diff options
| author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:59 -0400 |
|---|---|---|
| committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:59 -0400 |
| commit | d69d235b7da2778891640ee95efcd68075978904 (patch) | |
| tree | e7f22d38d8a742ddbca167af123f4987ada8926c /drivers/dma/ioat | |
| parent | 9de6fc717bdc574cf5faf9d46ce0f9d6265c7952 (diff) | |
ioat3: pq support
ioat3.2 adds support for raid6 syndrome generation (xor sum of galois
field multiplication products) using up to 8 sources. It can also
perform an pq-zero-sum operation to validate whether the syndrome for a
given set of sources matches a previously computed syndrome.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma/ioat')
| -rw-r--r-- | drivers/dma/ioat/dma_v3.c | 265 |
1 files changed, 264 insertions, 1 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 927c08b08861..ca2af0fa1c36 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
| @@ -69,10 +69,12 @@ | |||
| 69 | #define src_cnt_to_hw(x) ((x) - 2) | 69 | #define src_cnt_to_hw(x) ((x) - 2) |
| 70 | 70 | ||
| 71 | /* provide a lookup table for setting the source address in the base or | 71 | /* provide a lookup table for setting the source address in the base or |
| 72 | * extended descriptor of an xor descriptor | 72 | * extended descriptor of an xor or pq descriptor |
| 73 | */ | 73 | */ |
| 74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; | 74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; |
| 75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; | 75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; |
| 76 | static const u8 pq_idx_to_desc __read_mostly = 0xf8; | ||
| 77 | static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; | ||
| 76 | 78 | ||
| 77 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) | 79 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) |
| 78 | { | 80 | { |
| @@ -89,6 +91,23 @@ static void xor_set_src(struct ioat_raw_descriptor *descs[2], | |||
| 89 | raw->field[xor_idx_to_field[idx]] = addr + offset; | 91 | raw->field[xor_idx_to_field[idx]] = addr + offset; |
| 90 | } | 92 | } |
| 91 | 93 | ||
| 94 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
| 95 | { | ||
| 96 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
| 97 | |||
| 98 | return raw->field[pq_idx_to_field[idx]]; | ||
| 99 | } | ||
| 100 | |||
| 101 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | ||
| 102 | dma_addr_t addr, u32 offset, u8 coef, int idx) | ||
| 103 | { | ||
| 104 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | ||
| 105 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
| 106 | |||
| 107 | raw->field[pq_idx_to_field[idx]] = addr + offset; | ||
| 108 | pq->coef[idx] = coef; | ||
| 109 | } | ||
| 110 | |||
| 92 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | 111 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, |
| 93 | struct ioat_ring_ent *desc, int idx) | 112 | struct ioat_ring_ent *desc, int idx) |
| 94 | { | 113 | { |
| @@ -148,6 +167,58 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | |||
| 148 | PCI_DMA_FROMDEVICE, flags, 1); | 167 | PCI_DMA_FROMDEVICE, flags, 1); |
| 149 | break; | 168 | break; |
| 150 | } | 169 | } |
| 170 | case IOAT_OP_PQ_VAL: | ||
| 171 | case IOAT_OP_PQ: { | ||
| 172 | struct ioat_pq_descriptor *pq = desc->pq; | ||
| 173 | struct ioat_ring_ent *ext; | ||
| 174 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
| 175 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
| 176 | struct ioat_raw_descriptor *descs[2]; | ||
| 177 | int i; | ||
| 178 | |||
| 179 | if (src_cnt > 3) { | ||
| 180 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
| 181 | pq_ex = ext->pq_ex; | ||
| 182 | } | ||
| 183 | |||
| 184 | /* in the 'continue' case don't unmap the dests as sources */ | ||
| 185 | if (dmaf_p_disabled_continue(flags)) | ||
| 186 | src_cnt--; | ||
| 187 | else if (dmaf_continue(flags)) | ||
| 188 | src_cnt -= 3; | ||
| 189 | |||
| 190 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
| 191 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
| 192 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
| 193 | for (i = 0; i < src_cnt; i++) { | ||
| 194 | dma_addr_t src = pq_get_src(descs, i); | ||
| 195 | |||
| 196 | ioat_unmap(pdev, src - offset, len, | ||
| 197 | PCI_DMA_TODEVICE, flags, 0); | ||
| 198 | } | ||
| 199 | |||
| 200 | /* the dests are sources in pq validate operations */ | ||
| 201 | if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
| 202 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
| 203 | ioat_unmap(pdev, pq->p_addr - offset, | ||
| 204 | len, PCI_DMA_TODEVICE, flags, 0); | ||
| 205 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
| 206 | ioat_unmap(pdev, pq->q_addr - offset, | ||
| 207 | len, PCI_DMA_TODEVICE, flags, 0); | ||
| 208 | break; | ||
| 209 | } | ||
| 210 | } | ||
| 211 | |||
| 212 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
| 213 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
| 214 | ioat_unmap(pdev, pq->p_addr - offset, len, | ||
| 215 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
| 216 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
| 217 | ioat_unmap(pdev, pq->q_addr - offset, len, | ||
| 218 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
| 219 | } | ||
| 220 | break; | ||
| 221 | } | ||
| 151 | default: | 222 | default: |
| 152 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | 223 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", |
| 153 | __func__, desc->hw->ctl_f.op); | 224 | __func__, desc->hw->ctl_f.op); |
| @@ -164,6 +235,12 @@ static bool desc_has_ext(struct ioat_ring_ent *desc) | |||
| 164 | 235 | ||
| 165 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) | 236 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) |
| 166 | return true; | 237 | return true; |
| 238 | } else if (hw->ctl_f.op == IOAT_OP_PQ || | ||
| 239 | hw->ctl_f.op == IOAT_OP_PQ_VAL) { | ||
| 240 | struct ioat_pq_descriptor *pq = desc->pq; | ||
| 241 | |||
| 242 | if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) | ||
| 243 | return true; | ||
| 167 | } | 244 | } |
| 168 | 245 | ||
| 169 | return false; | 246 | return false; |
| @@ -513,6 +590,182 @@ ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | |||
| 513 | src_cnt - 1, len, flags); | 590 | src_cnt - 1, len, flags); |
| 514 | } | 591 | } |
| 515 | 592 | ||
| 593 | static void | ||
| 594 | dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) | ||
| 595 | { | ||
| 596 | struct device *dev = to_dev(&ioat->base); | ||
| 597 | struct ioat_pq_descriptor *pq = desc->pq; | ||
| 598 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | ||
| 599 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | ||
| 600 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
| 601 | int i; | ||
| 602 | |||
| 603 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | ||
| 604 | " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", | ||
| 605 | desc_id(desc), (unsigned long long) desc->txd.phys, | ||
| 606 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | ||
| 607 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, | ||
| 608 | pq->ctl_f.compl_write, | ||
| 609 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | ||
| 610 | pq->ctl_f.src_cnt); | ||
| 611 | for (i = 0; i < src_cnt; i++) | ||
| 612 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | ||
| 613 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | ||
| 614 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | ||
| 615 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | ||
| 616 | } | ||
| 617 | |||
| 618 | static struct dma_async_tx_descriptor * | ||
| 619 | __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
| 620 | const dma_addr_t *dst, const dma_addr_t *src, | ||
| 621 | unsigned int src_cnt, const unsigned char *scf, | ||
| 622 | size_t len, unsigned long flags) | ||
| 623 | { | ||
| 624 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
| 625 | struct ioat_chan_common *chan = &ioat->base; | ||
| 626 | struct ioat_ring_ent *compl_desc; | ||
| 627 | struct ioat_ring_ent *desc; | ||
| 628 | struct ioat_ring_ent *ext; | ||
| 629 | size_t total_len = len; | ||
| 630 | struct ioat_pq_descriptor *pq; | ||
| 631 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
| 632 | struct ioat_dma_descriptor *hw; | ||
| 633 | u32 offset = 0; | ||
| 634 | int num_descs; | ||
| 635 | int with_ext; | ||
| 636 | int i, s; | ||
| 637 | u16 idx; | ||
| 638 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | ||
| 639 | |||
| 640 | dev_dbg(to_dev(chan), "%s\n", __func__); | ||
| 641 | /* the engine requires at least two sources (we provide | ||
| 642 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | ||
| 643 | */ | ||
| 644 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | ||
| 645 | |||
| 646 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
| 647 | /* we need 2x the number of descriptors to cover greater than 3 | ||
| 648 | * sources | ||
| 649 | */ | ||
| 650 | if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { | ||
| 651 | with_ext = 1; | ||
| 652 | num_descs *= 2; | ||
| 653 | } else | ||
| 654 | with_ext = 0; | ||
| 655 | |||
| 656 | /* completion writes from the raid engine may pass completion | ||
| 657 | * writes from the legacy engine, so we need one extra null | ||
| 658 | * (legacy) descriptor to ensure all completion writes arrive in | ||
| 659 | * order. | ||
| 660 | */ | ||
| 661 | if (likely(num_descs) && | ||
| 662 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
| 663 | /* pass */; | ||
| 664 | else | ||
| 665 | return NULL; | ||
| 666 | for (i = 0; i < num_descs; i += 1 + with_ext) { | ||
| 667 | struct ioat_raw_descriptor *descs[2]; | ||
| 668 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
| 669 | |||
| 670 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
| 671 | pq = desc->pq; | ||
| 672 | |||
| 673 | /* save a branch by unconditionally retrieving the | ||
| 674 | * extended descriptor pq_set_src() knows to not write | ||
| 675 | * to it in the single descriptor case | ||
| 676 | */ | ||
| 677 | ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); | ||
| 678 | pq_ex = ext->pq_ex; | ||
| 679 | |||
| 680 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
| 681 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
| 682 | |||
| 683 | for (s = 0; s < src_cnt; s++) | ||
| 684 | pq_set_src(descs, src[s], offset, scf[s], s); | ||
| 685 | |||
| 686 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | ||
| 687 | if (dmaf_p_disabled_continue(flags)) | ||
| 688 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
| 689 | else if (dmaf_continue(flags)) { | ||
| 690 | pq_set_src(descs, dst[0], offset, 0, s++); | ||
| 691 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
| 692 | pq_set_src(descs, dst[1], offset, 0, s++); | ||
| 693 | } | ||
| 694 | pq->size = xfer_size; | ||
| 695 | pq->p_addr = dst[0] + offset; | ||
| 696 | pq->q_addr = dst[1] + offset; | ||
| 697 | pq->ctl = 0; | ||
| 698 | pq->ctl_f.op = op; | ||
| 699 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | ||
| 700 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
| 701 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | ||
| 702 | |||
| 703 | len -= xfer_size; | ||
| 704 | offset += xfer_size; | ||
| 705 | } | ||
| 706 | |||
| 707 | /* last pq descriptor carries the unmap parameters and fence bit */ | ||
| 708 | desc->txd.flags = flags; | ||
| 709 | desc->len = total_len; | ||
| 710 | if (result) | ||
| 711 | desc->result = result; | ||
| 712 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
| 713 | dump_pq_desc_dbg(ioat, desc, ext); | ||
| 714 | |||
| 715 | /* completion descriptor carries interrupt bit */ | ||
| 716 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
| 717 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
| 718 | hw = compl_desc->hw; | ||
| 719 | hw->ctl = 0; | ||
| 720 | hw->ctl_f.null = 1; | ||
| 721 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
| 722 | hw->ctl_f.compl_write = 1; | ||
| 723 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
| 724 | dump_desc_dbg(ioat, compl_desc); | ||
| 725 | |||
| 726 | /* we leave the channel locked to ensure in order submission */ | ||
| 727 | return &desc->txd; | ||
| 728 | } | ||
| 729 | |||
| 730 | static struct dma_async_tx_descriptor * | ||
| 731 | ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
| 732 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
| 733 | unsigned long flags) | ||
| 734 | { | ||
| 735 | /* handle the single source multiply case from the raid6 | ||
| 736 | * recovery path | ||
| 737 | */ | ||
| 738 | if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { | ||
| 739 | dma_addr_t single_source[2]; | ||
| 740 | unsigned char single_source_coef[2]; | ||
| 741 | |||
| 742 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | ||
| 743 | single_source[0] = src[0]; | ||
| 744 | single_source[1] = src[0]; | ||
| 745 | single_source_coef[0] = scf[0]; | ||
| 746 | single_source_coef[1] = 0; | ||
| 747 | |||
| 748 | return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, | ||
| 749 | single_source_coef, len, flags); | ||
| 750 | } else | ||
| 751 | return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, | ||
| 752 | len, flags); | ||
| 753 | } | ||
| 754 | |||
| 755 | struct dma_async_tx_descriptor * | ||
| 756 | ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
| 757 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
| 758 | enum sum_check_flags *pqres, unsigned long flags) | ||
| 759 | { | ||
| 760 | /* the cleanup routine only sets bits on validate failure, it | ||
| 761 | * does not clear bits on validate success... so clear it here | ||
| 762 | */ | ||
| 763 | *pqres = 0; | ||
| 764 | |||
| 765 | return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | ||
| 766 | flags); | ||
| 767 | } | ||
| 768 | |||
| 516 | static void __devinit ioat3_dma_test_callback(void *dma_async_param) | 769 | static void __devinit ioat3_dma_test_callback(void *dma_async_param) |
| 517 | { | 770 | { |
| 518 | struct completion *cmp = dma_async_param; | 771 | struct completion *cmp = dma_async_param; |
| @@ -822,6 +1075,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
| 822 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | 1075 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); |
| 823 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; | 1076 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; |
| 824 | } | 1077 | } |
| 1078 | if (cap & IOAT_CAP_PQ) { | ||
| 1079 | dma_set_maxpq(dma, 8, 0); | ||
| 1080 | dma->pq_align = 2; | ||
| 1081 | |||
| 1082 | dma_cap_set(DMA_PQ, dma->cap_mask); | ||
| 1083 | dma->device_prep_dma_pq = ioat3_prep_pq; | ||
| 1084 | |||
| 1085 | dma_cap_set(DMA_PQ_VAL, dma->cap_mask); | ||
| 1086 | dma->device_prep_dma_pq_val = ioat3_prep_pq_val; | ||
| 1087 | } | ||
| 825 | 1088 | ||
| 826 | /* -= IOAT ver.3 workarounds =- */ | 1089 | /* -= IOAT ver.3 workarounds =- */ |
| 827 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors | 1090 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors |
