diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:57 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:42:57 -0400 |
commit | b094ad3be564e7cc59cca4ff0256550d3a55dd3b (patch) | |
tree | d69f515b2ee6af2b0f12bb3028d7c7f5b3390794 /drivers/dma | |
parent | e61dacaeb3918cd00cd642e8fb0828324ac59819 (diff) |
ioat3: xor support
ioat3.2 adds xor offload support for up to 8 sources. It can also
perform an xor-zero-sum operation to validate whether all given sources
sum to zero, without writing to a destination. Xor descriptors differ
from memcpy in that one operation may require multiple descriptors
depending on the number of sources. When the number of sources exceeds
5 an extended descriptor is needed. These descriptors need to be
accounted for when updating the DMA_COUNT register.
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma')
-rw-r--r-- | drivers/dma/ioat/dma_v2.c | 2 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.h | 3 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 218 | ||||
-rw-r--r-- | drivers/dma/ioat/registers.h | 2 |
4 files changed, 222 insertions, 3 deletions
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 80ce32de8d32..ee295d48ba2c 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c | |||
@@ -48,7 +48,7 @@ module_param(ioat_ring_max_alloc_order, int, 0644); | |||
48 | MODULE_PARM_DESC(ioat_ring_max_alloc_order, | 48 | MODULE_PARM_DESC(ioat_ring_max_alloc_order, |
49 | "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); | 49 | "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); |
50 | 50 | ||
51 | static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) | 51 | void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) |
52 | { | 52 | { |
53 | void * __iomem reg_base = ioat->base.reg_base; | 53 | void * __iomem reg_base = ioat->base.reg_base; |
54 | 54 | ||
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index fa030f8e1f27..e23027d3dcbd 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h | |||
@@ -127,6 +127,7 @@ static inline u16 ioat2_xferlen_to_descs(struct ioat2_dma_chan *ioat, size_t len | |||
127 | * @raw: hardware raw (un-typed) descriptor | 127 | * @raw: hardware raw (un-typed) descriptor |
128 | * @txd: the generic software descriptor for all engines | 128 | * @txd: the generic software descriptor for all engines |
129 | * @len: total transaction length for unmap | 129 | * @len: total transaction length for unmap |
130 | * @result: asynchronous result of validate operations | ||
130 | * @id: identifier for debug | 131 | * @id: identifier for debug |
131 | */ | 132 | */ |
132 | 133 | ||
@@ -143,6 +144,7 @@ struct ioat_ring_ent { | |||
143 | }; | 144 | }; |
144 | struct dma_async_tx_descriptor txd; | 145 | struct dma_async_tx_descriptor txd; |
145 | size_t len; | 146 | size_t len; |
147 | enum sum_check_flags *result; | ||
146 | #ifdef DEBUG | 148 | #ifdef DEBUG |
147 | int id; | 149 | int id; |
148 | #endif | 150 | #endif |
@@ -180,5 +182,6 @@ enum dma_status ioat2_is_complete(struct dma_chan *c, dma_cookie_t cookie, | |||
180 | dma_cookie_t *done, dma_cookie_t *used); | 182 | dma_cookie_t *done, dma_cookie_t *used); |
181 | void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); | 183 | void __ioat2_restart_chan(struct ioat2_dma_chan *ioat); |
182 | bool reshape_ring(struct ioat2_dma_chan *ioat, int order); | 184 | bool reshape_ring(struct ioat2_dma_chan *ioat, int order); |
185 | void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); | ||
183 | extern struct kobj_type ioat2_ktype; | 186 | extern struct kobj_type ioat2_ktype; |
184 | #endif /* IOATDMA_V2_H */ | 187 | #endif /* IOATDMA_V2_H */ |
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 0913d11e09ee..957c205f91d0 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -64,8 +64,33 @@ | |||
64 | #include "dma.h" | 64 | #include "dma.h" |
65 | #include "dma_v2.h" | 65 | #include "dma_v2.h" |
66 | 66 | ||
67 | /* ioat hardware assumes at least two sources for raid operations */ | ||
68 | #define src_cnt_to_sw(x) ((x) + 2) | ||
69 | #define src_cnt_to_hw(x) ((x) - 2) | ||
70 | |||
71 | /* provide a lookup table for setting the source address in the base or | ||
72 | * extended descriptor of an xor descriptor | ||
73 | */ | ||
74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; | ||
75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; | ||
76 | |||
77 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
78 | { | ||
79 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
80 | |||
81 | return raw->field[xor_idx_to_field[idx]]; | ||
82 | } | ||
83 | |||
84 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | ||
85 | dma_addr_t addr, u32 offset, int idx) | ||
86 | { | ||
87 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
88 | |||
89 | raw->field[xor_idx_to_field[idx]] = addr + offset; | ||
90 | } | ||
91 | |||
67 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | 92 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, |
68 | struct ioat_ring_ent *desc) | 93 | struct ioat_ring_ent *desc, int idx) |
69 | { | 94 | { |
70 | struct ioat_chan_common *chan = &ioat->base; | 95 | struct ioat_chan_common *chan = &ioat->base; |
71 | struct pci_dev *pdev = chan->device->pdev; | 96 | struct pci_dev *pdev = chan->device->pdev; |
@@ -86,13 +111,71 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | |||
86 | PCI_DMA_FROMDEVICE, flags, 1); | 111 | PCI_DMA_FROMDEVICE, flags, 1); |
87 | break; | 112 | break; |
88 | } | 113 | } |
114 | case IOAT_OP_XOR_VAL: | ||
115 | case IOAT_OP_XOR: { | ||
116 | struct ioat_xor_descriptor *xor = desc->xor; | ||
117 | struct ioat_ring_ent *ext; | ||
118 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
119 | int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); | ||
120 | struct ioat_raw_descriptor *descs[2]; | ||
121 | int i; | ||
122 | |||
123 | if (src_cnt > 5) { | ||
124 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
125 | xor_ex = ext->xor_ex; | ||
126 | } | ||
127 | |||
128 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
129 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
130 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
131 | for (i = 0; i < src_cnt; i++) { | ||
132 | dma_addr_t src = xor_get_src(descs, i); | ||
133 | |||
134 | ioat_unmap(pdev, src - offset, len, | ||
135 | PCI_DMA_TODEVICE, flags, 0); | ||
136 | } | ||
137 | |||
138 | /* dest is a source in xor validate operations */ | ||
139 | if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
140 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
141 | PCI_DMA_TODEVICE, flags, 1); | ||
142 | break; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
147 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
148 | PCI_DMA_FROMDEVICE, flags, 1); | ||
149 | break; | ||
150 | } | ||
89 | default: | 151 | default: |
90 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | 152 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", |
91 | __func__, desc->hw->ctl_f.op); | 153 | __func__, desc->hw->ctl_f.op); |
92 | } | 154 | } |
93 | } | 155 | } |
94 | 156 | ||
157 | static bool desc_has_ext(struct ioat_ring_ent *desc) | ||
158 | { | ||
159 | struct ioat_dma_descriptor *hw = desc->hw; | ||
160 | |||
161 | if (hw->ctl_f.op == IOAT_OP_XOR || | ||
162 | hw->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
163 | struct ioat_xor_descriptor *xor = desc->xor; | ||
95 | 164 | ||
165 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) | ||
166 | return true; | ||
167 | } | ||
168 | |||
169 | return false; | ||
170 | } | ||
171 | |||
172 | /** | ||
173 | * __cleanup - reclaim used descriptors | ||
174 | * @ioat: channel (ring) to clean | ||
175 | * | ||
176 | * The difference from the dma_v2.c __cleanup() is that this routine | ||
177 | * handles extended descriptors and dma-unmapping raid operations. | ||
178 | */ | ||
96 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | 179 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) |
97 | { | 180 | { |
98 | struct ioat_chan_common *chan = &ioat->base; | 181 | struct ioat_chan_common *chan = &ioat->base; |
@@ -114,7 +197,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
114 | tx = &desc->txd; | 197 | tx = &desc->txd; |
115 | if (tx->cookie) { | 198 | if (tx->cookie) { |
116 | chan->completed_cookie = tx->cookie; | 199 | chan->completed_cookie = tx->cookie; |
117 | ioat3_dma_unmap(ioat, desc); | 200 | ioat3_dma_unmap(ioat, desc, ioat->tail + i); |
118 | tx->cookie = 0; | 201 | tx->cookie = 0; |
119 | if (tx->callback) { | 202 | if (tx->callback) { |
120 | tx->callback(tx->callback_param); | 203 | tx->callback(tx->callback_param); |
@@ -124,6 +207,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
124 | 207 | ||
125 | if (tx->phys == phys_complete) | 208 | if (tx->phys == phys_complete) |
126 | seen_current = true; | 209 | seen_current = true; |
210 | |||
211 | /* skip extended descriptors */ | ||
212 | if (desc_has_ext(desc)) { | ||
213 | BUG_ON(i + 1 >= active); | ||
214 | i++; | ||
215 | } | ||
127 | } | 216 | } |
128 | ioat->tail += i; | 217 | ioat->tail += i; |
129 | BUG_ON(!seen_current); /* no active descs have written a completion? */ | 218 | BUG_ON(!seen_current); /* no active descs have written a completion? */ |
@@ -309,6 +398,121 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, | |||
309 | return &desc->txd; | 398 | return &desc->txd; |
310 | } | 399 | } |
311 | 400 | ||
401 | static struct dma_async_tx_descriptor * | ||
402 | __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
403 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | ||
404 | size_t len, unsigned long flags) | ||
405 | { | ||
406 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
407 | struct ioat_ring_ent *compl_desc; | ||
408 | struct ioat_ring_ent *desc; | ||
409 | struct ioat_ring_ent *ext; | ||
410 | size_t total_len = len; | ||
411 | struct ioat_xor_descriptor *xor; | ||
412 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
413 | struct ioat_dma_descriptor *hw; | ||
414 | u32 offset = 0; | ||
415 | int num_descs; | ||
416 | int with_ext; | ||
417 | int i; | ||
418 | u16 idx; | ||
419 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | ||
420 | |||
421 | BUG_ON(src_cnt < 2); | ||
422 | |||
423 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
424 | /* we need 2x the number of descriptors to cover greater than 5 | ||
425 | * sources | ||
426 | */ | ||
427 | if (src_cnt > 5) { | ||
428 | with_ext = 1; | ||
429 | num_descs *= 2; | ||
430 | } else | ||
431 | with_ext = 0; | ||
432 | |||
433 | /* completion writes from the raid engine may pass completion | ||
434 | * writes from the legacy engine, so we need one extra null | ||
435 | * (legacy) descriptor to ensure all completion writes arrive in | ||
436 | * order. | ||
437 | */ | ||
438 | if (likely(num_descs) && | ||
439 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
440 | /* pass */; | ||
441 | else | ||
442 | return NULL; | ||
443 | for (i = 0; i < num_descs; i += 1 + with_ext) { | ||
444 | struct ioat_raw_descriptor *descs[2]; | ||
445 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
446 | int s; | ||
447 | |||
448 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
449 | xor = desc->xor; | ||
450 | |||
451 | /* save a branch by unconditionally retrieving the | ||
452 | * extended descriptor xor_set_src() knows to not write | ||
453 | * to it in the single descriptor case | ||
454 | */ | ||
455 | ext = ioat2_get_ring_ent(ioat, idx + i + 1); | ||
456 | xor_ex = ext->xor_ex; | ||
457 | |||
458 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
459 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
460 | for (s = 0; s < src_cnt; s++) | ||
461 | xor_set_src(descs, src[s], offset, s); | ||
462 | xor->size = xfer_size; | ||
463 | xor->dst_addr = dest + offset; | ||
464 | xor->ctl = 0; | ||
465 | xor->ctl_f.op = op; | ||
466 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | ||
467 | |||
468 | len -= xfer_size; | ||
469 | offset += xfer_size; | ||
470 | dump_desc_dbg(ioat, desc); | ||
471 | } | ||
472 | |||
473 | /* last xor descriptor carries the unmap parameters and fence bit */ | ||
474 | desc->txd.flags = flags; | ||
475 | desc->len = total_len; | ||
476 | if (result) | ||
477 | desc->result = result; | ||
478 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
479 | |||
480 | /* completion descriptor carries interrupt bit */ | ||
481 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
482 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
483 | hw = compl_desc->hw; | ||
484 | hw->ctl = 0; | ||
485 | hw->ctl_f.null = 1; | ||
486 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
487 | hw->ctl_f.compl_write = 1; | ||
488 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
489 | dump_desc_dbg(ioat, compl_desc); | ||
490 | |||
491 | /* we leave the channel locked to ensure in order submission */ | ||
492 | return &desc->txd; | ||
493 | } | ||
494 | |||
495 | static struct dma_async_tx_descriptor * | ||
496 | ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | ||
497 | unsigned int src_cnt, size_t len, unsigned long flags) | ||
498 | { | ||
499 | return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); | ||
500 | } | ||
501 | |||
502 | struct dma_async_tx_descriptor * | ||
503 | ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | ||
504 | unsigned int src_cnt, size_t len, | ||
505 | enum sum_check_flags *result, unsigned long flags) | ||
506 | { | ||
507 | /* the cleanup routine only sets bits on validate failure, it | ||
508 | * does not clear bits on validate success... so clear it here | ||
509 | */ | ||
510 | *result = 0; | ||
511 | |||
512 | return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], | ||
513 | src_cnt - 1, len, flags); | ||
514 | } | ||
515 | |||
312 | int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | 516 | int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) |
313 | { | 517 | { |
314 | struct pci_dev *pdev = device->pdev; | 518 | struct pci_dev *pdev = device->pdev; |
@@ -333,6 +537,16 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
333 | dma_cap_set(DMA_MEMSET, dma->cap_mask); | 537 | dma_cap_set(DMA_MEMSET, dma->cap_mask); |
334 | dma->device_prep_dma_memset = ioat3_prep_memset_lock; | 538 | dma->device_prep_dma_memset = ioat3_prep_memset_lock; |
335 | } | 539 | } |
540 | if (cap & IOAT_CAP_XOR) { | ||
541 | dma->max_xor = 8; | ||
542 | dma->xor_align = 2; | ||
543 | |||
544 | dma_cap_set(DMA_XOR, dma->cap_mask); | ||
545 | dma->device_prep_dma_xor = ioat3_prep_xor; | ||
546 | |||
547 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | ||
548 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; | ||
549 | } | ||
336 | 550 | ||
337 | /* -= IOAT ver.3 workarounds =- */ | 551 | /* -= IOAT ver.3 workarounds =- */ |
338 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors | 552 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors |
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index 97d26ea6d72f..63038e18ab03 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h | |||
@@ -243,6 +243,8 @@ | |||
243 | #define IOAT_CHANERR_XOR_Q_ERR 0x20000 | 243 | #define IOAT_CHANERR_XOR_Q_ERR 0x20000 |
244 | #define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 | 244 | #define IOAT_CHANERR_DESCRIPTOR_COUNT_ERR 0x40000 |
245 | 245 | ||
246 | #define IOAT_CHANERR_HANDLE_MASK (IOAT_CHANERR_XOR_P_OR_CRC_ERR | IOAT_CHANERR_XOR_Q_ERR) | ||
247 | |||
246 | #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ | 248 | #define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ |
247 | 249 | ||
248 | #endif /* _IOAT_REGISTERS_H_ */ | 250 | #endif /* _IOAT_REGISTERS_H_ */ |