diff options
-rw-r--r-- | drivers/dma/ioat/dma.h | 17 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.h | 2 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 394 | ||||
-rw-r--r-- | drivers/dma/ioat/hw.h | 43 | ||||
-rw-r--r-- | drivers/dma/ioat/pci.c | 3 | ||||
-rw-r--r-- | drivers/dma/ioat/registers.h | 1 |
6 files changed, 438 insertions, 22 deletions
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 976eba8c06c7..35d74028773a 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h | |||
@@ -81,6 +81,9 @@ struct ioatdma_device { | |||
81 | void __iomem *reg_base; | 81 | void __iomem *reg_base; |
82 | struct pci_pool *dma_pool; | 82 | struct pci_pool *dma_pool; |
83 | struct pci_pool *completion_pool; | 83 | struct pci_pool *completion_pool; |
84 | #define MAX_SED_POOLS 5 | ||
85 | struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; | ||
86 | struct kmem_cache *sed_pool; | ||
84 | struct dma_device common; | 87 | struct dma_device common; |
85 | u8 version; | 88 | u8 version; |
86 | struct msix_entry msix_entries[4]; | 89 | struct msix_entry msix_entries[4]; |
@@ -141,6 +144,20 @@ struct ioat_dma_chan { | |||
141 | u16 active; | 144 | u16 active; |
142 | }; | 145 | }; |
143 | 146 | ||
147 | /** | ||
148 | * struct ioat_sed_ent - wrapper around super extended hardware descriptor | ||
149 | * @hw: hardware SED | ||
150 | * @sed_dma: dma address for the SED | ||
151 | * @list: list member | ||
152 | * @parent: point to the dma descriptor that's the parent | ||
153 | */ | ||
154 | struct ioat_sed_ent { | ||
155 | struct ioat_sed_raw_descriptor *hw; | ||
156 | dma_addr_t dma; | ||
157 | struct ioat_ring_ent *parent; | ||
158 | unsigned int hw_pool; | ||
159 | }; | ||
160 | |||
144 | static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) | 161 | static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) |
145 | { | 162 | { |
146 | return container_of(c, struct ioat_chan_common, common); | 163 | return container_of(c, struct ioat_chan_common, common); |
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index e100f644e344..29bf9448035d 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h | |||
@@ -137,6 +137,7 @@ struct ioat_ring_ent { | |||
137 | #ifdef DEBUG | 137 | #ifdef DEBUG |
138 | int id; | 138 | int id; |
139 | #endif | 139 | #endif |
140 | struct ioat_sed_ent *sed; | ||
140 | }; | 141 | }; |
141 | 142 | ||
142 | static inline struct ioat_ring_ent * | 143 | static inline struct ioat_ring_ent * |
@@ -157,6 +158,7 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) | |||
157 | 158 | ||
158 | int ioat2_dma_probe(struct ioatdma_device *dev, int dca); | 159 | int ioat2_dma_probe(struct ioatdma_device *dev, int dca); |
159 | int ioat3_dma_probe(struct ioatdma_device *dev, int dca); | 160 | int ioat3_dma_probe(struct ioatdma_device *dev, int dca); |
161 | void ioat3_dma_remove(struct ioatdma_device *dev); | ||
160 | struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); | 162 | struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); |
161 | struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); | 163 | struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); |
162 | int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); | 164 | int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); |
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 639311598f35..71e113dfc8cc 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -55,7 +55,7 @@ | |||
55 | /* | 55 | /* |
56 | * Support routines for v3+ hardware | 56 | * Support routines for v3+ hardware |
57 | */ | 57 | */ |
58 | 58 | #include <linux/module.h> | |
59 | #include <linux/pci.h> | 59 | #include <linux/pci.h> |
60 | #include <linux/gfp.h> | 60 | #include <linux/gfp.h> |
61 | #include <linux/dmaengine.h> | 61 | #include <linux/dmaengine.h> |
@@ -70,6 +70,10 @@ | |||
70 | /* ioat hardware assumes at least two sources for raid operations */ | 70 | /* ioat hardware assumes at least two sources for raid operations */ |
71 | #define src_cnt_to_sw(x) ((x) + 2) | 71 | #define src_cnt_to_sw(x) ((x) + 2) |
72 | #define src_cnt_to_hw(x) ((x) - 2) | 72 | #define src_cnt_to_hw(x) ((x) - 2) |
73 | #define ndest_to_sw(x) ((x) + 1) | ||
74 | #define ndest_to_hw(x) ((x) - 1) | ||
75 | #define src16_cnt_to_sw(x) ((x) + 9) | ||
76 | #define src16_cnt_to_hw(x) ((x) - 9) | ||
73 | 77 | ||
74 | /* provide a lookup table for setting the source address in the base or | 78 | /* provide a lookup table for setting the source address in the base or |
75 | * extended descriptor of an xor or pq descriptor | 79 | * extended descriptor of an xor or pq descriptor |
@@ -77,7 +81,18 @@ | |||
77 | static const u8 xor_idx_to_desc = 0xe0; | 81 | static const u8 xor_idx_to_desc = 0xe0; |
78 | static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; | 82 | static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; |
79 | static const u8 pq_idx_to_desc = 0xf8; | 83 | static const u8 pq_idx_to_desc = 0xf8; |
84 | static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1, | ||
85 | 2, 2, 2, 2, 2, 2, 2 }; | ||
80 | static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; | 86 | static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; |
87 | static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, | ||
88 | 0, 1, 2, 3, 4, 5, 6 }; | ||
89 | |||
90 | /* | ||
91 | * technically sources 1 and 2 do not require SED, but the op will have | ||
92 | * at least 9 descriptors so that's irrelevant. | ||
93 | */ | ||
94 | static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
95 | 1, 1, 1, 1, 1, 1, 1 }; | ||
81 | 96 | ||
82 | static void ioat3_eh(struct ioat2_dma_chan *ioat); | 97 | static void ioat3_eh(struct ioat2_dma_chan *ioat); |
83 | 98 | ||
@@ -103,6 +118,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | |||
103 | return raw->field[pq_idx_to_field[idx]]; | 118 | return raw->field[pq_idx_to_field[idx]]; |
104 | } | 119 | } |
105 | 120 | ||
121 | static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx) | ||
122 | { | ||
123 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | ||
124 | |||
125 | return raw->field[pq16_idx_to_field[idx]]; | ||
126 | } | ||
127 | |||
106 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | 128 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], |
107 | dma_addr_t addr, u32 offset, u8 coef, int idx) | 129 | dma_addr_t addr, u32 offset, u8 coef, int idx) |
108 | { | 130 | { |
@@ -113,6 +135,12 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2], | |||
113 | pq->coef[idx] = coef; | 135 | pq->coef[idx] = coef; |
114 | } | 136 | } |
115 | 137 | ||
138 | static int sed_get_pq16_pool_idx(int src_cnt) | ||
139 | { | ||
140 | |||
141 | return pq16_idx_to_sed[src_cnt]; | ||
142 | } | ||
143 | |||
116 | static bool is_jf_ioat(struct pci_dev *pdev) | 144 | static bool is_jf_ioat(struct pci_dev *pdev) |
117 | { | 145 | { |
118 | switch (pdev->device) { | 146 | switch (pdev->device) { |
@@ -210,6 +238,52 @@ static bool is_bwd_ioat(struct pci_dev *pdev) | |||
210 | } | 238 | } |
211 | } | 239 | } |
212 | 240 | ||
241 | static void pq16_set_src(struct ioat_raw_descriptor *desc[3], | ||
242 | dma_addr_t addr, u32 offset, u8 coef, int idx) | ||
243 | { | ||
244 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0]; | ||
245 | struct ioat_pq16a_descriptor *pq16 = | ||
246 | (struct ioat_pq16a_descriptor *)desc[1]; | ||
247 | struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]]; | ||
248 | |||
249 | raw->field[pq16_idx_to_field[idx]] = addr + offset; | ||
250 | |||
251 | if (idx < 8) | ||
252 | pq->coef[idx] = coef; | ||
253 | else | ||
254 | pq16->coef[idx - 8] = coef; | ||
255 | } | ||
256 | |||
257 | struct ioat_sed_ent * | ||
258 | ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) | ||
259 | { | ||
260 | struct ioat_sed_ent *sed; | ||
261 | gfp_t flags = __GFP_ZERO | GFP_ATOMIC; | ||
262 | |||
263 | sed = kmem_cache_alloc(device->sed_pool, flags); | ||
264 | if (!sed) | ||
265 | return NULL; | ||
266 | |||
267 | sed->hw_pool = hw_pool; | ||
268 | sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], | ||
269 | flags, &sed->dma); | ||
270 | if (!sed->hw) { | ||
271 | kmem_cache_free(device->sed_pool, sed); | ||
272 | return NULL; | ||
273 | } | ||
274 | |||
275 | return sed; | ||
276 | } | ||
277 | |||
278 | void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed) | ||
279 | { | ||
280 | if (!sed) | ||
281 | return; | ||
282 | |||
283 | dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); | ||
284 | kmem_cache_free(device->sed_pool, sed); | ||
285 | } | ||
286 | |||
213 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | 287 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, |
214 | struct ioat_ring_ent *desc, int idx) | 288 | struct ioat_ring_ent *desc, int idx) |
215 | { | 289 | { |
@@ -322,6 +396,54 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | |||
322 | } | 396 | } |
323 | break; | 397 | break; |
324 | } | 398 | } |
399 | case IOAT_OP_PQ_16S: | ||
400 | case IOAT_OP_PQ_VAL_16S: { | ||
401 | struct ioat_pq_descriptor *pq = desc->pq; | ||
402 | int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); | ||
403 | struct ioat_raw_descriptor *descs[4]; | ||
404 | int i; | ||
405 | |||
406 | /* in the 'continue' case don't unmap the dests as sources */ | ||
407 | if (dmaf_p_disabled_continue(flags)) | ||
408 | src_cnt--; | ||
409 | else if (dmaf_continue(flags)) | ||
410 | src_cnt -= 3; | ||
411 | |||
412 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
413 | descs[0] = (struct ioat_raw_descriptor *)pq; | ||
414 | descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw); | ||
415 | descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]); | ||
416 | for (i = 0; i < src_cnt; i++) { | ||
417 | dma_addr_t src = pq16_get_src(descs, i); | ||
418 | |||
419 | ioat_unmap(pdev, src - offset, len, | ||
420 | PCI_DMA_TODEVICE, flags, 0); | ||
421 | } | ||
422 | |||
423 | /* the dests are sources in pq validate operations */ | ||
424 | if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
425 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
426 | ioat_unmap(pdev, pq->p_addr - offset, | ||
427 | len, PCI_DMA_TODEVICE, | ||
428 | flags, 0); | ||
429 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
430 | ioat_unmap(pdev, pq->q_addr - offset, | ||
431 | len, PCI_DMA_TODEVICE, | ||
432 | flags, 0); | ||
433 | break; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
438 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
439 | ioat_unmap(pdev, pq->p_addr - offset, len, | ||
440 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
441 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
442 | ioat_unmap(pdev, pq->q_addr - offset, len, | ||
443 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
444 | } | ||
445 | break; | ||
446 | } | ||
325 | default: | 447 | default: |
326 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | 448 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", |
327 | __func__, desc->hw->ctl_f.op); | 449 | __func__, desc->hw->ctl_f.op); |
@@ -386,6 +508,7 @@ static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan, | |||
386 | static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) | 508 | static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) |
387 | { | 509 | { |
388 | struct ioat_chan_common *chan = &ioat->base; | 510 | struct ioat_chan_common *chan = &ioat->base; |
511 | struct ioatdma_device *device = chan->device; | ||
389 | struct ioat_ring_ent *desc; | 512 | struct ioat_ring_ent *desc; |
390 | bool seen_current = false; | 513 | bool seen_current = false; |
391 | int idx = ioat->tail, i; | 514 | int idx = ioat->tail, i; |
@@ -430,6 +553,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) | |||
430 | BUG_ON(i + 1 >= active); | 553 | BUG_ON(i + 1 >= active); |
431 | i++; | 554 | i++; |
432 | } | 555 | } |
556 | |||
557 | /* cleanup super extended descriptors */ | ||
558 | if (desc->sed) { | ||
559 | ioat3_free_sed(device, desc->sed); | ||
560 | desc->sed = NULL; | ||
561 | } | ||
433 | } | 562 | } |
434 | smp_mb(); /* finish all descriptor reads before incrementing tail */ | 563 | smp_mb(); /* finish all descriptor reads before incrementing tail */ |
435 | ioat->tail = idx + i; | 564 | ioat->tail = idx + i; |
@@ -522,6 +651,7 @@ static void ioat3_eh(struct ioat2_dma_chan *ioat) | |||
522 | } | 651 | } |
523 | break; | 652 | break; |
524 | case IOAT_OP_PQ_VAL: | 653 | case IOAT_OP_PQ_VAL: |
654 | case IOAT_OP_PQ_VAL_16S: | ||
525 | if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { | 655 | if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { |
526 | *desc->result |= SUM_CHECK_P_RESULT; | 656 | *desc->result |= SUM_CHECK_P_RESULT; |
527 | err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; | 657 | err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; |
@@ -814,7 +944,8 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct | |||
814 | int i; | 944 | int i; |
815 | 945 | ||
816 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | 946 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" |
817 | " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", | 947 | " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" |
948 | " src_cnt: %d)\n", | ||
818 | desc_id(desc), (unsigned long long) desc->txd.phys, | 949 | desc_id(desc), (unsigned long long) desc->txd.phys, |
819 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | 950 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), |
820 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, | 951 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, |
@@ -829,6 +960,41 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct | |||
829 | dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); | 960 | dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); |
830 | } | 961 | } |
831 | 962 | ||
963 | static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat, | ||
964 | struct ioat_ring_ent *desc) | ||
965 | { | ||
966 | struct device *dev = to_dev(&ioat->base); | ||
967 | struct ioat_pq_descriptor *pq = desc->pq; | ||
968 | struct ioat_raw_descriptor *descs[] = { (void *)pq, | ||
969 | (void *)pq, | ||
970 | (void *)pq }; | ||
971 | int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); | ||
972 | int i; | ||
973 | |||
974 | if (desc->sed) { | ||
975 | descs[1] = (void *)desc->sed->hw; | ||
976 | descs[2] = (void *)desc->sed->hw + 64; | ||
977 | } | ||
978 | |||
979 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | ||
980 | " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'" | ||
981 | " src_cnt: %d)\n", | ||
982 | desc_id(desc), (unsigned long long) desc->txd.phys, | ||
983 | (unsigned long long) pq->next, | ||
984 | desc->txd.flags, pq->size, pq->ctl, | ||
985 | pq->ctl_f.op, pq->ctl_f.int_en, | ||
986 | pq->ctl_f.compl_write, | ||
987 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | ||
988 | pq->ctl_f.src_cnt); | ||
989 | for (i = 0; i < src_cnt; i++) { | ||
990 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | ||
991 | (unsigned long long) pq16_get_src(descs, i), | ||
992 | pq->coef[i]); | ||
993 | } | ||
994 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | ||
995 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | ||
996 | } | ||
997 | |||
832 | static struct dma_async_tx_descriptor * | 998 | static struct dma_async_tx_descriptor * |
833 | __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | 999 | __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, |
834 | const dma_addr_t *dst, const dma_addr_t *src, | 1000 | const dma_addr_t *dst, const dma_addr_t *src, |
@@ -951,10 +1117,114 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | |||
951 | } | 1117 | } |
952 | 1118 | ||
953 | static struct dma_async_tx_descriptor * | 1119 | static struct dma_async_tx_descriptor * |
1120 | __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
1121 | const dma_addr_t *dst, const dma_addr_t *src, | ||
1122 | unsigned int src_cnt, const unsigned char *scf, | ||
1123 | size_t len, unsigned long flags) | ||
1124 | { | ||
1125 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
1126 | struct ioat_chan_common *chan = &ioat->base; | ||
1127 | struct ioatdma_device *device = chan->device; | ||
1128 | struct ioat_ring_ent *desc; | ||
1129 | size_t total_len = len; | ||
1130 | struct ioat_pq_descriptor *pq; | ||
1131 | u32 offset = 0; | ||
1132 | u8 op; | ||
1133 | int i, s, idx, num_descs; | ||
1134 | |||
1135 | /* this function only handles src_cnt 9 - 16 */ | ||
1136 | BUG_ON(src_cnt < 9); | ||
1137 | |||
1138 | /* this function is only called with 9-16 sources */ | ||
1139 | op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; | ||
1140 | |||
1141 | dev_dbg(to_dev(chan), "%s\n", __func__); | ||
1142 | |||
1143 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
1144 | |||
1145 | /* | ||
1146 | * 16 source pq is only available on cb3.3 and has no completion | ||
1147 | * write hw bug. | ||
1148 | */ | ||
1149 | if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0) | ||
1150 | idx = ioat->head; | ||
1151 | else | ||
1152 | return NULL; | ||
1153 | |||
1154 | i = 0; | ||
1155 | |||
1156 | do { | ||
1157 | struct ioat_raw_descriptor *descs[4]; | ||
1158 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
1159 | |||
1160 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
1161 | pq = desc->pq; | ||
1162 | |||
1163 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
1164 | |||
1165 | desc->sed = ioat3_alloc_sed(device, | ||
1166 | sed_get_pq16_pool_idx(src_cnt)); | ||
1167 | if (!desc->sed) { | ||
1168 | dev_err(to_dev(chan), | ||
1169 | "%s: no free sed entries\n", __func__); | ||
1170 | return NULL; | ||
1171 | } | ||
1172 | |||
1173 | pq->sed_addr = desc->sed->dma; | ||
1174 | desc->sed->parent = desc; | ||
1175 | |||
1176 | descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw; | ||
1177 | descs[2] = (void *)descs[1] + 64; | ||
1178 | |||
1179 | for (s = 0; s < src_cnt; s++) | ||
1180 | pq16_set_src(descs, src[s], offset, scf[s], s); | ||
1181 | |||
1182 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | ||
1183 | if (dmaf_p_disabled_continue(flags)) | ||
1184 | pq16_set_src(descs, dst[1], offset, 1, s++); | ||
1185 | else if (dmaf_continue(flags)) { | ||
1186 | pq16_set_src(descs, dst[0], offset, 0, s++); | ||
1187 | pq16_set_src(descs, dst[1], offset, 1, s++); | ||
1188 | pq16_set_src(descs, dst[1], offset, 0, s++); | ||
1189 | } | ||
1190 | |||
1191 | pq->size = xfer_size; | ||
1192 | pq->p_addr = dst[0] + offset; | ||
1193 | pq->q_addr = dst[1] + offset; | ||
1194 | pq->ctl = 0; | ||
1195 | pq->ctl_f.op = op; | ||
1196 | pq->ctl_f.src_cnt = src16_cnt_to_hw(s); | ||
1197 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
1198 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | ||
1199 | |||
1200 | len -= xfer_size; | ||
1201 | offset += xfer_size; | ||
1202 | } while (++i < num_descs); | ||
1203 | |||
1204 | /* last pq descriptor carries the unmap parameters and fence bit */ | ||
1205 | desc->txd.flags = flags; | ||
1206 | desc->len = total_len; | ||
1207 | if (result) | ||
1208 | desc->result = result; | ||
1209 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
1210 | |||
1211 | /* with cb3.3 we should be able to do completion w/o a null desc */ | ||
1212 | pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
1213 | pq->ctl_f.compl_write = 1; | ||
1214 | |||
1215 | dump_pq16_desc_dbg(ioat, desc); | ||
1216 | |||
1217 | /* we leave the channel locked to ensure in order submission */ | ||
1218 | return &desc->txd; | ||
1219 | } | ||
1220 | |||
1221 | static struct dma_async_tx_descriptor * | ||
954 | ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | 1222 | ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, |
955 | unsigned int src_cnt, const unsigned char *scf, size_t len, | 1223 | unsigned int src_cnt, const unsigned char *scf, size_t len, |
956 | unsigned long flags) | 1224 | unsigned long flags) |
957 | { | 1225 | { |
1226 | struct dma_device *dma = chan->device; | ||
1227 | |||
958 | /* specify valid address for disabled result */ | 1228 | /* specify valid address for disabled result */ |
959 | if (flags & DMA_PREP_PQ_DISABLE_P) | 1229 | if (flags & DMA_PREP_PQ_DISABLE_P) |
960 | dst[0] = dst[1]; | 1230 | dst[0] = dst[1]; |
@@ -974,11 +1244,20 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | |||
974 | single_source_coef[0] = scf[0]; | 1244 | single_source_coef[0] = scf[0]; |
975 | single_source_coef[1] = 0; | 1245 | single_source_coef[1] = 0; |
976 | 1246 | ||
977 | return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, | 1247 | return (src_cnt > 8) && (dma->max_pq > 8) ? |
978 | single_source_coef, len, flags); | 1248 | __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, |
979 | } else | 1249 | 2, single_source_coef, len, |
980 | return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, | 1250 | flags) : |
981 | len, flags); | 1251 | __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, |
1252 | single_source_coef, len, flags); | ||
1253 | |||
1254 | } else { | ||
1255 | return (src_cnt > 8) && (dma->max_pq > 8) ? | ||
1256 | __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, | ||
1257 | scf, len, flags) : | ||
1258 | __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, | ||
1259 | scf, len, flags); | ||
1260 | } | ||
982 | } | 1261 | } |
983 | 1262 | ||
984 | struct dma_async_tx_descriptor * | 1263 | struct dma_async_tx_descriptor * |
@@ -986,6 +1265,8 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | |||
986 | unsigned int src_cnt, const unsigned char *scf, size_t len, | 1265 | unsigned int src_cnt, const unsigned char *scf, size_t len, |
987 | enum sum_check_flags *pqres, unsigned long flags) | 1266 | enum sum_check_flags *pqres, unsigned long flags) |
988 | { | 1267 | { |
1268 | struct dma_device *dma = chan->device; | ||
1269 | |||
989 | /* specify valid address for disabled result */ | 1270 | /* specify valid address for disabled result */ |
990 | if (flags & DMA_PREP_PQ_DISABLE_P) | 1271 | if (flags & DMA_PREP_PQ_DISABLE_P) |
991 | pq[0] = pq[1]; | 1272 | pq[0] = pq[1]; |
@@ -997,14 +1278,18 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | |||
997 | */ | 1278 | */ |
998 | *pqres = 0; | 1279 | *pqres = 0; |
999 | 1280 | ||
1000 | return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | 1281 | return (src_cnt > 8) && (dma->max_pq > 8) ? |
1001 | flags); | 1282 | __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, |
1283 | flags) : | ||
1284 | __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | ||
1285 | flags); | ||
1002 | } | 1286 | } |
1003 | 1287 | ||
1004 | static struct dma_async_tx_descriptor * | 1288 | static struct dma_async_tx_descriptor * |
1005 | ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | 1289 | ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, |
1006 | unsigned int src_cnt, size_t len, unsigned long flags) | 1290 | unsigned int src_cnt, size_t len, unsigned long flags) |
1007 | { | 1291 | { |
1292 | struct dma_device *dma = chan->device; | ||
1008 | unsigned char scf[src_cnt]; | 1293 | unsigned char scf[src_cnt]; |
1009 | dma_addr_t pq[2]; | 1294 | dma_addr_t pq[2]; |
1010 | 1295 | ||
@@ -1013,8 +1298,11 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | |||
1013 | flags |= DMA_PREP_PQ_DISABLE_Q; | 1298 | flags |= DMA_PREP_PQ_DISABLE_Q; |
1014 | pq[1] = dst; /* specify valid address for disabled result */ | 1299 | pq[1] = dst; /* specify valid address for disabled result */ |
1015 | 1300 | ||
1016 | return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | 1301 | return (src_cnt > 8) && (dma->max_pq > 8) ? |
1017 | flags); | 1302 | __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, |
1303 | flags) : | ||
1304 | __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | ||
1305 | flags); | ||
1018 | } | 1306 | } |
1019 | 1307 | ||
1020 | struct dma_async_tx_descriptor * | 1308 | struct dma_async_tx_descriptor * |
@@ -1022,6 +1310,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | |||
1022 | unsigned int src_cnt, size_t len, | 1310 | unsigned int src_cnt, size_t len, |
1023 | enum sum_check_flags *result, unsigned long flags) | 1311 | enum sum_check_flags *result, unsigned long flags) |
1024 | { | 1312 | { |
1313 | struct dma_device *dma = chan->device; | ||
1025 | unsigned char scf[src_cnt]; | 1314 | unsigned char scf[src_cnt]; |
1026 | dma_addr_t pq[2]; | 1315 | dma_addr_t pq[2]; |
1027 | 1316 | ||
@@ -1035,8 +1324,12 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | |||
1035 | flags |= DMA_PREP_PQ_DISABLE_Q; | 1324 | flags |= DMA_PREP_PQ_DISABLE_Q; |
1036 | pq[1] = pq[0]; /* specify valid address for disabled result */ | 1325 | pq[1] = pq[0]; /* specify valid address for disabled result */ |
1037 | 1326 | ||
1038 | return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, | 1327 | |
1039 | len, flags); | 1328 | return (src_cnt > 8) && (dma->max_pq > 8) ? |
1329 | __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, | ||
1330 | scf, len, flags) : | ||
1331 | __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, | ||
1332 | scf, len, flags); | ||
1040 | } | 1333 | } |
1041 | 1334 | ||
1042 | static struct dma_async_tx_descriptor * | 1335 | static struct dma_async_tx_descriptor * |
@@ -1533,11 +1826,17 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
1533 | 1826 | ||
1534 | if (cap & IOAT_CAP_PQ) { | 1827 | if (cap & IOAT_CAP_PQ) { |
1535 | is_raid_device = true; | 1828 | is_raid_device = true; |
1536 | dma_set_maxpq(dma, 8, 0); | 1829 | |
1537 | if (is_xeon_cb32(pdev)) | 1830 | if (cap & IOAT_CAP_RAID16SS) { |
1538 | dma->pq_align = 6; | 1831 | dma_set_maxpq(dma, 16, 0); |
1539 | else | ||
1540 | dma->pq_align = 0; | 1832 | dma->pq_align = 0; |
1833 | } else { | ||
1834 | dma_set_maxpq(dma, 8, 0); | ||
1835 | if (is_xeon_cb32(pdev)) | ||
1836 | dma->pq_align = 6; | ||
1837 | else | ||
1838 | dma->pq_align = 0; | ||
1839 | } | ||
1541 | 1840 | ||
1542 | dma_cap_set(DMA_PQ, dma->cap_mask); | 1841 | dma_cap_set(DMA_PQ, dma->cap_mask); |
1543 | dma->device_prep_dma_pq = ioat3_prep_pq; | 1842 | dma->device_prep_dma_pq = ioat3_prep_pq; |
@@ -1546,11 +1845,16 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
1546 | dma->device_prep_dma_pq_val = ioat3_prep_pq_val; | 1845 | dma->device_prep_dma_pq_val = ioat3_prep_pq_val; |
1547 | 1846 | ||
1548 | if (!(cap & IOAT_CAP_XOR)) { | 1847 | if (!(cap & IOAT_CAP_XOR)) { |
1549 | dma->max_xor = 8; | 1848 | if (cap & IOAT_CAP_RAID16SS) { |
1550 | if (is_xeon_cb32(pdev)) | 1849 | dma->max_xor = 16; |
1551 | dma->xor_align = 6; | ||
1552 | else | ||
1553 | dma->xor_align = 0; | 1850 | dma->xor_align = 0; |
1851 | } else { | ||
1852 | dma->max_xor = 8; | ||
1853 | if (is_xeon_cb32(pdev)) | ||
1854 | dma->xor_align = 6; | ||
1855 | else | ||
1856 | dma->xor_align = 0; | ||
1857 | } | ||
1554 | 1858 | ||
1555 | dma_cap_set(DMA_XOR, dma->cap_mask); | 1859 | dma_cap_set(DMA_XOR, dma->cap_mask); |
1556 | dma->device_prep_dma_xor = ioat3_prep_pqxor; | 1860 | dma->device_prep_dma_xor = ioat3_prep_pqxor; |
@@ -1578,6 +1882,30 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
1578 | dma->device_prep_dma_pq_val = NULL; | 1882 | dma->device_prep_dma_pq_val = NULL; |
1579 | } | 1883 | } |
1580 | 1884 | ||
1885 | /* starting with CB3.3 super extended descriptors are supported */ | ||
1886 | if (cap & IOAT_CAP_RAID16SS) { | ||
1887 | char pool_name[14]; | ||
1888 | int i; | ||
1889 | |||
1890 | /* allocate sw descriptor pool for SED */ | ||
1891 | device->sed_pool = kmem_cache_create("ioat_sed", | ||
1892 | sizeof(struct ioat_sed_ent), 0, 0, NULL); | ||
1893 | if (!device->sed_pool) | ||
1894 | return -ENOMEM; | ||
1895 | |||
1896 | for (i = 0; i < MAX_SED_POOLS; i++) { | ||
1897 | snprintf(pool_name, 14, "ioat_hw%d_sed", i); | ||
1898 | |||
1899 | /* allocate SED DMA pool */ | ||
1900 | device->sed_hw_pool[i] = dma_pool_create(pool_name, | ||
1901 | &pdev->dev, | ||
1902 | SED_SIZE * (i + 1), 64, 0); | ||
1903 | if (!device->sed_hw_pool[i]) | ||
1904 | goto sed_pool_cleanup; | ||
1905 | |||
1906 | } | ||
1907 | } | ||
1908 | |||
1581 | err = ioat_probe(device); | 1909 | err = ioat_probe(device); |
1582 | if (err) | 1910 | if (err) |
1583 | return err; | 1911 | return err; |
@@ -1599,4 +1927,28 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) | |||
1599 | device->dca = ioat3_dca_init(pdev, device->reg_base); | 1927 | device->dca = ioat3_dca_init(pdev, device->reg_base); |
1600 | 1928 | ||
1601 | return 0; | 1929 | return 0; |
1930 | |||
1931 | sed_pool_cleanup: | ||
1932 | if (device->sed_pool) { | ||
1933 | int i; | ||
1934 | kmem_cache_destroy(device->sed_pool); | ||
1935 | |||
1936 | for (i = 0; i < MAX_SED_POOLS; i++) | ||
1937 | if (device->sed_hw_pool[i]) | ||
1938 | dma_pool_destroy(device->sed_hw_pool[i]); | ||
1939 | } | ||
1940 | |||
1941 | return -ENOMEM; | ||
1942 | } | ||
1943 | |||
1944 | void ioat3_dma_remove(struct ioatdma_device *device) | ||
1945 | { | ||
1946 | if (device->sed_pool) { | ||
1947 | int i; | ||
1948 | kmem_cache_destroy(device->sed_pool); | ||
1949 | |||
1950 | for (i = 0; i < MAX_SED_POOLS; i++) | ||
1951 | if (device->sed_hw_pool[i]) | ||
1952 | dma_pool_destroy(device->sed_hw_pool[i]); | ||
1953 | } | ||
1602 | } | 1954 | } |
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h index ce431f5a9b2a..d10570db6e7d 100644 --- a/drivers/dma/ioat/hw.h +++ b/drivers/dma/ioat/hw.h | |||
@@ -183,6 +183,8 @@ struct ioat_pq_descriptor { | |||
183 | unsigned int rsvd:11; | 183 | unsigned int rsvd:11; |
184 | #define IOAT_OP_PQ 0x89 | 184 | #define IOAT_OP_PQ 0x89 |
185 | #define IOAT_OP_PQ_VAL 0x8a | 185 | #define IOAT_OP_PQ_VAL 0x8a |
186 | #define IOAT_OP_PQ_16S 0xa0 | ||
187 | #define IOAT_OP_PQ_VAL_16S 0xa1 | ||
186 | unsigned int op:8; | 188 | unsigned int op:8; |
187 | } ctl_f; | 189 | } ctl_f; |
188 | }; | 190 | }; |
@@ -190,7 +192,10 @@ struct ioat_pq_descriptor { | |||
190 | uint64_t p_addr; | 192 | uint64_t p_addr; |
191 | uint64_t next; | 193 | uint64_t next; |
192 | uint64_t src_addr2; | 194 | uint64_t src_addr2; |
193 | uint64_t src_addr3; | 195 | union { |
196 | uint64_t src_addr3; | ||
197 | uint64_t sed_addr; | ||
198 | }; | ||
194 | uint8_t coef[8]; | 199 | uint8_t coef[8]; |
195 | uint64_t q_addr; | 200 | uint64_t q_addr; |
196 | }; | 201 | }; |
@@ -239,4 +244,40 @@ struct ioat_pq_update_descriptor { | |||
239 | struct ioat_raw_descriptor { | 244 | struct ioat_raw_descriptor { |
240 | uint64_t field[8]; | 245 | uint64_t field[8]; |
241 | }; | 246 | }; |
247 | |||
248 | struct ioat_pq16a_descriptor { | ||
249 | uint8_t coef[8]; | ||
250 | uint64_t src_addr3; | ||
251 | uint64_t src_addr4; | ||
252 | uint64_t src_addr5; | ||
253 | uint64_t src_addr6; | ||
254 | uint64_t src_addr7; | ||
255 | uint64_t src_addr8; | ||
256 | uint64_t src_addr9; | ||
257 | }; | ||
258 | |||
259 | struct ioat_pq16b_descriptor { | ||
260 | uint64_t src_addr10; | ||
261 | uint64_t src_addr11; | ||
262 | uint64_t src_addr12; | ||
263 | uint64_t src_addr13; | ||
264 | uint64_t src_addr14; | ||
265 | uint64_t src_addr15; | ||
266 | uint64_t src_addr16; | ||
267 | uint64_t rsvd; | ||
268 | }; | ||
269 | |||
270 | union ioat_sed_pq_descriptor { | ||
271 | struct ioat_pq16a_descriptor a; | ||
272 | struct ioat_pq16b_descriptor b; | ||
273 | }; | ||
274 | |||
275 | #define SED_SIZE 64 | ||
276 | |||
277 | struct ioat_sed_raw_descriptor { | ||
278 | uint64_t a[8]; | ||
279 | uint64_t b[8]; | ||
280 | uint64_t c[8]; | ||
281 | }; | ||
282 | |||
242 | #endif | 283 | #endif |
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 1f632968d4fb..2c8d560e6334 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c | |||
@@ -207,6 +207,9 @@ static void ioat_remove(struct pci_dev *pdev) | |||
207 | if (!device) | 207 | if (!device) |
208 | return; | 208 | return; |
209 | 209 | ||
210 | if (device->version >= IOAT_VER_3_0) | ||
211 | ioat3_dma_remove(device); | ||
212 | |||
210 | dev_err(&pdev->dev, "Removing dma and dca services\n"); | 213 | dev_err(&pdev->dev, "Removing dma and dca services\n"); |
211 | if (device->dca) { | 214 | if (device->dca) { |
212 | unregister_dca_provider(device->dca, &pdev->dev); | 215 | unregister_dca_provider(device->dca, &pdev->dev); |
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index c1ad1946809e..efdd47e47b82 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h | |||
@@ -79,6 +79,7 @@ | |||
79 | #define IOAT_CAP_APIC 0x00000080 | 79 | #define IOAT_CAP_APIC 0x00000080 |
80 | #define IOAT_CAP_XOR 0x00000100 | 80 | #define IOAT_CAP_XOR 0x00000100 |
81 | #define IOAT_CAP_PQ 0x00000200 | 81 | #define IOAT_CAP_PQ 0x00000200 |
82 | #define IOAT_CAP_RAID16SS 0x00020000 | ||
82 | 83 | ||
83 | #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ | 84 | #define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ |
84 | 85 | ||