aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDave Jiang <dave.jiang@intel.com>2013-04-15 13:25:56 -0400
committerVinod Koul <vinod.koul@intel.com>2013-04-15 13:16:15 -0400
commit7727eaa4490b7244934fe31f05e7329f30715267 (patch)
tree32f34dec6650b20fcaa452930f655907cdd01ac3 /drivers
parente0884772d323b745c65baa65df391b1c70829410 (diff)
ioatdma: Adding support for 16 src PQ ops and super extended descriptors
v3.3 introduced 16 sources PQ operations. This also introduced super extended descriptors to support the 16 srcs operations. This patch adds support for the 16 sources ops and in turn adds the super extended descriptors for those ops. 5 SED pools are created depending on the descriptor sizes. An SED can be a 64 bytes sized descriptor or larger and must be physically contiguous. A kmem cache pool is created for allocating the software descriptor that manages the hardware descriptor. The super extended descriptor will take place of extended descriptor under certain operations and be "attached" to the op descriptor during operation. This is a new feature for ioatdma v3.3. Signed-off-by: Dave Jiang <dave.jiang@intel.com> Acked-by: Dan Williams <djbw@fb.com> Acked-by: Dan Williams <djbw@fb.com> Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/dma/ioat/dma.h17
-rw-r--r--drivers/dma/ioat/dma_v2.h2
-rw-r--r--drivers/dma/ioat/dma_v3.c394
-rw-r--r--drivers/dma/ioat/hw.h43
-rw-r--r--drivers/dma/ioat/pci.c3
-rw-r--r--drivers/dma/ioat/registers.h1
6 files changed, 438 insertions, 22 deletions
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 976eba8c06c7..35d74028773a 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -81,6 +81,9 @@ struct ioatdma_device {
81 void __iomem *reg_base; 81 void __iomem *reg_base;
82 struct pci_pool *dma_pool; 82 struct pci_pool *dma_pool;
83 struct pci_pool *completion_pool; 83 struct pci_pool *completion_pool;
84#define MAX_SED_POOLS 5
85 struct dma_pool *sed_hw_pool[MAX_SED_POOLS];
86 struct kmem_cache *sed_pool;
84 struct dma_device common; 87 struct dma_device common;
85 u8 version; 88 u8 version;
86 struct msix_entry msix_entries[4]; 89 struct msix_entry msix_entries[4];
@@ -141,6 +144,20 @@ struct ioat_dma_chan {
141 u16 active; 144 u16 active;
142}; 145};
143 146
147/**
148 * struct ioat_sed_ent - wrapper around super extended hardware descriptor
149 * @hw: hardware SED
150 * @sed_dma: dma address for the SED
151 * @list: list member
152 * @parent: point to the dma descriptor that's the parent
153 */
154struct ioat_sed_ent {
155 struct ioat_sed_raw_descriptor *hw;
156 dma_addr_t dma;
157 struct ioat_ring_ent *parent;
158 unsigned int hw_pool;
159};
160
144static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c) 161static inline struct ioat_chan_common *to_chan_common(struct dma_chan *c)
145{ 162{
146 return container_of(c, struct ioat_chan_common, common); 163 return container_of(c, struct ioat_chan_common, common);
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index e100f644e344..29bf9448035d 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -137,6 +137,7 @@ struct ioat_ring_ent {
137 #ifdef DEBUG 137 #ifdef DEBUG
138 int id; 138 int id;
139 #endif 139 #endif
140 struct ioat_sed_ent *sed;
140}; 141};
141 142
142static inline struct ioat_ring_ent * 143static inline struct ioat_ring_ent *
@@ -157,6 +158,7 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr)
157 158
158int ioat2_dma_probe(struct ioatdma_device *dev, int dca); 159int ioat2_dma_probe(struct ioatdma_device *dev, int dca);
159int ioat3_dma_probe(struct ioatdma_device *dev, int dca); 160int ioat3_dma_probe(struct ioatdma_device *dev, int dca);
161void ioat3_dma_remove(struct ioatdma_device *dev);
160struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); 162struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
161struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); 163struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
162int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); 164int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 639311598f35..71e113dfc8cc 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -55,7 +55,7 @@
55/* 55/*
56 * Support routines for v3+ hardware 56 * Support routines for v3+ hardware
57 */ 57 */
58 58#include <linux/module.h>
59#include <linux/pci.h> 59#include <linux/pci.h>
60#include <linux/gfp.h> 60#include <linux/gfp.h>
61#include <linux/dmaengine.h> 61#include <linux/dmaengine.h>
@@ -70,6 +70,10 @@
70/* ioat hardware assumes at least two sources for raid operations */ 70/* ioat hardware assumes at least two sources for raid operations */
71#define src_cnt_to_sw(x) ((x) + 2) 71#define src_cnt_to_sw(x) ((x) + 2)
72#define src_cnt_to_hw(x) ((x) - 2) 72#define src_cnt_to_hw(x) ((x) - 2)
73#define ndest_to_sw(x) ((x) + 1)
74#define ndest_to_hw(x) ((x) - 1)
75#define src16_cnt_to_sw(x) ((x) + 9)
76#define src16_cnt_to_hw(x) ((x) - 9)
73 77
74/* provide a lookup table for setting the source address in the base or 78/* provide a lookup table for setting the source address in the base or
75 * extended descriptor of an xor or pq descriptor 79 * extended descriptor of an xor or pq descriptor
@@ -77,7 +81,18 @@
77static const u8 xor_idx_to_desc = 0xe0; 81static const u8 xor_idx_to_desc = 0xe0;
78static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 }; 82static const u8 xor_idx_to_field[] = { 1, 4, 5, 6, 7, 0, 1, 2 };
79static const u8 pq_idx_to_desc = 0xf8; 83static const u8 pq_idx_to_desc = 0xf8;
84static const u8 pq16_idx_to_desc[] = { 0, 0, 1, 1, 1, 1, 1, 1, 1,
85 2, 2, 2, 2, 2, 2, 2 };
80static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; 86static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 };
87static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7,
88 0, 1, 2, 3, 4, 5, 6 };
89
90/*
91 * technically sources 1 and 2 do not require SED, but the op will have
92 * at least 9 descriptors so that's irrelevant.
93 */
94static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 1, 1, 1, 1, 1, 1, 1 };
81 96
82static void ioat3_eh(struct ioat2_dma_chan *ioat); 97static void ioat3_eh(struct ioat2_dma_chan *ioat);
83 98
@@ -103,6 +118,13 @@ static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx)
103 return raw->field[pq_idx_to_field[idx]]; 118 return raw->field[pq_idx_to_field[idx]];
104} 119}
105 120
121static dma_addr_t pq16_get_src(struct ioat_raw_descriptor *desc[3], int idx)
122{
123 struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
124
125 return raw->field[pq16_idx_to_field[idx]];
126}
127
106static void pq_set_src(struct ioat_raw_descriptor *descs[2], 128static void pq_set_src(struct ioat_raw_descriptor *descs[2],
107 dma_addr_t addr, u32 offset, u8 coef, int idx) 129 dma_addr_t addr, u32 offset, u8 coef, int idx)
108{ 130{
@@ -113,6 +135,12 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2],
113 pq->coef[idx] = coef; 135 pq->coef[idx] = coef;
114} 136}
115 137
138static int sed_get_pq16_pool_idx(int src_cnt)
139{
140
141 return pq16_idx_to_sed[src_cnt];
142}
143
116static bool is_jf_ioat(struct pci_dev *pdev) 144static bool is_jf_ioat(struct pci_dev *pdev)
117{ 145{
118 switch (pdev->device) { 146 switch (pdev->device) {
@@ -210,6 +238,52 @@ static bool is_bwd_ioat(struct pci_dev *pdev)
210 } 238 }
211} 239}
212 240
241static void pq16_set_src(struct ioat_raw_descriptor *desc[3],
242 dma_addr_t addr, u32 offset, u8 coef, int idx)
243{
244 struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *)desc[0];
245 struct ioat_pq16a_descriptor *pq16 =
246 (struct ioat_pq16a_descriptor *)desc[1];
247 struct ioat_raw_descriptor *raw = desc[pq16_idx_to_desc[idx]];
248
249 raw->field[pq16_idx_to_field[idx]] = addr + offset;
250
251 if (idx < 8)
252 pq->coef[idx] = coef;
253 else
254 pq16->coef[idx - 8] = coef;
255}
256
257struct ioat_sed_ent *
258ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool)
259{
260 struct ioat_sed_ent *sed;
261 gfp_t flags = __GFP_ZERO | GFP_ATOMIC;
262
263 sed = kmem_cache_alloc(device->sed_pool, flags);
264 if (!sed)
265 return NULL;
266
267 sed->hw_pool = hw_pool;
268 sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool],
269 flags, &sed->dma);
270 if (!sed->hw) {
271 kmem_cache_free(device->sed_pool, sed);
272 return NULL;
273 }
274
275 return sed;
276}
277
278void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *sed)
279{
280 if (!sed)
281 return;
282
283 dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma);
284 kmem_cache_free(device->sed_pool, sed);
285}
286
213static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, 287static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
214 struct ioat_ring_ent *desc, int idx) 288 struct ioat_ring_ent *desc, int idx)
215{ 289{
@@ -322,6 +396,54 @@ static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat,
322 } 396 }
323 break; 397 break;
324 } 398 }
399 case IOAT_OP_PQ_16S:
400 case IOAT_OP_PQ_VAL_16S: {
401 struct ioat_pq_descriptor *pq = desc->pq;
402 int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
403 struct ioat_raw_descriptor *descs[4];
404 int i;
405
406 /* in the 'continue' case don't unmap the dests as sources */
407 if (dmaf_p_disabled_continue(flags))
408 src_cnt--;
409 else if (dmaf_continue(flags))
410 src_cnt -= 3;
411
412 if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
413 descs[0] = (struct ioat_raw_descriptor *)pq;
414 descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw);
415 descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]);
416 for (i = 0; i < src_cnt; i++) {
417 dma_addr_t src = pq16_get_src(descs, i);
418
419 ioat_unmap(pdev, src - offset, len,
420 PCI_DMA_TODEVICE, flags, 0);
421 }
422
423 /* the dests are sources in pq validate operations */
424 if (pq->ctl_f.op == IOAT_OP_XOR_VAL) {
425 if (!(flags & DMA_PREP_PQ_DISABLE_P))
426 ioat_unmap(pdev, pq->p_addr - offset,
427 len, PCI_DMA_TODEVICE,
428 flags, 0);
429 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
430 ioat_unmap(pdev, pq->q_addr - offset,
431 len, PCI_DMA_TODEVICE,
432 flags, 0);
433 break;
434 }
435 }
436
437 if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
438 if (!(flags & DMA_PREP_PQ_DISABLE_P))
439 ioat_unmap(pdev, pq->p_addr - offset, len,
440 PCI_DMA_BIDIRECTIONAL, flags, 1);
441 if (!(flags & DMA_PREP_PQ_DISABLE_Q))
442 ioat_unmap(pdev, pq->q_addr - offset, len,
443 PCI_DMA_BIDIRECTIONAL, flags, 1);
444 }
445 break;
446 }
325 default: 447 default:
326 dev_err(&pdev->dev, "%s: unknown op type: %#x\n", 448 dev_err(&pdev->dev, "%s: unknown op type: %#x\n",
327 __func__, desc->hw->ctl_f.op); 449 __func__, desc->hw->ctl_f.op);
@@ -386,6 +508,7 @@ static bool ioat3_cleanup_preamble(struct ioat_chan_common *chan,
386static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) 508static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
387{ 509{
388 struct ioat_chan_common *chan = &ioat->base; 510 struct ioat_chan_common *chan = &ioat->base;
511 struct ioatdma_device *device = chan->device;
389 struct ioat_ring_ent *desc; 512 struct ioat_ring_ent *desc;
390 bool seen_current = false; 513 bool seen_current = false;
391 int idx = ioat->tail, i; 514 int idx = ioat->tail, i;
@@ -430,6 +553,12 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete)
430 BUG_ON(i + 1 >= active); 553 BUG_ON(i + 1 >= active);
431 i++; 554 i++;
432 } 555 }
556
557 /* cleanup super extended descriptors */
558 if (desc->sed) {
559 ioat3_free_sed(device, desc->sed);
560 desc->sed = NULL;
561 }
433 } 562 }
434 smp_mb(); /* finish all descriptor reads before incrementing tail */ 563 smp_mb(); /* finish all descriptor reads before incrementing tail */
435 ioat->tail = idx + i; 564 ioat->tail = idx + i;
@@ -522,6 +651,7 @@ static void ioat3_eh(struct ioat2_dma_chan *ioat)
522 } 651 }
523 break; 652 break;
524 case IOAT_OP_PQ_VAL: 653 case IOAT_OP_PQ_VAL:
654 case IOAT_OP_PQ_VAL_16S:
525 if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) { 655 if (chanerr & IOAT_CHANERR_XOR_P_OR_CRC_ERR) {
526 *desc->result |= SUM_CHECK_P_RESULT; 656 *desc->result |= SUM_CHECK_P_RESULT;
527 err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR; 657 err_handled |= IOAT_CHANERR_XOR_P_OR_CRC_ERR;
@@ -814,7 +944,8 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct
814 int i; 944 int i;
815 945
816 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" 946 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
817 " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", 947 " sz: %#10.8x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
948 " src_cnt: %d)\n",
818 desc_id(desc), (unsigned long long) desc->txd.phys, 949 desc_id(desc), (unsigned long long) desc->txd.phys,
819 (unsigned long long) (pq_ex ? pq_ex->next : pq->next), 950 (unsigned long long) (pq_ex ? pq_ex->next : pq->next),
820 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, 951 desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en,
@@ -829,6 +960,41 @@ dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct
829 dev_dbg(dev, "\tNEXT: %#llx\n", pq->next); 960 dev_dbg(dev, "\tNEXT: %#llx\n", pq->next);
830} 961}
831 962
963static void dump_pq16_desc_dbg(struct ioat2_dma_chan *ioat,
964 struct ioat_ring_ent *desc)
965{
966 struct device *dev = to_dev(&ioat->base);
967 struct ioat_pq_descriptor *pq = desc->pq;
968 struct ioat_raw_descriptor *descs[] = { (void *)pq,
969 (void *)pq,
970 (void *)pq };
971 int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt);
972 int i;
973
974 if (desc->sed) {
975 descs[1] = (void *)desc->sed->hw;
976 descs[2] = (void *)desc->sed->hw + 64;
977 }
978
979 dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x"
980 " sz: %#x ctl: %#x (op: %#x int: %d compl: %d pq: '%s%s'"
981 " src_cnt: %d)\n",
982 desc_id(desc), (unsigned long long) desc->txd.phys,
983 (unsigned long long) pq->next,
984 desc->txd.flags, pq->size, pq->ctl,
985 pq->ctl_f.op, pq->ctl_f.int_en,
986 pq->ctl_f.compl_write,
987 pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q",
988 pq->ctl_f.src_cnt);
989 for (i = 0; i < src_cnt; i++) {
990 dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i,
991 (unsigned long long) pq16_get_src(descs, i),
992 pq->coef[i]);
993 }
994 dev_dbg(dev, "\tP: %#llx\n", pq->p_addr);
995 dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr);
996}
997
832static struct dma_async_tx_descriptor * 998static struct dma_async_tx_descriptor *
833__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, 999__ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
834 const dma_addr_t *dst, const dma_addr_t *src, 1000 const dma_addr_t *dst, const dma_addr_t *src,
@@ -951,10 +1117,114 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
951} 1117}
952 1118
953static struct dma_async_tx_descriptor * 1119static struct dma_async_tx_descriptor *
1120__ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result,
1121 const dma_addr_t *dst, const dma_addr_t *src,
1122 unsigned int src_cnt, const unsigned char *scf,
1123 size_t len, unsigned long flags)
1124{
1125 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
1126 struct ioat_chan_common *chan = &ioat->base;
1127 struct ioatdma_device *device = chan->device;
1128 struct ioat_ring_ent *desc;
1129 size_t total_len = len;
1130 struct ioat_pq_descriptor *pq;
1131 u32 offset = 0;
1132 u8 op;
1133 int i, s, idx, num_descs;
1134
1135 /* this function only handles src_cnt 9 - 16 */
1136 BUG_ON(src_cnt < 9);
1137
1138 /* this function is only called with 9-16 sources */
1139 op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S;
1140
1141 dev_dbg(to_dev(chan), "%s\n", __func__);
1142
1143 num_descs = ioat2_xferlen_to_descs(ioat, len);
1144
1145 /*
1146 * 16 source pq is only available on cb3.3 and has no completion
1147 * write hw bug.
1148 */
1149 if (num_descs && ioat2_check_space_lock(ioat, num_descs) == 0)
1150 idx = ioat->head;
1151 else
1152 return NULL;
1153
1154 i = 0;
1155
1156 do {
1157 struct ioat_raw_descriptor *descs[4];
1158 size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log);
1159
1160 desc = ioat2_get_ring_ent(ioat, idx + i);
1161 pq = desc->pq;
1162
1163 descs[0] = (struct ioat_raw_descriptor *) pq;
1164
1165 desc->sed = ioat3_alloc_sed(device,
1166 sed_get_pq16_pool_idx(src_cnt));
1167 if (!desc->sed) {
1168 dev_err(to_dev(chan),
1169 "%s: no free sed entries\n", __func__);
1170 return NULL;
1171 }
1172
1173 pq->sed_addr = desc->sed->dma;
1174 desc->sed->parent = desc;
1175
1176 descs[1] = (struct ioat_raw_descriptor *)desc->sed->hw;
1177 descs[2] = (void *)descs[1] + 64;
1178
1179 for (s = 0; s < src_cnt; s++)
1180 pq16_set_src(descs, src[s], offset, scf[s], s);
1181
1182 /* see the comment for dma_maxpq in include/linux/dmaengine.h */
1183 if (dmaf_p_disabled_continue(flags))
1184 pq16_set_src(descs, dst[1], offset, 1, s++);
1185 else if (dmaf_continue(flags)) {
1186 pq16_set_src(descs, dst[0], offset, 0, s++);
1187 pq16_set_src(descs, dst[1], offset, 1, s++);
1188 pq16_set_src(descs, dst[1], offset, 0, s++);
1189 }
1190
1191 pq->size = xfer_size;
1192 pq->p_addr = dst[0] + offset;
1193 pq->q_addr = dst[1] + offset;
1194 pq->ctl = 0;
1195 pq->ctl_f.op = op;
1196 pq->ctl_f.src_cnt = src16_cnt_to_hw(s);
1197 pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P);
1198 pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q);
1199
1200 len -= xfer_size;
1201 offset += xfer_size;
1202 } while (++i < num_descs);
1203
1204 /* last pq descriptor carries the unmap parameters and fence bit */
1205 desc->txd.flags = flags;
1206 desc->len = total_len;
1207 if (result)
1208 desc->result = result;
1209 pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE);
1210
1211 /* with cb3.3 we should be able to do completion w/o a null desc */
1212 pq->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
1213 pq->ctl_f.compl_write = 1;
1214
1215 dump_pq16_desc_dbg(ioat, desc);
1216
1217 /* we leave the channel locked to ensure in order submission */
1218 return &desc->txd;
1219}
1220
1221static struct dma_async_tx_descriptor *
954ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, 1222ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
955 unsigned int src_cnt, const unsigned char *scf, size_t len, 1223 unsigned int src_cnt, const unsigned char *scf, size_t len,
956 unsigned long flags) 1224 unsigned long flags)
957{ 1225{
1226 struct dma_device *dma = chan->device;
1227
958 /* specify valid address for disabled result */ 1228 /* specify valid address for disabled result */
959 if (flags & DMA_PREP_PQ_DISABLE_P) 1229 if (flags & DMA_PREP_PQ_DISABLE_P)
960 dst[0] = dst[1]; 1230 dst[0] = dst[1];
@@ -974,11 +1244,20 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
974 single_source_coef[0] = scf[0]; 1244 single_source_coef[0] = scf[0];
975 single_source_coef[1] = 0; 1245 single_source_coef[1] = 0;
976 1246
977 return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, 1247 return (src_cnt > 8) && (dma->max_pq > 8) ?
978 single_source_coef, len, flags); 1248 __ioat3_prep_pq16_lock(chan, NULL, dst, single_source,
979 } else 1249 2, single_source_coef, len,
980 return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, 1250 flags) :
981 len, flags); 1251 __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2,
1252 single_source_coef, len, flags);
1253
1254 } else {
1255 return (src_cnt > 8) && (dma->max_pq > 8) ?
1256 __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt,
1257 scf, len, flags) :
1258 __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt,
1259 scf, len, flags);
1260 }
982} 1261}
983 1262
984struct dma_async_tx_descriptor * 1263struct dma_async_tx_descriptor *
@@ -986,6 +1265,8 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
986 unsigned int src_cnt, const unsigned char *scf, size_t len, 1265 unsigned int src_cnt, const unsigned char *scf, size_t len,
987 enum sum_check_flags *pqres, unsigned long flags) 1266 enum sum_check_flags *pqres, unsigned long flags)
988{ 1267{
1268 struct dma_device *dma = chan->device;
1269
989 /* specify valid address for disabled result */ 1270 /* specify valid address for disabled result */
990 if (flags & DMA_PREP_PQ_DISABLE_P) 1271 if (flags & DMA_PREP_PQ_DISABLE_P)
991 pq[0] = pq[1]; 1272 pq[0] = pq[1];
@@ -997,14 +1278,18 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
997 */ 1278 */
998 *pqres = 0; 1279 *pqres = 0;
999 1280
1000 return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, 1281 return (src_cnt > 8) && (dma->max_pq > 8) ?
1001 flags); 1282 __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len,
1283 flags) :
1284 __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len,
1285 flags);
1002} 1286}
1003 1287
1004static struct dma_async_tx_descriptor * 1288static struct dma_async_tx_descriptor *
1005ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, 1289ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
1006 unsigned int src_cnt, size_t len, unsigned long flags) 1290 unsigned int src_cnt, size_t len, unsigned long flags)
1007{ 1291{
1292 struct dma_device *dma = chan->device;
1008 unsigned char scf[src_cnt]; 1293 unsigned char scf[src_cnt];
1009 dma_addr_t pq[2]; 1294 dma_addr_t pq[2];
1010 1295
@@ -1013,8 +1298,11 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src,
1013 flags |= DMA_PREP_PQ_DISABLE_Q; 1298 flags |= DMA_PREP_PQ_DISABLE_Q;
1014 pq[1] = dst; /* specify valid address for disabled result */ 1299 pq[1] = dst; /* specify valid address for disabled result */
1015 1300
1016 return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, 1301 return (src_cnt > 8) && (dma->max_pq > 8) ?
1017 flags); 1302 __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len,
1303 flags) :
1304 __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len,
1305 flags);
1018} 1306}
1019 1307
1020struct dma_async_tx_descriptor * 1308struct dma_async_tx_descriptor *
@@ -1022,6 +1310,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
1022 unsigned int src_cnt, size_t len, 1310 unsigned int src_cnt, size_t len,
1023 enum sum_check_flags *result, unsigned long flags) 1311 enum sum_check_flags *result, unsigned long flags)
1024{ 1312{
1313 struct dma_device *dma = chan->device;
1025 unsigned char scf[src_cnt]; 1314 unsigned char scf[src_cnt];
1026 dma_addr_t pq[2]; 1315 dma_addr_t pq[2];
1027 1316
@@ -1035,8 +1324,12 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src,
1035 flags |= DMA_PREP_PQ_DISABLE_Q; 1324 flags |= DMA_PREP_PQ_DISABLE_Q;
1036 pq[1] = pq[0]; /* specify valid address for disabled result */ 1325 pq[1] = pq[0]; /* specify valid address for disabled result */
1037 1326
1038 return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, 1327
1039 len, flags); 1328 return (src_cnt > 8) && (dma->max_pq > 8) ?
1329 __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1,
1330 scf, len, flags) :
1331 __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1,
1332 scf, len, flags);
1040} 1333}
1041 1334
1042static struct dma_async_tx_descriptor * 1335static struct dma_async_tx_descriptor *
@@ -1533,11 +1826,17 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1533 1826
1534 if (cap & IOAT_CAP_PQ) { 1827 if (cap & IOAT_CAP_PQ) {
1535 is_raid_device = true; 1828 is_raid_device = true;
1536 dma_set_maxpq(dma, 8, 0); 1829
1537 if (is_xeon_cb32(pdev)) 1830 if (cap & IOAT_CAP_RAID16SS) {
1538 dma->pq_align = 6; 1831 dma_set_maxpq(dma, 16, 0);
1539 else
1540 dma->pq_align = 0; 1832 dma->pq_align = 0;
1833 } else {
1834 dma_set_maxpq(dma, 8, 0);
1835 if (is_xeon_cb32(pdev))
1836 dma->pq_align = 6;
1837 else
1838 dma->pq_align = 0;
1839 }
1541 1840
1542 dma_cap_set(DMA_PQ, dma->cap_mask); 1841 dma_cap_set(DMA_PQ, dma->cap_mask);
1543 dma->device_prep_dma_pq = ioat3_prep_pq; 1842 dma->device_prep_dma_pq = ioat3_prep_pq;
@@ -1546,11 +1845,16 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1546 dma->device_prep_dma_pq_val = ioat3_prep_pq_val; 1845 dma->device_prep_dma_pq_val = ioat3_prep_pq_val;
1547 1846
1548 if (!(cap & IOAT_CAP_XOR)) { 1847 if (!(cap & IOAT_CAP_XOR)) {
1549 dma->max_xor = 8; 1848 if (cap & IOAT_CAP_RAID16SS) {
1550 if (is_xeon_cb32(pdev)) 1849 dma->max_xor = 16;
1551 dma->xor_align = 6;
1552 else
1553 dma->xor_align = 0; 1850 dma->xor_align = 0;
1851 } else {
1852 dma->max_xor = 8;
1853 if (is_xeon_cb32(pdev))
1854 dma->xor_align = 6;
1855 else
1856 dma->xor_align = 0;
1857 }
1554 1858
1555 dma_cap_set(DMA_XOR, dma->cap_mask); 1859 dma_cap_set(DMA_XOR, dma->cap_mask);
1556 dma->device_prep_dma_xor = ioat3_prep_pqxor; 1860 dma->device_prep_dma_xor = ioat3_prep_pqxor;
@@ -1578,6 +1882,30 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1578 dma->device_prep_dma_pq_val = NULL; 1882 dma->device_prep_dma_pq_val = NULL;
1579 } 1883 }
1580 1884
1885 /* starting with CB3.3 super extended descriptors are supported */
1886 if (cap & IOAT_CAP_RAID16SS) {
1887 char pool_name[14];
1888 int i;
1889
1890 /* allocate sw descriptor pool for SED */
1891 device->sed_pool = kmem_cache_create("ioat_sed",
1892 sizeof(struct ioat_sed_ent), 0, 0, NULL);
1893 if (!device->sed_pool)
1894 return -ENOMEM;
1895
1896 for (i = 0; i < MAX_SED_POOLS; i++) {
1897 snprintf(pool_name, 14, "ioat_hw%d_sed", i);
1898
1899 /* allocate SED DMA pool */
1900 device->sed_hw_pool[i] = dma_pool_create(pool_name,
1901 &pdev->dev,
1902 SED_SIZE * (i + 1), 64, 0);
1903 if (!device->sed_hw_pool[i])
1904 goto sed_pool_cleanup;
1905
1906 }
1907 }
1908
1581 err = ioat_probe(device); 1909 err = ioat_probe(device);
1582 if (err) 1910 if (err)
1583 return err; 1911 return err;
@@ -1599,4 +1927,28 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
1599 device->dca = ioat3_dca_init(pdev, device->reg_base); 1927 device->dca = ioat3_dca_init(pdev, device->reg_base);
1600 1928
1601 return 0; 1929 return 0;
1930
1931sed_pool_cleanup:
1932 if (device->sed_pool) {
1933 int i;
1934 kmem_cache_destroy(device->sed_pool);
1935
1936 for (i = 0; i < MAX_SED_POOLS; i++)
1937 if (device->sed_hw_pool[i])
1938 dma_pool_destroy(device->sed_hw_pool[i]);
1939 }
1940
1941 return -ENOMEM;
1942}
1943
1944void ioat3_dma_remove(struct ioatdma_device *device)
1945{
1946 if (device->sed_pool) {
1947 int i;
1948 kmem_cache_destroy(device->sed_pool);
1949
1950 for (i = 0; i < MAX_SED_POOLS; i++)
1951 if (device->sed_hw_pool[i])
1952 dma_pool_destroy(device->sed_hw_pool[i]);
1953 }
1602} 1954}
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index ce431f5a9b2a..d10570db6e7d 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -183,6 +183,8 @@ struct ioat_pq_descriptor {
183 unsigned int rsvd:11; 183 unsigned int rsvd:11;
184 #define IOAT_OP_PQ 0x89 184 #define IOAT_OP_PQ 0x89
185 #define IOAT_OP_PQ_VAL 0x8a 185 #define IOAT_OP_PQ_VAL 0x8a
186 #define IOAT_OP_PQ_16S 0xa0
187 #define IOAT_OP_PQ_VAL_16S 0xa1
186 unsigned int op:8; 188 unsigned int op:8;
187 } ctl_f; 189 } ctl_f;
188 }; 190 };
@@ -190,7 +192,10 @@ struct ioat_pq_descriptor {
190 uint64_t p_addr; 192 uint64_t p_addr;
191 uint64_t next; 193 uint64_t next;
192 uint64_t src_addr2; 194 uint64_t src_addr2;
193 uint64_t src_addr3; 195 union {
196 uint64_t src_addr3;
197 uint64_t sed_addr;
198 };
194 uint8_t coef[8]; 199 uint8_t coef[8];
195 uint64_t q_addr; 200 uint64_t q_addr;
196}; 201};
@@ -239,4 +244,40 @@ struct ioat_pq_update_descriptor {
239struct ioat_raw_descriptor { 244struct ioat_raw_descriptor {
240 uint64_t field[8]; 245 uint64_t field[8];
241}; 246};
247
248struct ioat_pq16a_descriptor {
249 uint8_t coef[8];
250 uint64_t src_addr3;
251 uint64_t src_addr4;
252 uint64_t src_addr5;
253 uint64_t src_addr6;
254 uint64_t src_addr7;
255 uint64_t src_addr8;
256 uint64_t src_addr9;
257};
258
259struct ioat_pq16b_descriptor {
260 uint64_t src_addr10;
261 uint64_t src_addr11;
262 uint64_t src_addr12;
263 uint64_t src_addr13;
264 uint64_t src_addr14;
265 uint64_t src_addr15;
266 uint64_t src_addr16;
267 uint64_t rsvd;
268};
269
270union ioat_sed_pq_descriptor {
271 struct ioat_pq16a_descriptor a;
272 struct ioat_pq16b_descriptor b;
273};
274
275#define SED_SIZE 64
276
277struct ioat_sed_raw_descriptor {
278 uint64_t a[8];
279 uint64_t b[8];
280 uint64_t c[8];
281};
282
242#endif 283#endif
diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c
index 1f632968d4fb..2c8d560e6334 100644
--- a/drivers/dma/ioat/pci.c
+++ b/drivers/dma/ioat/pci.c
@@ -207,6 +207,9 @@ static void ioat_remove(struct pci_dev *pdev)
207 if (!device) 207 if (!device)
208 return; 208 return;
209 209
210 if (device->version >= IOAT_VER_3_0)
211 ioat3_dma_remove(device);
212
210 dev_err(&pdev->dev, "Removing dma and dca services\n"); 213 dev_err(&pdev->dev, "Removing dma and dca services\n");
211 if (device->dca) { 214 if (device->dca) {
212 unregister_dca_provider(device->dca, &pdev->dev); 215 unregister_dca_provider(device->dca, &pdev->dev);
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index c1ad1946809e..efdd47e47b82 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -79,6 +79,7 @@
79#define IOAT_CAP_APIC 0x00000080 79#define IOAT_CAP_APIC 0x00000080
80#define IOAT_CAP_XOR 0x00000100 80#define IOAT_CAP_XOR 0x00000100
81#define IOAT_CAP_PQ 0x00000200 81#define IOAT_CAP_PQ 0x00000200
82#define IOAT_CAP_RAID16SS 0x00020000
82 83
83#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ 84#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
84 85