diff options
Diffstat (limited to 'drivers/staging')
-rw-r--r-- | drivers/staging/Kconfig | 2 | ||||
-rw-r--r-- | drivers/staging/Makefile | 1 | ||||
-rw-r--r-- | drivers/staging/altpciechdma/Kconfig | 10 | ||||
-rw-r--r-- | drivers/staging/altpciechdma/Makefile | 2 | ||||
-rw-r--r-- | drivers/staging/altpciechdma/TODO | 15 | ||||
-rw-r--r-- | drivers/staging/altpciechdma/altpciechdma.c | 1184 |
6 files changed, 1214 insertions, 0 deletions
diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index 11d003d37ad..48e2e064a65 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig | |||
@@ -79,5 +79,7 @@ source "drivers/staging/asus_oled/Kconfig" | |||
79 | 79 | ||
80 | source "drivers/staging/panel/Kconfig" | 80 | source "drivers/staging/panel/Kconfig" |
81 | 81 | ||
82 | source "drivers/staging/altpciechdma/Kconfig" | ||
83 | |||
82 | endif # !STAGING_EXCLUDE_BUILD | 84 | endif # !STAGING_EXCLUDE_BUILD |
83 | endif # STAGING | 85 | endif # STAGING |
diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index a738bb34c8a..b301be8af94 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile | |||
@@ -22,3 +22,4 @@ obj-$(CONFIG_BENET) += benet/ | |||
22 | obj-$(CONFIG_COMEDI) += comedi/ | 22 | obj-$(CONFIG_COMEDI) += comedi/ |
23 | obj-$(CONFIG_ASUS_OLED) += asus_oled/ | 23 | obj-$(CONFIG_ASUS_OLED) += asus_oled/ |
24 | obj-$(CONFIG_PANEL) += panel/ | 24 | obj-$(CONFIG_PANEL) += panel/ |
25 | obj-$(CONFIG_ALTERA_PCIE_CHDMA) += altpciechdma/ | ||
diff --git a/drivers/staging/altpciechdma/Kconfig b/drivers/staging/altpciechdma/Kconfig new file mode 100644 index 00000000000..0f4bf92cbbf --- /dev/null +++ b/drivers/staging/altpciechdma/Kconfig | |||
@@ -0,0 +1,10 @@ | |||
1 | config ALTERA_PCIE_CHDMA | ||
2 | tristate "Altera PCI Express Chaining DMA driver" | ||
3 | depends on PCI | ||
4 | default N | ||
5 | ---help--- | ||
6 | A reference driver that exercises the Chaining DMA logic reference | ||
7 | design generated along the Altera FPGA PCI Express soft or hard core, | ||
8 | only if instantiated using the MegaWizard, not the SOPC builder, of | ||
9 | Quartus 8.1. | ||
10 | |||
diff --git a/drivers/staging/altpciechdma/Makefile b/drivers/staging/altpciechdma/Makefile new file mode 100644 index 00000000000..c08c8437f4d --- /dev/null +++ b/drivers/staging/altpciechdma/Makefile | |||
@@ -0,0 +1,2 @@ | |||
1 | obj-$(CONFIG_ALTERA_PCIE_CHDMA) += altpciechdma.o | ||
2 | |||
diff --git a/drivers/staging/altpciechdma/TODO b/drivers/staging/altpciechdma/TODO new file mode 100644 index 00000000000..12c945fd61e --- /dev/null +++ b/drivers/staging/altpciechdma/TODO | |||
@@ -0,0 +1,15 @@ | |||
1 | DONE: | ||
2 | - functionality similar to logic testbench | ||
3 | |||
4 | TODO: | ||
5 | - checkpatch.pl cleanups. | ||
6 | - keep state of DMA engines. | ||
7 | - keep data structure that keeps state of each transfer. | ||
8 | - interrupt handler should iterate over outstanding descriptor tables. | ||
9 | - complete userspace cdev to read/write using the DMA engines. | ||
10 | - split off the DMA support functions in a module, re-usable by custom | ||
11 | drivers. | ||
12 | |||
13 | Please coordinate work with, and send patches to | ||
14 | Leon Woestenberg <leon@sidebranch.com> | ||
15 | |||
diff --git a/drivers/staging/altpciechdma/altpciechdma.c b/drivers/staging/altpciechdma/altpciechdma.c new file mode 100644 index 00000000000..8e2b4ca0651 --- /dev/null +++ b/drivers/staging/altpciechdma/altpciechdma.c | |||
@@ -0,0 +1,1184 @@ | |||
1 | /** | ||
2 | * Driver for Altera PCIe core chaining DMA reference design. | ||
3 | * | ||
4 | * Copyright (C) 2008 Leon Woestenberg <leon.woestenberg@axon.tv> | ||
5 | * Copyright (C) 2008 Nickolas Heppermann <heppermannwdt@gmail.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
15 | * GNU General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License along | ||
18 | * with this program; if not, write to the Free Software Foundation, Inc., | ||
19 | * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
20 | * | ||
21 | * | ||
22 | * Rationale: This driver exercises the chaining DMA read and write engine | ||
23 | * in the reference design. It is meant as a complementary reference | ||
24 | * driver that can be used for testing early designs as well as a basis to | ||
25 | * write your custom driver. | ||
26 | * | ||
27 | * Status: Test results from Leon Woestenberg <leon.woestenberg@axon.tv>: | ||
28 | * | ||
29 | * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a | ||
30 | * Dell Precision 370 PC, x86, kernel 2.6.20 from Ubuntu 7.04. | ||
31 | * | ||
32 | * Sendero Board w/ Cyclone II EP2C35F672C6N, PX1011A PCIe x1 PHY on a | ||
33 | * Freescale MPC8313E-RDB board, PowerPC, 2.6.24 w/ Freescale patches. | ||
34 | * | ||
35 | * Driver tests passed with PCIe Compiler 8.1. With PCIe 8.0 the DMA | ||
36 | * loopback test had reproducable compare errors. I assume a change | ||
37 | * in the compiler or reference design, but could not find evidence nor | ||
38 | * documentation on a change or fix in that direction. | ||
39 | * | ||
40 | * The reference design does not have readable locations and thus a | ||
41 | * dummy read, used to flush PCI posted writes, cannot be performed. | ||
42 | * | ||
43 | */ | ||
44 | |||
45 | #include <linux/kernel.h> | ||
46 | #include <linux/cdev.h> | ||
47 | #include <linux/delay.h> | ||
48 | #include <linux/dma-mapping.h> | ||
49 | #include <linux/delay.h> | ||
50 | #include <linux/init.h> | ||
51 | #include <linux/interrupt.h> | ||
52 | #include <linux/io.h> | ||
53 | #include <linux/jiffies.h> | ||
54 | #include <linux/module.h> | ||
55 | #include <linux/pci.h> | ||
56 | |||
57 | |||
58 | /* by default do not build the character device interface */ | ||
59 | /* XXX It is non-functional yet */ | ||
60 | #ifndef ALTPCIECHDMA_CDEV | ||
61 | # define ALTPCIECHDMA_CDEV 0 | ||
62 | #endif | ||
63 | |||
64 | /* build the character device interface? */ | ||
65 | #if ALTPCIECHDMA_CDEV | ||
66 | # define MAX_CHDMA_SIZE (8 * 1024 * 1024) | ||
67 | # include "mapper_user_to_sg.h" | ||
68 | #endif | ||
69 | |||
70 | /** driver name, mimicks Altera naming of the reference design */ | ||
71 | #define DRV_NAME "altpciechdma" | ||
72 | /** number of BARs on the device */ | ||
73 | #define APE_BAR_NUM (6) | ||
74 | /** BAR number where the RCSLAVE memory sits */ | ||
75 | #define APE_BAR_RCSLAVE (0) | ||
76 | /** BAR number where the Descriptor Header sits */ | ||
77 | #define APE_BAR_HEADER (2) | ||
78 | |||
79 | /** maximum size in bytes of the descriptor table, chdma logic limit */ | ||
80 | #define APE_CHDMA_TABLE_SIZE (4096) | ||
81 | /* single transfer must not exceed 255 table entries. worst case this can be | ||
82 | * achieved by 255 scattered pages, with only a single byte in the head and | ||
83 | * tail pages. 253 * PAGE_SIZE is a safe upper bound for the transfer size. | ||
84 | */ | ||
85 | #define APE_CHDMA_MAX_TRANSFER_LEN (253 * PAGE_SIZE) | ||
86 | |||
87 | /** | ||
88 | * Specifies those BARs to be mapped and the length of each mapping. | ||
89 | * | ||
90 | * Zero (0) means do not map, otherwise specifies the BAR lengths to be mapped. | ||
91 | * If the actual BAR length is less, this is considered an error; then | ||
92 | * reconfigure your PCIe core. | ||
93 | * | ||
94 | * @see ug_pci_express 8.0, table 7-2 at page 7-13. | ||
95 | */ | ||
96 | static const unsigned long bar_min_len[APE_BAR_NUM] = | ||
97 | { 32768, 0, 256, 0, 32768, 0 }; | ||
98 | |||
99 | /** | ||
100 | * Descriptor Header, controls the DMA read engine or write engine. | ||
101 | * | ||
102 | * The descriptor header is the main data structure for starting DMA transfers. | ||
103 | * | ||
104 | * It sits in End Point (FPGA) memory BAR[2] for 32-bit or BAR[3:2] for 64-bit. | ||
105 | * It references a descriptor table which exists in Root Complex (PC) memory. | ||
106 | * Writing the rclast field starts the DMA operation, thus all other structures | ||
107 | * and fields must be setup before doing so. | ||
108 | * | ||
109 | * @see ug_pci_express 8.0, tables 7-3, 7-4 and 7-5 at page 7-14. | ||
110 | * @note This header must be written in four 32-bit (PCI DWORD) writes. | ||
111 | */ | ||
112 | struct ape_chdma_header { | ||
113 | /** | ||
114 | * w0 consists of two 16-bit fields: | ||
115 | * lsb u16 number; number of descriptors in ape_chdma_table | ||
116 | * msb u16 control; global control flags | ||
117 | */ | ||
118 | u32 w0; | ||
119 | /* bus address to ape_chdma_table in Root Complex memory */ | ||
120 | u32 bdt_addr_h; | ||
121 | u32 bdt_addr_l; | ||
122 | /** | ||
123 | * w3 consists of two 16-bit fields: | ||
124 | * - lsb u16 rclast; last descriptor number available in Root Complex | ||
125 | * - zero (0) means the first descriptor is ready, | ||
126 | * - one (1) means two descriptors are ready, etc. | ||
127 | * - msb u16 reserved; | ||
128 | * | ||
129 | * @note writing to this memory location starts the DMA operation! | ||
130 | */ | ||
131 | u32 w3; | ||
132 | } __attribute__ ((packed)); | ||
133 | |||
134 | /** | ||
135 | * Descriptor Entry, describing a (non-scattered) single memory block transfer. | ||
136 | * | ||
137 | * There is one descriptor for each memory block involved in the transfer, a | ||
138 | * block being a contiguous address range on the bus. | ||
139 | * | ||
140 | * Multiple descriptors are chained by means of the ape_chdma_table data | ||
141 | * structure. | ||
142 | * | ||
143 | * @see ug_pci_express 8.0, tables 7-6, 7-7 and 7-8 at page 7-14 and page 7-15. | ||
144 | */ | ||
145 | struct ape_chdma_desc { | ||
146 | /** | ||
147 | * w0 consists of two 16-bit fields: | ||
148 | * number of DWORDS to transfer | ||
149 | * - lsb u16 length; | ||
150 | * global control | ||
151 | * - msb u16 control; | ||
152 | */ | ||
153 | u32 w0; | ||
154 | /* address of memory in the End Point */ | ||
155 | u32 ep_addr; | ||
156 | /* bus address of source or destination memory in the Root Complex */ | ||
157 | u32 rc_addr_h; | ||
158 | u32 rc_addr_l; | ||
159 | } __attribute__ ((packed)); | ||
160 | |||
161 | /** | ||
162 | * Descriptor Table, an array of descriptors describing a chained transfer. | ||
163 | * | ||
164 | * An array of descriptors, preceded by workspace for the End Point. | ||
165 | * It exists in Root Complex memory. | ||
166 | * | ||
167 | * The End Point can update its last completed descriptor number in the | ||
168 | * eplast field if requested by setting the EPLAST_ENA bit either | ||
169 | * globally in the header's or locally in any descriptor's control field. | ||
170 | * | ||
171 | * @note this structure may not exceed 4096 bytes. This results in a | ||
172 | * maximum of 4096 / (4 * 4) - 1 = 255 descriptors per chained transfer. | ||
173 | * | ||
174 | * @see ug_pci_express 8.0, tables 7-9, 7-10 and 7-11 at page 7-17 and page 7-18. | ||
175 | */ | ||
176 | struct ape_chdma_table { | ||
177 | /* workspace 0x00-0x0b, reserved */ | ||
178 | u32 reserved1[3]; | ||
179 | /* workspace 0x0c-0x0f, last descriptor handled by End Point */ | ||
180 | u32 w3; | ||
181 | /* the actual array of descriptors | ||
182 | * 0x10-0x1f, 0x20-0x2f, ... 0xff0-0xfff (255 entries) | ||
183 | */ | ||
184 | struct ape_chdma_desc desc[255]; | ||
185 | } __attribute__ ((packed)); | ||
186 | |||
187 | /** | ||
188 | * Altera PCI Express ('ape') board specific book keeping data | ||
189 | * | ||
190 | * Keeps state of the PCIe core and the Chaining DMA controller | ||
191 | * application. | ||
192 | */ | ||
193 | struct ape_dev { | ||
194 | /** the kernel pci device data structure provided by probe() */ | ||
195 | struct pci_dev *pci_dev; | ||
196 | /** | ||
197 | * kernel virtual address of the mapped BAR memory and IO regions of | ||
198 | * the End Point. Used by map_bars()/unmap_bars(). | ||
199 | */ | ||
200 | void * __iomem bar[APE_BAR_NUM]; | ||
201 | /** kernel virtual address for Descriptor Table in Root Complex memory */ | ||
202 | struct ape_chdma_table *table_virt; | ||
203 | /** | ||
204 | * bus address for the Descriptor Table in Root Complex memory, in | ||
205 | * CPU-native endianess | ||
206 | */ | ||
207 | dma_addr_t table_bus; | ||
208 | /* if the device regions could not be allocated, assume and remember it | ||
209 | * is in use by another driver; this driver must not disable the device. | ||
210 | */ | ||
211 | int in_use; | ||
212 | /* whether this driver enabled msi for the device */ | ||
213 | int msi_enabled; | ||
214 | /* whether this driver could obtain the regions */ | ||
215 | int got_regions; | ||
216 | /* irq line succesfully requested by this driver, -1 otherwise */ | ||
217 | int irq_line; | ||
218 | /* board revision */ | ||
219 | u8 revision; | ||
220 | /* interrupt count, incremented by the interrupt handler */ | ||
221 | int irq_count; | ||
222 | #if ALTPCIECHDMA_CDEV | ||
223 | /* character device */ | ||
224 | dev_t cdevno; | ||
225 | struct cdev cdev; | ||
226 | /* user space scatter gather mapper */ | ||
227 | struct sg_mapping_t *sgm; | ||
228 | #endif | ||
229 | }; | ||
230 | |||
231 | /** | ||
232 | * Using the subsystem vendor id and subsystem id, it is possible to | ||
233 | * distinguish between different cards bases around the same | ||
234 | * (third-party) logic core. | ||
235 | * | ||
236 | * Default Altera vendor and device ID's, and some (non-reserved) | ||
237 | * ID's are now used here that are used amongst the testers/developers. | ||
238 | */ | ||
239 | static const struct pci_device_id ids[] = { | ||
240 | { PCI_DEVICE(0x1172, 0xE001), }, | ||
241 | { PCI_DEVICE(0x2071, 0x2071), }, | ||
242 | { 0, } | ||
243 | }; | ||
244 | MODULE_DEVICE_TABLE(pci, ids); | ||
245 | |||
246 | #if ALTPCIECHDMA_CDEV | ||
247 | /* prototypes for character device */ | ||
248 | static int sg_init(struct ape_dev *ape); | ||
249 | static void sg_exit(struct ape_dev *ape); | ||
250 | #endif | ||
251 | |||
252 | /** | ||
253 | * altpciechdma_isr() - Interrupt handler | ||
254 | * | ||
255 | */ | ||
256 | static irqreturn_t altpciechdma_isr(int irq, void *dev_id) | ||
257 | { | ||
258 | struct ape_dev *ape = (struct ape_dev *)dev_id; | ||
259 | if (!ape) | ||
260 | return IRQ_NONE; | ||
261 | ape->irq_count++; | ||
262 | return IRQ_HANDLED; | ||
263 | } | ||
264 | |||
265 | static int __devinit scan_bars(struct ape_dev *ape, struct pci_dev *dev) | ||
266 | { | ||
267 | int i; | ||
268 | for (i = 0; i < APE_BAR_NUM; i++) { | ||
269 | unsigned long bar_start = pci_resource_start(dev, i); | ||
270 | if (bar_start) { | ||
271 | unsigned long bar_end = pci_resource_end(dev, i); | ||
272 | unsigned long bar_flags = pci_resource_flags(dev, i); | ||
273 | printk(KERN_DEBUG "BAR%d 0x%08lx-0x%08lx flags 0x%08lx\n", | ||
274 | i, bar_start, bar_end, bar_flags); | ||
275 | } | ||
276 | } | ||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | /** | ||
281 | * Unmap the BAR regions that had been mapped earlier using map_bars() | ||
282 | */ | ||
283 | static void unmap_bars(struct ape_dev *ape, struct pci_dev *dev) | ||
284 | { | ||
285 | int i; | ||
286 | for (i = 0; i < APE_BAR_NUM; i++) { | ||
287 | /* is this BAR mapped? */ | ||
288 | if (ape->bar[i]) { | ||
289 | /* unmap BAR */ | ||
290 | pci_iounmap(dev, ape->bar[i]); | ||
291 | ape->bar[i] = NULL; | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /** | ||
297 | * Map the device memory regions into kernel virtual address space after | ||
298 | * verifying their sizes respect the minimum sizes needed, given by the | ||
299 | * bar_min_len[] array. | ||
300 | */ | ||
301 | static int __devinit map_bars(struct ape_dev *ape, struct pci_dev *dev) | ||
302 | { | ||
303 | int rc; | ||
304 | int i; | ||
305 | /* iterate through all the BARs */ | ||
306 | for (i = 0; i < APE_BAR_NUM; i++) { | ||
307 | unsigned long bar_start = pci_resource_start(dev, i); | ||
308 | unsigned long bar_end = pci_resource_end(dev, i); | ||
309 | unsigned long bar_length = bar_end - bar_start + 1; | ||
310 | ape->bar[i] = NULL; | ||
311 | /* do not map, and skip, BARs with length 0 */ | ||
312 | if (!bar_min_len[i]) | ||
313 | continue; | ||
314 | /* do not map BARs with address 0 */ | ||
315 | if (!bar_start || !bar_end) { | ||
316 | printk(KERN_DEBUG "BAR #%d is not present?!\n", i); | ||
317 | rc = -1; | ||
318 | goto fail; | ||
319 | } | ||
320 | bar_length = bar_end - bar_start + 1; | ||
321 | /* BAR length is less than driver requires? */ | ||
322 | if (bar_length < bar_min_len[i]) { | ||
323 | printk(KERN_DEBUG "BAR #%d length = %lu bytes but driver " | ||
324 | "requires at least %lu bytes\n", i, bar_length, bar_min_len[i]); | ||
325 | rc = -1; | ||
326 | goto fail; | ||
327 | } | ||
328 | /* map the device memory or IO region into kernel virtual | ||
329 | * address space */ | ||
330 | ape->bar[i] = pci_iomap(dev, i, bar_min_len[i]); | ||
331 | if (!ape->bar[i]) { | ||
332 | printk(KERN_DEBUG "Could not map BAR #%d.\n", i); | ||
333 | rc = -1; | ||
334 | goto fail; | ||
335 | } | ||
336 | printk(KERN_DEBUG "BAR[%d] mapped at 0x%p with length %lu(/%lu).\n", i, | ||
337 | ape->bar[i], bar_min_len[i], bar_length); | ||
338 | } | ||
339 | /* succesfully mapped all required BAR regions */ | ||
340 | rc = 0; | ||
341 | goto success; | ||
342 | fail: | ||
343 | /* unmap any BARs that we did map */ | ||
344 | unmap_bars(ape, dev); | ||
345 | success: | ||
346 | return rc; | ||
347 | } | ||
348 | |||
349 | #if 0 /* not yet implemented fully FIXME add opcode */ | ||
350 | static void __devinit rcslave_test(struct ape_dev *ape, struct pci_dev *dev) | ||
351 | { | ||
352 | u32 *rcslave_mem = (u32 *)ape->bar[APE_BAR_RCSLAVE]; | ||
353 | u32 result = 0; | ||
354 | /** this number is assumed to be different each time this test runs */ | ||
355 | u32 seed = (u32)jiffies; | ||
356 | u32 value = seed; | ||
357 | int i; | ||
358 | |||
359 | /* write loop */ | ||
360 | value = seed; | ||
361 | for (i = 1024; i < 32768 / 4 ; i++) { | ||
362 | printk(KERN_DEBUG "Writing 0x%08x to 0x%p.\n", | ||
363 | (u32)value, (void *)rcslave_mem + i); | ||
364 | iowrite32(value, rcslave_mem + i); | ||
365 | value++; | ||
366 | } | ||
367 | /* read-back loop */ | ||
368 | value = seed; | ||
369 | for (i = 1024; i < 32768 / 4; i++) { | ||
370 | result = ioread32(rcslave_mem + i); | ||
371 | if (result != value) { | ||
372 | printk(KERN_DEBUG "Wrote 0x%08x to 0x%p, but read back 0x%08x.\n", | ||
373 | (u32)value, (void *)rcslave_mem + i, (u32)result); | ||
374 | break; | ||
375 | } | ||
376 | value++; | ||
377 | } | ||
378 | } | ||
379 | #endif | ||
380 | |||
381 | /* obtain the 32 most significant (high) bits of a 32-bit or 64-bit address */ | ||
382 | #define pci_dma_h(addr) ((addr >> 16) >> 16) | ||
383 | /* obtain the 32 least significant (low) bits of a 32-bit or 64-bit address */ | ||
384 | #define pci_dma_l(addr) (addr & 0xffffffffUL) | ||
385 | |||
386 | /* ape_fill_chdma_desc() - Fill a Altera PCI Express Chaining DMA descriptor | ||
387 | * | ||
388 | * @desc pointer to descriptor to be filled | ||
389 | * @addr root complex address | ||
390 | * @ep_addr end point address | ||
391 | * @len number of bytes, must be a multiple of 4. | ||
392 | */ | ||
393 | static inline void ape_chdma_desc_set(struct ape_chdma_desc *desc, dma_addr_t addr, u32 ep_addr, int len) | ||
394 | { | ||
395 | BUG_ON(len & 3); | ||
396 | desc->w0 = cpu_to_le32(len / 4); | ||
397 | desc->ep_addr = cpu_to_le32(ep_addr); | ||
398 | desc->rc_addr_h = cpu_to_le32(pci_dma_h(addr)); | ||
399 | desc->rc_addr_l = cpu_to_le32(pci_dma_l(addr)); | ||
400 | } | ||
401 | |||
402 | /* | ||
403 | * ape_sg_to_chdma_table() - Create a device descriptor table from a scatterlist. | ||
404 | * | ||
405 | * The scatterlist must have been mapped by pci_map_sg(sgm->sgl). | ||
406 | * | ||
407 | * @sgl scatterlist. | ||
408 | * @nents Number of entries in the scatterlist. | ||
409 | * @first Start index in the scatterlist sgm->sgl. | ||
410 | * @ep_addr End Point address for the scatter/gather transfer. | ||
411 | * @desc pointer to first descriptor | ||
412 | * | ||
413 | * Returns Number of entries in the table on success, -1 on error. | ||
414 | */ | ||
415 | static int ape_sg_to_chdma_table(struct scatterlist *sgl, int nents, int first, struct ape_chdma_desc *desc, u32 ep_addr) | ||
416 | { | ||
417 | int i = first, j = 0; | ||
418 | /* inspect first entry */ | ||
419 | dma_addr_t addr = sg_dma_address(&sgl[i]); | ||
420 | unsigned int len = sg_dma_len(&sgl[i]); | ||
421 | /* contiguous block */ | ||
422 | dma_addr_t cont_addr = addr; | ||
423 | unsigned int cont_len = len; | ||
424 | /* iterate over remaining entries */ | ||
425 | for (; j < 25 && i < nents - 1; i++) { | ||
426 | /* bus address of next entry i + 1 */ | ||
427 | dma_addr_t next = sg_dma_address(&sgl[i + 1]); | ||
428 | /* length of this entry i */ | ||
429 | len = sg_dma_len(&sgl[i]); | ||
430 | printk(KERN_DEBUG "%04d: addr=0x%08x length=0x%08x\n", i, addr, len); | ||
431 | /* entry i + 1 is non-contiguous with entry i? */ | ||
432 | if (next != addr + len) { | ||
433 | /* TODO create entry here (we could overwrite i) */ | ||
434 | printk(KERN_DEBUG "%4d: cont_addr=0x%08x cont_len=0x%08x\n", j, cont_addr, cont_len); | ||
435 | /* set descriptor for contiguous transfer */ | ||
436 | ape_chdma_desc_set(&desc[j], cont_addr, ep_addr, cont_len); | ||
437 | /* next end point memory address */ | ||
438 | ep_addr += cont_len; | ||
439 | /* start new contiguous block */ | ||
440 | cont_addr = next; | ||
441 | cont_len = 0; | ||
442 | j++; | ||
443 | } | ||
444 | /* add entry i + 1 to current contiguous block */ | ||
445 | cont_len += len; | ||
446 | /* goto entry i + 1 */ | ||
447 | addr = next; | ||
448 | } | ||
449 | /* TODO create entry here (we could overwrite i) */ | ||
450 | printk(KERN_DEBUG "%04d: addr=0x%08x length=0x%08x\n", i, addr, len); | ||
451 | printk(KERN_DEBUG "%4d: cont_addr=0x%08x length=0x%08x\n", j, cont_addr, cont_len); | ||
452 | j++; | ||
453 | return j; | ||
454 | } | ||
455 | |||
456 | /* compare buffers */ | ||
457 | static inline int compare(u32 *p, u32 *q, int len) | ||
458 | { | ||
459 | int result = -1; | ||
460 | int fail = 0; | ||
461 | int i; | ||
462 | for (i = 0; i < len / 4; i++) { | ||
463 | if (*p == *q) { | ||
464 | /* every so many u32 words, show equals */ | ||
465 | if ((i & 255) == 0) | ||
466 | printk(KERN_DEBUG "[%p] = 0x%08x [%p] = 0x%08x\n", p, *p, q, *q); | ||
467 | } else { | ||
468 | fail++; | ||
469 | /* show the first few miscompares */ | ||
470 | if (fail < 10) { | ||
471 | printk(KERN_DEBUG "[%p] = 0x%08x != [%p] = 0x%08x ?!\n", p, *p, q, *q); | ||
472 | /* but stop after a while */ | ||
473 | } else if (fail == 10) { | ||
474 | printk(KERN_DEBUG "---more errors follow! not printed---\n"); | ||
475 | } else { | ||
476 | /* stop compare after this many errors */ | ||
477 | break; | ||
478 | } | ||
479 | } | ||
480 | p++; | ||
481 | q++; | ||
482 | } | ||
483 | if (!fail) | ||
484 | result = 0; | ||
485 | return result; | ||
486 | } | ||
487 | |||
488 | /* dma_test() - Perform DMA loop back test to end point and back to root complex. | ||
489 | * | ||
490 | * Allocate a cache-coherent buffer in host memory, consisting of four pages. | ||
491 | * | ||
492 | * Fill the four memory pages such that each 32-bit word contains its own address. | ||
493 | * | ||
494 | * Now perform a loop back test, have the end point device copy the first buffer | ||
495 | * half to end point memory, then have it copy back into the second half. | ||
496 | * | ||
497 | * Create a descriptor table to copy the first buffer half into End Point | ||
498 | * memory. Instruct the End Point to do a DMA read using that table. | ||
499 | * | ||
500 | * Create a descriptor table to copy End Point memory to the second buffer | ||
501 | * half. Instruct the End Point to do a DMA write using that table. | ||
502 | * | ||
503 | * Compare results, fail or pass. | ||
504 | * | ||
505 | */ | ||
506 | static int __devinit dma_test(struct ape_dev *ape, struct pci_dev *dev) | ||
507 | { | ||
508 | /* test result; guilty until proven innocent */ | ||
509 | int result = -1; | ||
510 | /* the DMA read header sits at address 0x00 of the DMA engine BAR */ | ||
511 | struct ape_chdma_header *write_header = (struct ape_chdma_header *)ape->bar[APE_BAR_HEADER]; | ||
512 | /* the write DMA header sits after the read header at address 0x10 */ | ||
513 | struct ape_chdma_header *read_header = write_header + 1; | ||
514 | /* virtual address of the allocated buffer */ | ||
515 | u8 *buffer_virt = 0; | ||
516 | /* bus address of the allocated buffer */ | ||
517 | dma_addr_t buffer_bus = 0; | ||
518 | int i, n = 0, irq_count; | ||
519 | |||
520 | /* temporary value used to construct 32-bit data words */ | ||
521 | u32 w; | ||
522 | |||
523 | printk(KERN_DEBUG "bar_tests(), PAGE_SIZE = 0x%0x\n", (int)PAGE_SIZE); | ||
524 | printk(KERN_DEBUG "write_header = 0x%p.\n", write_header); | ||
525 | printk(KERN_DEBUG "read_header = 0x%p.\n", read_header); | ||
526 | printk(KERN_DEBUG "&write_header->w3 = 0x%p\n", &write_header->w3); | ||
527 | printk(KERN_DEBUG "&read_header->w3 = 0x%p\n", &read_header->w3); | ||
528 | printk(KERN_DEBUG "ape->table_virt = 0x%p.\n", ape->table_virt); | ||
529 | |||
530 | if (!write_header || !read_header || !ape->table_virt) | ||
531 | goto fail; | ||
532 | |||
533 | /* allocate and map coherently-cached memory for a DMA-able buffer */ | ||
534 | /* @see 2.6.26.2/Documentation/DMA-mapping.txt line 318 */ | ||
535 | buffer_virt = (u8 *)pci_alloc_consistent(dev, PAGE_SIZE * 4, &buffer_bus); | ||
536 | if (!buffer_virt) { | ||
537 | printk(KERN_DEBUG "Could not allocate coherent DMA buffer.\n"); | ||
538 | goto fail; | ||
539 | } | ||
540 | printk(KERN_DEBUG "Allocated cache-coherent DMA buffer (virtual address = 0x%016llx, bus address = 0x%016llx).\n", | ||
541 | (u64)buffer_virt, (u64)buffer_bus); | ||
542 | |||
543 | /* fill first half of buffer with its virtual address as data */ | ||
544 | for (i = 0; i < 4 * PAGE_SIZE; i += 4) | ||
545 | #if 0 | ||
546 | *(u32 *)(buffer_virt + i) = i / PAGE_SIZE + 1; | ||
547 | #else | ||
548 | *(u32 *)(buffer_virt + i) = (buffer_virt + i); | ||
549 | #endif | ||
550 | #if 0 | ||
551 | compare((u32 *)buffer_virt, (u32 *)(buffer_virt + 2 * PAGE_SIZE), 8192); | ||
552 | #endif | ||
553 | |||
554 | #if 0 | ||
555 | /* fill second half of buffer with zeroes */ | ||
556 | for (i = 2 * PAGE_SIZE; i < 4 * PAGE_SIZE; i += 4) | ||
557 | *(u32 *)(buffer_virt + i) = 0; | ||
558 | #endif | ||
559 | |||
560 | /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */ | ||
561 | ape->table_virt->w3 = cpu_to_le32(0x0000FADE); | ||
562 | |||
563 | /* fill in first descriptor */ | ||
564 | n = 0; | ||
565 | /* read 8192 bytes from RC buffer to EP address 4096 */ | ||
566 | ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus, 4096, 2 * PAGE_SIZE); | ||
567 | #if 1 | ||
568 | for (i = 0; i < 255; i++) { | ||
569 | ape_chdma_desc_set(&ape->table_virt->desc[i], buffer_bus, 4096, 2 * PAGE_SIZE); | ||
570 | } | ||
571 | /* index of last descriptor */ | ||
572 | n = i - 1; | ||
573 | #endif | ||
574 | #if 0 | ||
575 | /* fill in next descriptor */ | ||
576 | n++; | ||
577 | /* read 1024 bytes from RC buffer to EP address 4096 + 1024 */ | ||
578 | ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus + 1024, 4096 + 1024, 1024); | ||
579 | #endif | ||
580 | |||
581 | #if 1 | ||
582 | /* enable MSI after the last descriptor is completed */ | ||
583 | if (ape->msi_enabled) | ||
584 | ape->table_virt->desc[n].w0 |= cpu_to_le32(1UL << 16)/*local MSI*/; | ||
585 | #endif | ||
586 | #if 0 | ||
587 | /* dump descriptor table for debugging */ | ||
588 | printk(KERN_DEBUG "Descriptor Table (Read, in Root Complex Memory, # = %d)\n", n + 1); | ||
589 | for (i = 0; i < 4 + (n + 1) * 4; i += 4) { | ||
590 | u32 *p = (u32 *)ape->table_virt; | ||
591 | p += i; | ||
592 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32)p, (u32)p & 15, *p, 4 * le32_to_cpu(*p)); | ||
593 | p++; | ||
594 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | ||
595 | p++; | ||
596 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | ||
597 | p++; | ||
598 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | ||
599 | } | ||
600 | #endif | ||
601 | /* set available number of descriptors in table */ | ||
602 | w = (u32)(n + 1); | ||
603 | w |= (1UL << 18)/*global EPLAST_EN*/; | ||
604 | #if 0 | ||
605 | if (ape->msi_enabled) | ||
606 | w |= (1UL << 17)/*global MSI*/; | ||
607 | #endif | ||
608 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", w, (void *)&read_header->w0); | ||
609 | iowrite32(w, &read_header->w0); | ||
610 | |||
611 | /* write table address (higher 32-bits) */ | ||
612 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)((ape->table_bus >> 16) >> 16), (void *)&read_header->bdt_addr_h); | ||
613 | iowrite32(pci_dma_h(ape->table_bus), &read_header->bdt_addr_h); | ||
614 | |||
615 | /* write table address (lower 32-bits) */ | ||
616 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)(ape->table_bus & 0xffffffffUL), (void *)&read_header->bdt_addr_l); | ||
617 | iowrite32(pci_dma_l(ape->table_bus), &read_header->bdt_addr_l); | ||
618 | |||
619 | /* memory write barrier */ | ||
620 | wmb(); | ||
621 | printk(KERN_DEBUG "Flush posted writes\n"); | ||
622 | /** FIXME Add dummy read to flush posted writes but need a readable location! */ | ||
623 | #if 0 | ||
624 | (void)ioread32(); | ||
625 | #endif | ||
626 | |||
627 | /* remember IRQ count before the transfer */ | ||
628 | irq_count = ape->irq_count; | ||
629 | /* write number of descriptors - this starts the DMA */ | ||
630 | printk(KERN_DEBUG "\nStart DMA read\n"); | ||
631 | printk(KERN_DEBUG "writing 0x%08x to 0x%p\n", (u32)n, (void *)&read_header->w3); | ||
632 | iowrite32(n, &read_header->w3); | ||
633 | printk(KERN_DEBUG "EPLAST = %lu\n", le32_to_cpu(*(u32 *)&ape->table_virt->w3) & 0xffffUL); | ||
634 | |||
635 | /** memory write barrier */ | ||
636 | wmb(); | ||
637 | /* dummy read to flush posted writes */ | ||
638 | /* FIXME Need a readable location! */ | ||
639 | #if 0 | ||
640 | (void)ioread32(); | ||
641 | #endif | ||
642 | printk(KERN_DEBUG "POLL FOR READ:\n"); | ||
643 | /* poll for chain completion, 1000 times 1 millisecond */ | ||
644 | for (i = 0; i < 100; i++) { | ||
645 | volatile u32 *p = &ape->table_virt->w3; | ||
646 | u32 eplast = le32_to_cpu(*p) & 0xffffUL; | ||
647 | printk(KERN_DEBUG "EPLAST = %u, n = %d\n", eplast, n); | ||
648 | if (eplast == n) { | ||
649 | printk(KERN_DEBUG "DONE\n"); | ||
650 | /* print IRQ count before the transfer */ | ||
651 | printk(KERN_DEBUG "#IRQs during transfer: %d\n", ape->irq_count - irq_count); | ||
652 | break; | ||
653 | } | ||
654 | udelay(100); | ||
655 | } | ||
656 | |||
657 | /* invalidate EPLAST, outside 0-255, 0xFADE is from the testbench */ | ||
658 | ape->table_virt->w3 = cpu_to_le32(0x0000FADE); | ||
659 | |||
660 | /* setup first descriptor */ | ||
661 | n = 0; | ||
662 | ape_chdma_desc_set(&ape->table_virt->desc[n], buffer_bus + 8192, 4096, 2 * PAGE_SIZE); | ||
663 | #if 1 | ||
664 | for (i = 0; i < 255; i++) { | ||
665 | ape_chdma_desc_set(&ape->table_virt->desc[i], buffer_bus + 8192, 4096, 2 * PAGE_SIZE); | ||
666 | } | ||
667 | /* index of last descriptor */ | ||
668 | n = i - 1; | ||
669 | #endif | ||
670 | #if 1 /* test variable, make a module option later */ | ||
671 | if (ape->msi_enabled) | ||
672 | ape->table_virt->desc[n].w0 |= cpu_to_le32(1UL << 16)/*local MSI*/; | ||
673 | #endif | ||
674 | #if 0 | ||
675 | /* dump descriptor table for debugging */ | ||
676 | printk(KERN_DEBUG "Descriptor Table (Write, in Root Complex Memory, # = %d)\n", n + 1); | ||
677 | for (i = 0; i < 4 + (n + 1) * 4; i += 4) { | ||
678 | u32 *p = (u32 *)ape->table_virt; | ||
679 | p += i; | ||
680 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (LEN=0x%x)\n", (u32)p, (u32)p & 15, *p, 4 * le32_to_cpu(*p)); | ||
681 | p++; | ||
682 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (EPA=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | ||
683 | p++; | ||
684 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCH=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | ||
685 | p++; | ||
686 | printk(KERN_DEBUG "0x%08x/0x%02x: 0x%08x (RCL=0x%x)\n", (u32)p, (u32)p & 15, *p, le32_to_cpu(*p)); | ||
687 | } | ||
688 | #endif | ||
689 | |||
690 | /* set number of available descriptors in the table */ | ||
691 | w = (u32)(n + 1); | ||
692 | /* enable updates of eplast for each descriptor completion */ | ||
693 | w |= (u32)(1UL << 18)/*global EPLAST_EN*/; | ||
694 | #if 0 // test variable, make a module option later | ||
695 | /* enable MSI for each descriptor completion */ | ||
696 | if (ape->msi_enabled) | ||
697 | w |= (1UL << 17)/*global MSI*/; | ||
698 | #endif | ||
699 | iowrite32(w, &write_header->w0); | ||
700 | iowrite32(pci_dma_h(ape->table_bus), &write_header->bdt_addr_h); | ||
701 | iowrite32(pci_dma_l(ape->table_bus), &write_header->bdt_addr_l); | ||
702 | |||
703 | /** memory write barrier and flush posted writes */ | ||
704 | wmb(); | ||
705 | /* dummy read to flush posted writes */ | ||
706 | /* FIXME Need a readable location! */ | ||
707 | #if 0 | ||
708 | (void)ioread32(); | ||
709 | #endif | ||
710 | irq_count = ape->irq_count; | ||
711 | |||
712 | printk(KERN_DEBUG "\nStart DMA write\n"); | ||
713 | iowrite32(n, &write_header->w3); | ||
714 | |||
715 | /** memory write barrier */ | ||
716 | wmb(); | ||
717 | /** dummy read to flush posted writes */ | ||
718 | //(void)ioread32(); | ||
719 | |||
720 | printk(KERN_DEBUG "POLL FOR WRITE:\n"); | ||
721 | /* poll for completion, 1000 times 1 millisecond */ | ||
722 | for (i = 0; i < 100; i++) { | ||
723 | volatile u32 *p = &ape->table_virt->w3; | ||
724 | u32 eplast = le32_to_cpu(*p) & 0xffffUL; | ||
725 | printk(KERN_DEBUG "EPLAST = %u, n = %d\n", eplast, n); | ||
726 | if (eplast == n) { | ||
727 | printk(KERN_DEBUG "DONE\n"); | ||
728 | /* print IRQ count before the transfer */ | ||
729 | printk(KERN_DEBUG "#IRQs during transfer: %d\n", ape->irq_count - irq_count); | ||
730 | break; | ||
731 | } | ||
732 | udelay(100); | ||
733 | } | ||
734 | /* soft-reset DMA write engine */ | ||
735 | iowrite32(0x0000ffffUL, &write_header->w0); | ||
736 | /* soft-reset DMA read engine */ | ||
737 | iowrite32(0x0000ffffUL, &read_header->w0); | ||
738 | |||
739 | /** memory write barrier */ | ||
740 | wmb(); | ||
741 | /* dummy read to flush posted writes */ | ||
742 | /* FIXME Need a readable location! */ | ||
743 | #if 0 | ||
744 | (void)ioread32(); | ||
745 | #endif | ||
746 | /* compare first half of buffer with second half, should be identical */ | ||
747 | result = compare((u32 *)buffer_virt, (u32 *)(buffer_virt + 2 * PAGE_SIZE), 8192); | ||
748 | printk(KERN_DEBUG "DMA loop back test %s.\n", result ? "FAILED" : "PASSED"); | ||
749 | |||
750 | pci_free_consistent(dev, 4 * PAGE_SIZE, buffer_virt, buffer_bus); | ||
751 | fail: | ||
752 | printk(KERN_DEBUG "bar_tests() end, result %d\n", result); | ||
753 | return result; | ||
754 | } | ||
755 | |||
756 | /* Called when the PCI sub system thinks we can control the given device. | ||
757 | * Inspect if we can support the device and if so take control of it. | ||
758 | * | ||
759 | * Return 0 when we have taken control of the given device. | ||
760 | * | ||
761 | * - allocate board specific bookkeeping | ||
762 | * - allocate coherently-mapped memory for the descriptor table | ||
763 | * - enable the board | ||
764 | * - verify board revision | ||
765 | * - request regions | ||
766 | * - query DMA mask | ||
767 | * - obtain and request irq | ||
768 | * - map regions into kernel address space | ||
769 | */ | ||
770 | static int __devinit probe(struct pci_dev *dev, const struct pci_device_id *id) | ||
771 | { | ||
772 | int rc = 0; | ||
773 | struct ape_dev *ape = NULL; | ||
774 | u8 irq_pin, irq_line; | ||
775 | printk(KERN_DEBUG "probe(dev = 0x%p, pciid = 0x%p)\n", dev, id); | ||
776 | |||
777 | /* allocate memory for per-board book keeping */ | ||
778 | ape = kzalloc(sizeof(struct ape_dev), GFP_KERNEL); | ||
779 | if (!ape) { | ||
780 | printk(KERN_DEBUG "Could not kzalloc()ate memory.\n"); | ||
781 | goto err_ape; | ||
782 | } | ||
783 | ape->pci_dev = dev; | ||
784 | dev->dev.driver_data = (void *)ape; | ||
785 | printk(KERN_DEBUG "probe() ape = 0x%p\n", ape); | ||
786 | |||
787 | printk(KERN_DEBUG "sizeof(struct ape_chdma_table) = %d.\n", | ||
788 | (int)sizeof(struct ape_chdma_table)); | ||
789 | /* the reference design has a size restriction on the table size */ | ||
790 | BUG_ON(sizeof(struct ape_chdma_table) > APE_CHDMA_TABLE_SIZE); | ||
791 | |||
792 | /* allocate and map coherently-cached memory for a descriptor table */ | ||
793 | /* @see LDD3 page 446 */ | ||
794 | ape->table_virt = (struct ape_chdma_table *)pci_alloc_consistent(dev, | ||
795 | APE_CHDMA_TABLE_SIZE, &ape->table_bus); | ||
796 | /* could not allocate table? */ | ||
797 | if (!ape->table_virt) { | ||
798 | printk(KERN_DEBUG "Could not dma_alloc()ate_coherent memory.\n"); | ||
799 | goto err_table; | ||
800 | } | ||
801 | |||
802 | printk(KERN_DEBUG "table_virt = 0x%16llx, table_bus = 0x%16llx.\n", | ||
803 | (u64)ape->table_virt, (u64)ape->table_bus); | ||
804 | |||
805 | /* enable device */ | ||
806 | rc = pci_enable_device(dev); | ||
807 | if (rc) { | ||
808 | printk(KERN_DEBUG "pci_enable_device() failed\n"); | ||
809 | goto err_enable; | ||
810 | } | ||
811 | |||
812 | /* enable bus master capability on device */ | ||
813 | pci_set_master(dev); | ||
814 | /* enable message signaled interrupts */ | ||
815 | rc = pci_enable_msi(dev); | ||
816 | /* could not use MSI? */ | ||
817 | if (rc) { | ||
818 | /* resort to legacy interrupts */ | ||
819 | printk(KERN_DEBUG "Could not enable MSI interrupting.\n"); | ||
820 | ape->msi_enabled = 0; | ||
821 | /* MSI enabled, remember for cleanup */ | ||
822 | } else { | ||
823 | printk(KERN_DEBUG "Enabled MSI interrupting.\n"); | ||
824 | ape->msi_enabled = 1; | ||
825 | } | ||
826 | |||
827 | pci_read_config_byte(dev, PCI_REVISION_ID, &ape->revision); | ||
828 | #if 0 /* example */ | ||
829 | /* (for example) this driver does not support revision 0x42 */ | ||
830 | if (ape->revision == 0x42) { | ||
831 | printk(KERN_DEBUG "Revision 0x42 is not supported by this driver.\n"); | ||
832 | rc = -ENODEV; | ||
833 | goto err_rev; | ||
834 | } | ||
835 | #endif | ||
836 | /** XXX check for native or legacy PCIe endpoint? */ | ||
837 | |||
838 | rc = pci_request_regions(dev, DRV_NAME); | ||
839 | /* could not request all regions? */ | ||
840 | if (rc) { | ||
841 | /* assume device is in use (and do not disable it later!) */ | ||
842 | ape->in_use = 1; | ||
843 | goto err_regions; | ||
844 | } | ||
845 | ape->got_regions = 1; | ||
846 | |||
847 | #if 1 // @todo For now, disable 64-bit, because I do not understand the implications (DAC!) | ||
848 | /* query for DMA transfer */ | ||
849 | /* @see Documentation/DMA-mapping.txt */ | ||
850 | if (!pci_set_dma_mask(dev, DMA_64BIT_MASK)) { | ||
851 | pci_set_consistent_dma_mask(dev, DMA_64BIT_MASK); | ||
852 | /* use 64-bit DMA */ | ||
853 | printk(KERN_DEBUG "Using a 64-bit DMA mask.\n"); | ||
854 | } else | ||
855 | #endif | ||
856 | if (!pci_set_dma_mask(dev, DMA_32BIT_MASK)) { | ||
857 | printk(KERN_DEBUG "Could not set 64-bit DMA mask.\n"); | ||
858 | pci_set_consistent_dma_mask(dev, DMA_32BIT_MASK); | ||
859 | /* use 32-bit DMA */ | ||
860 | printk(KERN_DEBUG "Using a 32-bit DMA mask.\n"); | ||
861 | } else { | ||
862 | printk(KERN_DEBUG "No suitable DMA possible.\n"); | ||
863 | /** @todo Choose proper error return code */ | ||
864 | rc = -1; | ||
865 | goto err_mask; | ||
866 | } | ||
867 | |||
868 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &irq_pin); | ||
869 | /* could not read? */ | ||
870 | if (rc) | ||
871 | goto err_irq; | ||
872 | printk(KERN_DEBUG "IRQ pin #%d (0=none, 1=INTA#...4=INTD#).\n", irq_pin); | ||
873 | |||
874 | /* @see LDD3, page 318 */ | ||
875 | rc = pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq_line); | ||
876 | /* could not read? */ | ||
877 | if (rc) { | ||
878 | printk(KERN_DEBUG "Could not query PCI_INTERRUPT_LINE, error %d\n", rc); | ||
879 | goto err_irq; | ||
880 | } | ||
881 | printk(KERN_DEBUG "IRQ line #%d.\n", irq_line); | ||
882 | #if 1 | ||
883 | irq_line = dev->irq; | ||
884 | /* @see LDD3, page 259 */ | ||
885 | rc = request_irq(irq_line, altpciechdma_isr, IRQF_SHARED, DRV_NAME, (void *)ape); | ||
886 | if (rc) { | ||
887 | printk(KERN_DEBUG "Could not request IRQ #%d, error %d\n", irq_line, rc); | ||
888 | ape->irq_line = -1; | ||
889 | goto err_irq; | ||
890 | } | ||
891 | /* remember which irq we allocated */ | ||
892 | ape->irq_line = (int)irq_line; | ||
893 | printk(KERN_DEBUG "Succesfully requested IRQ #%d with dev_id 0x%p\n", irq_line, ape); | ||
894 | #endif | ||
895 | /* show BARs */ | ||
896 | scan_bars(ape, dev); | ||
897 | /* map BARs */ | ||
898 | rc = map_bars(ape, dev); | ||
899 | if (rc) | ||
900 | goto err_map; | ||
901 | #if ALTPCIECHDMA_CDEV | ||
902 | /* initialize character device */ | ||
903 | rc = sg_init(ape); | ||
904 | if (rc) | ||
905 | goto err_cdev; | ||
906 | #endif | ||
907 | /* perform DMA engines loop back test */ | ||
908 | rc = dma_test(ape, dev); | ||
909 | (void)rc; | ||
910 | /* succesfully took the device */ | ||
911 | rc = 0; | ||
912 | printk(KERN_DEBUG "probe() successful.\n"); | ||
913 | goto end; | ||
914 | err_cdev: | ||
915 | /* unmap the BARs */ | ||
916 | unmap_bars(ape, dev); | ||
917 | err_map: | ||
918 | /* free allocated irq */ | ||
919 | if (ape->irq_line >= 0) | ||
920 | free_irq(ape->irq_line, (void *)ape); | ||
921 | err_irq: | ||
922 | if (ape->msi_enabled) | ||
923 | pci_disable_msi(dev); | ||
924 | /* disable the device iff it is not in use */ | ||
925 | if (!ape->in_use) | ||
926 | pci_disable_device(dev); | ||
927 | if (ape->got_regions) | ||
928 | pci_release_regions(dev); | ||
929 | err_mask: | ||
930 | err_regions: | ||
931 | err_rev: | ||
932 | /* clean up everything before device enable() */ | ||
933 | err_enable: | ||
934 | if (ape->table_virt) | ||
935 | pci_free_consistent(dev, APE_CHDMA_TABLE_SIZE, ape->table_virt, ape->table_bus); | ||
936 | /* clean up everything before allocating descriptor table */ | ||
937 | err_table: | ||
938 | if (ape) | ||
939 | kfree(ape); | ||
940 | err_ape: | ||
941 | end: | ||
942 | return rc; | ||
943 | } | ||
944 | |||
945 | static void __devexit remove(struct pci_dev *dev) | ||
946 | { | ||
947 | struct ape_dev *ape; | ||
948 | printk(KERN_DEBUG "remove(0x%p)\n", dev); | ||
949 | if ((dev == 0) || (dev->dev.driver_data == 0)) { | ||
950 | printk(KERN_DEBUG "remove(dev = 0x%p) dev->dev.driver_data = 0x%p\n", dev, dev->dev.driver_data); | ||
951 | return; | ||
952 | } | ||
953 | ape = (struct ape_dev *)dev->dev.driver_data; | ||
954 | printk(KERN_DEBUG "remove(dev = 0x%p) where dev->dev.driver_data = 0x%p\n", dev, ape); | ||
955 | if (ape->pci_dev != dev) { | ||
956 | printk(KERN_DEBUG "dev->dev.driver_data->pci_dev (0x%08lx) != dev (0x%08lx)\n", | ||
957 | (unsigned long)ape->pci_dev, (unsigned long)dev); | ||
958 | } | ||
959 | /* remove character device */ | ||
960 | #if ALTPCIECHDMA_CDEV | ||
961 | sg_exit(ape); | ||
962 | #endif | ||
963 | |||
964 | if (ape->table_virt) | ||
965 | pci_free_consistent(dev, APE_CHDMA_TABLE_SIZE, ape->table_virt, ape->table_bus); | ||
966 | |||
967 | /* free IRQ | ||
968 | * @see LDD3 page 279 | ||
969 | */ | ||
970 | if (ape->irq_line >= 0) { | ||
971 | printk(KERN_DEBUG "Freeing IRQ #%d for dev_id 0x%08lx.\n", | ||
972 | ape->irq_line, (unsigned long)ape); | ||
973 | free_irq(ape->irq_line, (void *)ape); | ||
974 | } | ||
975 | /* MSI was enabled? */ | ||
976 | if (ape->msi_enabled) { | ||
977 | /* Disable MSI @see Documentation/MSI-HOWTO.txt */ | ||
978 | pci_disable_msi(dev); | ||
979 | ape->msi_enabled = 0; | ||
980 | } | ||
981 | /* unmap the BARs */ | ||
982 | unmap_bars(ape, dev); | ||
983 | if (!ape->in_use) | ||
984 | pci_disable_device(dev); | ||
985 | if (ape->got_regions) | ||
986 | /* to be called after device disable */ | ||
987 | pci_release_regions(dev); | ||
988 | } | ||
989 | |||
990 | #if ALTPCIECHDMA_CDEV | ||
991 | |||
992 | /* | ||
993 | * Called when the device goes from unused to used. | ||
994 | */ | ||
995 | static int sg_open(struct inode *inode, struct file *file) | ||
996 | { | ||
997 | struct ape_dev *ape; | ||
998 | printk(KERN_DEBUG DRV_NAME "_open()\n"); | ||
999 | /* pointer to containing data structure of the character device inode */ | ||
1000 | ape = container_of(inode->i_cdev, struct ape_dev, cdev); | ||
1001 | /* create a reference to our device state in the opened file */ | ||
1002 | file->private_data = ape; | ||
1003 | /* create virtual memory mapper */ | ||
1004 | ape->sgm = sg_create_mapper(MAX_CHDMA_SIZE); | ||
1005 | return 0; | ||
1006 | } | ||
1007 | |||
1008 | /* | ||
1009 | * Called when the device goes from used to unused. | ||
1010 | */ | ||
1011 | static int sg_close(struct inode *inode, struct file *file) | ||
1012 | { | ||
1013 | /* fetch device specific data stored earlier during open */ | ||
1014 | struct ape_dev *ape = (struct ape_dev *)file->private_data; | ||
1015 | printk(KERN_DEBUG DRV_NAME "_close()\n"); | ||
1016 | /* destroy virtual memory mapper */ | ||
1017 | sg_destroy_mapper(ape->sgm); | ||
1018 | return 0; | ||
1019 | } | ||
1020 | |||
1021 | static ssize_t sg_read(struct file *file, char __user *buf, size_t count, loff_t *pos) | ||
1022 | { | ||
1023 | /* fetch device specific data stored earlier during open */ | ||
1024 | struct ape_dev *ape = (struct ape_dev *)file->private_data; | ||
1025 | (void)ape; | ||
1026 | printk(KERN_DEBUG DRV_NAME "_read(buf=0x%p, count=%lld, pos=%llu)\n", buf, (s64)count, (u64)*pos); | ||
1027 | return count; | ||
1028 | } | ||
1029 | |||
1030 | /* sg_write() - Write to the device | ||
1031 | * | ||
1032 | * @buf userspace buffer | ||
1033 | * @count number of bytes in the userspace buffer | ||
1034 | * | ||
1035 | * Iterate over the userspace buffer, taking at most 255 * PAGE_SIZE bytes for | ||
1036 | * each DMA transfer. | ||
1037 | * For each transfer, get the user pages, build a sglist, map, build a | ||
1038 | * descriptor table. submit the transfer. wait for the interrupt handler | ||
1039 | * to wake us on completion. | ||
1040 | */ | ||
1041 | static ssize_t sg_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) | ||
1042 | { | ||
1043 | int hwnents, tents; | ||
1044 | size_t transfer_len, remaining = count, done = 0; | ||
1045 | u64 transfer_addr = (u64)buf; | ||
1046 | /* fetch device specific data stored earlier during open */ | ||
1047 | struct ape_dev *ape = (struct ape_dev *)file->private_data; | ||
1048 | printk(KERN_DEBUG DRV_NAME "_write(buf=0x%p, count=%lld, pos=%llu)\n", | ||
1049 | buf, (s64)count, (u64)*pos); | ||
1050 | /* TODO transfer boundaries at PAGE_SIZE granularity */ | ||
1051 | while (remaining > 0) | ||
1052 | { | ||
1053 | /* limit DMA transfer size */ | ||
1054 | transfer_len = (remaining < APE_CHDMA_MAX_TRANSFER_LEN)? remaining: | ||
1055 | APE_CHDMA_MAX_TRANSFER_LEN; | ||
1056 | /* get all user space buffer pages and create a scattergather list */ | ||
1057 | sgm_map_user_pages(ape->sgm, transfer_addr, transfer_len, 0/*read from userspace*/); | ||
1058 | printk(KERN_DEBUG DRV_NAME "mapped_pages=%d\n", ape->sgm->mapped_pages); | ||
1059 | /* map all entries in the scattergather list */ | ||
1060 | hwnents = pci_map_sg(ape->pci_dev, ape->sgm->sgl, ape->sgm->mapped_pages, DMA_TO_DEVICE); | ||
1061 | printk(KERN_DEBUG DRV_NAME "hwnents=%d\n", hwnents); | ||
1062 | /* build device descriptor tables and submit them to the DMA engine */ | ||
1063 | tents = ape_sg_to_chdma_table(ape->sgm->sgl, hwnents, 0, &ape->table_virt->desc[0], 4096); | ||
1064 | printk(KERN_DEBUG DRV_NAME "tents=%d\n", hwnents); | ||
1065 | #if 0 | ||
1066 | while (tables) { | ||
1067 | /* TODO build table */ | ||
1068 | /* TODO submit table to the device */ | ||
1069 | /* if engine stopped and unfinished work then start engine */ | ||
1070 | } | ||
1071 | put ourselves on wait queue | ||
1072 | #endif | ||
1073 | |||
1074 | dma_unmap_sg(NULL, ape->sgm->sgl, ape->sgm->mapped_pages, DMA_TO_DEVICE); | ||
1075 | /* dirty and free the pages */ | ||
1076 | sgm_unmap_user_pages(ape->sgm, 1/*dirtied*/); | ||
1077 | /* book keeping */ | ||
1078 | transfer_addr += transfer_len; | ||
1079 | remaining -= transfer_len; | ||
1080 | done += transfer_len; | ||
1081 | } | ||
1082 | return done; | ||
1083 | } | ||
1084 | |||
1085 | /* | ||
1086 | * character device file operations | ||
1087 | */ | ||
1088 | static struct file_operations sg_fops = { | ||
1089 | .owner = THIS_MODULE, | ||
1090 | .open = sg_open, | ||
1091 | .release = sg_close, | ||
1092 | .read = sg_read, | ||
1093 | .write = sg_write, | ||
1094 | }; | ||
1095 | |||
1096 | /* sg_init() - Initialize character device | ||
1097 | * | ||
1098 | * XXX Should ideally be tied to the device, on device probe, not module init. | ||
1099 | */ | ||
1100 | static int sg_init(struct ape_dev *ape) | ||
1101 | { | ||
1102 | int rc; | ||
1103 | printk(KERN_DEBUG DRV_NAME " sg_init()\n"); | ||
1104 | /* allocate a dynamically allocated character device node */ | ||
1105 | rc = alloc_chrdev_region(&ape->cdevno, 0/*requested minor*/, 1/*count*/, DRV_NAME); | ||
1106 | /* allocation failed? */ | ||
1107 | if (rc < 0) { | ||
1108 | printk("alloc_chrdev_region() = %d\n", rc); | ||
1109 | goto fail_alloc; | ||
1110 | } | ||
1111 | /* couple the device file operations to the character device */ | ||
1112 | cdev_init(&ape->cdev, &sg_fops); | ||
1113 | ape->cdev.owner = THIS_MODULE; | ||
1114 | /* bring character device live */ | ||
1115 | rc = cdev_add(&ape->cdev, ape->cdevno, 1/*count*/); | ||
1116 | if (rc < 0) { | ||
1117 | printk("cdev_add() = %d\n", rc); | ||
1118 | goto fail_add; | ||
1119 | } | ||
1120 | printk(KERN_DEBUG "altpciechdma = %d:%d\n", MAJOR(ape->cdevno), MINOR(ape->cdevno)); | ||
1121 | return 0; | ||
1122 | fail_add: | ||
1123 | /* free the dynamically allocated character device node */ | ||
1124 | unregister_chrdev_region(ape->cdevno, 1/*count*/); | ||
1125 | fail_alloc: | ||
1126 | return -1; | ||
1127 | } | ||
1128 | |||
1129 | /* sg_exit() - Cleanup character device | ||
1130 | * | ||
1131 | * XXX Should ideally be tied to the device, on device remove, not module exit. | ||
1132 | */ | ||
1133 | |||
1134 | static void sg_exit(struct ape_dev *ape) | ||
1135 | { | ||
1136 | printk(KERN_DEBUG DRV_NAME " sg_exit()\n"); | ||
1137 | /* remove the character device */ | ||
1138 | cdev_del(&ape->cdev); | ||
1139 | /* free the dynamically allocated character device node */ | ||
1140 | unregister_chrdev_region(ape->cdevno, 1/*count*/); | ||
1141 | } | ||
1142 | |||
1143 | #endif /* ALTPCIECHDMA_CDEV */ | ||
1144 | |||
1145 | /* used to register the driver with the PCI kernel sub system | ||
1146 | * @see LDD3 page 311 | ||
1147 | */ | ||
1148 | static struct pci_driver pci_driver = { | ||
1149 | .name = DRV_NAME, | ||
1150 | .id_table = ids, | ||
1151 | .probe = probe, | ||
1152 | .remove = remove, | ||
1153 | /* resume, suspend are optional */ | ||
1154 | }; | ||
1155 | |||
1156 | /** | ||
1157 | * alterapciechdma_init() - Module initialization, registers devices. | ||
1158 | */ | ||
1159 | static int __init alterapciechdma_init(void) | ||
1160 | { | ||
1161 | int rc = 0; | ||
1162 | printk(KERN_DEBUG DRV_NAME " init(), built at " __DATE__ " " __TIME__ "\n"); | ||
1163 | /* register this driver with the PCI bus driver */ | ||
1164 | rc = pci_register_driver(&pci_driver); | ||
1165 | if (rc < 0) | ||
1166 | return rc; | ||
1167 | return 0; | ||
1168 | } | ||
1169 | |||
1170 | /** | ||
1171 | * alterapciechdma_init() - Module cleanup, unregisters devices. | ||
1172 | */ | ||
1173 | static void __exit alterapciechdma_exit(void) | ||
1174 | { | ||
1175 | printk(KERN_DEBUG DRV_NAME " exit(), built at " __DATE__ " " __TIME__ "\n"); | ||
1176 | /* unregister this driver from the PCI bus driver */ | ||
1177 | pci_unregister_driver(&pci_driver); | ||
1178 | } | ||
1179 | |||
1180 | MODULE_LICENSE("GPL"); | ||
1181 | |||
1182 | module_init(alterapciechdma_init); | ||
1183 | module_exit(alterapciechdma_exit); | ||
1184 | |||