diff options
author | Jarod Wilson <jwilson@redhat.com> | 2007-12-03 13:43:12 -0500 |
---|---|---|
committer | Stefan Richter <stefanr@s5r6.in-berlin.de> | 2007-12-10 15:55:19 -0500 |
commit | a186b4a6b22fdc96a1ed63da483d267b5d00839e (patch) | |
tree | 3d609e60aa00a921475b94c7b133e44068d15a99 | |
parent | 41f81e88e01eb959f439f8537c58078e4bfc5291 (diff) |
firewire: OHCI 1.0 Isochronous Receive support
Third rendition of FireWire OHCI 1.0 Isochronous Receive support, using a
zer-copy method similar to OHCI 1.1 which puts the IR data payload directly
into the userspace buffer. The zero-copy implementation eliminates the
video artifacts, audio popping, and buffer underrun problems seen with
version 1 of this patch, as well as fixing a regression in OHCI 1.1 support
introduced by version 2 of this patch.
Successfully tested in OHCI 1.1 mode on the following chipsets:
- NEC uPD72847 (rev 01), OHCI 1.1 (PCI)
- Ti XIO2200(A) (rev 01), OHCI 1.1 (PCIe)
- Ti TSB41AB2 (rev 01), OHCI 1.1 (PCI on SB Audigy)
- Apple UniNorth 2 (rev 81), OHCI 1.1 (PowerBook G4 onboard)
Successfully tested in OHCI 1.0 mode on the following chipsets:
- Agere FW323 (rev 06), OHCI 1.0 (Mac Mini onboard)
- Agere FW323 (rev 06), OHCI 1.0 (PCI)
- Via VT6306 (rev 46), OHCI 1.0 (PCI)
- NEC OrangeLink (rev 01), OHCI 1.0 (PCI)
- NEC uPD72847 (rev 01), OHCI 1.1 (PCI)
- Ti XIO2200(A) (rev 01), OHCI 1.1 (PCIe)
The bulk of testing was done in an x86_64 system, but was also successfully
sanity-tested on other systems, including a PPC(32) PowerBook G4 and an i686
EPIA M10k. Crude benchmarking (watching top during capture) puts the cpu
utilization during capture on the EPIA's 1GHz Via C3 processor around 13%,
which is down from 30% with the v1 code.
Some implementation details:
To maintain the same userspace API as dual-buffer mode, we set up two
descriptors for every incoming packet. The first is an INPUT_MORE descriptor,
pointing to a buffer large enough to hold just the packet's iso headers,
immediately followed by an INPUT_LAST descriptor, pointing to a chunk of the
userspace buffer big enough for the packet's data payload. With this setup,
each incoming packet fills in these two descriptors in a manner that very
closely emulates dual-buffer receive, to the point where the bulk of the
handle_ir_* code is now identical between the two (and probably primed for
some restructuring to share code between them).
The only caveat I have at the moment is that neither of my OHCI 1.0 Via
VT6307-based FireWire controllers work particularly well with this code
for reasons I have yet to figure out.
Signed-off-by: Jarod Wilson <jwilson@redhat.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
-rw-r--r-- | drivers/firewire/fw-ohci.c | 175 |
1 files changed, 155 insertions, 20 deletions
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c index c9b9081831da..436a855a4c60 100644 --- a/drivers/firewire/fw-ohci.c +++ b/drivers/firewire/fw-ohci.c | |||
@@ -437,6 +437,21 @@ static void ar_context_run(struct ar_context *ctx) | |||
437 | flush_writes(ctx->ohci); | 437 | flush_writes(ctx->ohci); |
438 | } | 438 | } |
439 | 439 | ||
440 | static struct descriptor * | ||
441 | find_branch_descriptor(struct descriptor *d, int z) | ||
442 | { | ||
443 | int b, key; | ||
444 | |||
445 | b = (le16_to_cpu(d->control) & DESCRIPTOR_BRANCH_ALWAYS) >> 2; | ||
446 | key = (le16_to_cpu(d->control) & DESCRIPTOR_KEY_IMMEDIATE) >> 8; | ||
447 | |||
448 | /* figure out which descriptor the branch address goes in */ | ||
449 | if (z == 2 && (b == 3 || key == 2)) | ||
450 | return d; | ||
451 | else | ||
452 | return d + z - 1; | ||
453 | } | ||
454 | |||
440 | static void context_tasklet(unsigned long data) | 455 | static void context_tasklet(unsigned long data) |
441 | { | 456 | { |
442 | struct context *ctx = (struct context *) data; | 457 | struct context *ctx = (struct context *) data; |
@@ -455,7 +470,7 @@ static void context_tasklet(unsigned long data) | |||
455 | address = le32_to_cpu(last->branch_address); | 470 | address = le32_to_cpu(last->branch_address); |
456 | z = address & 0xf; | 471 | z = address & 0xf; |
457 | d = ctx->buffer + (address - ctx->buffer_bus) / sizeof(*d); | 472 | d = ctx->buffer + (address - ctx->buffer_bus) / sizeof(*d); |
458 | last = (z == 2) ? d : d + z - 1; | 473 | last = find_branch_descriptor(d, z); |
459 | 474 | ||
460 | if (!ctx->callback(ctx, d, last)) | 475 | if (!ctx->callback(ctx, d, last)) |
461 | break; | 476 | break; |
@@ -566,7 +581,7 @@ static void context_append(struct context *ctx, | |||
566 | 581 | ||
567 | ctx->head_descriptor = d + z + extra; | 582 | ctx->head_descriptor = d + z + extra; |
568 | ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z); | 583 | ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z); |
569 | ctx->prev_descriptor = z == 2 ? d : d + z - 1; | 584 | ctx->prev_descriptor = find_branch_descriptor(d, z); |
570 | 585 | ||
571 | dma_sync_single_for_device(ctx->ohci->card.device, ctx->buffer_bus, | 586 | dma_sync_single_for_device(ctx->ohci->card.device, ctx->buffer_bus, |
572 | ctx->buffer_size, DMA_TO_DEVICE); | 587 | ctx->buffer_size, DMA_TO_DEVICE); |
@@ -655,7 +670,7 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet) | |||
655 | driver_data = (struct driver_data *) &d[3]; | 670 | driver_data = (struct driver_data *) &d[3]; |
656 | driver_data->packet = packet; | 671 | driver_data->packet = packet; |
657 | packet->driver_data = driver_data; | 672 | packet->driver_data = driver_data; |
658 | 673 | ||
659 | if (packet->payload_length > 0) { | 674 | if (packet->payload_length > 0) { |
660 | payload_bus = | 675 | payload_bus = |
661 | dma_map_single(ohci->card.device, packet->payload, | 676 | dma_map_single(ohci->card.device, packet->payload, |
@@ -903,7 +918,7 @@ at_context_transmit(struct context *ctx, struct fw_packet *packet) | |||
903 | 918 | ||
904 | if (retval < 0) | 919 | if (retval < 0) |
905 | packet->callback(packet, &ctx->ohci->card, packet->ack); | 920 | packet->callback(packet, &ctx->ohci->card, packet->ack); |
906 | 921 | ||
907 | } | 922 | } |
908 | 923 | ||
909 | static void bus_reset_tasklet(unsigned long data) | 924 | static void bus_reset_tasklet(unsigned long data) |
@@ -1431,6 +1446,57 @@ static int handle_ir_dualbuffer_packet(struct context *context, | |||
1431 | return 1; | 1446 | return 1; |
1432 | } | 1447 | } |
1433 | 1448 | ||
1449 | static int handle_ir_packet_per_buffer(struct context *context, | ||
1450 | struct descriptor *d, | ||
1451 | struct descriptor *last) | ||
1452 | { | ||
1453 | struct iso_context *ctx = | ||
1454 | container_of(context, struct iso_context, context); | ||
1455 | struct descriptor *pd = d + 1; | ||
1456 | __le32 *ir_header; | ||
1457 | size_t header_length; | ||
1458 | void *p, *end; | ||
1459 | int i, z; | ||
1460 | |||
1461 | if (pd->res_count == pd->req_count) | ||
1462 | /* Descriptor(s) not done yet, stop iteration */ | ||
1463 | return 0; | ||
1464 | |||
1465 | header_length = le16_to_cpu(d->req_count); | ||
1466 | |||
1467 | i = ctx->header_length; | ||
1468 | z = le32_to_cpu(pd->branch_address) & 0xf; | ||
1469 | p = d + z; | ||
1470 | end = p + header_length; | ||
1471 | |||
1472 | while (p < end && i + ctx->base.header_size <= PAGE_SIZE) { | ||
1473 | /* | ||
1474 | * The iso header is byteswapped to little endian by | ||
1475 | * the controller, but the remaining header quadlets | ||
1476 | * are big endian. We want to present all the headers | ||
1477 | * as big endian, so we have to swap the first quadlet. | ||
1478 | */ | ||
1479 | *(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4)); | ||
1480 | memcpy(ctx->header + i + 4, p + 8, ctx->base.header_size - 4); | ||
1481 | i += ctx->base.header_size; | ||
1482 | p += ctx->base.header_size + 4; | ||
1483 | } | ||
1484 | |||
1485 | ctx->header_length = i; | ||
1486 | |||
1487 | if (le16_to_cpu(pd->control) & DESCRIPTOR_IRQ_ALWAYS) { | ||
1488 | ir_header = (__le32 *) (d + z); | ||
1489 | ctx->base.callback(&ctx->base, | ||
1490 | le32_to_cpu(ir_header[0]) & 0xffff, | ||
1491 | ctx->header_length, ctx->header, | ||
1492 | ctx->base.callback_data); | ||
1493 | ctx->header_length = 0; | ||
1494 | } | ||
1495 | |||
1496 | |||
1497 | return 1; | ||
1498 | } | ||
1499 | |||
1434 | static int handle_it_packet(struct context *context, | 1500 | static int handle_it_packet(struct context *context, |
1435 | struct descriptor *d, | 1501 | struct descriptor *d, |
1436 | struct descriptor *last) | 1502 | struct descriptor *last) |
@@ -1466,14 +1532,12 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size) | |||
1466 | } else { | 1532 | } else { |
1467 | mask = &ohci->ir_context_mask; | 1533 | mask = &ohci->ir_context_mask; |
1468 | list = ohci->ir_context_list; | 1534 | list = ohci->ir_context_list; |
1469 | callback = handle_ir_dualbuffer_packet; | 1535 | if (ohci->version >= OHCI_VERSION_1_1) |
1536 | callback = handle_ir_dualbuffer_packet; | ||
1537 | else | ||
1538 | callback = handle_ir_packet_per_buffer; | ||
1470 | } | 1539 | } |
1471 | 1540 | ||
1472 | /* FIXME: We need a fallback for pre 1.1 OHCI. */ | ||
1473 | if (callback == handle_ir_dualbuffer_packet && | ||
1474 | ohci->version < OHCI_VERSION_1_1) | ||
1475 | return ERR_PTR(-ENOSYS); | ||
1476 | |||
1477 | spin_lock_irqsave(&ohci->lock, flags); | 1541 | spin_lock_irqsave(&ohci->lock, flags); |
1478 | index = ffs(*mask) - 1; | 1542 | index = ffs(*mask) - 1; |
1479 | if (index >= 0) | 1543 | if (index >= 0) |
@@ -1532,7 +1596,9 @@ static int ohci_start_iso(struct fw_iso_context *base, | |||
1532 | context_run(&ctx->context, match); | 1596 | context_run(&ctx->context, match); |
1533 | } else { | 1597 | } else { |
1534 | index = ctx - ohci->ir_context_list; | 1598 | index = ctx - ohci->ir_context_list; |
1535 | control = IR_CONTEXT_DUAL_BUFFER_MODE | IR_CONTEXT_ISOCH_HEADER; | 1599 | control = IR_CONTEXT_ISOCH_HEADER; |
1600 | if (ohci->version >= OHCI_VERSION_1_1) | ||
1601 | control |= IR_CONTEXT_DUAL_BUFFER_MODE; | ||
1536 | match = (tags << 28) | (sync << 8) | ctx->base.channel; | 1602 | match = (tags << 28) | (sync << 8) | ctx->base.channel; |
1537 | if (cycle >= 0) { | 1603 | if (cycle >= 0) { |
1538 | match |= (cycle & 0x07fff) << 12; | 1604 | match |= (cycle & 0x07fff) << 12; |
@@ -1738,7 +1804,6 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base, | |||
1738 | offset = payload & ~PAGE_MASK; | 1804 | offset = payload & ~PAGE_MASK; |
1739 | rest = p->payload_length; | 1805 | rest = p->payload_length; |
1740 | 1806 | ||
1741 | /* FIXME: OHCI 1.0 doesn't support dual buffer receive */ | ||
1742 | /* FIXME: make packet-per-buffer/dual-buffer a context option */ | 1807 | /* FIXME: make packet-per-buffer/dual-buffer a context option */ |
1743 | while (rest > 0) { | 1808 | while (rest > 0) { |
1744 | d = context_get_descriptors(&ctx->context, | 1809 | d = context_get_descriptors(&ctx->context, |
@@ -1777,6 +1842,81 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base, | |||
1777 | } | 1842 | } |
1778 | 1843 | ||
1779 | static int | 1844 | static int |
1845 | ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base, | ||
1846 | struct fw_iso_packet *packet, | ||
1847 | struct fw_iso_buffer *buffer, | ||
1848 | unsigned long payload) | ||
1849 | { | ||
1850 | struct iso_context *ctx = container_of(base, struct iso_context, base); | ||
1851 | struct descriptor *d = NULL, *pd = NULL; | ||
1852 | struct fw_iso_packet *p; | ||
1853 | dma_addr_t d_bus, page_bus; | ||
1854 | u32 z, header_z, rest; | ||
1855 | int i, page, offset, packet_count, header_size; | ||
1856 | |||
1857 | if (packet->skip) { | ||
1858 | d = context_get_descriptors(&ctx->context, 1, &d_bus); | ||
1859 | if (d == NULL) | ||
1860 | return -ENOMEM; | ||
1861 | |||
1862 | d->control = cpu_to_le16(DESCRIPTOR_STATUS | | ||
1863 | DESCRIPTOR_INPUT_LAST | | ||
1864 | DESCRIPTOR_BRANCH_ALWAYS | | ||
1865 | DESCRIPTOR_WAIT); | ||
1866 | context_append(&ctx->context, d, 1, 0); | ||
1867 | } | ||
1868 | |||
1869 | /* one descriptor for header, one for payload */ | ||
1870 | /* FIXME: handle cases where we need multiple desc. for payload */ | ||
1871 | z = 2; | ||
1872 | p = packet; | ||
1873 | |||
1874 | /* | ||
1875 | * The OHCI controller puts the status word in the | ||
1876 | * buffer too, so we need 4 extra bytes per packet. | ||
1877 | */ | ||
1878 | packet_count = p->header_length / ctx->base.header_size; | ||
1879 | header_size = packet_count * (ctx->base.header_size + 4); | ||
1880 | |||
1881 | /* Get header size in number of descriptors. */ | ||
1882 | header_z = DIV_ROUND_UP(header_size, sizeof(*d)); | ||
1883 | page = payload >> PAGE_SHIFT; | ||
1884 | offset = payload & ~PAGE_MASK; | ||
1885 | rest = p->payload_length; | ||
1886 | |||
1887 | for (i = 0; i < packet_count; i++) { | ||
1888 | /* d points to the header descriptor */ | ||
1889 | d = context_get_descriptors(&ctx->context, | ||
1890 | z + header_z, &d_bus); | ||
1891 | if (d == NULL) | ||
1892 | return -ENOMEM; | ||
1893 | |||
1894 | d->control = cpu_to_le16(DESCRIPTOR_INPUT_MORE); | ||
1895 | d->req_count = cpu_to_le16(header_size); | ||
1896 | d->res_count = d->req_count; | ||
1897 | d->data_address = cpu_to_le32(d_bus + (z * sizeof(*d))); | ||
1898 | |||
1899 | /* pd points to the payload descriptor */ | ||
1900 | pd = d + 1; | ||
1901 | pd->control = cpu_to_le16(DESCRIPTOR_STATUS | | ||
1902 | DESCRIPTOR_INPUT_LAST | | ||
1903 | DESCRIPTOR_BRANCH_ALWAYS); | ||
1904 | if (p->interrupt) | ||
1905 | pd->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS); | ||
1906 | |||
1907 | pd->req_count = cpu_to_le16(rest); | ||
1908 | pd->res_count = pd->req_count; | ||
1909 | |||
1910 | page_bus = page_private(buffer->pages[page]); | ||
1911 | pd->data_address = cpu_to_le32(page_bus + offset); | ||
1912 | |||
1913 | context_append(&ctx->context, d, z, header_z); | ||
1914 | } | ||
1915 | |||
1916 | return 0; | ||
1917 | } | ||
1918 | |||
1919 | static int | ||
1780 | ohci_queue_iso(struct fw_iso_context *base, | 1920 | ohci_queue_iso(struct fw_iso_context *base, |
1781 | struct fw_iso_packet *packet, | 1921 | struct fw_iso_packet *packet, |
1782 | struct fw_iso_buffer *buffer, | 1922 | struct fw_iso_buffer *buffer, |
@@ -1790,8 +1930,9 @@ ohci_queue_iso(struct fw_iso_context *base, | |||
1790 | return ohci_queue_iso_receive_dualbuffer(base, packet, | 1930 | return ohci_queue_iso_receive_dualbuffer(base, packet, |
1791 | buffer, payload); | 1931 | buffer, payload); |
1792 | else | 1932 | else |
1793 | /* FIXME: Implement fallback for OHCI 1.0 controllers. */ | 1933 | return ohci_queue_iso_receive_packet_per_buffer(base, packet, |
1794 | return -ENOSYS; | 1934 | buffer, |
1935 | payload); | ||
1795 | } | 1936 | } |
1796 | 1937 | ||
1797 | static const struct fw_card_driver ohci_driver = { | 1938 | static const struct fw_card_driver ohci_driver = { |
@@ -1911,12 +2052,6 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent) | |||
1911 | ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; | 2052 | ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; |
1912 | fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", | 2053 | fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", |
1913 | dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff); | 2054 | dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff); |
1914 | if (ohci->version < OHCI_VERSION_1_1) { | ||
1915 | fw_notify(" Isochronous I/O is not yet implemented for " | ||
1916 | "OHCI 1.0 chips.\n"); | ||
1917 | fw_notify(" Cameras, audio devices etc. won't work on " | ||
1918 | "this controller with this driver version.\n"); | ||
1919 | } | ||
1920 | return 0; | 2055 | return 0; |
1921 | 2056 | ||
1922 | fail_self_id: | 2057 | fail_self_id: |