aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJarod Wilson <jwilson@redhat.com>2007-12-03 13:43:12 -0500
committerStefan Richter <stefanr@s5r6.in-berlin.de>2007-12-10 15:55:19 -0500
commita186b4a6b22fdc96a1ed63da483d267b5d00839e (patch)
tree3d609e60aa00a921475b94c7b133e44068d15a99
parent41f81e88e01eb959f439f8537c58078e4bfc5291 (diff)
firewire: OHCI 1.0 Isochronous Receive support
Third rendition of FireWire OHCI 1.0 Isochronous Receive support, using a zer-copy method similar to OHCI 1.1 which puts the IR data payload directly into the userspace buffer. The zero-copy implementation eliminates the video artifacts, audio popping, and buffer underrun problems seen with version 1 of this patch, as well as fixing a regression in OHCI 1.1 support introduced by version 2 of this patch. Successfully tested in OHCI 1.1 mode on the following chipsets: - NEC uPD72847 (rev 01), OHCI 1.1 (PCI) - Ti XIO2200(A) (rev 01), OHCI 1.1 (PCIe) - Ti TSB41AB2 (rev 01), OHCI 1.1 (PCI on SB Audigy) - Apple UniNorth 2 (rev 81), OHCI 1.1 (PowerBook G4 onboard) Successfully tested in OHCI 1.0 mode on the following chipsets: - Agere FW323 (rev 06), OHCI 1.0 (Mac Mini onboard) - Agere FW323 (rev 06), OHCI 1.0 (PCI) - Via VT6306 (rev 46), OHCI 1.0 (PCI) - NEC OrangeLink (rev 01), OHCI 1.0 (PCI) - NEC uPD72847 (rev 01), OHCI 1.1 (PCI) - Ti XIO2200(A) (rev 01), OHCI 1.1 (PCIe) The bulk of testing was done in an x86_64 system, but was also successfully sanity-tested on other systems, including a PPC(32) PowerBook G4 and an i686 EPIA M10k. Crude benchmarking (watching top during capture) puts the cpu utilization during capture on the EPIA's 1GHz Via C3 processor around 13%, which is down from 30% with the v1 code. Some implementation details: To maintain the same userspace API as dual-buffer mode, we set up two descriptors for every incoming packet. The first is an INPUT_MORE descriptor, pointing to a buffer large enough to hold just the packet's iso headers, immediately followed by an INPUT_LAST descriptor, pointing to a chunk of the userspace buffer big enough for the packet's data payload. With this setup, each incoming packet fills in these two descriptors in a manner that very closely emulates dual-buffer receive, to the point where the bulk of the handle_ir_* code is now identical between the two (and probably primed for some restructuring to share code between them). The only caveat I have at the moment is that neither of my OHCI 1.0 Via VT6307-based FireWire controllers work particularly well with this code for reasons I have yet to figure out. Signed-off-by: Jarod Wilson <jwilson@redhat.com> Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
-rw-r--r--drivers/firewire/fw-ohci.c175
1 files changed, 155 insertions, 20 deletions
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index c9b9081831d..436a855a4c6 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -437,6 +437,21 @@ static void ar_context_run(struct ar_context *ctx)
437 flush_writes(ctx->ohci); 437 flush_writes(ctx->ohci);
438} 438}
439 439
440static struct descriptor *
441find_branch_descriptor(struct descriptor *d, int z)
442{
443 int b, key;
444
445 b = (le16_to_cpu(d->control) & DESCRIPTOR_BRANCH_ALWAYS) >> 2;
446 key = (le16_to_cpu(d->control) & DESCRIPTOR_KEY_IMMEDIATE) >> 8;
447
448 /* figure out which descriptor the branch address goes in */
449 if (z == 2 && (b == 3 || key == 2))
450 return d;
451 else
452 return d + z - 1;
453}
454
440static void context_tasklet(unsigned long data) 455static void context_tasklet(unsigned long data)
441{ 456{
442 struct context *ctx = (struct context *) data; 457 struct context *ctx = (struct context *) data;
@@ -455,7 +470,7 @@ static void context_tasklet(unsigned long data)
455 address = le32_to_cpu(last->branch_address); 470 address = le32_to_cpu(last->branch_address);
456 z = address & 0xf; 471 z = address & 0xf;
457 d = ctx->buffer + (address - ctx->buffer_bus) / sizeof(*d); 472 d = ctx->buffer + (address - ctx->buffer_bus) / sizeof(*d);
458 last = (z == 2) ? d : d + z - 1; 473 last = find_branch_descriptor(d, z);
459 474
460 if (!ctx->callback(ctx, d, last)) 475 if (!ctx->callback(ctx, d, last))
461 break; 476 break;
@@ -566,7 +581,7 @@ static void context_append(struct context *ctx,
566 581
567 ctx->head_descriptor = d + z + extra; 582 ctx->head_descriptor = d + z + extra;
568 ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z); 583 ctx->prev_descriptor->branch_address = cpu_to_le32(d_bus | z);
569 ctx->prev_descriptor = z == 2 ? d : d + z - 1; 584 ctx->prev_descriptor = find_branch_descriptor(d, z);
570 585
571 dma_sync_single_for_device(ctx->ohci->card.device, ctx->buffer_bus, 586 dma_sync_single_for_device(ctx->ohci->card.device, ctx->buffer_bus,
572 ctx->buffer_size, DMA_TO_DEVICE); 587 ctx->buffer_size, DMA_TO_DEVICE);
@@ -655,7 +670,7 @@ at_context_queue_packet(struct context *ctx, struct fw_packet *packet)
655 driver_data = (struct driver_data *) &d[3]; 670 driver_data = (struct driver_data *) &d[3];
656 driver_data->packet = packet; 671 driver_data->packet = packet;
657 packet->driver_data = driver_data; 672 packet->driver_data = driver_data;
658 673
659 if (packet->payload_length > 0) { 674 if (packet->payload_length > 0) {
660 payload_bus = 675 payload_bus =
661 dma_map_single(ohci->card.device, packet->payload, 676 dma_map_single(ohci->card.device, packet->payload,
@@ -903,7 +918,7 @@ at_context_transmit(struct context *ctx, struct fw_packet *packet)
903 918
904 if (retval < 0) 919 if (retval < 0)
905 packet->callback(packet, &ctx->ohci->card, packet->ack); 920 packet->callback(packet, &ctx->ohci->card, packet->ack);
906 921
907} 922}
908 923
909static void bus_reset_tasklet(unsigned long data) 924static void bus_reset_tasklet(unsigned long data)
@@ -1431,6 +1446,57 @@ static int handle_ir_dualbuffer_packet(struct context *context,
1431 return 1; 1446 return 1;
1432} 1447}
1433 1448
1449static int handle_ir_packet_per_buffer(struct context *context,
1450 struct descriptor *d,
1451 struct descriptor *last)
1452{
1453 struct iso_context *ctx =
1454 container_of(context, struct iso_context, context);
1455 struct descriptor *pd = d + 1;
1456 __le32 *ir_header;
1457 size_t header_length;
1458 void *p, *end;
1459 int i, z;
1460
1461 if (pd->res_count == pd->req_count)
1462 /* Descriptor(s) not done yet, stop iteration */
1463 return 0;
1464
1465 header_length = le16_to_cpu(d->req_count);
1466
1467 i = ctx->header_length;
1468 z = le32_to_cpu(pd->branch_address) & 0xf;
1469 p = d + z;
1470 end = p + header_length;
1471
1472 while (p < end && i + ctx->base.header_size <= PAGE_SIZE) {
1473 /*
1474 * The iso header is byteswapped to little endian by
1475 * the controller, but the remaining header quadlets
1476 * are big endian. We want to present all the headers
1477 * as big endian, so we have to swap the first quadlet.
1478 */
1479 *(u32 *) (ctx->header + i) = __swab32(*(u32 *) (p + 4));
1480 memcpy(ctx->header + i + 4, p + 8, ctx->base.header_size - 4);
1481 i += ctx->base.header_size;
1482 p += ctx->base.header_size + 4;
1483 }
1484
1485 ctx->header_length = i;
1486
1487 if (le16_to_cpu(pd->control) & DESCRIPTOR_IRQ_ALWAYS) {
1488 ir_header = (__le32 *) (d + z);
1489 ctx->base.callback(&ctx->base,
1490 le32_to_cpu(ir_header[0]) & 0xffff,
1491 ctx->header_length, ctx->header,
1492 ctx->base.callback_data);
1493 ctx->header_length = 0;
1494 }
1495
1496
1497 return 1;
1498}
1499
1434static int handle_it_packet(struct context *context, 1500static int handle_it_packet(struct context *context,
1435 struct descriptor *d, 1501 struct descriptor *d,
1436 struct descriptor *last) 1502 struct descriptor *last)
@@ -1466,14 +1532,12 @@ ohci_allocate_iso_context(struct fw_card *card, int type, size_t header_size)
1466 } else { 1532 } else {
1467 mask = &ohci->ir_context_mask; 1533 mask = &ohci->ir_context_mask;
1468 list = ohci->ir_context_list; 1534 list = ohci->ir_context_list;
1469 callback = handle_ir_dualbuffer_packet; 1535 if (ohci->version >= OHCI_VERSION_1_1)
1536 callback = handle_ir_dualbuffer_packet;
1537 else
1538 callback = handle_ir_packet_per_buffer;
1470 } 1539 }
1471 1540
1472 /* FIXME: We need a fallback for pre 1.1 OHCI. */
1473 if (callback == handle_ir_dualbuffer_packet &&
1474 ohci->version < OHCI_VERSION_1_1)
1475 return ERR_PTR(-ENOSYS);
1476
1477 spin_lock_irqsave(&ohci->lock, flags); 1541 spin_lock_irqsave(&ohci->lock, flags);
1478 index = ffs(*mask) - 1; 1542 index = ffs(*mask) - 1;
1479 if (index >= 0) 1543 if (index >= 0)
@@ -1532,7 +1596,9 @@ static int ohci_start_iso(struct fw_iso_context *base,
1532 context_run(&ctx->context, match); 1596 context_run(&ctx->context, match);
1533 } else { 1597 } else {
1534 index = ctx - ohci->ir_context_list; 1598 index = ctx - ohci->ir_context_list;
1535 control = IR_CONTEXT_DUAL_BUFFER_MODE | IR_CONTEXT_ISOCH_HEADER; 1599 control = IR_CONTEXT_ISOCH_HEADER;
1600 if (ohci->version >= OHCI_VERSION_1_1)
1601 control |= IR_CONTEXT_DUAL_BUFFER_MODE;
1536 match = (tags << 28) | (sync << 8) | ctx->base.channel; 1602 match = (tags << 28) | (sync << 8) | ctx->base.channel;
1537 if (cycle >= 0) { 1603 if (cycle >= 0) {
1538 match |= (cycle & 0x07fff) << 12; 1604 match |= (cycle & 0x07fff) << 12;
@@ -1738,7 +1804,6 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
1738 offset = payload & ~PAGE_MASK; 1804 offset = payload & ~PAGE_MASK;
1739 rest = p->payload_length; 1805 rest = p->payload_length;
1740 1806
1741 /* FIXME: OHCI 1.0 doesn't support dual buffer receive */
1742 /* FIXME: make packet-per-buffer/dual-buffer a context option */ 1807 /* FIXME: make packet-per-buffer/dual-buffer a context option */
1743 while (rest > 0) { 1808 while (rest > 0) {
1744 d = context_get_descriptors(&ctx->context, 1809 d = context_get_descriptors(&ctx->context,
@@ -1777,6 +1842,81 @@ ohci_queue_iso_receive_dualbuffer(struct fw_iso_context *base,
1777} 1842}
1778 1843
1779static int 1844static int
1845ohci_queue_iso_receive_packet_per_buffer(struct fw_iso_context *base,
1846 struct fw_iso_packet *packet,
1847 struct fw_iso_buffer *buffer,
1848 unsigned long payload)
1849{
1850 struct iso_context *ctx = container_of(base, struct iso_context, base);
1851 struct descriptor *d = NULL, *pd = NULL;
1852 struct fw_iso_packet *p;
1853 dma_addr_t d_bus, page_bus;
1854 u32 z, header_z, rest;
1855 int i, page, offset, packet_count, header_size;
1856
1857 if (packet->skip) {
1858 d = context_get_descriptors(&ctx->context, 1, &d_bus);
1859 if (d == NULL)
1860 return -ENOMEM;
1861
1862 d->control = cpu_to_le16(DESCRIPTOR_STATUS |
1863 DESCRIPTOR_INPUT_LAST |
1864 DESCRIPTOR_BRANCH_ALWAYS |
1865 DESCRIPTOR_WAIT);
1866 context_append(&ctx->context, d, 1, 0);
1867 }
1868
1869 /* one descriptor for header, one for payload */
1870 /* FIXME: handle cases where we need multiple desc. for payload */
1871 z = 2;
1872 p = packet;
1873
1874 /*
1875 * The OHCI controller puts the status word in the
1876 * buffer too, so we need 4 extra bytes per packet.
1877 */
1878 packet_count = p->header_length / ctx->base.header_size;
1879 header_size = packet_count * (ctx->base.header_size + 4);
1880
1881 /* Get header size in number of descriptors. */
1882 header_z = DIV_ROUND_UP(header_size, sizeof(*d));
1883 page = payload >> PAGE_SHIFT;
1884 offset = payload & ~PAGE_MASK;
1885 rest = p->payload_length;
1886
1887 for (i = 0; i < packet_count; i++) {
1888 /* d points to the header descriptor */
1889 d = context_get_descriptors(&ctx->context,
1890 z + header_z, &d_bus);
1891 if (d == NULL)
1892 return -ENOMEM;
1893
1894 d->control = cpu_to_le16(DESCRIPTOR_INPUT_MORE);
1895 d->req_count = cpu_to_le16(header_size);
1896 d->res_count = d->req_count;
1897 d->data_address = cpu_to_le32(d_bus + (z * sizeof(*d)));
1898
1899 /* pd points to the payload descriptor */
1900 pd = d + 1;
1901 pd->control = cpu_to_le16(DESCRIPTOR_STATUS |
1902 DESCRIPTOR_INPUT_LAST |
1903 DESCRIPTOR_BRANCH_ALWAYS);
1904 if (p->interrupt)
1905 pd->control |= cpu_to_le16(DESCRIPTOR_IRQ_ALWAYS);
1906
1907 pd->req_count = cpu_to_le16(rest);
1908 pd->res_count = pd->req_count;
1909
1910 page_bus = page_private(buffer->pages[page]);
1911 pd->data_address = cpu_to_le32(page_bus + offset);
1912
1913 context_append(&ctx->context, d, z, header_z);
1914 }
1915
1916 return 0;
1917}
1918
1919static int
1780ohci_queue_iso(struct fw_iso_context *base, 1920ohci_queue_iso(struct fw_iso_context *base,
1781 struct fw_iso_packet *packet, 1921 struct fw_iso_packet *packet,
1782 struct fw_iso_buffer *buffer, 1922 struct fw_iso_buffer *buffer,
@@ -1790,8 +1930,9 @@ ohci_queue_iso(struct fw_iso_context *base,
1790 return ohci_queue_iso_receive_dualbuffer(base, packet, 1930 return ohci_queue_iso_receive_dualbuffer(base, packet,
1791 buffer, payload); 1931 buffer, payload);
1792 else 1932 else
1793 /* FIXME: Implement fallback for OHCI 1.0 controllers. */ 1933 return ohci_queue_iso_receive_packet_per_buffer(base, packet,
1794 return -ENOSYS; 1934 buffer,
1935 payload);
1795} 1936}
1796 1937
1797static const struct fw_card_driver ohci_driver = { 1938static const struct fw_card_driver ohci_driver = {
@@ -1911,12 +2052,6 @@ pci_probe(struct pci_dev *dev, const struct pci_device_id *ent)
1911 ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff; 2052 ohci->version = reg_read(ohci, OHCI1394_Version) & 0x00ff00ff;
1912 fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n", 2053 fw_notify("Added fw-ohci device %s, OHCI version %x.%x\n",
1913 dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff); 2054 dev->dev.bus_id, ohci->version >> 16, ohci->version & 0xff);
1914 if (ohci->version < OHCI_VERSION_1_1) {
1915 fw_notify(" Isochronous I/O is not yet implemented for "
1916 "OHCI 1.0 chips.\n");
1917 fw_notify(" Cameras, audio devices etc. won't work on "
1918 "this controller with this driver version.\n");
1919 }
1920 return 0; 2055 return 0;
1921 2056
1922 fail_self_id: 2057 fail_self_id: