aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/usb
diff options
context:
space:
mode:
authorSarah Sharp <sarah.a.sharp@linux.intel.com>2009-04-27 22:59:19 -0400
committerGreg Kroah-Hartman <gregkh@suse.de>2009-06-16 00:44:49 -0400
commit8a96c052283e68fe91a6c657c175b39bfed80bed (patch)
treecf32b4462b2f3f59554b3531d866b60be2c4f6a1 /drivers/usb
parente04748e3a87271fcf30d383e3780c5d3ee1c1618 (diff)
USB: xhci: Scatter gather list support for bulk transfers.
Add support for bulk URBs that pass scatter gather lists to xHCI. This allows xHCI to more efficiently enqueue these transfers, and allows the host controller to take advantage of USB 3.0 "bursts" for bulk endpoints. Use requested length to calculate the number of TRBs needed for a scatter gather list transfer, instead of using the number of sglist entries. The application can pass down a scatter gather list that is bigger than it needs for the requested transfer. Scatter gather entries can cross 64KB boundaries, so be careful to setup TRBs such that no buffer crosses a 64KB boundary. Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Diffstat (limited to 'drivers/usb')
-rw-r--r--drivers/usb/host/xhci-ring.c245
1 files changed, 217 insertions, 28 deletions
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 3364381ee6ca..c948288042e2 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -64,6 +64,7 @@
64 * endpoint rings; it generates events on the event ring for these. 64 * endpoint rings; it generates events on the event ring for these.
65 */ 65 */
66 66
67#include <linux/scatterlist.h>
67#include "xhci.h" 68#include "xhci.h"
68 69
69/* 70/*
@@ -758,6 +759,211 @@ int xhci_prepare_transfer(struct xhci_hcd *xhci,
758 return 0; 759 return 0;
759} 760}
760 761
762unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
763{
764 int num_sgs, num_trbs, running_total, temp, i;
765 struct scatterlist *sg;
766
767 sg = NULL;
768 num_sgs = urb->num_sgs;
769 temp = urb->transfer_buffer_length;
770
771 xhci_dbg(xhci, "count sg list trbs: \n");
772 num_trbs = 0;
773 for_each_sg(urb->sg->sg, sg, num_sgs, i) {
774 unsigned int previous_total_trbs = num_trbs;
775 unsigned int len = sg_dma_len(sg);
776
777 /* Scatter gather list entries may cross 64KB boundaries */
778 running_total = TRB_MAX_BUFF_SIZE -
779 (sg_dma_address(sg) & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
780 if (running_total != 0)
781 num_trbs++;
782
783 /* How many more 64KB chunks to transfer, how many more TRBs? */
784 while (running_total < sg_dma_len(sg)) {
785 num_trbs++;
786 running_total += TRB_MAX_BUFF_SIZE;
787 }
788 xhci_dbg(xhci, " sg #%d: dma = %#x, len = %#x (%d), num_trbs = %d\n",
789 i, sg_dma_address(sg), len, len,
790 num_trbs - previous_total_trbs);
791
792 len = min_t(int, len, temp);
793 temp -= len;
794 if (temp == 0)
795 break;
796 }
797 xhci_dbg(xhci, "\n");
798 if (!in_interrupt())
799 dev_dbg(&urb->dev->dev, "ep %#x - urb len = %d, sglist used, num_trbs = %d\n",
800 urb->ep->desc.bEndpointAddress,
801 urb->transfer_buffer_length,
802 num_trbs);
803 return num_trbs;
804}
805
806void check_trb_math(struct urb *urb, int num_trbs, int running_total)
807{
808 if (num_trbs != 0)
809 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated number of "
810 "TRBs, %d left\n", __func__,
811 urb->ep->desc.bEndpointAddress, num_trbs);
812 if (running_total != urb->transfer_buffer_length)
813 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated tx length, "
814 "queued %#x (%d), asked for %#x (%d)\n",
815 __func__,
816 urb->ep->desc.bEndpointAddress,
817 running_total, running_total,
818 urb->transfer_buffer_length,
819 urb->transfer_buffer_length);
820}
821
822void giveback_first_trb(struct xhci_hcd *xhci, int slot_id,
823 unsigned int ep_index, int start_cycle,
824 struct xhci_generic_trb *start_trb, struct xhci_td *td)
825{
826 u32 field;
827
828 /*
829 * Pass all the TRBs to the hardware at once and make sure this write
830 * isn't reordered.
831 */
832 wmb();
833 start_trb->field[3] |= start_cycle;
834 field = xhci_readl(xhci, &xhci->dba->doorbell[slot_id]) & DB_MASK;
835 xhci_writel(xhci, field | EPI_TO_DB(ep_index),
836 &xhci->dba->doorbell[slot_id]);
837 /* Flush PCI posted writes */
838 xhci_readl(xhci, &xhci->dba->doorbell[slot_id]);
839}
840
841int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
842 struct urb *urb, int slot_id, unsigned int ep_index)
843{
844 struct xhci_ring *ep_ring;
845 unsigned int num_trbs;
846 struct xhci_td *td;
847 struct scatterlist *sg;
848 int num_sgs;
849 int trb_buff_len, this_sg_len, running_total;
850 bool first_trb;
851 u64 addr;
852
853 struct xhci_generic_trb *start_trb;
854 int start_cycle;
855
856 ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
857 num_trbs = count_sg_trbs_needed(xhci, urb);
858 num_sgs = urb->num_sgs;
859
860 trb_buff_len = xhci_prepare_transfer(xhci, xhci->devs[slot_id],
861 ep_index, num_trbs, urb, &td, mem_flags);
862 if (trb_buff_len < 0)
863 return trb_buff_len;
864 /*
865 * Don't give the first TRB to the hardware (by toggling the cycle bit)
866 * until we've finished creating all the other TRBs. The ring's cycle
867 * state may change as we enqueue the other TRBs, so save it too.
868 */
869 start_trb = &ep_ring->enqueue->generic;
870 start_cycle = ep_ring->cycle_state;
871
872 running_total = 0;
873 /*
874 * How much data is in the first TRB?
875 *
876 * There are three forces at work for TRB buffer pointers and lengths:
877 * 1. We don't want to walk off the end of this sg-list entry buffer.
878 * 2. The transfer length that the driver requested may be smaller than
879 * the amount of memory allocated for this scatter-gather list.
880 * 3. TRBs buffers can't cross 64KB boundaries.
881 */
882 sg = urb->sg->sg;
883 addr = (u64) sg_dma_address(sg);
884 this_sg_len = sg_dma_len(sg);
885 trb_buff_len = TRB_MAX_BUFF_SIZE -
886 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
887 trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
888 if (trb_buff_len > urb->transfer_buffer_length)
889 trb_buff_len = urb->transfer_buffer_length;
890 xhci_dbg(xhci, "First length to xfer from 1st sglist entry = %u\n",
891 trb_buff_len);
892
893 first_trb = true;
894 /* Queue the first TRB, even if it's zero-length */
895 do {
896 u32 field = 0;
897
898 /* Don't change the cycle bit of the first TRB until later */
899 if (first_trb)
900 first_trb = false;
901 else
902 field |= ep_ring->cycle_state;
903
904 /* Chain all the TRBs together; clear the chain bit in the last
905 * TRB to indicate it's the last TRB in the chain.
906 */
907 if (num_trbs > 1) {
908 field |= TRB_CHAIN;
909 } else {
910 /* FIXME - add check for ZERO_PACKET flag before this */
911 td->last_trb = ep_ring->enqueue;
912 field |= TRB_IOC;
913 }
914 xhci_dbg(xhci, " sg entry: dma = %#x, len = %#x (%d), "
915 "64KB boundary at %#x, end dma = %#x\n",
916 (unsigned int) addr, trb_buff_len, trb_buff_len,
917 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
918 (unsigned int) addr + trb_buff_len);
919 if (TRB_MAX_BUFF_SIZE -
920 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1)) < trb_buff_len) {
921 xhci_warn(xhci, "WARN: sg dma xfer crosses 64KB boundaries!\n");
922 xhci_dbg(xhci, "Next boundary at %#x, end dma = %#x\n",
923 (unsigned int) (addr + TRB_MAX_BUFF_SIZE) & ~(TRB_MAX_BUFF_SIZE - 1),
924 (unsigned int) addr + trb_buff_len);
925 }
926 queue_trb(xhci, ep_ring, false,
927 (u32) addr,
928 (u32) ((u64) addr >> 32),
929 TRB_LEN(trb_buff_len) | TRB_INTR_TARGET(0),
930 /* We always want to know if the TRB was short,
931 * or we won't get an event when it completes.
932 * (Unless we use event data TRBs, which are a
933 * waste of space and HC resources.)
934 */
935 field | TRB_ISP | TRB_TYPE(TRB_NORMAL));
936 --num_trbs;
937 running_total += trb_buff_len;
938
939 /* Calculate length for next transfer --
940 * Are we done queueing all the TRBs for this sg entry?
941 */
942 this_sg_len -= trb_buff_len;
943 if (this_sg_len == 0) {
944 --num_sgs;
945 if (num_sgs == 0)
946 break;
947 sg = sg_next(sg);
948 addr = (u64) sg_dma_address(sg);
949 this_sg_len = sg_dma_len(sg);
950 } else {
951 addr += trb_buff_len;
952 }
953
954 trb_buff_len = TRB_MAX_BUFF_SIZE -
955 (addr & ((1 << TRB_MAX_BUFF_SHIFT) - 1));
956 trb_buff_len = min_t(int, trb_buff_len, this_sg_len);
957 if (running_total + trb_buff_len > urb->transfer_buffer_length)
958 trb_buff_len =
959 urb->transfer_buffer_length - running_total;
960 } while (running_total < urb->transfer_buffer_length);
961
962 check_trb_math(urb, num_trbs, running_total);
963 giveback_first_trb(xhci, slot_id, ep_index, start_cycle, start_trb, td);
964 return 0;
965}
966
761/* This is very similar to what ehci-q.c qtd_fill() does */ 967/* This is very similar to what ehci-q.c qtd_fill() does */
762int queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, 968int queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
763 struct urb *urb, int slot_id, unsigned int ep_index) 969 struct urb *urb, int slot_id, unsigned int ep_index)
@@ -773,6 +979,9 @@ int queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
773 int running_total, trb_buff_len, ret; 979 int running_total, trb_buff_len, ret;
774 u64 addr; 980 u64 addr;
775 981
982 if (urb->sg)
983 return queue_bulk_sg_tx(xhci, mem_flags, urb, slot_id, ep_index);
984
776 ep_ring = xhci->devs[slot_id]->ep_rings[ep_index]; 985 ep_ring = xhci->devs[slot_id]->ep_rings[ep_index];
777 986
778 num_trbs = 0; 987 num_trbs = 0;
@@ -793,10 +1002,13 @@ int queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
793 /* FIXME: this doesn't deal with URB_ZERO_PACKET - need one more */ 1002 /* FIXME: this doesn't deal with URB_ZERO_PACKET - need one more */
794 1003
795 if (!in_interrupt()) 1004 if (!in_interrupt())
796 dev_dbg(&urb->dev->dev, "ep %#x - urb len = %d, addr = %#x, num_trbs = %d\n", 1005 dev_dbg(&urb->dev->dev, "ep %#x - urb len = %#x (%d), addr = %#x, num_trbs = %d\n",
797 urb->ep->desc.bEndpointAddress, 1006 urb->ep->desc.bEndpointAddress,
798 urb->transfer_buffer_length, urb->transfer_dma, 1007 urb->transfer_buffer_length,
1008 urb->transfer_buffer_length,
1009 urb->transfer_dma,
799 num_trbs); 1010 num_trbs);
1011
800 ret = xhci_prepare_transfer(xhci, xhci->devs[slot_id], ep_index, 1012 ret = xhci_prepare_transfer(xhci, xhci->devs[slot_id], ep_index,
801 num_trbs, urb, &td, mem_flags); 1013 num_trbs, urb, &td, mem_flags);
802 if (ret < 0) 1014 if (ret < 0)
@@ -860,21 +1072,8 @@ int queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
860 trb_buff_len = TRB_MAX_BUFF_SIZE; 1072 trb_buff_len = TRB_MAX_BUFF_SIZE;
861 } while (running_total < urb->transfer_buffer_length); 1073 } while (running_total < urb->transfer_buffer_length);
862 1074
863 if (num_trbs != 0) 1075 check_trb_math(urb, num_trbs, running_total);
864 dev_dbg(&urb->dev->dev, "%s - ep %#x - Miscalculated number of " 1076 giveback_first_trb(xhci, slot_id, ep_index, start_cycle, start_trb, td);
865 "TRBs, %d left\n", __FUNCTION__,
866 urb->ep->desc.bEndpointAddress, num_trbs);
867 /*
868 * Pass all the TRBs to the hardware at once and make sure this write
869 * isn't reordered.
870 */
871 wmb();
872 start_trb->field[3] |= start_cycle;
873 field = xhci_readl(xhci, &xhci->dba->doorbell[slot_id]) & DB_MASK;
874 xhci_writel(xhci, field | EPI_TO_DB(ep_index), &xhci->dba->doorbell[slot_id]);
875 /* Flush PCI posted writes */
876 xhci_readl(xhci, &xhci->dba->doorbell[slot_id]);
877
878 return 0; 1077 return 0;
879} 1078}
880 1079
@@ -965,17 +1164,7 @@ int queue_ctrl_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
965 /* Event on completion */ 1164 /* Event on completion */
966 field | TRB_IOC | TRB_TYPE(TRB_STATUS) | ep_ring->cycle_state); 1165 field | TRB_IOC | TRB_TYPE(TRB_STATUS) | ep_ring->cycle_state);
967 1166
968 /* 1167 giveback_first_trb(xhci, slot_id, ep_index, start_cycle, start_trb, td);
969 * Pass all the TRBs to the hardware at once and make sure this write
970 * isn't reordered.
971 */
972 wmb();
973 start_trb->field[3] |= start_cycle;
974 field = xhci_readl(xhci, &xhci->dba->doorbell[slot_id]) & DB_MASK;
975 xhci_writel(xhci, field | EPI_TO_DB(ep_index), &xhci->dba->doorbell[slot_id]);
976 /* Flush PCI posted writes */
977 xhci_readl(xhci, &xhci->dba->doorbell[slot_id]);
978
979 return 0; 1168 return 0;
980} 1169}
981 1170