aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/sun/sunvnet.c
diff options
context:
space:
mode:
authorDavid L Stevens <david.stevens@oracle.com>2014-09-29 19:48:11 -0400
committerDavid S. Miller <davem@davemloft.net>2014-09-30 17:10:39 -0400
commit8e845f4cbbd2ef81846c2ab5dca46d88fb2717ee (patch)
tree59f30a451145d5e95bf308c4b06cacb61a996417 /drivers/net/ethernet/sun/sunvnet.c
parente4defc775424a3501caf98d266a8d7616fa53819 (diff)
sunvnet: make transmit path zero-copy in the kernel
This patch removes pre-allocated transmit buffers and instead directly maps pending packets on demand. This saves O(n^2) maximum-sized transmit buffers, for n hosts on a vswitch, as well as a copy to those buffers. Single-stream TCP throughput linux-solaris dropped ~5% for 1500-byte MTU, but linux-linux at 1500-bytes increased ~20%. Signed-off-by: David L Stevens <david.stevens@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/sun/sunvnet.c')
-rw-r--r--drivers/net/ethernet/sun/sunvnet.c218
1 files changed, 174 insertions, 44 deletions
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index b1abcadb42ff..8f5f4e3291a4 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -780,6 +780,117 @@ struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb)
780 return ret; 780 return ret;
781} 781}
782 782
783static struct sk_buff *vnet_clean_tx_ring(struct vnet_port *port,
784 unsigned *pending)
785{
786 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
787 struct sk_buff *skb = NULL;
788 int i, txi;
789
790 *pending = 0;
791
792 txi = dr->prod-1;
793 if (txi < 0)
794 txi = VNET_TX_RING_SIZE-1;
795
796 for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
797 struct vio_net_desc *d;
798
799 d = vio_dring_entry(dr, txi);
800
801 if (d->hdr.state == VIO_DESC_DONE) {
802 if (port->tx_bufs[txi].skb) {
803 BUG_ON(port->tx_bufs[txi].skb->next);
804
805 port->tx_bufs[txi].skb->next = skb;
806 skb = port->tx_bufs[txi].skb;
807 port->tx_bufs[txi].skb = NULL;
808
809 ldc_unmap(port->vio.lp,
810 port->tx_bufs[txi].cookies,
811 port->tx_bufs[txi].ncookies);
812 }
813 d->hdr.state = VIO_DESC_FREE;
814 } else if (d->hdr.state == VIO_DESC_READY) {
815 (*pending)++;
816 } else if (d->hdr.state == VIO_DESC_FREE) {
817 break;
818 }
819 --txi;
820 if (txi < 0)
821 txi = VNET_TX_RING_SIZE-1;
822 }
823 return skb;
824}
825
826static inline void vnet_free_skbs(struct sk_buff *skb)
827{
828 struct sk_buff *next;
829
830 while (skb) {
831 next = skb->next;
832 skb->next = NULL;
833 dev_kfree_skb(skb);
834 skb = next;
835 }
836}
837
838static void vnet_clean_timer_expire(unsigned long port0)
839{
840 struct vnet_port *port = (struct vnet_port *)port0;
841 struct sk_buff *freeskbs;
842 unsigned pending;
843 unsigned long flags;
844
845 spin_lock_irqsave(&port->vio.lock, flags);
846 freeskbs = vnet_clean_tx_ring(port, &pending);
847 spin_unlock_irqrestore(&port->vio.lock, flags);
848
849 vnet_free_skbs(freeskbs);
850
851 if (pending)
852 (void)mod_timer(&port->clean_timer,
853 jiffies + VNET_CLEAN_TIMEOUT);
854 else
855 del_timer(&port->clean_timer);
856}
857
858static inline struct sk_buff *vnet_skb_shape(struct sk_buff *skb, void **pstart,
859 int *plen)
860{
861 struct sk_buff *nskb;
862 int len, pad;
863
864 len = skb->len;
865 pad = 0;
866 if (len < ETH_ZLEN) {
867 pad += ETH_ZLEN - skb->len;
868 len += pad;
869 }
870 len += VNET_PACKET_SKIP;
871 pad += 8 - (len & 7);
872 len += 8 - (len & 7);
873
874 if (((unsigned long)skb->data & 7) != VNET_PACKET_SKIP ||
875 skb_tailroom(skb) < pad ||
876 skb_headroom(skb) < VNET_PACKET_SKIP) {
877 nskb = alloc_and_align_skb(skb->dev, skb->len);
878 skb_reserve(nskb, VNET_PACKET_SKIP);
879 if (skb_copy_bits(skb, 0, nskb->data, skb->len)) {
880 dev_kfree_skb(nskb);
881 dev_kfree_skb(skb);
882 return NULL;
883 }
884 (void)skb_put(nskb, skb->len);
885 dev_kfree_skb(skb);
886 skb = nskb;
887 }
888
889 *pstart = skb->data - VNET_PACKET_SKIP;
890 *plen = len;
891 return skb;
892}
893
783static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) 894static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
784{ 895{
785 struct vnet *vp = netdev_priv(dev); 896 struct vnet *vp = netdev_priv(dev);
@@ -788,12 +899,20 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
788 struct vio_net_desc *d; 899 struct vio_net_desc *d;
789 unsigned long flags; 900 unsigned long flags;
790 unsigned int len; 901 unsigned int len;
791 void *tx_buf; 902 struct sk_buff *freeskbs = NULL;
792 int i, err; 903 int i, err, txi;
904 void *start = NULL;
905 int nlen = 0;
906 unsigned pending = 0;
793 907
794 if (unlikely(!port)) 908 if (unlikely(!port))
795 goto out_dropped; 909 goto out_dropped;
796 910
911 skb = vnet_skb_shape(skb, &start, &nlen);
912
913 if (unlikely(!skb))
914 goto out_dropped;
915
797 spin_lock_irqsave(&port->vio.lock, flags); 916 spin_lock_irqsave(&port->vio.lock, flags);
798 917
799 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 918 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
@@ -811,14 +930,27 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
811 930
812 d = vio_dring_cur(dr); 931 d = vio_dring_cur(dr);
813 932
814 tx_buf = port->tx_bufs[dr->prod].buf; 933 txi = dr->prod;
815 skb_copy_from_linear_data(skb, tx_buf + VNET_PACKET_SKIP, skb->len); 934
935 freeskbs = vnet_clean_tx_ring(port, &pending);
936
937 BUG_ON(port->tx_bufs[txi].skb);
816 938
817 len = skb->len; 939 len = skb->len;
818 if (len < ETH_ZLEN) { 940 if (len < ETH_ZLEN)
819 len = ETH_ZLEN; 941 len = ETH_ZLEN;
820 memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len); 942
943 port->tx_bufs[txi].skb = skb;
944 skb = NULL;
945
946 err = ldc_map_single(port->vio.lp, start, nlen,
947 port->tx_bufs[txi].cookies, 2,
948 (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW));
949 if (err < 0) {
950 netdev_info(dev, "tx buffer map error %d\n", err);
951 goto out_dropped_unlock;
821 } 952 }
953 port->tx_bufs[txi].ncookies = err;
822 954
823 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(), 955 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
824 * thus it is safe to not set VIO_ACK_ENABLE for each transmission: 956 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
@@ -830,9 +962,9 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
830 */ 962 */
831 d->hdr.ack = VIO_ACK_DISABLE; 963 d->hdr.ack = VIO_ACK_DISABLE;
832 d->size = len; 964 d->size = len;
833 d->ncookies = port->tx_bufs[dr->prod].ncookies; 965 d->ncookies = port->tx_bufs[txi].ncookies;
834 for (i = 0; i < d->ncookies; i++) 966 for (i = 0; i < d->ncookies; i++)
835 d->cookies[i] = port->tx_bufs[dr->prod].cookies[i]; 967 d->cookies[i] = port->tx_bufs[txi].cookies[i];
836 968
837 /* This has to be a non-SMP write barrier because we are writing 969 /* This has to be a non-SMP write barrier because we are writing
838 * to memory which is shared with the peer LDOM. 970 * to memory which is shared with the peer LDOM.
@@ -876,7 +1008,7 @@ ldc_start_done:
876 port->start_cons = false; 1008 port->start_cons = false;
877 1009
878 dev->stats.tx_packets++; 1010 dev->stats.tx_packets++;
879 dev->stats.tx_bytes += skb->len; 1011 dev->stats.tx_bytes += port->tx_bufs[txi].skb->len;
880 1012
881 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1); 1013 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
882 if (unlikely(vnet_tx_dring_avail(dr) < 2)) { 1014 if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
@@ -887,7 +1019,9 @@ ldc_start_done:
887 1019
888 spin_unlock_irqrestore(&port->vio.lock, flags); 1020 spin_unlock_irqrestore(&port->vio.lock, flags);
889 1021
890 dev_kfree_skb(skb); 1022 vnet_free_skbs(freeskbs);
1023
1024 (void)mod_timer(&port->clean_timer, jiffies + VNET_CLEAN_TIMEOUT);
891 1025
892 return NETDEV_TX_OK; 1026 return NETDEV_TX_OK;
893 1027
@@ -895,7 +1029,14 @@ out_dropped_unlock:
895 spin_unlock_irqrestore(&port->vio.lock, flags); 1029 spin_unlock_irqrestore(&port->vio.lock, flags);
896 1030
897out_dropped: 1031out_dropped:
898 dev_kfree_skb(skb); 1032 if (skb)
1033 dev_kfree_skb(skb);
1034 vnet_free_skbs(freeskbs);
1035 if (pending)
1036 (void)mod_timer(&port->clean_timer,
1037 jiffies + VNET_CLEAN_TIMEOUT);
1038 else
1039 del_timer(&port->clean_timer);
899 dev->stats.tx_dropped++; 1040 dev->stats.tx_dropped++;
900 return NETDEV_TX_OK; 1041 return NETDEV_TX_OK;
901} 1042}
@@ -1097,17 +1238,22 @@ static void vnet_port_free_tx_bufs(struct vnet_port *port)
1097 } 1238 }
1098 1239
1099 for (i = 0; i < VNET_TX_RING_SIZE; i++) { 1240 for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1100 void *buf = port->tx_bufs[i].buf; 1241 struct vio_net_desc *d;
1242 void *skb = port->tx_bufs[i].skb;
1101 1243
1102 if (!buf) 1244 if (!skb)
1103 continue; 1245 continue;
1104 1246
1247 d = vio_dring_entry(dr, i);
1248 if (d->hdr.state == VIO_DESC_READY)
1249 pr_warn("active transmit buffers freed\n");
1250
1105 ldc_unmap(port->vio.lp, 1251 ldc_unmap(port->vio.lp,
1106 port->tx_bufs[i].cookies, 1252 port->tx_bufs[i].cookies,
1107 port->tx_bufs[i].ncookies); 1253 port->tx_bufs[i].ncookies);
1108 1254 dev_kfree_skb(skb);
1109 kfree(buf); 1255 port->tx_bufs[i].skb = NULL;
1110 port->tx_bufs[i].buf = NULL; 1256 d->hdr.state = VIO_DESC_FREE;
1111 } 1257 }
1112} 1258}
1113 1259
@@ -1118,34 +1264,6 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
1118 int i, err, ncookies; 1264 int i, err, ncookies;
1119 void *dring; 1265 void *dring;
1120 1266
1121 for (i = 0; i < VNET_TX_RING_SIZE; i++) {
1122 void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL);
1123 int map_len = (VNET_MAXPACKET + 7) & ~7;
1124
1125 err = -ENOMEM;
1126 if (!buf)
1127 goto err_out;
1128
1129 err = -EFAULT;
1130 if ((unsigned long)buf & (8UL - 1)) {
1131 pr_err("TX buffer misaligned\n");
1132 kfree(buf);
1133 goto err_out;
1134 }
1135
1136 err = ldc_map_single(port->vio.lp, buf, map_len,
1137 port->tx_bufs[i].cookies, 2,
1138 (LDC_MAP_SHADOW |
1139 LDC_MAP_DIRECT |
1140 LDC_MAP_RW));
1141 if (err < 0) {
1142 kfree(buf);
1143 goto err_out;
1144 }
1145 port->tx_bufs[i].buf = buf;
1146 port->tx_bufs[i].ncookies = err;
1147 }
1148
1149 dr = &port->vio.drings[VIO_DRIVER_TX_RING]; 1267 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
1150 1268
1151 len = (VNET_TX_RING_SIZE * 1269 len = (VNET_TX_RING_SIZE *
@@ -1172,6 +1290,12 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port)
1172 dr->pending = VNET_TX_RING_SIZE; 1290 dr->pending = VNET_TX_RING_SIZE;
1173 dr->ncookies = ncookies; 1291 dr->ncookies = ncookies;
1174 1292
1293 for (i = 0; i < VNET_TX_RING_SIZE; ++i) {
1294 struct vio_net_desc *d;
1295
1296 d = vio_dring_entry(dr, i);
1297 d->hdr.state = VIO_DESC_FREE;
1298 }
1175 return 0; 1299 return 0;
1176 1300
1177err_out: 1301err_out:
@@ -1203,6 +1327,8 @@ static struct vnet *vnet_new(const u64 *local_mac)
1203 dev = alloc_etherdev(sizeof(*vp)); 1327 dev = alloc_etherdev(sizeof(*vp));
1204 if (!dev) 1328 if (!dev)
1205 return ERR_PTR(-ENOMEM); 1329 return ERR_PTR(-ENOMEM);
1330 dev->needed_headroom = VNET_PACKET_SKIP + 8;
1331 dev->needed_tailroom = 8;
1206 1332
1207 for (i = 0; i < ETH_ALEN; i++) 1333 for (i = 0; i < ETH_ALEN; i++)
1208 dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff; 1334 dev->dev_addr[i] = (*local_mac >> (5 - i) * 8) & 0xff;
@@ -1397,6 +1523,9 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
1397 pr_info("%s: PORT ( remote-mac %pM%s )\n", 1523 pr_info("%s: PORT ( remote-mac %pM%s )\n",
1398 vp->dev->name, port->raddr, switch_port ? " switch-port" : ""); 1524 vp->dev->name, port->raddr, switch_port ? " switch-port" : "");
1399 1525
1526 setup_timer(&port->clean_timer, vnet_clean_timer_expire,
1527 (unsigned long)port);
1528
1400 vio_port_up(&port->vio); 1529 vio_port_up(&port->vio);
1401 1530
1402 mdesc_release(hp); 1531 mdesc_release(hp);
@@ -1423,6 +1552,7 @@ static int vnet_port_remove(struct vio_dev *vdev)
1423 unsigned long flags; 1552 unsigned long flags;
1424 1553
1425 del_timer_sync(&port->vio.timer); 1554 del_timer_sync(&port->vio.timer);
1555 del_timer_sync(&port->clean_timer);
1426 1556
1427 spin_lock_irqsave(&vp->lock, flags); 1557 spin_lock_irqsave(&vp->lock, flags);
1428 list_del(&port->list); 1558 list_del(&port->list);