aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSridhar Samudrala <sri@us.ibm.com>2010-02-04 23:24:10 -0500
committerDavid S. Miller <davem@davemloft.net>2010-02-04 23:24:10 -0500
commitbfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 (patch)
tree63d19326b491b507f246ea4c3ef410d62602e794
parent746079dabcf74be2a16dc983ac597156e3d2e051 (diff)
packet: Add GSO/csum offload support.
This patch adds GSO/checksum offload to af_packet sockets using virtio_net_hdr. Based on Rusty's patch to add this support to tun. It allows GSO/checksum offload to be enabled when using raw socket backend with virtio_net. Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the receive path and process/skip virtio_net_hdr in the send path. This option is only allowed with SOCK_RAW sockets attached to ethernet type devices. v2 updates ---------- Michael's Comments - Perform length check in packet_snd() when GSO is off even when vnet_hdr is present. - Check for SKB_GSO_FCOE type and return -EINVAL - don't allow tx/rx ring when vnet_hdr is enabled. Herbert's Comments - Removed ethernet specific code. - protocol value is assumed to be passed in by the caller. Signed-off-by: Sridhar Samudrala <sri@us.ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/if_packet.h1
-rw-r--r--net/packet/af_packet.c187
2 files changed, 177 insertions, 11 deletions
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 4021d47cc437..aa57a5f993fc 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -46,6 +46,7 @@ struct sockaddr_ll {
46#define PACKET_RESERVE 12 46#define PACKET_RESERVE 12
47#define PACKET_TX_RING 13 47#define PACKET_TX_RING 13
48#define PACKET_LOSS 14 48#define PACKET_LOSS 14
49#define PACKET_VNET_HDR 15
49 50
50struct tpacket_stats { 51struct tpacket_stats {
51 unsigned int tp_packets; 52 unsigned int tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53633c5fdb1d..178e2937bbaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -80,6 +80,7 @@
80#include <linux/init.h> 80#include <linux/init.h>
81#include <linux/mutex.h> 81#include <linux/mutex.h>
82#include <linux/if_vlan.h> 82#include <linux/if_vlan.h>
83#include <linux/virtio_net.h>
83 84
84#ifdef CONFIG_INET 85#ifdef CONFIG_INET
85#include <net/inet_common.h> 86#include <net/inet_common.h>
@@ -193,7 +194,8 @@ struct packet_sock {
193 struct mutex pg_vec_lock; 194 struct mutex pg_vec_lock;
194 unsigned int running:1, /* prot_hook is attached*/ 195 unsigned int running:1, /* prot_hook is attached*/
195 auxdata:1, 196 auxdata:1,
196 origdev:1; 197 origdev:1,
198 has_vnet_hdr:1;
197 int ifindex; /* bound device */ 199 int ifindex; /* bound device */
198 __be16 num; 200 __be16 num;
199 struct packet_mclist *mclist; 201 struct packet_mclist *mclist;
@@ -1056,6 +1058,30 @@ out:
1056} 1058}
1057#endif 1059#endif
1058 1060
1061static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1062 size_t reserve, size_t len,
1063 size_t linear, int noblock,
1064 int *err)
1065{
1066 struct sk_buff *skb;
1067
1068 /* Under a page? Don't bother with paged skb. */
1069 if (prepad + len < PAGE_SIZE || !linear)
1070 linear = len;
1071
1072 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1073 err);
1074 if (!skb)
1075 return NULL;
1076
1077 skb_reserve(skb, reserve);
1078 skb_put(skb, linear);
1079 skb->data_len = len - linear;
1080 skb->len += len - linear;
1081
1082 return skb;
1083}
1084
1059static int packet_snd(struct socket *sock, 1085static int packet_snd(struct socket *sock,
1060 struct msghdr *msg, size_t len) 1086 struct msghdr *msg, size_t len)
1061{ 1087{
@@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock,
1066 __be16 proto; 1092 __be16 proto;
1067 unsigned char *addr; 1093 unsigned char *addr;
1068 int ifindex, err, reserve = 0; 1094 int ifindex, err, reserve = 0;
1095 struct virtio_net_hdr vnet_hdr = { 0 };
1096 int offset = 0;
1097 int vnet_hdr_len;
1098 struct packet_sock *po = pkt_sk(sk);
1099 unsigned short gso_type = 0;
1069 1100
1070 /* 1101 /*
1071 * Get and verify the address. 1102 * Get and verify the address.
1072 */ 1103 */
1073 1104
1074 if (saddr == NULL) { 1105 if (saddr == NULL) {
1075 struct packet_sock *po = pkt_sk(sk);
1076
1077 ifindex = po->ifindex; 1106 ifindex = po->ifindex;
1078 proto = po->num; 1107 proto = po->num;
1079 addr = NULL; 1108 addr = NULL;
@@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock,
1100 if (!(dev->flags & IFF_UP)) 1129 if (!(dev->flags & IFF_UP))
1101 goto out_unlock; 1130 goto out_unlock;
1102 1131
1132 if (po->has_vnet_hdr) {
1133 vnet_hdr_len = sizeof(vnet_hdr);
1134
1135 err = -EINVAL;
1136 if (len < vnet_hdr_len)
1137 goto out_unlock;
1138
1139 len -= vnet_hdr_len;
1140
1141 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1142 vnet_hdr_len);
1143 if (err < 0)
1144 goto out_unlock;
1145
1146 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1147 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1148 vnet_hdr.hdr_len))
1149 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1150 vnet_hdr.csum_offset + 2;
1151
1152 err = -EINVAL;
1153 if (vnet_hdr.hdr_len > len)
1154 goto out_unlock;
1155
1156 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1157 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1158 case VIRTIO_NET_HDR_GSO_TCPV4:
1159 gso_type = SKB_GSO_TCPV4;
1160 break;
1161 case VIRTIO_NET_HDR_GSO_TCPV6:
1162 gso_type = SKB_GSO_TCPV6;
1163 break;
1164 case VIRTIO_NET_HDR_GSO_UDP:
1165 gso_type = SKB_GSO_UDP;
1166 break;
1167 default:
1168 goto out_unlock;
1169 }
1170
1171 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1172 gso_type |= SKB_GSO_TCP_ECN;
1173
1174 if (vnet_hdr.gso_size == 0)
1175 goto out_unlock;
1176
1177 }
1178 }
1179
1103 err = -EMSGSIZE; 1180 err = -EMSGSIZE;
1104 if (len > dev->mtu+reserve) 1181 if (!gso_type && (len > dev->mtu+reserve))
1105 goto out_unlock; 1182 goto out_unlock;
1106 1183
1107 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), 1184 err = -ENOBUFS;
1108 msg->msg_flags & MSG_DONTWAIT, &err); 1185 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1186 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1187 msg->msg_flags & MSG_DONTWAIT, &err);
1109 if (skb == NULL) 1188 if (skb == NULL)
1110 goto out_unlock; 1189 goto out_unlock;
1111 1190
1112 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1191 skb_set_network_header(skb, reserve);
1113 skb_reset_network_header(skb);
1114 1192
1115 err = -EINVAL; 1193 err = -EINVAL;
1116 if (sock->type == SOCK_DGRAM && 1194 if (sock->type == SOCK_DGRAM &&
1117 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) 1195 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
1118 goto out_free; 1196 goto out_free;
1119 1197
1120 /* Returns -EFAULT on error */ 1198 /* Returns -EFAULT on error */
1121 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1199 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1122 if (err) 1200 if (err)
1123 goto out_free; 1201 goto out_free;
1124 1202
@@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock,
1127 skb->priority = sk->sk_priority; 1205 skb->priority = sk->sk_priority;
1128 skb->mark = sk->sk_mark; 1206 skb->mark = sk->sk_mark;
1129 1207
1208 if (po->has_vnet_hdr) {
1209 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1210 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1211 vnet_hdr.csum_offset)) {
1212 err = -EINVAL;
1213 goto out_free;
1214 }
1215 }
1216
1217 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1218 skb_shinfo(skb)->gso_type = gso_type;
1219
1220 /* Header must be checked, and gso_segs computed. */
1221 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1222 skb_shinfo(skb)->gso_segs = 0;
1223
1224 len += vnet_hdr_len;
1225 }
1226
1130 /* 1227 /*
1131 * Now send it 1228 * Now send it
1132 */ 1229 */
@@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1420 struct sk_buff *skb; 1517 struct sk_buff *skb;
1421 int copied, err; 1518 int copied, err;
1422 struct sockaddr_ll *sll; 1519 struct sockaddr_ll *sll;
1520 int vnet_hdr_len = 0;
1423 1521
1424 err = -EINVAL; 1522 err = -EINVAL;
1425 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) 1523 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1451 if (skb == NULL) 1549 if (skb == NULL)
1452 goto out; 1550 goto out;
1453 1551
1552 if (pkt_sk(sk)->has_vnet_hdr) {
1553 struct virtio_net_hdr vnet_hdr = { 0 };
1554
1555 err = -EINVAL;
1556 vnet_hdr_len = sizeof(vnet_hdr);
1557 if ((len -= vnet_hdr_len) < 0)
1558 goto out_free;
1559
1560 if (skb_is_gso(skb)) {
1561 struct skb_shared_info *sinfo = skb_shinfo(skb);
1562
1563 /* This is a hint as to how much should be linear. */
1564 vnet_hdr.hdr_len = skb_headlen(skb);
1565 vnet_hdr.gso_size = sinfo->gso_size;
1566 if (sinfo->gso_type & SKB_GSO_TCPV4)
1567 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1568 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1569 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1570 else if (sinfo->gso_type & SKB_GSO_UDP)
1571 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1572 else if (sinfo->gso_type & SKB_GSO_FCOE)
1573 goto out_free;
1574 else
1575 BUG();
1576 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1577 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1578 } else
1579 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1580
1581 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1582 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1583 vnet_hdr.csum_start = skb->csum_start -
1584 skb_headroom(skb);
1585 vnet_hdr.csum_offset = skb->csum_offset;
1586 } /* else everything is zero */
1587
1588 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1589 vnet_hdr_len);
1590 if (err < 0)
1591 goto out_free;
1592 }
1593
1454 /* 1594 /*
1455 * If the address length field is there to be filled in, we fill 1595 * If the address length field is there to be filled in, we fill
1456 * it in now. 1596 * it in now.
@@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1502 * Free or return the buffer as appropriate. Again this 1642 * Free or return the buffer as appropriate. Again this
1503 * hides all the races and re-entrancy issues from us. 1643 * hides all the races and re-entrancy issues from us.
1504 */ 1644 */
1505 err = (flags&MSG_TRUNC) ? skb->len : copied; 1645 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1506 1646
1507out_free: 1647out_free:
1508 skb_free_datagram(sk, skb); 1648 skb_free_datagram(sk, skb);
@@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1740 1880
1741 if (optlen < sizeof(req)) 1881 if (optlen < sizeof(req))
1742 return -EINVAL; 1882 return -EINVAL;
1883 if (pkt_sk(sk)->has_vnet_hdr)
1884 return -EINVAL;
1743 if (copy_from_user(&req, optval, sizeof(req))) 1885 if (copy_from_user(&req, optval, sizeof(req)))
1744 return -EFAULT; 1886 return -EFAULT;
1745 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); 1887 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
@@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1826 po->origdev = !!val; 1968 po->origdev = !!val;
1827 return 0; 1969 return 0;
1828 } 1970 }
1971 case PACKET_VNET_HDR:
1972 {
1973 int val;
1974
1975 if (sock->type != SOCK_RAW)
1976 return -EINVAL;
1977 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
1978 return -EBUSY;
1979 if (optlen < sizeof(val))
1980 return -EINVAL;
1981 if (copy_from_user(&val, optval, sizeof(val)))
1982 return -EFAULT;
1983
1984 po->has_vnet_hdr = !!val;
1985 return 0;
1986 }
1829 default: 1987 default:
1830 return -ENOPROTOOPT; 1988 return -ENOPROTOOPT;
1831 } 1989 }
@@ -1876,6 +2034,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
1876 2034
1877 data = &val; 2035 data = &val;
1878 break; 2036 break;
2037 case PACKET_VNET_HDR:
2038 if (len > sizeof(int))
2039 len = sizeof(int);
2040 val = po->has_vnet_hdr;
2041
2042 data = &val;
2043 break;
1879#ifdef CONFIG_PACKET_MMAP 2044#ifdef CONFIG_PACKET_MMAP
1880 case PACKET_VERSION: 2045 case PACKET_VERSION:
1881 if (len > sizeof(int)) 2046 if (len > sizeof(int))