diff options
author | Sridhar Samudrala <sri@us.ibm.com> | 2010-02-04 23:24:10 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-02-04 23:24:10 -0500 |
commit | bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 (patch) | |
tree | 63d19326b491b507f246ea4c3ef410d62602e794 | |
parent | 746079dabcf74be2a16dc983ac597156e3d2e051 (diff) |
packet: Add GSO/csum offload support.
This patch adds GSO/checksum offload to af_packet sockets using
virtio_net_hdr. Based on Rusty's patch to add this support to tun.
It allows GSO/checksum offload to be enabled when using raw socket
backend with virtio_net.
Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the
receive path and process/skip virtio_net_hdr in the send path. This
option is only allowed with SOCK_RAW sockets attached to ethernet
type devices.
v2 updates
----------
Michael's Comments
- Perform length check in packet_snd() when GSO is off even when
vnet_hdr is present.
- Check for SKB_GSO_FCOE type and return -EINVAL
- don't allow tx/rx ring when vnet_hdr is enabled.
Herbert's Comments
- Removed ethernet specific code.
- protocol value is assumed to be passed in by the caller.
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/if_packet.h | 1 | ||||
-rw-r--r-- | net/packet/af_packet.c | 187 |
2 files changed, 177 insertions, 11 deletions
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index 4021d47cc437..aa57a5f993fc 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h | |||
@@ -46,6 +46,7 @@ struct sockaddr_ll { | |||
46 | #define PACKET_RESERVE 12 | 46 | #define PACKET_RESERVE 12 |
47 | #define PACKET_TX_RING 13 | 47 | #define PACKET_TX_RING 13 |
48 | #define PACKET_LOSS 14 | 48 | #define PACKET_LOSS 14 |
49 | #define PACKET_VNET_HDR 15 | ||
49 | 50 | ||
50 | struct tpacket_stats { | 51 | struct tpacket_stats { |
51 | unsigned int tp_packets; | 52 | unsigned int tp_packets; |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 53633c5fdb1d..178e2937bbaa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/init.h> | 80 | #include <linux/init.h> |
81 | #include <linux/mutex.h> | 81 | #include <linux/mutex.h> |
82 | #include <linux/if_vlan.h> | 82 | #include <linux/if_vlan.h> |
83 | #include <linux/virtio_net.h> | ||
83 | 84 | ||
84 | #ifdef CONFIG_INET | 85 | #ifdef CONFIG_INET |
85 | #include <net/inet_common.h> | 86 | #include <net/inet_common.h> |
@@ -193,7 +194,8 @@ struct packet_sock { | |||
193 | struct mutex pg_vec_lock; | 194 | struct mutex pg_vec_lock; |
194 | unsigned int running:1, /* prot_hook is attached*/ | 195 | unsigned int running:1, /* prot_hook is attached*/ |
195 | auxdata:1, | 196 | auxdata:1, |
196 | origdev:1; | 197 | origdev:1, |
198 | has_vnet_hdr:1; | ||
197 | int ifindex; /* bound device */ | 199 | int ifindex; /* bound device */ |
198 | __be16 num; | 200 | __be16 num; |
199 | struct packet_mclist *mclist; | 201 | struct packet_mclist *mclist; |
@@ -1056,6 +1058,30 @@ out: | |||
1056 | } | 1058 | } |
1057 | #endif | 1059 | #endif |
1058 | 1060 | ||
1061 | static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, | ||
1062 | size_t reserve, size_t len, | ||
1063 | size_t linear, int noblock, | ||
1064 | int *err) | ||
1065 | { | ||
1066 | struct sk_buff *skb; | ||
1067 | |||
1068 | /* Under a page? Don't bother with paged skb. */ | ||
1069 | if (prepad + len < PAGE_SIZE || !linear) | ||
1070 | linear = len; | ||
1071 | |||
1072 | skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, | ||
1073 | err); | ||
1074 | if (!skb) | ||
1075 | return NULL; | ||
1076 | |||
1077 | skb_reserve(skb, reserve); | ||
1078 | skb_put(skb, linear); | ||
1079 | skb->data_len = len - linear; | ||
1080 | skb->len += len - linear; | ||
1081 | |||
1082 | return skb; | ||
1083 | } | ||
1084 | |||
1059 | static int packet_snd(struct socket *sock, | 1085 | static int packet_snd(struct socket *sock, |
1060 | struct msghdr *msg, size_t len) | 1086 | struct msghdr *msg, size_t len) |
1061 | { | 1087 | { |
@@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock, | |||
1066 | __be16 proto; | 1092 | __be16 proto; |
1067 | unsigned char *addr; | 1093 | unsigned char *addr; |
1068 | int ifindex, err, reserve = 0; | 1094 | int ifindex, err, reserve = 0; |
1095 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
1096 | int offset = 0; | ||
1097 | int vnet_hdr_len; | ||
1098 | struct packet_sock *po = pkt_sk(sk); | ||
1099 | unsigned short gso_type = 0; | ||
1069 | 1100 | ||
1070 | /* | 1101 | /* |
1071 | * Get and verify the address. | 1102 | * Get and verify the address. |
1072 | */ | 1103 | */ |
1073 | 1104 | ||
1074 | if (saddr == NULL) { | 1105 | if (saddr == NULL) { |
1075 | struct packet_sock *po = pkt_sk(sk); | ||
1076 | |||
1077 | ifindex = po->ifindex; | 1106 | ifindex = po->ifindex; |
1078 | proto = po->num; | 1107 | proto = po->num; |
1079 | addr = NULL; | 1108 | addr = NULL; |
@@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock, | |||
1100 | if (!(dev->flags & IFF_UP)) | 1129 | if (!(dev->flags & IFF_UP)) |
1101 | goto out_unlock; | 1130 | goto out_unlock; |
1102 | 1131 | ||
1132 | if (po->has_vnet_hdr) { | ||
1133 | vnet_hdr_len = sizeof(vnet_hdr); | ||
1134 | |||
1135 | err = -EINVAL; | ||
1136 | if (len < vnet_hdr_len) | ||
1137 | goto out_unlock; | ||
1138 | |||
1139 | len -= vnet_hdr_len; | ||
1140 | |||
1141 | err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov, | ||
1142 | vnet_hdr_len); | ||
1143 | if (err < 0) | ||
1144 | goto out_unlock; | ||
1145 | |||
1146 | if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && | ||
1147 | (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > | ||
1148 | vnet_hdr.hdr_len)) | ||
1149 | vnet_hdr.hdr_len = vnet_hdr.csum_start + | ||
1150 | vnet_hdr.csum_offset + 2; | ||
1151 | |||
1152 | err = -EINVAL; | ||
1153 | if (vnet_hdr.hdr_len > len) | ||
1154 | goto out_unlock; | ||
1155 | |||
1156 | if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { | ||
1157 | switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { | ||
1158 | case VIRTIO_NET_HDR_GSO_TCPV4: | ||
1159 | gso_type = SKB_GSO_TCPV4; | ||
1160 | break; | ||
1161 | case VIRTIO_NET_HDR_GSO_TCPV6: | ||
1162 | gso_type = SKB_GSO_TCPV6; | ||
1163 | break; | ||
1164 | case VIRTIO_NET_HDR_GSO_UDP: | ||
1165 | gso_type = SKB_GSO_UDP; | ||
1166 | break; | ||
1167 | default: | ||
1168 | goto out_unlock; | ||
1169 | } | ||
1170 | |||
1171 | if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) | ||
1172 | gso_type |= SKB_GSO_TCP_ECN; | ||
1173 | |||
1174 | if (vnet_hdr.gso_size == 0) | ||
1175 | goto out_unlock; | ||
1176 | |||
1177 | } | ||
1178 | } | ||
1179 | |||
1103 | err = -EMSGSIZE; | 1180 | err = -EMSGSIZE; |
1104 | if (len > dev->mtu+reserve) | 1181 | if (!gso_type && (len > dev->mtu+reserve)) |
1105 | goto out_unlock; | 1182 | goto out_unlock; |
1106 | 1183 | ||
1107 | skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), | 1184 | err = -ENOBUFS; |
1108 | msg->msg_flags & MSG_DONTWAIT, &err); | 1185 | skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev), |
1186 | LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len, | ||
1187 | msg->msg_flags & MSG_DONTWAIT, &err); | ||
1109 | if (skb == NULL) | 1188 | if (skb == NULL) |
1110 | goto out_unlock; | 1189 | goto out_unlock; |
1111 | 1190 | ||
1112 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 1191 | skb_set_network_header(skb, reserve); |
1113 | skb_reset_network_header(skb); | ||
1114 | 1192 | ||
1115 | err = -EINVAL; | 1193 | err = -EINVAL; |
1116 | if (sock->type == SOCK_DGRAM && | 1194 | if (sock->type == SOCK_DGRAM && |
1117 | dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) | 1195 | (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0) |
1118 | goto out_free; | 1196 | goto out_free; |
1119 | 1197 | ||
1120 | /* Returns -EFAULT on error */ | 1198 | /* Returns -EFAULT on error */ |
1121 | err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | 1199 | err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); |
1122 | if (err) | 1200 | if (err) |
1123 | goto out_free; | 1201 | goto out_free; |
1124 | 1202 | ||
@@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock, | |||
1127 | skb->priority = sk->sk_priority; | 1205 | skb->priority = sk->sk_priority; |
1128 | skb->mark = sk->sk_mark; | 1206 | skb->mark = sk->sk_mark; |
1129 | 1207 | ||
1208 | if (po->has_vnet_hdr) { | ||
1209 | if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { | ||
1210 | if (!skb_partial_csum_set(skb, vnet_hdr.csum_start, | ||
1211 | vnet_hdr.csum_offset)) { | ||
1212 | err = -EINVAL; | ||
1213 | goto out_free; | ||
1214 | } | ||
1215 | } | ||
1216 | |||
1217 | skb_shinfo(skb)->gso_size = vnet_hdr.gso_size; | ||
1218 | skb_shinfo(skb)->gso_type = gso_type; | ||
1219 | |||
1220 | /* Header must be checked, and gso_segs computed. */ | ||
1221 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
1222 | skb_shinfo(skb)->gso_segs = 0; | ||
1223 | |||
1224 | len += vnet_hdr_len; | ||
1225 | } | ||
1226 | |||
1130 | /* | 1227 | /* |
1131 | * Now send it | 1228 | * Now send it |
1132 | */ | 1229 | */ |
@@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1420 | struct sk_buff *skb; | 1517 | struct sk_buff *skb; |
1421 | int copied, err; | 1518 | int copied, err; |
1422 | struct sockaddr_ll *sll; | 1519 | struct sockaddr_ll *sll; |
1520 | int vnet_hdr_len = 0; | ||
1423 | 1521 | ||
1424 | err = -EINVAL; | 1522 | err = -EINVAL; |
1425 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) | 1523 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) |
@@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1451 | if (skb == NULL) | 1549 | if (skb == NULL) |
1452 | goto out; | 1550 | goto out; |
1453 | 1551 | ||
1552 | if (pkt_sk(sk)->has_vnet_hdr) { | ||
1553 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
1554 | |||
1555 | err = -EINVAL; | ||
1556 | vnet_hdr_len = sizeof(vnet_hdr); | ||
1557 | if ((len -= vnet_hdr_len) < 0) | ||
1558 | goto out_free; | ||
1559 | |||
1560 | if (skb_is_gso(skb)) { | ||
1561 | struct skb_shared_info *sinfo = skb_shinfo(skb); | ||
1562 | |||
1563 | /* This is a hint as to how much should be linear. */ | ||
1564 | vnet_hdr.hdr_len = skb_headlen(skb); | ||
1565 | vnet_hdr.gso_size = sinfo->gso_size; | ||
1566 | if (sinfo->gso_type & SKB_GSO_TCPV4) | ||
1567 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; | ||
1568 | else if (sinfo->gso_type & SKB_GSO_TCPV6) | ||
1569 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; | ||
1570 | else if (sinfo->gso_type & SKB_GSO_UDP) | ||
1571 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; | ||
1572 | else if (sinfo->gso_type & SKB_GSO_FCOE) | ||
1573 | goto out_free; | ||
1574 | else | ||
1575 | BUG(); | ||
1576 | if (sinfo->gso_type & SKB_GSO_TCP_ECN) | ||
1577 | vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; | ||
1578 | } else | ||
1579 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; | ||
1580 | |||
1581 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
1582 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | ||
1583 | vnet_hdr.csum_start = skb->csum_start - | ||
1584 | skb_headroom(skb); | ||
1585 | vnet_hdr.csum_offset = skb->csum_offset; | ||
1586 | } /* else everything is zero */ | ||
1587 | |||
1588 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, | ||
1589 | vnet_hdr_len); | ||
1590 | if (err < 0) | ||
1591 | goto out_free; | ||
1592 | } | ||
1593 | |||
1454 | /* | 1594 | /* |
1455 | * If the address length field is there to be filled in, we fill | 1595 | * If the address length field is there to be filled in, we fill |
1456 | * it in now. | 1596 | * it in now. |
@@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1502 | * Free or return the buffer as appropriate. Again this | 1642 | * Free or return the buffer as appropriate. Again this |
1503 | * hides all the races and re-entrancy issues from us. | 1643 | * hides all the races and re-entrancy issues from us. |
1504 | */ | 1644 | */ |
1505 | err = (flags&MSG_TRUNC) ? skb->len : copied; | 1645 | err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); |
1506 | 1646 | ||
1507 | out_free: | 1647 | out_free: |
1508 | skb_free_datagram(sk, skb); | 1648 | skb_free_datagram(sk, skb); |
@@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1740 | 1880 | ||
1741 | if (optlen < sizeof(req)) | 1881 | if (optlen < sizeof(req)) |
1742 | return -EINVAL; | 1882 | return -EINVAL; |
1883 | if (pkt_sk(sk)->has_vnet_hdr) | ||
1884 | return -EINVAL; | ||
1743 | if (copy_from_user(&req, optval, sizeof(req))) | 1885 | if (copy_from_user(&req, optval, sizeof(req))) |
1744 | return -EFAULT; | 1886 | return -EFAULT; |
1745 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); | 1887 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); |
@@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1826 | po->origdev = !!val; | 1968 | po->origdev = !!val; |
1827 | return 0; | 1969 | return 0; |
1828 | } | 1970 | } |
1971 | case PACKET_VNET_HDR: | ||
1972 | { | ||
1973 | int val; | ||
1974 | |||
1975 | if (sock->type != SOCK_RAW) | ||
1976 | return -EINVAL; | ||
1977 | if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) | ||
1978 | return -EBUSY; | ||
1979 | if (optlen < sizeof(val)) | ||
1980 | return -EINVAL; | ||
1981 | if (copy_from_user(&val, optval, sizeof(val))) | ||
1982 | return -EFAULT; | ||
1983 | |||
1984 | po->has_vnet_hdr = !!val; | ||
1985 | return 0; | ||
1986 | } | ||
1829 | default: | 1987 | default: |
1830 | return -ENOPROTOOPT; | 1988 | return -ENOPROTOOPT; |
1831 | } | 1989 | } |
@@ -1876,6 +2034,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
1876 | 2034 | ||
1877 | data = &val; | 2035 | data = &val; |
1878 | break; | 2036 | break; |
2037 | case PACKET_VNET_HDR: | ||
2038 | if (len > sizeof(int)) | ||
2039 | len = sizeof(int); | ||
2040 | val = po->has_vnet_hdr; | ||
2041 | |||
2042 | data = &val; | ||
2043 | break; | ||
1879 | #ifdef CONFIG_PACKET_MMAP | 2044 | #ifdef CONFIG_PACKET_MMAP |
1880 | case PACKET_VERSION: | 2045 | case PACKET_VERSION: |
1881 | if (len > sizeof(int)) | 2046 | if (len > sizeof(int)) |