diff options
| author | Sridhar Samudrala <sri@us.ibm.com> | 2010-02-04 23:24:10 -0500 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2010-02-04 23:24:10 -0500 |
| commit | bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 (patch) | |
| tree | 63d19326b491b507f246ea4c3ef410d62602e794 /net/packet | |
| parent | 746079dabcf74be2a16dc983ac597156e3d2e051 (diff) | |
packet: Add GSO/csum offload support.
This patch adds GSO/checksum offload to af_packet sockets using
virtio_net_hdr. Based on Rusty's patch to add this support to tun.
It allows GSO/checksum offload to be enabled when using raw socket
backend with virtio_net.
Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the
receive path and process/skip virtio_net_hdr in the send path. This
option is only allowed with SOCK_RAW sockets attached to ethernet
type devices.
v2 updates
----------
Michael's Comments
- Perform length check in packet_snd() when GSO is off even when
vnet_hdr is present.
- Check for SKB_GSO_FCOE type and return -EINVAL
- don't allow tx/rx ring when vnet_hdr is enabled.
Herbert's Comments
- Removed ethernet specific code.
- protocol value is assumed to be passed in by the caller.
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/packet')
| -rw-r--r-- | net/packet/af_packet.c | 187 |
1 files changed, 176 insertions, 11 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 53633c5fdb1d..178e2937bbaa 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
| @@ -80,6 +80,7 @@ | |||
| 80 | #include <linux/init.h> | 80 | #include <linux/init.h> |
| 81 | #include <linux/mutex.h> | 81 | #include <linux/mutex.h> |
| 82 | #include <linux/if_vlan.h> | 82 | #include <linux/if_vlan.h> |
| 83 | #include <linux/virtio_net.h> | ||
| 83 | 84 | ||
| 84 | #ifdef CONFIG_INET | 85 | #ifdef CONFIG_INET |
| 85 | #include <net/inet_common.h> | 86 | #include <net/inet_common.h> |
| @@ -193,7 +194,8 @@ struct packet_sock { | |||
| 193 | struct mutex pg_vec_lock; | 194 | struct mutex pg_vec_lock; |
| 194 | unsigned int running:1, /* prot_hook is attached*/ | 195 | unsigned int running:1, /* prot_hook is attached*/ |
| 195 | auxdata:1, | 196 | auxdata:1, |
| 196 | origdev:1; | 197 | origdev:1, |
| 198 | has_vnet_hdr:1; | ||
| 197 | int ifindex; /* bound device */ | 199 | int ifindex; /* bound device */ |
| 198 | __be16 num; | 200 | __be16 num; |
| 199 | struct packet_mclist *mclist; | 201 | struct packet_mclist *mclist; |
| @@ -1056,6 +1058,30 @@ out: | |||
| 1056 | } | 1058 | } |
| 1057 | #endif | 1059 | #endif |
| 1058 | 1060 | ||
| 1061 | static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, | ||
| 1062 | size_t reserve, size_t len, | ||
| 1063 | size_t linear, int noblock, | ||
| 1064 | int *err) | ||
| 1065 | { | ||
| 1066 | struct sk_buff *skb; | ||
| 1067 | |||
| 1068 | /* Under a page? Don't bother with paged skb. */ | ||
| 1069 | if (prepad + len < PAGE_SIZE || !linear) | ||
| 1070 | linear = len; | ||
| 1071 | |||
| 1072 | skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, | ||
| 1073 | err); | ||
| 1074 | if (!skb) | ||
| 1075 | return NULL; | ||
| 1076 | |||
| 1077 | skb_reserve(skb, reserve); | ||
| 1078 | skb_put(skb, linear); | ||
| 1079 | skb->data_len = len - linear; | ||
| 1080 | skb->len += len - linear; | ||
| 1081 | |||
| 1082 | return skb; | ||
| 1083 | } | ||
| 1084 | |||
| 1059 | static int packet_snd(struct socket *sock, | 1085 | static int packet_snd(struct socket *sock, |
| 1060 | struct msghdr *msg, size_t len) | 1086 | struct msghdr *msg, size_t len) |
| 1061 | { | 1087 | { |
| @@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock, | |||
| 1066 | __be16 proto; | 1092 | __be16 proto; |
| 1067 | unsigned char *addr; | 1093 | unsigned char *addr; |
| 1068 | int ifindex, err, reserve = 0; | 1094 | int ifindex, err, reserve = 0; |
| 1095 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
| 1096 | int offset = 0; | ||
| 1097 | int vnet_hdr_len; | ||
| 1098 | struct packet_sock *po = pkt_sk(sk); | ||
| 1099 | unsigned short gso_type = 0; | ||
| 1069 | 1100 | ||
| 1070 | /* | 1101 | /* |
| 1071 | * Get and verify the address. | 1102 | * Get and verify the address. |
| 1072 | */ | 1103 | */ |
| 1073 | 1104 | ||
| 1074 | if (saddr == NULL) { | 1105 | if (saddr == NULL) { |
| 1075 | struct packet_sock *po = pkt_sk(sk); | ||
| 1076 | |||
| 1077 | ifindex = po->ifindex; | 1106 | ifindex = po->ifindex; |
| 1078 | proto = po->num; | 1107 | proto = po->num; |
| 1079 | addr = NULL; | 1108 | addr = NULL; |
| @@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock, | |||
| 1100 | if (!(dev->flags & IFF_UP)) | 1129 | if (!(dev->flags & IFF_UP)) |
| 1101 | goto out_unlock; | 1130 | goto out_unlock; |
| 1102 | 1131 | ||
| 1132 | if (po->has_vnet_hdr) { | ||
| 1133 | vnet_hdr_len = sizeof(vnet_hdr); | ||
| 1134 | |||
| 1135 | err = -EINVAL; | ||
| 1136 | if (len < vnet_hdr_len) | ||
| 1137 | goto out_unlock; | ||
| 1138 | |||
| 1139 | len -= vnet_hdr_len; | ||
| 1140 | |||
| 1141 | err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov, | ||
| 1142 | vnet_hdr_len); | ||
| 1143 | if (err < 0) | ||
| 1144 | goto out_unlock; | ||
| 1145 | |||
| 1146 | if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && | ||
| 1147 | (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > | ||
| 1148 | vnet_hdr.hdr_len)) | ||
| 1149 | vnet_hdr.hdr_len = vnet_hdr.csum_start + | ||
| 1150 | vnet_hdr.csum_offset + 2; | ||
| 1151 | |||
| 1152 | err = -EINVAL; | ||
| 1153 | if (vnet_hdr.hdr_len > len) | ||
| 1154 | goto out_unlock; | ||
| 1155 | |||
| 1156 | if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { | ||
| 1157 | switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { | ||
| 1158 | case VIRTIO_NET_HDR_GSO_TCPV4: | ||
| 1159 | gso_type = SKB_GSO_TCPV4; | ||
| 1160 | break; | ||
| 1161 | case VIRTIO_NET_HDR_GSO_TCPV6: | ||
| 1162 | gso_type = SKB_GSO_TCPV6; | ||
| 1163 | break; | ||
| 1164 | case VIRTIO_NET_HDR_GSO_UDP: | ||
| 1165 | gso_type = SKB_GSO_UDP; | ||
| 1166 | break; | ||
| 1167 | default: | ||
| 1168 | goto out_unlock; | ||
| 1169 | } | ||
| 1170 | |||
| 1171 | if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) | ||
| 1172 | gso_type |= SKB_GSO_TCP_ECN; | ||
| 1173 | |||
| 1174 | if (vnet_hdr.gso_size == 0) | ||
| 1175 | goto out_unlock; | ||
| 1176 | |||
| 1177 | } | ||
| 1178 | } | ||
| 1179 | |||
| 1103 | err = -EMSGSIZE; | 1180 | err = -EMSGSIZE; |
| 1104 | if (len > dev->mtu+reserve) | 1181 | if (!gso_type && (len > dev->mtu+reserve)) |
| 1105 | goto out_unlock; | 1182 | goto out_unlock; |
| 1106 | 1183 | ||
| 1107 | skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), | 1184 | err = -ENOBUFS; |
| 1108 | msg->msg_flags & MSG_DONTWAIT, &err); | 1185 | skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev), |
| 1186 | LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len, | ||
| 1187 | msg->msg_flags & MSG_DONTWAIT, &err); | ||
| 1109 | if (skb == NULL) | 1188 | if (skb == NULL) |
| 1110 | goto out_unlock; | 1189 | goto out_unlock; |
| 1111 | 1190 | ||
| 1112 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 1191 | skb_set_network_header(skb, reserve); |
| 1113 | skb_reset_network_header(skb); | ||
| 1114 | 1192 | ||
| 1115 | err = -EINVAL; | 1193 | err = -EINVAL; |
| 1116 | if (sock->type == SOCK_DGRAM && | 1194 | if (sock->type == SOCK_DGRAM && |
| 1117 | dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) | 1195 | (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0) |
| 1118 | goto out_free; | 1196 | goto out_free; |
| 1119 | 1197 | ||
| 1120 | /* Returns -EFAULT on error */ | 1198 | /* Returns -EFAULT on error */ |
| 1121 | err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | 1199 | err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); |
| 1122 | if (err) | 1200 | if (err) |
| 1123 | goto out_free; | 1201 | goto out_free; |
| 1124 | 1202 | ||
| @@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock, | |||
| 1127 | skb->priority = sk->sk_priority; | 1205 | skb->priority = sk->sk_priority; |
| 1128 | skb->mark = sk->sk_mark; | 1206 | skb->mark = sk->sk_mark; |
| 1129 | 1207 | ||
| 1208 | if (po->has_vnet_hdr) { | ||
| 1209 | if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { | ||
| 1210 | if (!skb_partial_csum_set(skb, vnet_hdr.csum_start, | ||
| 1211 | vnet_hdr.csum_offset)) { | ||
| 1212 | err = -EINVAL; | ||
| 1213 | goto out_free; | ||
| 1214 | } | ||
| 1215 | } | ||
| 1216 | |||
| 1217 | skb_shinfo(skb)->gso_size = vnet_hdr.gso_size; | ||
| 1218 | skb_shinfo(skb)->gso_type = gso_type; | ||
| 1219 | |||
| 1220 | /* Header must be checked, and gso_segs computed. */ | ||
| 1221 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
| 1222 | skb_shinfo(skb)->gso_segs = 0; | ||
| 1223 | |||
| 1224 | len += vnet_hdr_len; | ||
| 1225 | } | ||
| 1226 | |||
| 1130 | /* | 1227 | /* |
| 1131 | * Now send it | 1228 | * Now send it |
| 1132 | */ | 1229 | */ |
| @@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1420 | struct sk_buff *skb; | 1517 | struct sk_buff *skb; |
| 1421 | int copied, err; | 1518 | int copied, err; |
| 1422 | struct sockaddr_ll *sll; | 1519 | struct sockaddr_ll *sll; |
| 1520 | int vnet_hdr_len = 0; | ||
| 1423 | 1521 | ||
| 1424 | err = -EINVAL; | 1522 | err = -EINVAL; |
| 1425 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) | 1523 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) |
| @@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1451 | if (skb == NULL) | 1549 | if (skb == NULL) |
| 1452 | goto out; | 1550 | goto out; |
| 1453 | 1551 | ||
| 1552 | if (pkt_sk(sk)->has_vnet_hdr) { | ||
| 1553 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
| 1554 | |||
| 1555 | err = -EINVAL; | ||
| 1556 | vnet_hdr_len = sizeof(vnet_hdr); | ||
| 1557 | if ((len -= vnet_hdr_len) < 0) | ||
| 1558 | goto out_free; | ||
| 1559 | |||
| 1560 | if (skb_is_gso(skb)) { | ||
| 1561 | struct skb_shared_info *sinfo = skb_shinfo(skb); | ||
| 1562 | |||
| 1563 | /* This is a hint as to how much should be linear. */ | ||
| 1564 | vnet_hdr.hdr_len = skb_headlen(skb); | ||
| 1565 | vnet_hdr.gso_size = sinfo->gso_size; | ||
| 1566 | if (sinfo->gso_type & SKB_GSO_TCPV4) | ||
| 1567 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; | ||
| 1568 | else if (sinfo->gso_type & SKB_GSO_TCPV6) | ||
| 1569 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; | ||
| 1570 | else if (sinfo->gso_type & SKB_GSO_UDP) | ||
| 1571 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; | ||
| 1572 | else if (sinfo->gso_type & SKB_GSO_FCOE) | ||
| 1573 | goto out_free; | ||
| 1574 | else | ||
| 1575 | BUG(); | ||
| 1576 | if (sinfo->gso_type & SKB_GSO_TCP_ECN) | ||
| 1577 | vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; | ||
| 1578 | } else | ||
| 1579 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; | ||
| 1580 | |||
| 1581 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
| 1582 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | ||
| 1583 | vnet_hdr.csum_start = skb->csum_start - | ||
| 1584 | skb_headroom(skb); | ||
| 1585 | vnet_hdr.csum_offset = skb->csum_offset; | ||
| 1586 | } /* else everything is zero */ | ||
| 1587 | |||
| 1588 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, | ||
| 1589 | vnet_hdr_len); | ||
| 1590 | if (err < 0) | ||
| 1591 | goto out_free; | ||
| 1592 | } | ||
| 1593 | |||
| 1454 | /* | 1594 | /* |
| 1455 | * If the address length field is there to be filled in, we fill | 1595 | * If the address length field is there to be filled in, we fill |
| 1456 | * it in now. | 1596 | * it in now. |
| @@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
| 1502 | * Free or return the buffer as appropriate. Again this | 1642 | * Free or return the buffer as appropriate. Again this |
| 1503 | * hides all the races and re-entrancy issues from us. | 1643 | * hides all the races and re-entrancy issues from us. |
| 1504 | */ | 1644 | */ |
| 1505 | err = (flags&MSG_TRUNC) ? skb->len : copied; | 1645 | err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); |
| 1506 | 1646 | ||
| 1507 | out_free: | 1647 | out_free: |
| 1508 | skb_free_datagram(sk, skb); | 1648 | skb_free_datagram(sk, skb); |
| @@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
| 1740 | 1880 | ||
| 1741 | if (optlen < sizeof(req)) | 1881 | if (optlen < sizeof(req)) |
| 1742 | return -EINVAL; | 1882 | return -EINVAL; |
| 1883 | if (pkt_sk(sk)->has_vnet_hdr) | ||
| 1884 | return -EINVAL; | ||
| 1743 | if (copy_from_user(&req, optval, sizeof(req))) | 1885 | if (copy_from_user(&req, optval, sizeof(req))) |
| 1744 | return -EFAULT; | 1886 | return -EFAULT; |
| 1745 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); | 1887 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); |
| @@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
| 1826 | po->origdev = !!val; | 1968 | po->origdev = !!val; |
| 1827 | return 0; | 1969 | return 0; |
| 1828 | } | 1970 | } |
| 1971 | case PACKET_VNET_HDR: | ||
| 1972 | { | ||
| 1973 | int val; | ||
| 1974 | |||
| 1975 | if (sock->type != SOCK_RAW) | ||
| 1976 | return -EINVAL; | ||
| 1977 | if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) | ||
| 1978 | return -EBUSY; | ||
| 1979 | if (optlen < sizeof(val)) | ||
| 1980 | return -EINVAL; | ||
| 1981 | if (copy_from_user(&val, optval, sizeof(val))) | ||
| 1982 | return -EFAULT; | ||
| 1983 | |||
| 1984 | po->has_vnet_hdr = !!val; | ||
| 1985 | return 0; | ||
| 1986 | } | ||
| 1829 | default: | 1987 | default: |
| 1830 | return -ENOPROTOOPT; | 1988 | return -ENOPROTOOPT; |
| 1831 | } | 1989 | } |
| @@ -1876,6 +2034,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
| 1876 | 2034 | ||
| 1877 | data = &val; | 2035 | data = &val; |
| 1878 | break; | 2036 | break; |
| 2037 | case PACKET_VNET_HDR: | ||
| 2038 | if (len > sizeof(int)) | ||
| 2039 | len = sizeof(int); | ||
| 2040 | val = po->has_vnet_hdr; | ||
| 2041 | |||
| 2042 | data = &val; | ||
| 2043 | break; | ||
| 1879 | #ifdef CONFIG_PACKET_MMAP | 2044 | #ifdef CONFIG_PACKET_MMAP |
| 1880 | case PACKET_VERSION: | 2045 | case PACKET_VERSION: |
| 1881 | if (len > sizeof(int)) | 2046 | if (len > sizeof(int)) |
