aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet/af_packet.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r--net/packet/af_packet.c413
1 files changed, 315 insertions, 98 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f126d18dbdc4..9a17f28b1253 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -60,6 +60,7 @@
60#include <linux/wireless.h> 60#include <linux/wireless.h>
61#include <linux/kernel.h> 61#include <linux/kernel.h>
62#include <linux/kmod.h> 62#include <linux/kmod.h>
63#include <linux/slab.h>
63#include <net/net_namespace.h> 64#include <net/net_namespace.h>
64#include <net/ip.h> 65#include <net/ip.h>
65#include <net/protocol.h> 66#include <net/protocol.h>
@@ -80,6 +81,9 @@
80#include <linux/init.h> 81#include <linux/init.h>
81#include <linux/mutex.h> 82#include <linux/mutex.h>
82#include <linux/if_vlan.h> 83#include <linux/if_vlan.h>
84#include <linux/virtio_net.h>
85#include <linux/errqueue.h>
86#include <linux/net_tstamp.h>
83 87
84#ifdef CONFIG_INET 88#ifdef CONFIG_INET
85#include <net/inet_common.h> 89#include <net/inet_common.h>
@@ -156,7 +160,6 @@ struct packet_mreq_max {
156 unsigned char mr_address[MAX_ADDR_LEN]; 160 unsigned char mr_address[MAX_ADDR_LEN];
157}; 161};
158 162
159#ifdef CONFIG_PACKET_MMAP
160static int packet_set_ring(struct sock *sk, struct tpacket_req *req, 163static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
161 int closing, int tx_ring); 164 int closing, int tx_ring);
162 165
@@ -176,7 +179,6 @@ struct packet_ring_buffer {
176 179
177struct packet_sock; 180struct packet_sock;
178static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); 181static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
179#endif
180 182
181static void packet_flush_mclist(struct sock *sk); 183static void packet_flush_mclist(struct sock *sk);
182 184
@@ -184,26 +186,24 @@ struct packet_sock {
184 /* struct sock has to be the first member of packet_sock */ 186 /* struct sock has to be the first member of packet_sock */
185 struct sock sk; 187 struct sock sk;
186 struct tpacket_stats stats; 188 struct tpacket_stats stats;
187#ifdef CONFIG_PACKET_MMAP
188 struct packet_ring_buffer rx_ring; 189 struct packet_ring_buffer rx_ring;
189 struct packet_ring_buffer tx_ring; 190 struct packet_ring_buffer tx_ring;
190 int copy_thresh; 191 int copy_thresh;
191#endif
192 spinlock_t bind_lock; 192 spinlock_t bind_lock;
193 struct mutex pg_vec_lock; 193 struct mutex pg_vec_lock;
194 unsigned int running:1, /* prot_hook is attached*/ 194 unsigned int running:1, /* prot_hook is attached*/
195 auxdata:1, 195 auxdata:1,
196 origdev:1; 196 origdev:1,
197 has_vnet_hdr:1;
197 int ifindex; /* bound device */ 198 int ifindex; /* bound device */
198 __be16 num; 199 __be16 num;
199 struct packet_mclist *mclist; 200 struct packet_mclist *mclist;
200#ifdef CONFIG_PACKET_MMAP
201 atomic_t mapped; 201 atomic_t mapped;
202 enum tpacket_versions tp_version; 202 enum tpacket_versions tp_version;
203 unsigned int tp_hdrlen; 203 unsigned int tp_hdrlen;
204 unsigned int tp_reserve; 204 unsigned int tp_reserve;
205 unsigned int tp_loss:1; 205 unsigned int tp_loss:1;
206#endif 206 unsigned int tp_tstamp;
207 struct packet_type prot_hook ____cacheline_aligned_in_smp; 207 struct packet_type prot_hook ____cacheline_aligned_in_smp;
208}; 208};
209 209
@@ -217,8 +217,6 @@ struct packet_skb_cb {
217 217
218#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 218#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
219 219
220#ifdef CONFIG_PACKET_MMAP
221
222static void __packet_set_status(struct packet_sock *po, void *frame, int status) 220static void __packet_set_status(struct packet_sock *po, void *frame, int status)
223{ 221{
224 union { 222 union {
@@ -313,8 +311,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff)
313 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 311 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
314} 312}
315 313
316#endif
317
318static inline struct packet_sock *pkt_sk(struct sock *sk) 314static inline struct packet_sock *pkt_sk(struct sock *sk)
319{ 315{
320 return (struct packet_sock *)sk; 316 return (struct packet_sock *)sk;
@@ -322,6 +318,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
322 318
323static void packet_sock_destruct(struct sock *sk) 319static void packet_sock_destruct(struct sock *sk)
324{ 320{
321 skb_queue_purge(&sk->sk_error_queue);
322
325 WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 323 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
326 WARN_ON(atomic_read(&sk->sk_wmem_alloc)); 324 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
327 325
@@ -490,6 +488,9 @@ retry:
490 skb->dev = dev; 488 skb->dev = dev;
491 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
492 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
492 if (err < 0)
493 goto out_unlock;
493 494
494 dev_queue_xmit(skb); 495 dev_queue_xmit(skb);
495 rcu_read_unlock(); 496 rcu_read_unlock();
@@ -508,7 +509,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
508 struct sk_filter *filter; 509 struct sk_filter *filter;
509 510
510 rcu_read_lock_bh(); 511 rcu_read_lock_bh();
511 filter = rcu_dereference(sk->sk_filter); 512 filter = rcu_dereference_bh(sk->sk_filter);
512 if (filter != NULL) 513 if (filter != NULL)
513 res = sk_run_filter(skb, filter->insns, filter->len); 514 res = sk_run_filter(skb, filter->insns, filter->len);
514 rcu_read_unlock_bh(); 515 rcu_read_unlock_bh();
@@ -638,7 +639,6 @@ drop:
638 return 0; 639 return 0;
639} 640}
640 641
641#ifdef CONFIG_PACKET_MMAP
642static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, 642static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
643 struct packet_type *pt, struct net_device *orig_dev) 643 struct packet_type *pt, struct net_device *orig_dev)
644{ 644{
@@ -658,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
658 struct sk_buff *copy_skb = NULL; 658 struct sk_buff *copy_skb = NULL;
659 struct timeval tv; 659 struct timeval tv;
660 struct timespec ts; 660 struct timespec ts;
661 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
661 662
662 if (skb->pkt_type == PACKET_LOOPBACK) 663 if (skb->pkt_type == PACKET_LOOPBACK)
663 goto drop; 664 goto drop;
@@ -739,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
739 h.h1->tp_snaplen = snaplen; 740 h.h1->tp_snaplen = snaplen;
740 h.h1->tp_mac = macoff; 741 h.h1->tp_mac = macoff;
741 h.h1->tp_net = netoff; 742 h.h1->tp_net = netoff;
742 if (skb->tstamp.tv64) 743 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
744 && shhwtstamps->syststamp.tv64)
745 tv = ktime_to_timeval(shhwtstamps->syststamp);
746 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
747 && shhwtstamps->hwtstamp.tv64)
748 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
749 else if (skb->tstamp.tv64)
743 tv = ktime_to_timeval(skb->tstamp); 750 tv = ktime_to_timeval(skb->tstamp);
744 else 751 else
745 do_gettimeofday(&tv); 752 do_gettimeofday(&tv);
@@ -752,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
752 h.h2->tp_snaplen = snaplen; 759 h.h2->tp_snaplen = snaplen;
753 h.h2->tp_mac = macoff; 760 h.h2->tp_mac = macoff;
754 h.h2->tp_net = netoff; 761 h.h2->tp_net = netoff;
755 if (skb->tstamp.tv64) 762 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
763 && shhwtstamps->syststamp.tv64)
764 ts = ktime_to_timespec(shhwtstamps->syststamp);
765 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
766 && shhwtstamps->hwtstamp.tv64)
767 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
768 else if (skb->tstamp.tv64)
756 ts = ktime_to_timespec(skb->tstamp); 769 ts = ktime_to_timespec(skb->tstamp);
757 else 770 else
758 getnstimeofday(&ts); 771 getnstimeofday(&ts);
@@ -1054,7 +1067,30 @@ out:
1054 mutex_unlock(&po->pg_vec_lock); 1067 mutex_unlock(&po->pg_vec_lock);
1055 return err; 1068 return err;
1056} 1069}
1057#endif 1070
1071static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1072 size_t reserve, size_t len,
1073 size_t linear, int noblock,
1074 int *err)
1075{
1076 struct sk_buff *skb;
1077
1078 /* Under a page? Don't bother with paged skb. */
1079 if (prepad + len < PAGE_SIZE || !linear)
1080 linear = len;
1081
1082 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1083 err);
1084 if (!skb)
1085 return NULL;
1086
1087 skb_reserve(skb, reserve);
1088 skb_put(skb, linear);
1089 skb->data_len = len - linear;
1090 skb->len += len - linear;
1091
1092 return skb;
1093}
1058 1094
1059static int packet_snd(struct socket *sock, 1095static int packet_snd(struct socket *sock,
1060 struct msghdr *msg, size_t len) 1096 struct msghdr *msg, size_t len)
@@ -1066,14 +1102,17 @@ static int packet_snd(struct socket *sock,
1066 __be16 proto; 1102 __be16 proto;
1067 unsigned char *addr; 1103 unsigned char *addr;
1068 int ifindex, err, reserve = 0; 1104 int ifindex, err, reserve = 0;
1105 struct virtio_net_hdr vnet_hdr = { 0 };
1106 int offset = 0;
1107 int vnet_hdr_len;
1108 struct packet_sock *po = pkt_sk(sk);
1109 unsigned short gso_type = 0;
1069 1110
1070 /* 1111 /*
1071 * Get and verify the address. 1112 * Get and verify the address.
1072 */ 1113 */
1073 1114
1074 if (saddr == NULL) { 1115 if (saddr == NULL) {
1075 struct packet_sock *po = pkt_sk(sk);
1076
1077 ifindex = po->ifindex; 1116 ifindex = po->ifindex;
1078 proto = po->num; 1117 proto = po->num;
1079 addr = NULL; 1118 addr = NULL;
@@ -1100,33 +1139,104 @@ static int packet_snd(struct socket *sock,
1100 if (!(dev->flags & IFF_UP)) 1139 if (!(dev->flags & IFF_UP))
1101 goto out_unlock; 1140 goto out_unlock;
1102 1141
1142 if (po->has_vnet_hdr) {
1143 vnet_hdr_len = sizeof(vnet_hdr);
1144
1145 err = -EINVAL;
1146 if (len < vnet_hdr_len)
1147 goto out_unlock;
1148
1149 len -= vnet_hdr_len;
1150
1151 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1152 vnet_hdr_len);
1153 if (err < 0)
1154 goto out_unlock;
1155
1156 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1157 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1158 vnet_hdr.hdr_len))
1159 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1160 vnet_hdr.csum_offset + 2;
1161
1162 err = -EINVAL;
1163 if (vnet_hdr.hdr_len > len)
1164 goto out_unlock;
1165
1166 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1167 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1168 case VIRTIO_NET_HDR_GSO_TCPV4:
1169 gso_type = SKB_GSO_TCPV4;
1170 break;
1171 case VIRTIO_NET_HDR_GSO_TCPV6:
1172 gso_type = SKB_GSO_TCPV6;
1173 break;
1174 case VIRTIO_NET_HDR_GSO_UDP:
1175 gso_type = SKB_GSO_UDP;
1176 break;
1177 default:
1178 goto out_unlock;
1179 }
1180
1181 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1182 gso_type |= SKB_GSO_TCP_ECN;
1183
1184 if (vnet_hdr.gso_size == 0)
1185 goto out_unlock;
1186
1187 }
1188 }
1189
1103 err = -EMSGSIZE; 1190 err = -EMSGSIZE;
1104 if (len > dev->mtu+reserve) 1191 if (!gso_type && (len > dev->mtu+reserve))
1105 goto out_unlock; 1192 goto out_unlock;
1106 1193
1107 skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), 1194 err = -ENOBUFS;
1108 msg->msg_flags & MSG_DONTWAIT, &err); 1195 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1196 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1197 msg->msg_flags & MSG_DONTWAIT, &err);
1109 if (skb == NULL) 1198 if (skb == NULL)
1110 goto out_unlock; 1199 goto out_unlock;
1111 1200
1112 skb_reserve(skb, LL_RESERVED_SPACE(dev)); 1201 skb_set_network_header(skb, reserve);
1113 skb_reset_network_header(skb);
1114 1202
1115 err = -EINVAL; 1203 err = -EINVAL;
1116 if (sock->type == SOCK_DGRAM && 1204 if (sock->type == SOCK_DGRAM &&
1117 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) 1205 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
1118 goto out_free; 1206 goto out_free;
1119 1207
1120 /* Returns -EFAULT on error */ 1208 /* Returns -EFAULT on error */
1121 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1122 if (err) 1210 if (err)
1123 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
1213 if (err < 0)
1214 goto out_free;
1124 1215
1125 skb->protocol = proto; 1216 skb->protocol = proto;
1126 skb->dev = dev; 1217 skb->dev = dev;
1127 skb->priority = sk->sk_priority; 1218 skb->priority = sk->sk_priority;
1128 skb->mark = sk->sk_mark; 1219 skb->mark = sk->sk_mark;
1129 1220
1221 if (po->has_vnet_hdr) {
1222 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1223 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1224 vnet_hdr.csum_offset)) {
1225 err = -EINVAL;
1226 goto out_free;
1227 }
1228 }
1229
1230 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1231 skb_shinfo(skb)->gso_type = gso_type;
1232
1233 /* Header must be checked, and gso_segs computed. */
1234 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1235 skb_shinfo(skb)->gso_segs = 0;
1236
1237 len += vnet_hdr_len;
1238 }
1239
1130 /* 1240 /*
1131 * Now send it 1241 * Now send it
1132 */ 1242 */
@@ -1151,13 +1261,11 @@ out:
1151static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, 1261static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1152 struct msghdr *msg, size_t len) 1262 struct msghdr *msg, size_t len)
1153{ 1263{
1154#ifdef CONFIG_PACKET_MMAP
1155 struct sock *sk = sock->sk; 1264 struct sock *sk = sock->sk;
1156 struct packet_sock *po = pkt_sk(sk); 1265 struct packet_sock *po = pkt_sk(sk);
1157 if (po->tx_ring.pg_vec) 1266 if (po->tx_ring.pg_vec)
1158 return tpacket_snd(po, msg); 1267 return tpacket_snd(po, msg);
1159 else 1268 else
1160#endif
1161 return packet_snd(sock, msg, len); 1269 return packet_snd(sock, msg, len);
1162} 1270}
1163 1271
@@ -1171,9 +1279,7 @@ static int packet_release(struct socket *sock)
1171 struct sock *sk = sock->sk; 1279 struct sock *sk = sock->sk;
1172 struct packet_sock *po; 1280 struct packet_sock *po;
1173 struct net *net; 1281 struct net *net;
1174#ifdef CONFIG_PACKET_MMAP
1175 struct tpacket_req req; 1282 struct tpacket_req req;
1176#endif
1177 1283
1178 if (!sk) 1284 if (!sk)
1179 return 0; 1285 return 0;
@@ -1181,28 +1287,25 @@ static int packet_release(struct socket *sock)
1181 net = sock_net(sk); 1287 net = sock_net(sk);
1182 po = pkt_sk(sk); 1288 po = pkt_sk(sk);
1183 1289
1184 write_lock_bh(&net->packet.sklist_lock); 1290 spin_lock_bh(&net->packet.sklist_lock);
1185 sk_del_node_init(sk); 1291 sk_del_node_init_rcu(sk);
1186 sock_prot_inuse_add(net, sk->sk_prot, -1); 1292 sock_prot_inuse_add(net, sk->sk_prot, -1);
1187 write_unlock_bh(&net->packet.sklist_lock); 1293 spin_unlock_bh(&net->packet.sklist_lock);
1188
1189 /*
1190 * Unhook packet receive handler.
1191 */
1192 1294
1295 spin_lock(&po->bind_lock);
1193 if (po->running) { 1296 if (po->running) {
1194 /* 1297 /*
1195 * Remove the protocol hook 1298 * Remove from protocol table
1196 */ 1299 */
1197 dev_remove_pack(&po->prot_hook);
1198 po->running = 0; 1300 po->running = 0;
1199 po->num = 0; 1301 po->num = 0;
1302 __dev_remove_pack(&po->prot_hook);
1200 __sock_put(sk); 1303 __sock_put(sk);
1201 } 1304 }
1305 spin_unlock(&po->bind_lock);
1202 1306
1203 packet_flush_mclist(sk); 1307 packet_flush_mclist(sk);
1204 1308
1205#ifdef CONFIG_PACKET_MMAP
1206 memset(&req, 0, sizeof(req)); 1309 memset(&req, 0, sizeof(req));
1207 1310
1208 if (po->rx_ring.pg_vec) 1311 if (po->rx_ring.pg_vec)
@@ -1210,12 +1313,11 @@ static int packet_release(struct socket *sock)
1210 1313
1211 if (po->tx_ring.pg_vec) 1314 if (po->tx_ring.pg_vec)
1212 packet_set_ring(sk, &req, 1, 1); 1315 packet_set_ring(sk, &req, 1, 1);
1213#endif
1214 1316
1317 synchronize_net();
1215 /* 1318 /*
1216 * Now the socket is dead. No more input will appear. 1319 * Now the socket is dead. No more input will appear.
1217 */ 1320 */
1218
1219 sock_orphan(sk); 1321 sock_orphan(sk);
1220 sock->sk = NULL; 1322 sock->sk = NULL;
1221 1323
@@ -1399,15 +1501,61 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
1399 po->running = 1; 1501 po->running = 1;
1400 } 1502 }
1401 1503
1402 write_lock_bh(&net->packet.sklist_lock); 1504 spin_lock_bh(&net->packet.sklist_lock);
1403 sk_add_node(sk, &net->packet.sklist); 1505 sk_add_node_rcu(sk, &net->packet.sklist);
1404 sock_prot_inuse_add(net, &packet_proto, 1); 1506 sock_prot_inuse_add(net, &packet_proto, 1);
1405 write_unlock_bh(&net->packet.sklist_lock); 1507 spin_unlock_bh(&net->packet.sklist_lock);
1508
1406 return 0; 1509 return 0;
1407out: 1510out:
1408 return err; 1511 return err;
1409} 1512}
1410 1513
1514static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1515{
1516 struct sock_exterr_skb *serr;
1517 struct sk_buff *skb, *skb2;
1518 int copied, err;
1519
1520 err = -EAGAIN;
1521 skb = skb_dequeue(&sk->sk_error_queue);
1522 if (skb == NULL)
1523 goto out;
1524
1525 copied = skb->len;
1526 if (copied > len) {
1527 msg->msg_flags |= MSG_TRUNC;
1528 copied = len;
1529 }
1530 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1531 if (err)
1532 goto out_free_skb;
1533
1534 sock_recv_timestamp(msg, sk, skb);
1535
1536 serr = SKB_EXT_ERR(skb);
1537 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1538 sizeof(serr->ee), &serr->ee);
1539
1540 msg->msg_flags |= MSG_ERRQUEUE;
1541 err = copied;
1542
1543 /* Reset and regenerate socket error */
1544 spin_lock_bh(&sk->sk_error_queue.lock);
1545 sk->sk_err = 0;
1546 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1547 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1548 spin_unlock_bh(&sk->sk_error_queue.lock);
1549 sk->sk_error_report(sk);
1550 } else
1551 spin_unlock_bh(&sk->sk_error_queue.lock);
1552
1553out_free_skb:
1554 kfree_skb(skb);
1555out:
1556 return err;
1557}
1558
1411/* 1559/*
1412 * Pull a packet from our receive queue and hand it to the user. 1560 * Pull a packet from our receive queue and hand it to the user.
1413 * If necessary we block. 1561 * If necessary we block.
@@ -1420,9 +1568,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1420 struct sk_buff *skb; 1568 struct sk_buff *skb;
1421 int copied, err; 1569 int copied, err;
1422 struct sockaddr_ll *sll; 1570 struct sockaddr_ll *sll;
1571 int vnet_hdr_len = 0;
1423 1572
1424 err = -EINVAL; 1573 err = -EINVAL;
1425 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) 1574 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
1426 goto out; 1575 goto out;
1427 1576
1428#if 0 1577#if 0
@@ -1431,6 +1580,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1431 return -ENODEV; 1580 return -ENODEV;
1432#endif 1581#endif
1433 1582
1583 if (flags & MSG_ERRQUEUE) {
1584 err = packet_recv_error(sk, msg, len);
1585 goto out;
1586 }
1587
1434 /* 1588 /*
1435 * Call the generic datagram receiver. This handles all sorts 1589 * Call the generic datagram receiver. This handles all sorts
1436 * of horrible races and re-entrancy so we can forget about it 1590 * of horrible races and re-entrancy so we can forget about it
@@ -1451,6 +1605,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1451 if (skb == NULL) 1605 if (skb == NULL)
1452 goto out; 1606 goto out;
1453 1607
1608 if (pkt_sk(sk)->has_vnet_hdr) {
1609 struct virtio_net_hdr vnet_hdr = { 0 };
1610
1611 err = -EINVAL;
1612 vnet_hdr_len = sizeof(vnet_hdr);
1613 if ((len -= vnet_hdr_len) < 0)
1614 goto out_free;
1615
1616 if (skb_is_gso(skb)) {
1617 struct skb_shared_info *sinfo = skb_shinfo(skb);
1618
1619 /* This is a hint as to how much should be linear. */
1620 vnet_hdr.hdr_len = skb_headlen(skb);
1621 vnet_hdr.gso_size = sinfo->gso_size;
1622 if (sinfo->gso_type & SKB_GSO_TCPV4)
1623 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1624 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1625 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1626 else if (sinfo->gso_type & SKB_GSO_UDP)
1627 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1628 else if (sinfo->gso_type & SKB_GSO_FCOE)
1629 goto out_free;
1630 else
1631 BUG();
1632 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1633 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1634 } else
1635 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1636
1637 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1638 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1639 vnet_hdr.csum_start = skb->csum_start -
1640 skb_headroom(skb);
1641 vnet_hdr.csum_offset = skb->csum_offset;
1642 } /* else everything is zero */
1643
1644 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1645 vnet_hdr_len);
1646 if (err < 0)
1647 goto out_free;
1648 }
1649
1454 /* 1650 /*
1455 * If the address length field is there to be filled in, we fill 1651 * If the address length field is there to be filled in, we fill
1456 * it in now. 1652 * it in now.
@@ -1502,7 +1698,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1502 * Free or return the buffer as appropriate. Again this 1698 * Free or return the buffer as appropriate. Again this
1503 * hides all the races and re-entrancy issues from us. 1699 * hides all the races and re-entrancy issues from us.
1504 */ 1700 */
1505 err = (flags&MSG_TRUNC) ? skb->len : copied; 1701 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
1506 1702
1507out_free: 1703out_free:
1508 skb_free_datagram(sk, skb); 1704 skb_free_datagram(sk, skb);
@@ -1567,10 +1763,12 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1567{ 1763{
1568 switch (i->type) { 1764 switch (i->type) {
1569 case PACKET_MR_MULTICAST: 1765 case PACKET_MR_MULTICAST:
1766 if (i->alen != dev->addr_len)
1767 return -EINVAL;
1570 if (what > 0) 1768 if (what > 0)
1571 return dev_mc_add(dev, i->addr, i->alen, 0); 1769 return dev_mc_add(dev, i->addr);
1572 else 1770 else
1573 return dev_mc_delete(dev, i->addr, i->alen, 0); 1771 return dev_mc_del(dev, i->addr);
1574 break; 1772 break;
1575 case PACKET_MR_PROMISC: 1773 case PACKET_MR_PROMISC:
1576 return dev_set_promiscuity(dev, what); 1774 return dev_set_promiscuity(dev, what);
@@ -1579,10 +1777,12 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1579 return dev_set_allmulti(dev, what); 1777 return dev_set_allmulti(dev, what);
1580 break; 1778 break;
1581 case PACKET_MR_UNICAST: 1779 case PACKET_MR_UNICAST:
1780 if (i->alen != dev->addr_len)
1781 return -EINVAL;
1582 if (what > 0) 1782 if (what > 0)
1583 return dev_unicast_add(dev, i->addr); 1783 return dev_uc_add(dev, i->addr);
1584 else 1784 else
1585 return dev_unicast_delete(dev, i->addr); 1785 return dev_uc_del(dev, i->addr);
1586 break; 1786 break;
1587 default: 1787 default:
1588 break; 1788 break;
@@ -1732,7 +1932,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1732 return ret; 1932 return ret;
1733 } 1933 }
1734 1934
1735#ifdef CONFIG_PACKET_MMAP
1736 case PACKET_RX_RING: 1935 case PACKET_RX_RING:
1737 case PACKET_TX_RING: 1936 case PACKET_TX_RING:
1738 { 1937 {
@@ -1740,6 +1939,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1740 1939
1741 if (optlen < sizeof(req)) 1940 if (optlen < sizeof(req))
1742 return -EINVAL; 1941 return -EINVAL;
1942 if (pkt_sk(sk)->has_vnet_hdr)
1943 return -EINVAL;
1743 if (copy_from_user(&req, optval, sizeof(req))) 1944 if (copy_from_user(&req, optval, sizeof(req)))
1744 return -EFAULT; 1945 return -EFAULT;
1745 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); 1946 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
@@ -1801,7 +2002,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1801 po->tp_loss = !!val; 2002 po->tp_loss = !!val;
1802 return 0; 2003 return 0;
1803 } 2004 }
1804#endif
1805 case PACKET_AUXDATA: 2005 case PACKET_AUXDATA:
1806 { 2006 {
1807 int val; 2007 int val;
@@ -1826,6 +2026,34 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1826 po->origdev = !!val; 2026 po->origdev = !!val;
1827 return 0; 2027 return 0;
1828 } 2028 }
2029 case PACKET_VNET_HDR:
2030 {
2031 int val;
2032
2033 if (sock->type != SOCK_RAW)
2034 return -EINVAL;
2035 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2036 return -EBUSY;
2037 if (optlen < sizeof(val))
2038 return -EINVAL;
2039 if (copy_from_user(&val, optval, sizeof(val)))
2040 return -EFAULT;
2041
2042 po->has_vnet_hdr = !!val;
2043 return 0;
2044 }
2045 case PACKET_TIMESTAMP:
2046 {
2047 int val;
2048
2049 if (optlen != sizeof(val))
2050 return -EINVAL;
2051 if (copy_from_user(&val, optval, sizeof(val)))
2052 return -EFAULT;
2053
2054 po->tp_tstamp = val;
2055 return 0;
2056 }
1829 default: 2057 default:
1830 return -ENOPROTOOPT; 2058 return -ENOPROTOOPT;
1831 } 2059 }
@@ -1876,7 +2104,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
1876 2104
1877 data = &val; 2105 data = &val;
1878 break; 2106 break;
1879#ifdef CONFIG_PACKET_MMAP 2107 case PACKET_VNET_HDR:
2108 if (len > sizeof(int))
2109 len = sizeof(int);
2110 val = po->has_vnet_hdr;
2111
2112 data = &val;
2113 break;
1880 case PACKET_VERSION: 2114 case PACKET_VERSION:
1881 if (len > sizeof(int)) 2115 if (len > sizeof(int))
1882 len = sizeof(int); 2116 len = sizeof(int);
@@ -1912,7 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
1912 val = po->tp_loss; 2146 val = po->tp_loss;
1913 data = &val; 2147 data = &val;
1914 break; 2148 break;
1915#endif 2149 case PACKET_TIMESTAMP:
2150 if (len > sizeof(int))
2151 len = sizeof(int);
2152 val = po->tp_tstamp;
2153 data = &val;
2154 break;
1916 default: 2155 default:
1917 return -ENOPROTOOPT; 2156 return -ENOPROTOOPT;
1918 } 2157 }
@@ -1932,8 +2171,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
1932 struct net_device *dev = data; 2171 struct net_device *dev = data;
1933 struct net *net = dev_net(dev); 2172 struct net *net = dev_net(dev);
1934 2173
1935 read_lock(&net->packet.sklist_lock); 2174 rcu_read_lock();
1936 sk_for_each(sk, node, &net->packet.sklist) { 2175 sk_for_each_rcu(sk, node, &net->packet.sklist) {
1937 struct packet_sock *po = pkt_sk(sk); 2176 struct packet_sock *po = pkt_sk(sk);
1938 2177
1939 switch (msg) { 2178 switch (msg) {
@@ -1961,18 +2200,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
1961 } 2200 }
1962 break; 2201 break;
1963 case NETDEV_UP: 2202 case NETDEV_UP:
1964 spin_lock(&po->bind_lock); 2203 if (dev->ifindex == po->ifindex) {
1965 if (dev->ifindex == po->ifindex && po->num && 2204 spin_lock(&po->bind_lock);
1966 !po->running) { 2205 if (po->num && !po->running) {
1967 dev_add_pack(&po->prot_hook); 2206 dev_add_pack(&po->prot_hook);
1968 sock_hold(sk); 2207 sock_hold(sk);
1969 po->running = 1; 2208 po->running = 1;
2209 }
2210 spin_unlock(&po->bind_lock);
1970 } 2211 }
1971 spin_unlock(&po->bind_lock);
1972 break; 2212 break;
1973 } 2213 }
1974 } 2214 }
1975 read_unlock(&net->packet.sklist_lock); 2215 rcu_read_unlock();
1976 return NOTIFY_DONE; 2216 return NOTIFY_DONE;
1977} 2217}
1978 2218
@@ -2021,8 +2261,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
2021 case SIOCGIFDSTADDR: 2261 case SIOCGIFDSTADDR:
2022 case SIOCSIFDSTADDR: 2262 case SIOCSIFDSTADDR:
2023 case SIOCSIFFLAGS: 2263 case SIOCSIFFLAGS:
2024 if (!net_eq(sock_net(sk), &init_net))
2025 return -ENOIOCTLCMD;
2026 return inet_dgram_ops.ioctl(sock, cmd, arg); 2264 return inet_dgram_ops.ioctl(sock, cmd, arg);
2027#endif 2265#endif
2028 2266
@@ -2032,11 +2270,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
2032 return 0; 2270 return 0;
2033} 2271}
2034 2272
2035#ifndef CONFIG_PACKET_MMAP
2036#define packet_mmap sock_no_mmap
2037#define packet_poll datagram_poll
2038#else
2039
2040static unsigned int packet_poll(struct file *file, struct socket *sock, 2273static unsigned int packet_poll(struct file *file, struct socket *sock,
2041 poll_table *wait) 2274 poll_table *wait)
2042{ 2275{
@@ -2318,8 +2551,6 @@ out:
2318 mutex_unlock(&po->pg_vec_lock); 2551 mutex_unlock(&po->pg_vec_lock);
2319 return err; 2552 return err;
2320} 2553}
2321#endif
2322
2323 2554
2324static const struct proto_ops packet_ops_spkt = { 2555static const struct proto_ops packet_ops_spkt = {
2325 .family = PF_PACKET, 2556 .family = PF_PACKET,
@@ -2374,40 +2605,26 @@ static struct notifier_block packet_netdev_notifier = {
2374}; 2605};
2375 2606
2376#ifdef CONFIG_PROC_FS 2607#ifdef CONFIG_PROC_FS
2377static inline struct sock *packet_seq_idx(struct net *net, loff_t off)
2378{
2379 struct sock *s;
2380 struct hlist_node *node;
2381
2382 sk_for_each(s, node, &net->packet.sklist) {
2383 if (!off--)
2384 return s;
2385 }
2386 return NULL;
2387}
2388 2608
2389static void *packet_seq_start(struct seq_file *seq, loff_t *pos) 2609static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
2390 __acquires(seq_file_net(seq)->packet.sklist_lock) 2610 __acquires(RCU)
2391{ 2611{
2392 struct net *net = seq_file_net(seq); 2612 struct net *net = seq_file_net(seq);
2393 read_lock(&net->packet.sklist_lock); 2613
2394 return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; 2614 rcu_read_lock();
2615 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
2395} 2616}
2396 2617
2397static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2618static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2398{ 2619{
2399 struct net *net = seq_file_net(seq); 2620 struct net *net = seq_file_net(seq);
2400 ++*pos; 2621 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
2401 return (v == SEQ_START_TOKEN)
2402 ? sk_head(&net->packet.sklist)
2403 : sk_next((struct sock *)v) ;
2404} 2622}
2405 2623
2406static void packet_seq_stop(struct seq_file *seq, void *v) 2624static void packet_seq_stop(struct seq_file *seq, void *v)
2407 __releases(seq_file_net(seq)->packet.sklist_lock) 2625 __releases(RCU)
2408{ 2626{
2409 struct net *net = seq_file_net(seq); 2627 rcu_read_unlock();
2410 read_unlock(&net->packet.sklist_lock);
2411} 2628}
2412 2629
2413static int packet_seq_show(struct seq_file *seq, void *v) 2630static int packet_seq_show(struct seq_file *seq, void *v)
@@ -2415,7 +2632,7 @@ static int packet_seq_show(struct seq_file *seq, void *v)
2415 if (v == SEQ_START_TOKEN) 2632 if (v == SEQ_START_TOKEN)
2416 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); 2633 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2417 else { 2634 else {
2418 struct sock *s = v; 2635 struct sock *s = sk_entry(v);
2419 const struct packet_sock *po = pkt_sk(s); 2636 const struct packet_sock *po = pkt_sk(s);
2420 2637
2421 seq_printf(seq, 2638 seq_printf(seq,
@@ -2457,9 +2674,9 @@ static const struct file_operations packet_seq_fops = {
2457 2674
2458#endif 2675#endif
2459 2676
2460static int packet_net_init(struct net *net) 2677static int __net_init packet_net_init(struct net *net)
2461{ 2678{
2462 rwlock_init(&net->packet.sklist_lock); 2679 spin_lock_init(&net->packet.sklist_lock);
2463 INIT_HLIST_HEAD(&net->packet.sklist); 2680 INIT_HLIST_HEAD(&net->packet.sklist);
2464 2681
2465 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) 2682 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
@@ -2468,7 +2685,7 @@ static int packet_net_init(struct net *net)
2468 return 0; 2685 return 0;
2469} 2686}
2470 2687
2471static void packet_net_exit(struct net *net) 2688static void __net_exit packet_net_exit(struct net *net)
2472{ 2689{
2473 proc_net_remove(net, "packet"); 2690 proc_net_remove(net, "packet");
2474} 2691}