diff options
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r-- | net/packet/af_packet.c | 466 |
1 files changed, 297 insertions, 169 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f2d116a5cb35..243946d4809d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <linux/wireless.h> | 60 | #include <linux/wireless.h> |
61 | #include <linux/kernel.h> | 61 | #include <linux/kernel.h> |
62 | #include <linux/kmod.h> | 62 | #include <linux/kmod.h> |
63 | #include <linux/slab.h> | ||
63 | #include <net/net_namespace.h> | 64 | #include <net/net_namespace.h> |
64 | #include <net/ip.h> | 65 | #include <net/ip.h> |
65 | #include <net/protocol.h> | 66 | #include <net/protocol.h> |
@@ -79,6 +80,8 @@ | |||
79 | #include <linux/module.h> | 80 | #include <linux/module.h> |
80 | #include <linux/init.h> | 81 | #include <linux/init.h> |
81 | #include <linux/mutex.h> | 82 | #include <linux/mutex.h> |
83 | #include <linux/if_vlan.h> | ||
84 | #include <linux/virtio_net.h> | ||
82 | 85 | ||
83 | #ifdef CONFIG_INET | 86 | #ifdef CONFIG_INET |
84 | #include <net/inet_common.h> | 87 | #include <net/inet_common.h> |
@@ -155,7 +158,6 @@ struct packet_mreq_max { | |||
155 | unsigned char mr_address[MAX_ADDR_LEN]; | 158 | unsigned char mr_address[MAX_ADDR_LEN]; |
156 | }; | 159 | }; |
157 | 160 | ||
158 | #ifdef CONFIG_PACKET_MMAP | ||
159 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | 161 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, |
160 | int closing, int tx_ring); | 162 | int closing, int tx_ring); |
161 | 163 | ||
@@ -175,7 +177,6 @@ struct packet_ring_buffer { | |||
175 | 177 | ||
176 | struct packet_sock; | 178 | struct packet_sock; |
177 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); | 179 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg); |
178 | #endif | ||
179 | 180 | ||
180 | static void packet_flush_mclist(struct sock *sk); | 181 | static void packet_flush_mclist(struct sock *sk); |
181 | 182 | ||
@@ -183,27 +184,24 @@ struct packet_sock { | |||
183 | /* struct sock has to be the first member of packet_sock */ | 184 | /* struct sock has to be the first member of packet_sock */ |
184 | struct sock sk; | 185 | struct sock sk; |
185 | struct tpacket_stats stats; | 186 | struct tpacket_stats stats; |
186 | #ifdef CONFIG_PACKET_MMAP | ||
187 | struct packet_ring_buffer rx_ring; | 187 | struct packet_ring_buffer rx_ring; |
188 | struct packet_ring_buffer tx_ring; | 188 | struct packet_ring_buffer tx_ring; |
189 | int copy_thresh; | 189 | int copy_thresh; |
190 | #endif | ||
191 | struct packet_type prot_hook; | ||
192 | spinlock_t bind_lock; | 190 | spinlock_t bind_lock; |
193 | struct mutex pg_vec_lock; | 191 | struct mutex pg_vec_lock; |
194 | unsigned int running:1, /* prot_hook is attached*/ | 192 | unsigned int running:1, /* prot_hook is attached*/ |
195 | auxdata:1, | 193 | auxdata:1, |
196 | origdev:1; | 194 | origdev:1, |
195 | has_vnet_hdr:1; | ||
197 | int ifindex; /* bound device */ | 196 | int ifindex; /* bound device */ |
198 | __be16 num; | 197 | __be16 num; |
199 | struct packet_mclist *mclist; | 198 | struct packet_mclist *mclist; |
200 | #ifdef CONFIG_PACKET_MMAP | ||
201 | atomic_t mapped; | 199 | atomic_t mapped; |
202 | enum tpacket_versions tp_version; | 200 | enum tpacket_versions tp_version; |
203 | unsigned int tp_hdrlen; | 201 | unsigned int tp_hdrlen; |
204 | unsigned int tp_reserve; | 202 | unsigned int tp_reserve; |
205 | unsigned int tp_loss:1; | 203 | unsigned int tp_loss:1; |
206 | #endif | 204 | struct packet_type prot_hook ____cacheline_aligned_in_smp; |
207 | }; | 205 | }; |
208 | 206 | ||
209 | struct packet_skb_cb { | 207 | struct packet_skb_cb { |
@@ -216,8 +214,6 @@ struct packet_skb_cb { | |||
216 | 214 | ||
217 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) | 215 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) |
218 | 216 | ||
219 | #ifdef CONFIG_PACKET_MMAP | ||
220 | |||
221 | static void __packet_set_status(struct packet_sock *po, void *frame, int status) | 217 | static void __packet_set_status(struct packet_sock *po, void *frame, int status) |
222 | { | 218 | { |
223 | union { | 219 | union { |
@@ -312,8 +308,6 @@ static inline void packet_increment_head(struct packet_ring_buffer *buff) | |||
312 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; | 308 | buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; |
313 | } | 309 | } |
314 | 310 | ||
315 | #endif | ||
316 | |||
317 | static inline struct packet_sock *pkt_sk(struct sock *sk) | 311 | static inline struct packet_sock *pkt_sk(struct sock *sk) |
318 | { | 312 | { |
319 | return (struct packet_sock *)sk; | 313 | return (struct packet_sock *)sk; |
@@ -364,7 +358,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, | |||
364 | if (skb->pkt_type == PACKET_LOOPBACK) | 358 | if (skb->pkt_type == PACKET_LOOPBACK) |
365 | goto out; | 359 | goto out; |
366 | 360 | ||
367 | if (dev_net(dev) != sock_net(sk)) | 361 | if (!net_eq(dev_net(dev), sock_net(sk))) |
368 | goto out; | 362 | goto out; |
369 | 363 | ||
370 | skb = skb_share_check(skb, GFP_ATOMIC); | 364 | skb = skb_share_check(skb, GFP_ATOMIC); |
@@ -414,7 +408,7 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, | |||
414 | { | 408 | { |
415 | struct sock *sk = sock->sk; | 409 | struct sock *sk = sock->sk; |
416 | struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; | 410 | struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name; |
417 | struct sk_buff *skb; | 411 | struct sk_buff *skb = NULL; |
418 | struct net_device *dev; | 412 | struct net_device *dev; |
419 | __be16 proto = 0; | 413 | __be16 proto = 0; |
420 | int err; | 414 | int err; |
@@ -436,7 +430,9 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, | |||
436 | */ | 430 | */ |
437 | 431 | ||
438 | saddr->spkt_device[13] = 0; | 432 | saddr->spkt_device[13] = 0; |
439 | dev = dev_get_by_name(sock_net(sk), saddr->spkt_device); | 433 | retry: |
434 | rcu_read_lock(); | ||
435 | dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); | ||
440 | err = -ENODEV; | 436 | err = -ENODEV; |
441 | if (dev == NULL) | 437 | if (dev == NULL) |
442 | goto out_unlock; | 438 | goto out_unlock; |
@@ -454,58 +450,48 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, | |||
454 | if (len > dev->mtu + dev->hard_header_len) | 450 | if (len > dev->mtu + dev->hard_header_len) |
455 | goto out_unlock; | 451 | goto out_unlock; |
456 | 452 | ||
457 | err = -ENOBUFS; | 453 | if (!skb) { |
458 | skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL); | 454 | size_t reserved = LL_RESERVED_SPACE(dev); |
459 | 455 | unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; | |
460 | /* | 456 | |
461 | * If the write buffer is full, then tough. At this level the user | 457 | rcu_read_unlock(); |
462 | * gets to deal with the problem - do your own algorithmic backoffs. | 458 | skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL); |
463 | * That's far more flexible. | 459 | if (skb == NULL) |
464 | */ | 460 | return -ENOBUFS; |
465 | 461 | /* FIXME: Save some space for broken drivers that write a hard | |
466 | if (skb == NULL) | 462 | * header at transmission time by themselves. PPP is the notable |
467 | goto out_unlock; | 463 | * one here. This should really be fixed at the driver level. |
468 | 464 | */ | |
469 | /* | 465 | skb_reserve(skb, reserved); |
470 | * Fill it in | 466 | skb_reset_network_header(skb); |
471 | */ | 467 | |
472 | 468 | /* Try to align data part correctly */ | |
473 | /* FIXME: Save some space for broken drivers that write a | 469 | if (hhlen) { |
474 | * hard header at transmission time by themselves. PPP is the | 470 | skb->data -= hhlen; |
475 | * notable one here. This should really be fixed at the driver level. | 471 | skb->tail -= hhlen; |
476 | */ | 472 | if (len < hhlen) |
477 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 473 | skb_reset_network_header(skb); |
478 | skb_reset_network_header(skb); | 474 | } |
479 | 475 | err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | |
480 | /* Try to align data part correctly */ | 476 | if (err) |
481 | if (dev->header_ops) { | 477 | goto out_free; |
482 | skb->data -= dev->hard_header_len; | 478 | goto retry; |
483 | skb->tail -= dev->hard_header_len; | ||
484 | if (len < dev->hard_header_len) | ||
485 | skb_reset_network_header(skb); | ||
486 | } | 479 | } |
487 | 480 | ||
488 | /* Returns -EFAULT on error */ | 481 | |
489 | err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | ||
490 | skb->protocol = proto; | 482 | skb->protocol = proto; |
491 | skb->dev = dev; | 483 | skb->dev = dev; |
492 | skb->priority = sk->sk_priority; | 484 | skb->priority = sk->sk_priority; |
493 | if (err) | 485 | skb->mark = sk->sk_mark; |
494 | goto out_free; | ||
495 | |||
496 | /* | ||
497 | * Now send it | ||
498 | */ | ||
499 | 486 | ||
500 | dev_queue_xmit(skb); | 487 | dev_queue_xmit(skb); |
501 | dev_put(dev); | 488 | rcu_read_unlock(); |
502 | return len; | 489 | return len; |
503 | 490 | ||
491 | out_unlock: | ||
492 | rcu_read_unlock(); | ||
504 | out_free: | 493 | out_free: |
505 | kfree_skb(skb); | 494 | kfree_skb(skb); |
506 | out_unlock: | ||
507 | if (dev) | ||
508 | dev_put(dev); | ||
509 | return err; | 495 | return err; |
510 | } | 496 | } |
511 | 497 | ||
@@ -515,7 +501,7 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, | |||
515 | struct sk_filter *filter; | 501 | struct sk_filter *filter; |
516 | 502 | ||
517 | rcu_read_lock_bh(); | 503 | rcu_read_lock_bh(); |
518 | filter = rcu_dereference(sk->sk_filter); | 504 | filter = rcu_dereference_bh(sk->sk_filter); |
519 | if (filter != NULL) | 505 | if (filter != NULL) |
520 | res = sk_run_filter(skb, filter->insns, filter->len); | 506 | res = sk_run_filter(skb, filter->insns, filter->len); |
521 | rcu_read_unlock_bh(); | 507 | rcu_read_unlock_bh(); |
@@ -551,7 +537,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
551 | sk = pt->af_packet_priv; | 537 | sk = pt->af_packet_priv; |
552 | po = pkt_sk(sk); | 538 | po = pkt_sk(sk); |
553 | 539 | ||
554 | if (dev_net(dev) != sock_net(sk)) | 540 | if (!net_eq(dev_net(dev), sock_net(sk))) |
555 | goto drop; | 541 | goto drop; |
556 | 542 | ||
557 | skb->dev = dev; | 543 | skb->dev = dev; |
@@ -626,15 +612,14 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
626 | 612 | ||
627 | spin_lock(&sk->sk_receive_queue.lock); | 613 | spin_lock(&sk->sk_receive_queue.lock); |
628 | po->stats.tp_packets++; | 614 | po->stats.tp_packets++; |
615 | skb->dropcount = atomic_read(&sk->sk_drops); | ||
629 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 616 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
630 | spin_unlock(&sk->sk_receive_queue.lock); | 617 | spin_unlock(&sk->sk_receive_queue.lock); |
631 | sk->sk_data_ready(sk, skb->len); | 618 | sk->sk_data_ready(sk, skb->len); |
632 | return 0; | 619 | return 0; |
633 | 620 | ||
634 | drop_n_acct: | 621 | drop_n_acct: |
635 | spin_lock(&sk->sk_receive_queue.lock); | 622 | po->stats.tp_drops = atomic_inc_return(&sk->sk_drops); |
636 | po->stats.tp_drops++; | ||
637 | spin_unlock(&sk->sk_receive_queue.lock); | ||
638 | 623 | ||
639 | drop_n_restore: | 624 | drop_n_restore: |
640 | if (skb_head != skb->data && skb_shared(skb)) { | 625 | if (skb_head != skb->data && skb_shared(skb)) { |
@@ -646,7 +631,6 @@ drop: | |||
646 | return 0; | 631 | return 0; |
647 | } | 632 | } |
648 | 633 | ||
649 | #ifdef CONFIG_PACKET_MMAP | ||
650 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | 634 | static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, |
651 | struct packet_type *pt, struct net_device *orig_dev) | 635 | struct packet_type *pt, struct net_device *orig_dev) |
652 | { | 636 | { |
@@ -673,7 +657,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
673 | sk = pt->af_packet_priv; | 657 | sk = pt->af_packet_priv; |
674 | po = pkt_sk(sk); | 658 | po = pkt_sk(sk); |
675 | 659 | ||
676 | if (dev_net(dev) != sock_net(sk)) | 660 | if (!net_eq(dev_net(dev), sock_net(sk))) |
677 | goto drop; | 661 | goto drop; |
678 | 662 | ||
679 | if (dev->header_ops) { | 663 | if (dev->header_ops) { |
@@ -766,7 +750,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
766 | getnstimeofday(&ts); | 750 | getnstimeofday(&ts); |
767 | h.h2->tp_sec = ts.tv_sec; | 751 | h.h2->tp_sec = ts.tv_sec; |
768 | h.h2->tp_nsec = ts.tv_nsec; | 752 | h.h2->tp_nsec = ts.tv_nsec; |
769 | h.h2->tp_vlan_tci = skb->vlan_tci; | 753 | h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); |
770 | hdrlen = sizeof(*h.h2); | 754 | hdrlen = sizeof(*h.h2); |
771 | break; | 755 | break; |
772 | default: | 756 | default: |
@@ -856,6 +840,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, | |||
856 | skb->protocol = proto; | 840 | skb->protocol = proto; |
857 | skb->dev = dev; | 841 | skb->dev = dev; |
858 | skb->priority = po->sk.sk_priority; | 842 | skb->priority = po->sk.sk_priority; |
843 | skb->mark = po->sk.sk_mark; | ||
859 | skb_shinfo(skb)->destructor_arg = ph.raw; | 844 | skb_shinfo(skb)->destructor_arg = ph.raw; |
860 | 845 | ||
861 | switch (po->tp_version) { | 846 | switch (po->tp_version) { |
@@ -1028,20 +1013,30 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
1028 | 1013 | ||
1029 | status = TP_STATUS_SEND_REQUEST; | 1014 | status = TP_STATUS_SEND_REQUEST; |
1030 | err = dev_queue_xmit(skb); | 1015 | err = dev_queue_xmit(skb); |
1031 | if (unlikely(err > 0 && (err = net_xmit_errno(err)) != 0)) | 1016 | if (unlikely(err > 0)) { |
1032 | goto out_xmit; | 1017 | err = net_xmit_errno(err); |
1018 | if (err && __packet_get_status(po, ph) == | ||
1019 | TP_STATUS_AVAILABLE) { | ||
1020 | /* skb was destructed already */ | ||
1021 | skb = NULL; | ||
1022 | goto out_status; | ||
1023 | } | ||
1024 | /* | ||
1025 | * skb was dropped but not destructed yet; | ||
1026 | * let's treat it like congestion or err < 0 | ||
1027 | */ | ||
1028 | err = 0; | ||
1029 | } | ||
1033 | packet_increment_head(&po->tx_ring); | 1030 | packet_increment_head(&po->tx_ring); |
1034 | len_sum += tp_len; | 1031 | len_sum += tp_len; |
1035 | } while (likely((ph != NULL) || ((!(msg->msg_flags & MSG_DONTWAIT)) | 1032 | } while (likely((ph != NULL) || |
1036 | && (atomic_read(&po->tx_ring.pending)))) | 1033 | ((!(msg->msg_flags & MSG_DONTWAIT)) && |
1037 | ); | 1034 | (atomic_read(&po->tx_ring.pending)))) |
1035 | ); | ||
1038 | 1036 | ||
1039 | err = len_sum; | 1037 | err = len_sum; |
1040 | goto out_put; | 1038 | goto out_put; |
1041 | 1039 | ||
1042 | out_xmit: | ||
1043 | skb->destructor = sock_wfree; | ||
1044 | atomic_dec(&po->tx_ring.pending); | ||
1045 | out_status: | 1040 | out_status: |
1046 | __packet_set_status(po, ph, status); | 1041 | __packet_set_status(po, ph, status); |
1047 | kfree_skb(skb); | 1042 | kfree_skb(skb); |
@@ -1051,7 +1046,30 @@ out: | |||
1051 | mutex_unlock(&po->pg_vec_lock); | 1046 | mutex_unlock(&po->pg_vec_lock); |
1052 | return err; | 1047 | return err; |
1053 | } | 1048 | } |
1054 | #endif | 1049 | |
1050 | static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, | ||
1051 | size_t reserve, size_t len, | ||
1052 | size_t linear, int noblock, | ||
1053 | int *err) | ||
1054 | { | ||
1055 | struct sk_buff *skb; | ||
1056 | |||
1057 | /* Under a page? Don't bother with paged skb. */ | ||
1058 | if (prepad + len < PAGE_SIZE || !linear) | ||
1059 | linear = len; | ||
1060 | |||
1061 | skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, | ||
1062 | err); | ||
1063 | if (!skb) | ||
1064 | return NULL; | ||
1065 | |||
1066 | skb_reserve(skb, reserve); | ||
1067 | skb_put(skb, linear); | ||
1068 | skb->data_len = len - linear; | ||
1069 | skb->len += len - linear; | ||
1070 | |||
1071 | return skb; | ||
1072 | } | ||
1055 | 1073 | ||
1056 | static int packet_snd(struct socket *sock, | 1074 | static int packet_snd(struct socket *sock, |
1057 | struct msghdr *msg, size_t len) | 1075 | struct msghdr *msg, size_t len) |
@@ -1063,14 +1081,17 @@ static int packet_snd(struct socket *sock, | |||
1063 | __be16 proto; | 1081 | __be16 proto; |
1064 | unsigned char *addr; | 1082 | unsigned char *addr; |
1065 | int ifindex, err, reserve = 0; | 1083 | int ifindex, err, reserve = 0; |
1084 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
1085 | int offset = 0; | ||
1086 | int vnet_hdr_len; | ||
1087 | struct packet_sock *po = pkt_sk(sk); | ||
1088 | unsigned short gso_type = 0; | ||
1066 | 1089 | ||
1067 | /* | 1090 | /* |
1068 | * Get and verify the address. | 1091 | * Get and verify the address. |
1069 | */ | 1092 | */ |
1070 | 1093 | ||
1071 | if (saddr == NULL) { | 1094 | if (saddr == NULL) { |
1072 | struct packet_sock *po = pkt_sk(sk); | ||
1073 | |||
1074 | ifindex = po->ifindex; | 1095 | ifindex = po->ifindex; |
1075 | proto = po->num; | 1096 | proto = po->num; |
1076 | addr = NULL; | 1097 | addr = NULL; |
@@ -1097,31 +1118,100 @@ static int packet_snd(struct socket *sock, | |||
1097 | if (!(dev->flags & IFF_UP)) | 1118 | if (!(dev->flags & IFF_UP)) |
1098 | goto out_unlock; | 1119 | goto out_unlock; |
1099 | 1120 | ||
1121 | if (po->has_vnet_hdr) { | ||
1122 | vnet_hdr_len = sizeof(vnet_hdr); | ||
1123 | |||
1124 | err = -EINVAL; | ||
1125 | if (len < vnet_hdr_len) | ||
1126 | goto out_unlock; | ||
1127 | |||
1128 | len -= vnet_hdr_len; | ||
1129 | |||
1130 | err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov, | ||
1131 | vnet_hdr_len); | ||
1132 | if (err < 0) | ||
1133 | goto out_unlock; | ||
1134 | |||
1135 | if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && | ||
1136 | (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 > | ||
1137 | vnet_hdr.hdr_len)) | ||
1138 | vnet_hdr.hdr_len = vnet_hdr.csum_start + | ||
1139 | vnet_hdr.csum_offset + 2; | ||
1140 | |||
1141 | err = -EINVAL; | ||
1142 | if (vnet_hdr.hdr_len > len) | ||
1143 | goto out_unlock; | ||
1144 | |||
1145 | if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) { | ||
1146 | switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { | ||
1147 | case VIRTIO_NET_HDR_GSO_TCPV4: | ||
1148 | gso_type = SKB_GSO_TCPV4; | ||
1149 | break; | ||
1150 | case VIRTIO_NET_HDR_GSO_TCPV6: | ||
1151 | gso_type = SKB_GSO_TCPV6; | ||
1152 | break; | ||
1153 | case VIRTIO_NET_HDR_GSO_UDP: | ||
1154 | gso_type = SKB_GSO_UDP; | ||
1155 | break; | ||
1156 | default: | ||
1157 | goto out_unlock; | ||
1158 | } | ||
1159 | |||
1160 | if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN) | ||
1161 | gso_type |= SKB_GSO_TCP_ECN; | ||
1162 | |||
1163 | if (vnet_hdr.gso_size == 0) | ||
1164 | goto out_unlock; | ||
1165 | |||
1166 | } | ||
1167 | } | ||
1168 | |||
1100 | err = -EMSGSIZE; | 1169 | err = -EMSGSIZE; |
1101 | if (len > dev->mtu+reserve) | 1170 | if (!gso_type && (len > dev->mtu+reserve)) |
1102 | goto out_unlock; | 1171 | goto out_unlock; |
1103 | 1172 | ||
1104 | skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev), | 1173 | err = -ENOBUFS; |
1105 | msg->msg_flags & MSG_DONTWAIT, &err); | 1174 | skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev), |
1175 | LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len, | ||
1176 | msg->msg_flags & MSG_DONTWAIT, &err); | ||
1106 | if (skb == NULL) | 1177 | if (skb == NULL) |
1107 | goto out_unlock; | 1178 | goto out_unlock; |
1108 | 1179 | ||
1109 | skb_reserve(skb, LL_RESERVED_SPACE(dev)); | 1180 | skb_set_network_header(skb, reserve); |
1110 | skb_reset_network_header(skb); | ||
1111 | 1181 | ||
1112 | err = -EINVAL; | 1182 | err = -EINVAL; |
1113 | if (sock->type == SOCK_DGRAM && | 1183 | if (sock->type == SOCK_DGRAM && |
1114 | dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0) | 1184 | (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0) |
1115 | goto out_free; | 1185 | goto out_free; |
1116 | 1186 | ||
1117 | /* Returns -EFAULT on error */ | 1187 | /* Returns -EFAULT on error */ |
1118 | err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); | 1188 | err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); |
1119 | if (err) | 1189 | if (err) |
1120 | goto out_free; | 1190 | goto out_free; |
1121 | 1191 | ||
1122 | skb->protocol = proto; | 1192 | skb->protocol = proto; |
1123 | skb->dev = dev; | 1193 | skb->dev = dev; |
1124 | skb->priority = sk->sk_priority; | 1194 | skb->priority = sk->sk_priority; |
1195 | skb->mark = sk->sk_mark; | ||
1196 | |||
1197 | if (po->has_vnet_hdr) { | ||
1198 | if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { | ||
1199 | if (!skb_partial_csum_set(skb, vnet_hdr.csum_start, | ||
1200 | vnet_hdr.csum_offset)) { | ||
1201 | err = -EINVAL; | ||
1202 | goto out_free; | ||
1203 | } | ||
1204 | } | ||
1205 | |||
1206 | skb_shinfo(skb)->gso_size = vnet_hdr.gso_size; | ||
1207 | skb_shinfo(skb)->gso_type = gso_type; | ||
1208 | |||
1209 | /* Header must be checked, and gso_segs computed. */ | ||
1210 | skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY; | ||
1211 | skb_shinfo(skb)->gso_segs = 0; | ||
1212 | |||
1213 | len += vnet_hdr_len; | ||
1214 | } | ||
1125 | 1215 | ||
1126 | /* | 1216 | /* |
1127 | * Now send it | 1217 | * Now send it |
@@ -1147,13 +1237,11 @@ out: | |||
1147 | static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, | 1237 | static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, |
1148 | struct msghdr *msg, size_t len) | 1238 | struct msghdr *msg, size_t len) |
1149 | { | 1239 | { |
1150 | #ifdef CONFIG_PACKET_MMAP | ||
1151 | struct sock *sk = sock->sk; | 1240 | struct sock *sk = sock->sk; |
1152 | struct packet_sock *po = pkt_sk(sk); | 1241 | struct packet_sock *po = pkt_sk(sk); |
1153 | if (po->tx_ring.pg_vec) | 1242 | if (po->tx_ring.pg_vec) |
1154 | return tpacket_snd(po, msg); | 1243 | return tpacket_snd(po, msg); |
1155 | else | 1244 | else |
1156 | #endif | ||
1157 | return packet_snd(sock, msg, len); | 1245 | return packet_snd(sock, msg, len); |
1158 | } | 1246 | } |
1159 | 1247 | ||
@@ -1167,9 +1255,7 @@ static int packet_release(struct socket *sock) | |||
1167 | struct sock *sk = sock->sk; | 1255 | struct sock *sk = sock->sk; |
1168 | struct packet_sock *po; | 1256 | struct packet_sock *po; |
1169 | struct net *net; | 1257 | struct net *net; |
1170 | #ifdef CONFIG_PACKET_MMAP | ||
1171 | struct tpacket_req req; | 1258 | struct tpacket_req req; |
1172 | #endif | ||
1173 | 1259 | ||
1174 | if (!sk) | 1260 | if (!sk) |
1175 | return 0; | 1261 | return 0; |
@@ -1177,28 +1263,25 @@ static int packet_release(struct socket *sock) | |||
1177 | net = sock_net(sk); | 1263 | net = sock_net(sk); |
1178 | po = pkt_sk(sk); | 1264 | po = pkt_sk(sk); |
1179 | 1265 | ||
1180 | write_lock_bh(&net->packet.sklist_lock); | 1266 | spin_lock_bh(&net->packet.sklist_lock); |
1181 | sk_del_node_init(sk); | 1267 | sk_del_node_init_rcu(sk); |
1182 | sock_prot_inuse_add(net, sk->sk_prot, -1); | 1268 | sock_prot_inuse_add(net, sk->sk_prot, -1); |
1183 | write_unlock_bh(&net->packet.sklist_lock); | 1269 | spin_unlock_bh(&net->packet.sklist_lock); |
1184 | |||
1185 | /* | ||
1186 | * Unhook packet receive handler. | ||
1187 | */ | ||
1188 | 1270 | ||
1271 | spin_lock(&po->bind_lock); | ||
1189 | if (po->running) { | 1272 | if (po->running) { |
1190 | /* | 1273 | /* |
1191 | * Remove the protocol hook | 1274 | * Remove from protocol table |
1192 | */ | 1275 | */ |
1193 | dev_remove_pack(&po->prot_hook); | ||
1194 | po->running = 0; | 1276 | po->running = 0; |
1195 | po->num = 0; | 1277 | po->num = 0; |
1278 | __dev_remove_pack(&po->prot_hook); | ||
1196 | __sock_put(sk); | 1279 | __sock_put(sk); |
1197 | } | 1280 | } |
1281 | spin_unlock(&po->bind_lock); | ||
1198 | 1282 | ||
1199 | packet_flush_mclist(sk); | 1283 | packet_flush_mclist(sk); |
1200 | 1284 | ||
1201 | #ifdef CONFIG_PACKET_MMAP | ||
1202 | memset(&req, 0, sizeof(req)); | 1285 | memset(&req, 0, sizeof(req)); |
1203 | 1286 | ||
1204 | if (po->rx_ring.pg_vec) | 1287 | if (po->rx_ring.pg_vec) |
@@ -1206,12 +1289,11 @@ static int packet_release(struct socket *sock) | |||
1206 | 1289 | ||
1207 | if (po->tx_ring.pg_vec) | 1290 | if (po->tx_ring.pg_vec) |
1208 | packet_set_ring(sk, &req, 1, 1); | 1291 | packet_set_ring(sk, &req, 1, 1); |
1209 | #endif | ||
1210 | 1292 | ||
1293 | synchronize_net(); | ||
1211 | /* | 1294 | /* |
1212 | * Now the socket is dead. No more input will appear. | 1295 | * Now the socket is dead. No more input will appear. |
1213 | */ | 1296 | */ |
1214 | |||
1215 | sock_orphan(sk); | 1297 | sock_orphan(sk); |
1216 | sock->sk = NULL; | 1298 | sock->sk = NULL; |
1217 | 1299 | ||
@@ -1341,7 +1423,8 @@ static struct proto packet_proto = { | |||
1341 | * Create a packet of type SOCK_PACKET. | 1423 | * Create a packet of type SOCK_PACKET. |
1342 | */ | 1424 | */ |
1343 | 1425 | ||
1344 | static int packet_create(struct net *net, struct socket *sock, int protocol) | 1426 | static int packet_create(struct net *net, struct socket *sock, int protocol, |
1427 | int kern) | ||
1345 | { | 1428 | { |
1346 | struct sock *sk; | 1429 | struct sock *sk; |
1347 | struct packet_sock *po; | 1430 | struct packet_sock *po; |
@@ -1394,10 +1477,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol) | |||
1394 | po->running = 1; | 1477 | po->running = 1; |
1395 | } | 1478 | } |
1396 | 1479 | ||
1397 | write_lock_bh(&net->packet.sklist_lock); | 1480 | spin_lock_bh(&net->packet.sklist_lock); |
1398 | sk_add_node(sk, &net->packet.sklist); | 1481 | sk_add_node_rcu(sk, &net->packet.sklist); |
1399 | sock_prot_inuse_add(net, &packet_proto, 1); | 1482 | sock_prot_inuse_add(net, &packet_proto, 1); |
1400 | write_unlock_bh(&net->packet.sklist_lock); | 1483 | spin_unlock_bh(&net->packet.sklist_lock); |
1484 | |||
1401 | return 0; | 1485 | return 0; |
1402 | out: | 1486 | out: |
1403 | return err; | 1487 | return err; |
@@ -1415,6 +1499,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1415 | struct sk_buff *skb; | 1499 | struct sk_buff *skb; |
1416 | int copied, err; | 1500 | int copied, err; |
1417 | struct sockaddr_ll *sll; | 1501 | struct sockaddr_ll *sll; |
1502 | int vnet_hdr_len = 0; | ||
1418 | 1503 | ||
1419 | err = -EINVAL; | 1504 | err = -EINVAL; |
1420 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) | 1505 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) |
@@ -1446,6 +1531,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1446 | if (skb == NULL) | 1531 | if (skb == NULL) |
1447 | goto out; | 1532 | goto out; |
1448 | 1533 | ||
1534 | if (pkt_sk(sk)->has_vnet_hdr) { | ||
1535 | struct virtio_net_hdr vnet_hdr = { 0 }; | ||
1536 | |||
1537 | err = -EINVAL; | ||
1538 | vnet_hdr_len = sizeof(vnet_hdr); | ||
1539 | if ((len -= vnet_hdr_len) < 0) | ||
1540 | goto out_free; | ||
1541 | |||
1542 | if (skb_is_gso(skb)) { | ||
1543 | struct skb_shared_info *sinfo = skb_shinfo(skb); | ||
1544 | |||
1545 | /* This is a hint as to how much should be linear. */ | ||
1546 | vnet_hdr.hdr_len = skb_headlen(skb); | ||
1547 | vnet_hdr.gso_size = sinfo->gso_size; | ||
1548 | if (sinfo->gso_type & SKB_GSO_TCPV4) | ||
1549 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4; | ||
1550 | else if (sinfo->gso_type & SKB_GSO_TCPV6) | ||
1551 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6; | ||
1552 | else if (sinfo->gso_type & SKB_GSO_UDP) | ||
1553 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP; | ||
1554 | else if (sinfo->gso_type & SKB_GSO_FCOE) | ||
1555 | goto out_free; | ||
1556 | else | ||
1557 | BUG(); | ||
1558 | if (sinfo->gso_type & SKB_GSO_TCP_ECN) | ||
1559 | vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN; | ||
1560 | } else | ||
1561 | vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE; | ||
1562 | |||
1563 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | ||
1564 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | ||
1565 | vnet_hdr.csum_start = skb->csum_start - | ||
1566 | skb_headroom(skb); | ||
1567 | vnet_hdr.csum_offset = skb->csum_offset; | ||
1568 | } /* else everything is zero */ | ||
1569 | |||
1570 | err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr, | ||
1571 | vnet_hdr_len); | ||
1572 | if (err < 0) | ||
1573 | goto out_free; | ||
1574 | } | ||
1575 | |||
1449 | /* | 1576 | /* |
1450 | * If the address length field is there to be filled in, we fill | 1577 | * If the address length field is there to be filled in, we fill |
1451 | * it in now. | 1578 | * it in now. |
@@ -1472,7 +1599,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1472 | if (err) | 1599 | if (err) |
1473 | goto out_free; | 1600 | goto out_free; |
1474 | 1601 | ||
1475 | sock_recv_timestamp(msg, sk, skb); | 1602 | sock_recv_ts_and_drops(msg, sk, skb); |
1476 | 1603 | ||
1477 | if (msg->msg_name) | 1604 | if (msg->msg_name) |
1478 | memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, | 1605 | memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, |
@@ -1488,7 +1615,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1488 | aux.tp_snaplen = skb->len; | 1615 | aux.tp_snaplen = skb->len; |
1489 | aux.tp_mac = 0; | 1616 | aux.tp_mac = 0; |
1490 | aux.tp_net = skb_network_offset(skb); | 1617 | aux.tp_net = skb_network_offset(skb); |
1491 | aux.tp_vlan_tci = skb->vlan_tci; | 1618 | aux.tp_vlan_tci = vlan_tx_tag_get(skb); |
1492 | 1619 | ||
1493 | put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); | 1620 | put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); |
1494 | } | 1621 | } |
@@ -1497,7 +1624,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1497 | * Free or return the buffer as appropriate. Again this | 1624 | * Free or return the buffer as appropriate. Again this |
1498 | * hides all the races and re-entrancy issues from us. | 1625 | * hides all the races and re-entrancy issues from us. |
1499 | */ | 1626 | */ |
1500 | err = (flags&MSG_TRUNC) ? skb->len : copied; | 1627 | err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); |
1501 | 1628 | ||
1502 | out_free: | 1629 | out_free: |
1503 | skb_free_datagram(sk, skb); | 1630 | skb_free_datagram(sk, skb); |
@@ -1515,12 +1642,13 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, | |||
1515 | return -EOPNOTSUPP; | 1642 | return -EOPNOTSUPP; |
1516 | 1643 | ||
1517 | uaddr->sa_family = AF_PACKET; | 1644 | uaddr->sa_family = AF_PACKET; |
1518 | dev = dev_get_by_index(sock_net(sk), pkt_sk(sk)->ifindex); | 1645 | rcu_read_lock(); |
1519 | if (dev) { | 1646 | dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); |
1647 | if (dev) | ||
1520 | strlcpy(uaddr->sa_data, dev->name, 15); | 1648 | strlcpy(uaddr->sa_data, dev->name, 15); |
1521 | dev_put(dev); | 1649 | else |
1522 | } else | ||
1523 | memset(uaddr->sa_data, 0, 14); | 1650 | memset(uaddr->sa_data, 0, 14); |
1651 | rcu_read_unlock(); | ||
1524 | *uaddr_len = sizeof(*uaddr); | 1652 | *uaddr_len = sizeof(*uaddr); |
1525 | 1653 | ||
1526 | return 0; | 1654 | return 0; |
@@ -1532,7 +1660,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
1532 | struct net_device *dev; | 1660 | struct net_device *dev; |
1533 | struct sock *sk = sock->sk; | 1661 | struct sock *sk = sock->sk; |
1534 | struct packet_sock *po = pkt_sk(sk); | 1662 | struct packet_sock *po = pkt_sk(sk); |
1535 | struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; | 1663 | DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); |
1536 | 1664 | ||
1537 | if (peer) | 1665 | if (peer) |
1538 | return -EOPNOTSUPP; | 1666 | return -EOPNOTSUPP; |
@@ -1540,16 +1668,17 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
1540 | sll->sll_family = AF_PACKET; | 1668 | sll->sll_family = AF_PACKET; |
1541 | sll->sll_ifindex = po->ifindex; | 1669 | sll->sll_ifindex = po->ifindex; |
1542 | sll->sll_protocol = po->num; | 1670 | sll->sll_protocol = po->num; |
1543 | dev = dev_get_by_index(sock_net(sk), po->ifindex); | 1671 | rcu_read_lock(); |
1672 | dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); | ||
1544 | if (dev) { | 1673 | if (dev) { |
1545 | sll->sll_hatype = dev->type; | 1674 | sll->sll_hatype = dev->type; |
1546 | sll->sll_halen = dev->addr_len; | 1675 | sll->sll_halen = dev->addr_len; |
1547 | memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); | 1676 | memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len); |
1548 | dev_put(dev); | ||
1549 | } else { | 1677 | } else { |
1550 | sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ | 1678 | sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ |
1551 | sll->sll_halen = 0; | 1679 | sll->sll_halen = 0; |
1552 | } | 1680 | } |
1681 | rcu_read_unlock(); | ||
1553 | *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; | 1682 | *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; |
1554 | 1683 | ||
1555 | return 0; | 1684 | return 0; |
@@ -1560,6 +1689,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, | |||
1560 | { | 1689 | { |
1561 | switch (i->type) { | 1690 | switch (i->type) { |
1562 | case PACKET_MR_MULTICAST: | 1691 | case PACKET_MR_MULTICAST: |
1692 | if (i->alen != dev->addr_len) | ||
1693 | return -EINVAL; | ||
1563 | if (what > 0) | 1694 | if (what > 0) |
1564 | return dev_mc_add(dev, i->addr, i->alen, 0); | 1695 | return dev_mc_add(dev, i->addr, i->alen, 0); |
1565 | else | 1696 | else |
@@ -1572,6 +1703,8 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, | |||
1572 | return dev_set_allmulti(dev, what); | 1703 | return dev_set_allmulti(dev, what); |
1573 | break; | 1704 | break; |
1574 | case PACKET_MR_UNICAST: | 1705 | case PACKET_MR_UNICAST: |
1706 | if (i->alen != dev->addr_len) | ||
1707 | return -EINVAL; | ||
1575 | if (what > 0) | 1708 | if (what > 0) |
1576 | return dev_unicast_add(dev, i->addr); | 1709 | return dev_unicast_add(dev, i->addr); |
1577 | else | 1710 | else |
@@ -1659,11 +1792,9 @@ static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) | |||
1659 | if (--ml->count == 0) { | 1792 | if (--ml->count == 0) { |
1660 | struct net_device *dev; | 1793 | struct net_device *dev; |
1661 | *mlp = ml->next; | 1794 | *mlp = ml->next; |
1662 | dev = dev_get_by_index(sock_net(sk), ml->ifindex); | 1795 | dev = __dev_get_by_index(sock_net(sk), ml->ifindex); |
1663 | if (dev) { | 1796 | if (dev) |
1664 | packet_dev_mc(dev, ml, -1); | 1797 | packet_dev_mc(dev, ml, -1); |
1665 | dev_put(dev); | ||
1666 | } | ||
1667 | kfree(ml); | 1798 | kfree(ml); |
1668 | } | 1799 | } |
1669 | rtnl_unlock(); | 1800 | rtnl_unlock(); |
@@ -1687,11 +1818,9 @@ static void packet_flush_mclist(struct sock *sk) | |||
1687 | struct net_device *dev; | 1818 | struct net_device *dev; |
1688 | 1819 | ||
1689 | po->mclist = ml->next; | 1820 | po->mclist = ml->next; |
1690 | dev = dev_get_by_index(sock_net(sk), ml->ifindex); | 1821 | dev = __dev_get_by_index(sock_net(sk), ml->ifindex); |
1691 | if (dev != NULL) { | 1822 | if (dev != NULL) |
1692 | packet_dev_mc(dev, ml, -1); | 1823 | packet_dev_mc(dev, ml, -1); |
1693 | dev_put(dev); | ||
1694 | } | ||
1695 | kfree(ml); | 1824 | kfree(ml); |
1696 | } | 1825 | } |
1697 | rtnl_unlock(); | 1826 | rtnl_unlock(); |
@@ -1729,7 +1858,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1729 | return ret; | 1858 | return ret; |
1730 | } | 1859 | } |
1731 | 1860 | ||
1732 | #ifdef CONFIG_PACKET_MMAP | ||
1733 | case PACKET_RX_RING: | 1861 | case PACKET_RX_RING: |
1734 | case PACKET_TX_RING: | 1862 | case PACKET_TX_RING: |
1735 | { | 1863 | { |
@@ -1737,6 +1865,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1737 | 1865 | ||
1738 | if (optlen < sizeof(req)) | 1866 | if (optlen < sizeof(req)) |
1739 | return -EINVAL; | 1867 | return -EINVAL; |
1868 | if (pkt_sk(sk)->has_vnet_hdr) | ||
1869 | return -EINVAL; | ||
1740 | if (copy_from_user(&req, optval, sizeof(req))) | 1870 | if (copy_from_user(&req, optval, sizeof(req))) |
1741 | return -EFAULT; | 1871 | return -EFAULT; |
1742 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); | 1872 | return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING); |
@@ -1798,7 +1928,6 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1798 | po->tp_loss = !!val; | 1928 | po->tp_loss = !!val; |
1799 | return 0; | 1929 | return 0; |
1800 | } | 1930 | } |
1801 | #endif | ||
1802 | case PACKET_AUXDATA: | 1931 | case PACKET_AUXDATA: |
1803 | { | 1932 | { |
1804 | int val; | 1933 | int val; |
@@ -1823,6 +1952,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv | |||
1823 | po->origdev = !!val; | 1952 | po->origdev = !!val; |
1824 | return 0; | 1953 | return 0; |
1825 | } | 1954 | } |
1955 | case PACKET_VNET_HDR: | ||
1956 | { | ||
1957 | int val; | ||
1958 | |||
1959 | if (sock->type != SOCK_RAW) | ||
1960 | return -EINVAL; | ||
1961 | if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) | ||
1962 | return -EBUSY; | ||
1963 | if (optlen < sizeof(val)) | ||
1964 | return -EINVAL; | ||
1965 | if (copy_from_user(&val, optval, sizeof(val))) | ||
1966 | return -EFAULT; | ||
1967 | |||
1968 | po->has_vnet_hdr = !!val; | ||
1969 | return 0; | ||
1970 | } | ||
1826 | default: | 1971 | default: |
1827 | return -ENOPROTOOPT; | 1972 | return -ENOPROTOOPT; |
1828 | } | 1973 | } |
@@ -1873,7 +2018,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
1873 | 2018 | ||
1874 | data = &val; | 2019 | data = &val; |
1875 | break; | 2020 | break; |
1876 | #ifdef CONFIG_PACKET_MMAP | 2021 | case PACKET_VNET_HDR: |
2022 | if (len > sizeof(int)) | ||
2023 | len = sizeof(int); | ||
2024 | val = po->has_vnet_hdr; | ||
2025 | |||
2026 | data = &val; | ||
2027 | break; | ||
1877 | case PACKET_VERSION: | 2028 | case PACKET_VERSION: |
1878 | if (len > sizeof(int)) | 2029 | if (len > sizeof(int)) |
1879 | len = sizeof(int); | 2030 | len = sizeof(int); |
@@ -1909,7 +2060,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, | |||
1909 | val = po->tp_loss; | 2060 | val = po->tp_loss; |
1910 | data = &val; | 2061 | data = &val; |
1911 | break; | 2062 | break; |
1912 | #endif | ||
1913 | default: | 2063 | default: |
1914 | return -ENOPROTOOPT; | 2064 | return -ENOPROTOOPT; |
1915 | } | 2065 | } |
@@ -1929,8 +2079,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
1929 | struct net_device *dev = data; | 2079 | struct net_device *dev = data; |
1930 | struct net *net = dev_net(dev); | 2080 | struct net *net = dev_net(dev); |
1931 | 2081 | ||
1932 | read_lock(&net->packet.sklist_lock); | 2082 | rcu_read_lock(); |
1933 | sk_for_each(sk, node, &net->packet.sklist) { | 2083 | sk_for_each_rcu(sk, node, &net->packet.sklist) { |
1934 | struct packet_sock *po = pkt_sk(sk); | 2084 | struct packet_sock *po = pkt_sk(sk); |
1935 | 2085 | ||
1936 | switch (msg) { | 2086 | switch (msg) { |
@@ -1958,18 +2108,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void | |||
1958 | } | 2108 | } |
1959 | break; | 2109 | break; |
1960 | case NETDEV_UP: | 2110 | case NETDEV_UP: |
1961 | spin_lock(&po->bind_lock); | 2111 | if (dev->ifindex == po->ifindex) { |
1962 | if (dev->ifindex == po->ifindex && po->num && | 2112 | spin_lock(&po->bind_lock); |
1963 | !po->running) { | 2113 | if (po->num && !po->running) { |
1964 | dev_add_pack(&po->prot_hook); | 2114 | dev_add_pack(&po->prot_hook); |
1965 | sock_hold(sk); | 2115 | sock_hold(sk); |
1966 | po->running = 1; | 2116 | po->running = 1; |
2117 | } | ||
2118 | spin_unlock(&po->bind_lock); | ||
1967 | } | 2119 | } |
1968 | spin_unlock(&po->bind_lock); | ||
1969 | break; | 2120 | break; |
1970 | } | 2121 | } |
1971 | } | 2122 | } |
1972 | read_unlock(&net->packet.sklist_lock); | 2123 | rcu_read_unlock(); |
1973 | return NOTIFY_DONE; | 2124 | return NOTIFY_DONE; |
1974 | } | 2125 | } |
1975 | 2126 | ||
@@ -2018,8 +2169,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, | |||
2018 | case SIOCGIFDSTADDR: | 2169 | case SIOCGIFDSTADDR: |
2019 | case SIOCSIFDSTADDR: | 2170 | case SIOCSIFDSTADDR: |
2020 | case SIOCSIFFLAGS: | 2171 | case SIOCSIFFLAGS: |
2021 | if (!net_eq(sock_net(sk), &init_net)) | ||
2022 | return -ENOIOCTLCMD; | ||
2023 | return inet_dgram_ops.ioctl(sock, cmd, arg); | 2172 | return inet_dgram_ops.ioctl(sock, cmd, arg); |
2024 | #endif | 2173 | #endif |
2025 | 2174 | ||
@@ -2029,11 +2178,6 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd, | |||
2029 | return 0; | 2178 | return 0; |
2030 | } | 2179 | } |
2031 | 2180 | ||
2032 | #ifndef CONFIG_PACKET_MMAP | ||
2033 | #define packet_mmap sock_no_mmap | ||
2034 | #define packet_poll datagram_poll | ||
2035 | #else | ||
2036 | |||
2037 | static unsigned int packet_poll(struct file *file, struct socket *sock, | 2181 | static unsigned int packet_poll(struct file *file, struct socket *sock, |
2038 | poll_table *wait) | 2182 | poll_table *wait) |
2039 | { | 2183 | { |
@@ -2315,8 +2459,6 @@ out: | |||
2315 | mutex_unlock(&po->pg_vec_lock); | 2459 | mutex_unlock(&po->pg_vec_lock); |
2316 | return err; | 2460 | return err; |
2317 | } | 2461 | } |
2318 | #endif | ||
2319 | |||
2320 | 2462 | ||
2321 | static const struct proto_ops packet_ops_spkt = { | 2463 | static const struct proto_ops packet_ops_spkt = { |
2322 | .family = PF_PACKET, | 2464 | .family = PF_PACKET, |
@@ -2360,7 +2502,7 @@ static const struct proto_ops packet_ops = { | |||
2360 | .sendpage = sock_no_sendpage, | 2502 | .sendpage = sock_no_sendpage, |
2361 | }; | 2503 | }; |
2362 | 2504 | ||
2363 | static struct net_proto_family packet_family_ops = { | 2505 | static const struct net_proto_family packet_family_ops = { |
2364 | .family = PF_PACKET, | 2506 | .family = PF_PACKET, |
2365 | .create = packet_create, | 2507 | .create = packet_create, |
2366 | .owner = THIS_MODULE, | 2508 | .owner = THIS_MODULE, |
@@ -2371,40 +2513,26 @@ static struct notifier_block packet_netdev_notifier = { | |||
2371 | }; | 2513 | }; |
2372 | 2514 | ||
2373 | #ifdef CONFIG_PROC_FS | 2515 | #ifdef CONFIG_PROC_FS |
2374 | static inline struct sock *packet_seq_idx(struct net *net, loff_t off) | ||
2375 | { | ||
2376 | struct sock *s; | ||
2377 | struct hlist_node *node; | ||
2378 | |||
2379 | sk_for_each(s, node, &net->packet.sklist) { | ||
2380 | if (!off--) | ||
2381 | return s; | ||
2382 | } | ||
2383 | return NULL; | ||
2384 | } | ||
2385 | 2516 | ||
2386 | static void *packet_seq_start(struct seq_file *seq, loff_t *pos) | 2517 | static void *packet_seq_start(struct seq_file *seq, loff_t *pos) |
2387 | __acquires(seq_file_net(seq)->packet.sklist_lock) | 2518 | __acquires(RCU) |
2388 | { | 2519 | { |
2389 | struct net *net = seq_file_net(seq); | 2520 | struct net *net = seq_file_net(seq); |
2390 | read_lock(&net->packet.sklist_lock); | 2521 | |
2391 | return *pos ? packet_seq_idx(net, *pos - 1) : SEQ_START_TOKEN; | 2522 | rcu_read_lock(); |
2523 | return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); | ||
2392 | } | 2524 | } |
2393 | 2525 | ||
2394 | static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 2526 | static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) |
2395 | { | 2527 | { |
2396 | struct net *net = seq_file_net(seq); | 2528 | struct net *net = seq_file_net(seq); |
2397 | ++*pos; | 2529 | return seq_hlist_next_rcu(v, &net->packet.sklist, pos); |
2398 | return (v == SEQ_START_TOKEN) | ||
2399 | ? sk_head(&net->packet.sklist) | ||
2400 | : sk_next((struct sock *)v) ; | ||
2401 | } | 2530 | } |
2402 | 2531 | ||
2403 | static void packet_seq_stop(struct seq_file *seq, void *v) | 2532 | static void packet_seq_stop(struct seq_file *seq, void *v) |
2404 | __releases(seq_file_net(seq)->packet.sklist_lock) | 2533 | __releases(RCU) |
2405 | { | 2534 | { |
2406 | struct net *net = seq_file_net(seq); | 2535 | rcu_read_unlock(); |
2407 | read_unlock(&net->packet.sklist_lock); | ||
2408 | } | 2536 | } |
2409 | 2537 | ||
2410 | static int packet_seq_show(struct seq_file *seq, void *v) | 2538 | static int packet_seq_show(struct seq_file *seq, void *v) |
@@ -2412,7 +2540,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) | |||
2412 | if (v == SEQ_START_TOKEN) | 2540 | if (v == SEQ_START_TOKEN) |
2413 | seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); | 2541 | seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n"); |
2414 | else { | 2542 | else { |
2415 | struct sock *s = v; | 2543 | struct sock *s = sk_entry(v); |
2416 | const struct packet_sock *po = pkt_sk(s); | 2544 | const struct packet_sock *po = pkt_sk(s); |
2417 | 2545 | ||
2418 | seq_printf(seq, | 2546 | seq_printf(seq, |
@@ -2454,9 +2582,9 @@ static const struct file_operations packet_seq_fops = { | |||
2454 | 2582 | ||
2455 | #endif | 2583 | #endif |
2456 | 2584 | ||
2457 | static int packet_net_init(struct net *net) | 2585 | static int __net_init packet_net_init(struct net *net) |
2458 | { | 2586 | { |
2459 | rwlock_init(&net->packet.sklist_lock); | 2587 | spin_lock_init(&net->packet.sklist_lock); |
2460 | INIT_HLIST_HEAD(&net->packet.sklist); | 2588 | INIT_HLIST_HEAD(&net->packet.sklist); |
2461 | 2589 | ||
2462 | if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) | 2590 | if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) |
@@ -2465,7 +2593,7 @@ static int packet_net_init(struct net *net) | |||
2465 | return 0; | 2593 | return 0; |
2466 | } | 2594 | } |
2467 | 2595 | ||
2468 | static void packet_net_exit(struct net *net) | 2596 | static void __net_exit packet_net_exit(struct net *net) |
2469 | { | 2597 | { |
2470 | proc_net_remove(net, "packet"); | 2598 | proc_net_remove(net, "packet"); |
2471 | } | 2599 | } |