diff options
Diffstat (limited to 'net/packet')
-rw-r--r-- | net/packet/af_packet.c | 225 |
1 files changed, 153 insertions, 72 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9a17f28b1253..c0c3cda19712 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -61,6 +61,7 @@ | |||
61 | #include <linux/kernel.h> | 61 | #include <linux/kernel.h> |
62 | #include <linux/kmod.h> | 62 | #include <linux/kmod.h> |
63 | #include <linux/slab.h> | 63 | #include <linux/slab.h> |
64 | #include <linux/vmalloc.h> | ||
64 | #include <net/net_namespace.h> | 65 | #include <net/net_namespace.h> |
65 | #include <net/ip.h> | 66 | #include <net/ip.h> |
66 | #include <net/protocol.h> | 67 | #include <net/protocol.h> |
@@ -163,8 +164,12 @@ struct packet_mreq_max { | |||
163 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | 164 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, |
164 | int closing, int tx_ring); | 165 | int closing, int tx_ring); |
165 | 166 | ||
167 | struct pgv { | ||
168 | char *buffer; | ||
169 | }; | ||
170 | |||
166 | struct packet_ring_buffer { | 171 | struct packet_ring_buffer { |
167 | char **pg_vec; | 172 | struct pgv *pg_vec; |
168 | unsigned int head; | 173 | unsigned int head; |
169 | unsigned int frames_per_block; | 174 | unsigned int frames_per_block; |
170 | unsigned int frame_size; | 175 | unsigned int frame_size; |
@@ -217,6 +222,13 @@ struct packet_skb_cb { | |||
217 | 222 | ||
218 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) | 223 | #define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) |
219 | 224 | ||
225 | static inline __pure struct page *pgv_to_page(void *addr) | ||
226 | { | ||
227 | if (is_vmalloc_addr(addr)) | ||
228 | return vmalloc_to_page(addr); | ||
229 | return virt_to_page(addr); | ||
230 | } | ||
231 | |||
220 | static void __packet_set_status(struct packet_sock *po, void *frame, int status) | 232 | static void __packet_set_status(struct packet_sock *po, void *frame, int status) |
221 | { | 233 | { |
222 | union { | 234 | union { |
@@ -229,11 +241,11 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status) | |||
229 | switch (po->tp_version) { | 241 | switch (po->tp_version) { |
230 | case TPACKET_V1: | 242 | case TPACKET_V1: |
231 | h.h1->tp_status = status; | 243 | h.h1->tp_status = status; |
232 | flush_dcache_page(virt_to_page(&h.h1->tp_status)); | 244 | flush_dcache_page(pgv_to_page(&h.h1->tp_status)); |
233 | break; | 245 | break; |
234 | case TPACKET_V2: | 246 | case TPACKET_V2: |
235 | h.h2->tp_status = status; | 247 | h.h2->tp_status = status; |
236 | flush_dcache_page(virt_to_page(&h.h2->tp_status)); | 248 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
237 | break; | 249 | break; |
238 | default: | 250 | default: |
239 | pr_err("TPACKET version not supported\n"); | 251 | pr_err("TPACKET version not supported\n"); |
@@ -256,10 +268,10 @@ static int __packet_get_status(struct packet_sock *po, void *frame) | |||
256 | h.raw = frame; | 268 | h.raw = frame; |
257 | switch (po->tp_version) { | 269 | switch (po->tp_version) { |
258 | case TPACKET_V1: | 270 | case TPACKET_V1: |
259 | flush_dcache_page(virt_to_page(&h.h1->tp_status)); | 271 | flush_dcache_page(pgv_to_page(&h.h1->tp_status)); |
260 | return h.h1->tp_status; | 272 | return h.h1->tp_status; |
261 | case TPACKET_V2: | 273 | case TPACKET_V2: |
262 | flush_dcache_page(virt_to_page(&h.h2->tp_status)); | 274 | flush_dcache_page(pgv_to_page(&h.h2->tp_status)); |
263 | return h.h2->tp_status; | 275 | return h.h2->tp_status; |
264 | default: | 276 | default: |
265 | pr_err("TPACKET version not supported\n"); | 277 | pr_err("TPACKET version not supported\n"); |
@@ -283,7 +295,8 @@ static void *packet_lookup_frame(struct packet_sock *po, | |||
283 | pg_vec_pos = position / rb->frames_per_block; | 295 | pg_vec_pos = position / rb->frames_per_block; |
284 | frame_offset = position % rb->frames_per_block; | 296 | frame_offset = position % rb->frames_per_block; |
285 | 297 | ||
286 | h.raw = rb->pg_vec[pg_vec_pos] + (frame_offset * rb->frame_size); | 298 | h.raw = rb->pg_vec[pg_vec_pos].buffer + |
299 | (frame_offset * rb->frame_size); | ||
287 | 300 | ||
288 | if (status != __packet_get_status(po, h.raw)) | 301 | if (status != __packet_get_status(po, h.raw)) |
289 | return NULL; | 302 | return NULL; |
@@ -452,7 +465,7 @@ retry: | |||
452 | */ | 465 | */ |
453 | 466 | ||
454 | err = -EMSGSIZE; | 467 | err = -EMSGSIZE; |
455 | if (len > dev->mtu + dev->hard_header_len) | 468 | if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN) |
456 | goto out_unlock; | 469 | goto out_unlock; |
457 | 470 | ||
458 | if (!skb) { | 471 | if (!skb) { |
@@ -483,12 +496,25 @@ retry: | |||
483 | goto retry; | 496 | goto retry; |
484 | } | 497 | } |
485 | 498 | ||
499 | if (len > (dev->mtu + dev->hard_header_len)) { | ||
500 | /* Earlier code assumed this would be a VLAN pkt, | ||
501 | * double-check this now that we have the actual | ||
502 | * packet in hand. | ||
503 | */ | ||
504 | struct ethhdr *ehdr; | ||
505 | skb_reset_mac_header(skb); | ||
506 | ehdr = eth_hdr(skb); | ||
507 | if (ehdr->h_proto != htons(ETH_P_8021Q)) { | ||
508 | err = -EMSGSIZE; | ||
509 | goto out_unlock; | ||
510 | } | ||
511 | } | ||
486 | 512 | ||
487 | skb->protocol = proto; | 513 | skb->protocol = proto; |
488 | skb->dev = dev; | 514 | skb->dev = dev; |
489 | skb->priority = sk->sk_priority; | 515 | skb->priority = sk->sk_priority; |
490 | skb->mark = sk->sk_mark; | 516 | skb->mark = sk->sk_mark; |
491 | err = sock_tx_timestamp(msg, sk, skb_tx(skb)); | 517 | err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); |
492 | if (err < 0) | 518 | if (err < 0) |
493 | goto out_unlock; | 519 | goto out_unlock; |
494 | 520 | ||
@@ -503,30 +529,31 @@ out_free: | |||
503 | return err; | 529 | return err; |
504 | } | 530 | } |
505 | 531 | ||
506 | static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, | 532 | static inline unsigned int run_filter(const struct sk_buff *skb, |
533 | const struct sock *sk, | ||
507 | unsigned int res) | 534 | unsigned int res) |
508 | { | 535 | { |
509 | struct sk_filter *filter; | 536 | struct sk_filter *filter; |
510 | 537 | ||
511 | rcu_read_lock_bh(); | 538 | rcu_read_lock(); |
512 | filter = rcu_dereference_bh(sk->sk_filter); | 539 | filter = rcu_dereference(sk->sk_filter); |
513 | if (filter != NULL) | 540 | if (filter != NULL) |
514 | res = sk_run_filter(skb, filter->insns, filter->len); | 541 | res = SK_RUN_FILTER(filter, skb); |
515 | rcu_read_unlock_bh(); | 542 | rcu_read_unlock(); |
516 | 543 | ||
517 | return res; | 544 | return res; |
518 | } | 545 | } |
519 | 546 | ||
520 | /* | 547 | /* |
521 | This function makes lazy skb cloning in hope that most of packets | 548 | * This function makes lazy skb cloning in hope that most of packets |
522 | are discarded by BPF. | 549 | * are discarded by BPF. |
523 | 550 | * | |
524 | Note tricky part: we DO mangle shared skb! skb->data, skb->len | 551 | * Note tricky part: we DO mangle shared skb! skb->data, skb->len |
525 | and skb->cb are mangled. It works because (and until) packets | 552 | * and skb->cb are mangled. It works because (and until) packets |
526 | falling here are owned by current CPU. Output packets are cloned | 553 | * falling here are owned by current CPU. Output packets are cloned |
527 | by dev_queue_xmit_nit(), input packets are processed by net_bh | 554 | * by dev_queue_xmit_nit(), input packets are processed by net_bh |
528 | sequencially, so that if we return skb to original state on exit, | 555 | * sequencially, so that if we return skb to original state on exit, |
529 | we will not harm anyone. | 556 | * we will not harm anyone. |
530 | */ | 557 | */ |
531 | 558 | ||
532 | static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | 559 | static int packet_rcv(struct sk_buff *skb, struct net_device *dev, |
@@ -552,11 +579,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
552 | 579 | ||
553 | if (dev->header_ops) { | 580 | if (dev->header_ops) { |
554 | /* The device has an explicit notion of ll header, | 581 | /* The device has an explicit notion of ll header, |
555 | exported to higher levels. | 582 | * exported to higher levels. |
556 | 583 | * | |
557 | Otherwise, the device hides datails of it frame | 584 | * Otherwise, the device hides details of its frame |
558 | structure, so that corresponding packet head | 585 | * structure, so that corresponding packet head is |
559 | never delivered to user. | 586 | * never delivered to user. |
560 | */ | 587 | */ |
561 | if (sk->sk_type != SOCK_DGRAM) | 588 | if (sk->sk_type != SOCK_DGRAM) |
562 | skb_push(skb, skb->data - skb_mac_header(skb)); | 589 | skb_push(skb, skb->data - skb_mac_header(skb)); |
@@ -771,7 +798,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
771 | getnstimeofday(&ts); | 798 | getnstimeofday(&ts); |
772 | h.h2->tp_sec = ts.tv_sec; | 799 | h.h2->tp_sec = ts.tv_sec; |
773 | h.h2->tp_nsec = ts.tv_nsec; | 800 | h.h2->tp_nsec = ts.tv_nsec; |
774 | h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); | 801 | if (vlan_tx_tag_present(skb)) { |
802 | h.h2->tp_vlan_tci = vlan_tx_tag_get(skb); | ||
803 | status |= TP_STATUS_VLAN_VALID; | ||
804 | } else { | ||
805 | h.h2->tp_vlan_tci = 0; | ||
806 | } | ||
807 | h.h2->tp_padding = 0; | ||
775 | hdrlen = sizeof(*h.h2); | 808 | hdrlen = sizeof(*h.h2); |
776 | break; | 809 | break; |
777 | default: | 810 | default: |
@@ -791,17 +824,15 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, | |||
791 | 824 | ||
792 | __packet_set_status(po, h.raw, status); | 825 | __packet_set_status(po, h.raw, status); |
793 | smp_mb(); | 826 | smp_mb(); |
827 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 | ||
794 | { | 828 | { |
795 | struct page *p_start, *p_end; | 829 | u8 *start, *end; |
796 | u8 *h_end = h.raw + macoff + snaplen - 1; | 830 | |
797 | 831 | end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen); | |
798 | p_start = virt_to_page(h.raw); | 832 | for (start = h.raw; start < end; start += PAGE_SIZE) |
799 | p_end = virt_to_page(h_end); | 833 | flush_dcache_page(pgv_to_page(start)); |
800 | while (p_start <= p_end) { | ||
801 | flush_dcache_page(p_start); | ||
802 | p_start++; | ||
803 | } | ||
804 | } | 834 | } |
835 | #endif | ||
805 | 836 | ||
806 | sk->sk_data_ready(sk, 0); | 837 | sk->sk_data_ready(sk, 0); |
807 | 838 | ||
@@ -907,7 +938,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, | |||
907 | } | 938 | } |
908 | 939 | ||
909 | err = -EFAULT; | 940 | err = -EFAULT; |
910 | page = virt_to_page(data); | ||
911 | offset = offset_in_page(data); | 941 | offset = offset_in_page(data); |
912 | len_max = PAGE_SIZE - offset; | 942 | len_max = PAGE_SIZE - offset; |
913 | len = ((to_write > len_max) ? len_max : to_write); | 943 | len = ((to_write > len_max) ? len_max : to_write); |
@@ -926,11 +956,11 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, | |||
926 | return -EFAULT; | 956 | return -EFAULT; |
927 | } | 957 | } |
928 | 958 | ||
959 | page = pgv_to_page(data); | ||
960 | data += len; | ||
929 | flush_dcache_page(page); | 961 | flush_dcache_page(page); |
930 | get_page(page); | 962 | get_page(page); |
931 | skb_fill_page_desc(skb, | 963 | skb_fill_page_desc(skb, nr_frags, page, offset, len); |
932 | nr_frags, | ||
933 | page++, offset, len); | ||
934 | to_write -= len; | 964 | to_write -= len; |
935 | offset = 0; | 965 | offset = 0; |
936 | len_max = PAGE_SIZE; | 966 | len_max = PAGE_SIZE; |
@@ -942,7 +972,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, | |||
942 | 972 | ||
943 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | 973 | static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) |
944 | { | 974 | { |
945 | struct socket *sock; | ||
946 | struct sk_buff *skb; | 975 | struct sk_buff *skb; |
947 | struct net_device *dev; | 976 | struct net_device *dev; |
948 | __be16 proto; | 977 | __be16 proto; |
@@ -954,8 +983,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) | |||
954 | int len_sum = 0; | 983 | int len_sum = 0; |
955 | int status = 0; | 984 | int status = 0; |
956 | 985 | ||
957 | sock = po->sk.sk_socket; | ||
958 | |||
959 | mutex_lock(&po->pg_vec_lock); | 986 | mutex_lock(&po->pg_vec_lock); |
960 | 987 | ||
961 | err = -EBUSY; | 988 | err = -EBUSY; |
@@ -1188,7 +1215,7 @@ static int packet_snd(struct socket *sock, | |||
1188 | } | 1215 | } |
1189 | 1216 | ||
1190 | err = -EMSGSIZE; | 1217 | err = -EMSGSIZE; |
1191 | if (!gso_type && (len > dev->mtu+reserve)) | 1218 | if (!gso_type && (len > dev->mtu + reserve + VLAN_HLEN)) |
1192 | goto out_unlock; | 1219 | goto out_unlock; |
1193 | 1220 | ||
1194 | err = -ENOBUFS; | 1221 | err = -ENOBUFS; |
@@ -1209,10 +1236,24 @@ static int packet_snd(struct socket *sock, | |||
1209 | err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); | 1236 | err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); |
1210 | if (err) | 1237 | if (err) |
1211 | goto out_free; | 1238 | goto out_free; |
1212 | err = sock_tx_timestamp(msg, sk, skb_tx(skb)); | 1239 | err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); |
1213 | if (err < 0) | 1240 | if (err < 0) |
1214 | goto out_free; | 1241 | goto out_free; |
1215 | 1242 | ||
1243 | if (!gso_type && (len > dev->mtu + reserve)) { | ||
1244 | /* Earlier code assumed this would be a VLAN pkt, | ||
1245 | * double-check this now that we have the actual | ||
1246 | * packet in hand. | ||
1247 | */ | ||
1248 | struct ethhdr *ehdr; | ||
1249 | skb_reset_mac_header(skb); | ||
1250 | ehdr = eth_hdr(skb); | ||
1251 | if (ehdr->h_proto != htons(ETH_P_8021Q)) { | ||
1252 | err = -EMSGSIZE; | ||
1253 | goto out_free; | ||
1254 | } | ||
1255 | } | ||
1256 | |||
1216 | skb->protocol = proto; | 1257 | skb->protocol = proto; |
1217 | skb->dev = dev; | 1258 | skb->dev = dev; |
1218 | skb->priority = sk->sk_priority; | 1259 | skb->priority = sk->sk_priority; |
@@ -1610,9 +1651,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1610 | 1651 | ||
1611 | err = -EINVAL; | 1652 | err = -EINVAL; |
1612 | vnet_hdr_len = sizeof(vnet_hdr); | 1653 | vnet_hdr_len = sizeof(vnet_hdr); |
1613 | if ((len -= vnet_hdr_len) < 0) | 1654 | if (len < vnet_hdr_len) |
1614 | goto out_free; | 1655 | goto out_free; |
1615 | 1656 | ||
1657 | len -= vnet_hdr_len; | ||
1658 | |||
1616 | if (skb_is_gso(skb)) { | 1659 | if (skb_is_gso(skb)) { |
1617 | struct skb_shared_info *sinfo = skb_shinfo(skb); | 1660 | struct skb_shared_info *sinfo = skb_shinfo(skb); |
1618 | 1661 | ||
@@ -1636,8 +1679,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1636 | 1679 | ||
1637 | if (skb->ip_summed == CHECKSUM_PARTIAL) { | 1680 | if (skb->ip_summed == CHECKSUM_PARTIAL) { |
1638 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; | 1681 | vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; |
1639 | vnet_hdr.csum_start = skb->csum_start - | 1682 | vnet_hdr.csum_start = skb_checksum_start_offset(skb); |
1640 | skb_headroom(skb); | ||
1641 | vnet_hdr.csum_offset = skb->csum_offset; | 1683 | vnet_hdr.csum_offset = skb->csum_offset; |
1642 | } /* else everything is zero */ | 1684 | } /* else everything is zero */ |
1643 | 1685 | ||
@@ -1689,8 +1731,13 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1689 | aux.tp_snaplen = skb->len; | 1731 | aux.tp_snaplen = skb->len; |
1690 | aux.tp_mac = 0; | 1732 | aux.tp_mac = 0; |
1691 | aux.tp_net = skb_network_offset(skb); | 1733 | aux.tp_net = skb_network_offset(skb); |
1692 | aux.tp_vlan_tci = vlan_tx_tag_get(skb); | 1734 | if (vlan_tx_tag_present(skb)) { |
1693 | 1735 | aux.tp_vlan_tci = vlan_tx_tag_get(skb); | |
1736 | aux.tp_status |= TP_STATUS_VLAN_VALID; | ||
1737 | } else { | ||
1738 | aux.tp_vlan_tci = 0; | ||
1739 | } | ||
1740 | aux.tp_padding = 0; | ||
1694 | put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); | 1741 | put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); |
1695 | } | 1742 | } |
1696 | 1743 | ||
@@ -1719,7 +1766,7 @@ static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, | |||
1719 | rcu_read_lock(); | 1766 | rcu_read_lock(); |
1720 | dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); | 1767 | dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); |
1721 | if (dev) | 1768 | if (dev) |
1722 | strlcpy(uaddr->sa_data, dev->name, 15); | 1769 | strncpy(uaddr->sa_data, dev->name, 14); |
1723 | else | 1770 | else |
1724 | memset(uaddr->sa_data, 0, 14); | 1771 | memset(uaddr->sa_data, 0, 14); |
1725 | rcu_read_unlock(); | 1772 | rcu_read_unlock(); |
@@ -1742,6 +1789,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, | |||
1742 | sll->sll_family = AF_PACKET; | 1789 | sll->sll_family = AF_PACKET; |
1743 | sll->sll_ifindex = po->ifindex; | 1790 | sll->sll_ifindex = po->ifindex; |
1744 | sll->sll_protocol = po->num; | 1791 | sll->sll_protocol = po->num; |
1792 | sll->sll_pkttype = 0; | ||
1745 | rcu_read_lock(); | 1793 | rcu_read_lock(); |
1746 | dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); | 1794 | dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); |
1747 | if (dev) { | 1795 | if (dev) { |
@@ -2322,37 +2370,70 @@ static const struct vm_operations_struct packet_mmap_ops = { | |||
2322 | .close = packet_mm_close, | 2370 | .close = packet_mm_close, |
2323 | }; | 2371 | }; |
2324 | 2372 | ||
2325 | static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len) | 2373 | static void free_pg_vec(struct pgv *pg_vec, unsigned int order, |
2374 | unsigned int len) | ||
2326 | { | 2375 | { |
2327 | int i; | 2376 | int i; |
2328 | 2377 | ||
2329 | for (i = 0; i < len; i++) { | 2378 | for (i = 0; i < len; i++) { |
2330 | if (likely(pg_vec[i])) | 2379 | if (likely(pg_vec[i].buffer)) { |
2331 | free_pages((unsigned long) pg_vec[i], order); | 2380 | if (is_vmalloc_addr(pg_vec[i].buffer)) |
2381 | vfree(pg_vec[i].buffer); | ||
2382 | else | ||
2383 | free_pages((unsigned long)pg_vec[i].buffer, | ||
2384 | order); | ||
2385 | pg_vec[i].buffer = NULL; | ||
2386 | } | ||
2332 | } | 2387 | } |
2333 | kfree(pg_vec); | 2388 | kfree(pg_vec); |
2334 | } | 2389 | } |
2335 | 2390 | ||
2336 | static inline char *alloc_one_pg_vec_page(unsigned long order) | 2391 | static inline char *alloc_one_pg_vec_page(unsigned long order) |
2337 | { | 2392 | { |
2338 | gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO | __GFP_NOWARN; | 2393 | char *buffer = NULL; |
2394 | gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | | ||
2395 | __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; | ||
2396 | |||
2397 | buffer = (char *) __get_free_pages(gfp_flags, order); | ||
2339 | 2398 | ||
2340 | return (char *) __get_free_pages(gfp_flags, order); | 2399 | if (buffer) |
2400 | return buffer; | ||
2401 | |||
2402 | /* | ||
2403 | * __get_free_pages failed, fall back to vmalloc | ||
2404 | */ | ||
2405 | buffer = vzalloc((1 << order) * PAGE_SIZE); | ||
2406 | |||
2407 | if (buffer) | ||
2408 | return buffer; | ||
2409 | |||
2410 | /* | ||
2411 | * vmalloc failed, lets dig into swap here | ||
2412 | */ | ||
2413 | gfp_flags &= ~__GFP_NORETRY; | ||
2414 | buffer = (char *)__get_free_pages(gfp_flags, order); | ||
2415 | if (buffer) | ||
2416 | return buffer; | ||
2417 | |||
2418 | /* | ||
2419 | * complete and utter failure | ||
2420 | */ | ||
2421 | return NULL; | ||
2341 | } | 2422 | } |
2342 | 2423 | ||
2343 | static char **alloc_pg_vec(struct tpacket_req *req, int order) | 2424 | static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) |
2344 | { | 2425 | { |
2345 | unsigned int block_nr = req->tp_block_nr; | 2426 | unsigned int block_nr = req->tp_block_nr; |
2346 | char **pg_vec; | 2427 | struct pgv *pg_vec; |
2347 | int i; | 2428 | int i; |
2348 | 2429 | ||
2349 | pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL); | 2430 | pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); |
2350 | if (unlikely(!pg_vec)) | 2431 | if (unlikely(!pg_vec)) |
2351 | goto out; | 2432 | goto out; |
2352 | 2433 | ||
2353 | for (i = 0; i < block_nr; i++) { | 2434 | for (i = 0; i < block_nr; i++) { |
2354 | pg_vec[i] = alloc_one_pg_vec_page(order); | 2435 | pg_vec[i].buffer = alloc_one_pg_vec_page(order); |
2355 | if (unlikely(!pg_vec[i])) | 2436 | if (unlikely(!pg_vec[i].buffer)) |
2356 | goto out_free_pgvec; | 2437 | goto out_free_pgvec; |
2357 | } | 2438 | } |
2358 | 2439 | ||
@@ -2368,7 +2449,7 @@ out_free_pgvec: | |||
2368 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | 2449 | static int packet_set_ring(struct sock *sk, struct tpacket_req *req, |
2369 | int closing, int tx_ring) | 2450 | int closing, int tx_ring) |
2370 | { | 2451 | { |
2371 | char **pg_vec = NULL; | 2452 | struct pgv *pg_vec = NULL; |
2372 | struct packet_sock *po = pkt_sk(sk); | 2453 | struct packet_sock *po = pkt_sk(sk); |
2373 | int was_running, order = 0; | 2454 | int was_running, order = 0; |
2374 | struct packet_ring_buffer *rb; | 2455 | struct packet_ring_buffer *rb; |
@@ -2453,22 +2534,20 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, | |||
2453 | mutex_lock(&po->pg_vec_lock); | 2534 | mutex_lock(&po->pg_vec_lock); |
2454 | if (closing || atomic_read(&po->mapped) == 0) { | 2535 | if (closing || atomic_read(&po->mapped) == 0) { |
2455 | err = 0; | 2536 | err = 0; |
2456 | #define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; }) | ||
2457 | spin_lock_bh(&rb_queue->lock); | 2537 | spin_lock_bh(&rb_queue->lock); |
2458 | pg_vec = XC(rb->pg_vec, pg_vec); | 2538 | swap(rb->pg_vec, pg_vec); |
2459 | rb->frame_max = (req->tp_frame_nr - 1); | 2539 | rb->frame_max = (req->tp_frame_nr - 1); |
2460 | rb->head = 0; | 2540 | rb->head = 0; |
2461 | rb->frame_size = req->tp_frame_size; | 2541 | rb->frame_size = req->tp_frame_size; |
2462 | spin_unlock_bh(&rb_queue->lock); | 2542 | spin_unlock_bh(&rb_queue->lock); |
2463 | 2543 | ||
2464 | order = XC(rb->pg_vec_order, order); | 2544 | swap(rb->pg_vec_order, order); |
2465 | req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr); | 2545 | swap(rb->pg_vec_len, req->tp_block_nr); |
2466 | 2546 | ||
2467 | rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; | 2547 | rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; |
2468 | po->prot_hook.func = (po->rx_ring.pg_vec) ? | 2548 | po->prot_hook.func = (po->rx_ring.pg_vec) ? |
2469 | tpacket_rcv : packet_rcv; | 2549 | tpacket_rcv : packet_rcv; |
2470 | skb_queue_purge(rb_queue); | 2550 | skb_queue_purge(rb_queue); |
2471 | #undef XC | ||
2472 | if (atomic_read(&po->mapped)) | 2551 | if (atomic_read(&po->mapped)) |
2473 | pr_err("packet_mmap: vma is busy: %d\n", | 2552 | pr_err("packet_mmap: vma is busy: %d\n", |
2474 | atomic_read(&po->mapped)); | 2553 | atomic_read(&po->mapped)); |
@@ -2530,15 +2609,17 @@ static int packet_mmap(struct file *file, struct socket *sock, | |||
2530 | continue; | 2609 | continue; |
2531 | 2610 | ||
2532 | for (i = 0; i < rb->pg_vec_len; i++) { | 2611 | for (i = 0; i < rb->pg_vec_len; i++) { |
2533 | struct page *page = virt_to_page(rb->pg_vec[i]); | 2612 | struct page *page; |
2613 | void *kaddr = rb->pg_vec[i].buffer; | ||
2534 | int pg_num; | 2614 | int pg_num; |
2535 | 2615 | ||
2536 | for (pg_num = 0; pg_num < rb->pg_vec_pages; | 2616 | for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { |
2537 | pg_num++, page++) { | 2617 | page = pgv_to_page(kaddr); |
2538 | err = vm_insert_page(vma, start, page); | 2618 | err = vm_insert_page(vma, start, page); |
2539 | if (unlikely(err)) | 2619 | if (unlikely(err)) |
2540 | goto out; | 2620 | goto out; |
2541 | start += PAGE_SIZE; | 2621 | start += PAGE_SIZE; |
2622 | kaddr += PAGE_SIZE; | ||
2542 | } | 2623 | } |
2543 | } | 2624 | } |
2544 | } | 2625 | } |
@@ -2636,7 +2717,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) | |||
2636 | const struct packet_sock *po = pkt_sk(s); | 2717 | const struct packet_sock *po = pkt_sk(s); |
2637 | 2718 | ||
2638 | seq_printf(seq, | 2719 | seq_printf(seq, |
2639 | "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", | 2720 | "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", |
2640 | s, | 2721 | s, |
2641 | atomic_read(&s->sk_refcnt), | 2722 | atomic_read(&s->sk_refcnt), |
2642 | s->sk_type, | 2723 | s->sk_type, |