aboutsummaryrefslogtreecommitdiffstats
path: root/net/packet/af_packet.c
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2008-07-15 01:50:15 -0400
committerDavid S. Miller <davem@davemloft.net>2008-07-15 01:50:15 -0400
commitbbd6ef87c544d88c30e4b762b1b61ef267a7d279 (patch)
treedd9fdfbde65332d3212290d1b8783666475bd861 /net/packet/af_packet.c
parentbc1d0411b804ad190cdadabac48a10067f17b9e6 (diff)
packet: support extensible, 64 bit clean mmaped ring structure
The tpacket_hdr is not 64 bit clean due to use of an unsigned long and can't be extended because the following struct sockaddr_ll needs to be at a fixed offset. Add support for a version 2 tpacket protocol that removes these limitations. Userspace can query the header size through a new getsockopt option and change the protocol version through a setsockopt option. The changes needed to switch to the new protocol version are: 1. replace struct tpacket_hdr by struct tpacket2_hdr 2. query header len and save 3. set protocol version to 2 - set up ring as usual 4. for getting the sockaddr_ll, use (void *)hdr + TPACKET_ALIGN(hdrlen) instead of (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr)) Steps 2 and 4 can be omitted if the struct sockaddr_ll isn't needed. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/packet/af_packet.c')
-rw-r--r--net/packet/af_packet.c179
1 files changed, 146 insertions, 33 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9f226916668..4f059775d48 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -186,6 +186,8 @@ struct packet_sock {
186 unsigned int pg_vec_order; 186 unsigned int pg_vec_order;
187 unsigned int pg_vec_pages; 187 unsigned int pg_vec_pages;
188 unsigned int pg_vec_len; 188 unsigned int pg_vec_len;
189 enum tpacket_versions tp_version;
190 unsigned int tp_hdrlen;
189#endif 191#endif
190}; 192};
191 193
@@ -201,14 +203,52 @@ struct packet_skb_cb {
201 203
202#ifdef CONFIG_PACKET_MMAP 204#ifdef CONFIG_PACKET_MMAP
203 205
204static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position) 206static void *packet_lookup_frame(struct packet_sock *po, unsigned int position,
207 int status)
205{ 208{
206 unsigned int pg_vec_pos, frame_offset; 209 unsigned int pg_vec_pos, frame_offset;
210 union {
211 struct tpacket_hdr *h1;
212 struct tpacket2_hdr *h2;
213 void *raw;
214 } h;
207 215
208 pg_vec_pos = position / po->frames_per_block; 216 pg_vec_pos = position / po->frames_per_block;
209 frame_offset = position % po->frames_per_block; 217 frame_offset = position % po->frames_per_block;
210 218
211 return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size)); 219 h.raw = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
220 switch (po->tp_version) {
221 case TPACKET_V1:
222 if (status != h.h1->tp_status ? TP_STATUS_USER :
223 TP_STATUS_KERNEL)
224 return NULL;
225 break;
226 case TPACKET_V2:
227 if (status != h.h2->tp_status ? TP_STATUS_USER :
228 TP_STATUS_KERNEL)
229 return NULL;
230 break;
231 }
232 return h.raw;
233}
234
235static void __packet_set_status(struct packet_sock *po, void *frame, int status)
236{
237 union {
238 struct tpacket_hdr *h1;
239 struct tpacket2_hdr *h2;
240 void *raw;
241 } h;
242
243 h.raw = frame;
244 switch (po->tp_version) {
245 case TPACKET_V1:
246 h.h1->tp_status = status;
247 break;
248 case TPACKET_V2:
249 h.h2->tp_status = status;
250 break;
251 }
212} 252}
213#endif 253#endif
214 254
@@ -551,14 +591,19 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
551 struct sock *sk; 591 struct sock *sk;
552 struct packet_sock *po; 592 struct packet_sock *po;
553 struct sockaddr_ll *sll; 593 struct sockaddr_ll *sll;
554 struct tpacket_hdr *h; 594 union {
595 struct tpacket_hdr *h1;
596 struct tpacket2_hdr *h2;
597 void *raw;
598 } h;
555 u8 * skb_head = skb->data; 599 u8 * skb_head = skb->data;
556 int skb_len = skb->len; 600 int skb_len = skb->len;
557 unsigned int snaplen, res; 601 unsigned int snaplen, res;
558 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; 602 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
559 unsigned short macoff, netoff; 603 unsigned short macoff, netoff, hdrlen;
560 struct sk_buff *copy_skb = NULL; 604 struct sk_buff *copy_skb = NULL;
561 struct timeval tv; 605 struct timeval tv;
606 struct timespec ts;
562 607
563 if (skb->pkt_type == PACKET_LOOPBACK) 608 if (skb->pkt_type == PACKET_LOOPBACK)
564 goto drop; 609 goto drop;
@@ -590,10 +635,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
590 snaplen = res; 635 snaplen = res;
591 636
592 if (sk->sk_type == SOCK_DGRAM) { 637 if (sk->sk_type == SOCK_DGRAM) {
593 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; 638 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16;
594 } else { 639 } else {
595 unsigned maclen = skb_network_offset(skb); 640 unsigned maclen = skb_network_offset(skb);
596 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen)); 641 netoff = TPACKET_ALIGN(po->tp_hdrlen +
642 (maclen < 16 ? 16 : maclen));
597 macoff = netoff - maclen; 643 macoff = netoff - maclen;
598 } 644 }
599 645
@@ -616,9 +662,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
616 } 662 }
617 663
618 spin_lock(&sk->sk_receive_queue.lock); 664 spin_lock(&sk->sk_receive_queue.lock);
619 h = packet_lookup_frame(po, po->head); 665 h.raw = packet_lookup_frame(po, po->head, TP_STATUS_KERNEL);
620 666 if (!h.raw)
621 if (h->tp_status)
622 goto ring_is_full; 667 goto ring_is_full;
623 po->head = po->head != po->frame_max ? po->head+1 : 0; 668 po->head = po->head != po->frame_max ? po->head+1 : 0;
624 po->stats.tp_packets++; 669 po->stats.tp_packets++;
@@ -630,20 +675,40 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
630 status &= ~TP_STATUS_LOSING; 675 status &= ~TP_STATUS_LOSING;
631 spin_unlock(&sk->sk_receive_queue.lock); 676 spin_unlock(&sk->sk_receive_queue.lock);
632 677
633 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen); 678 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
634 679
635 h->tp_len = skb->len; 680 switch (po->tp_version) {
636 h->tp_snaplen = snaplen; 681 case TPACKET_V1:
637 h->tp_mac = macoff; 682 h.h1->tp_len = skb->len;
638 h->tp_net = netoff; 683 h.h1->tp_snaplen = snaplen;
639 if (skb->tstamp.tv64) 684 h.h1->tp_mac = macoff;
640 tv = ktime_to_timeval(skb->tstamp); 685 h.h1->tp_net = netoff;
641 else 686 if (skb->tstamp.tv64)
642 do_gettimeofday(&tv); 687 tv = ktime_to_timeval(skb->tstamp);
643 h->tp_sec = tv.tv_sec; 688 else
644 h->tp_usec = tv.tv_usec; 689 do_gettimeofday(&tv);
690 h.h1->tp_sec = tv.tv_sec;
691 h.h1->tp_usec = tv.tv_usec;
692 hdrlen = sizeof(*h.h1);
693 break;
694 case TPACKET_V2:
695 h.h2->tp_len = skb->len;
696 h.h2->tp_snaplen = snaplen;
697 h.h2->tp_mac = macoff;
698 h.h2->tp_net = netoff;
699 if (skb->tstamp.tv64)
700 ts = ktime_to_timespec(skb->tstamp);
701 else
702 getnstimeofday(&ts);
703 h.h2->tp_sec = ts.tv_sec;
704 h.h2->tp_nsec = ts.tv_nsec;
705 hdrlen = sizeof(*h.h2);
706 break;
707 default:
708 BUG();
709 }
645 710
646 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h))); 711 sll = h.raw + TPACKET_ALIGN(hdrlen);
647 sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 712 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
648 sll->sll_family = AF_PACKET; 713 sll->sll_family = AF_PACKET;
649 sll->sll_hatype = dev->type; 714 sll->sll_hatype = dev->type;
@@ -654,14 +719,14 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
654 else 719 else
655 sll->sll_ifindex = dev->ifindex; 720 sll->sll_ifindex = dev->ifindex;
656 721
657 h->tp_status = status; 722 __packet_set_status(po, h.raw, status);
658 smp_mb(); 723 smp_mb();
659 724
660 { 725 {
661 struct page *p_start, *p_end; 726 struct page *p_start, *p_end;
662 u8 *h_end = (u8 *)h + macoff + snaplen - 1; 727 u8 *h_end = h.raw + macoff + snaplen - 1;
663 728
664 p_start = virt_to_page(h); 729 p_start = virt_to_page(h.raw);
665 p_end = virt_to_page(h_end); 730 p_end = virt_to_page(h_end);
666 while (p_start <= p_end) { 731 while (p_start <= p_end) {
667 flush_dcache_page(p_start); 732 flush_dcache_page(p_start);
@@ -1362,6 +1427,25 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
1362 pkt_sk(sk)->copy_thresh = val; 1427 pkt_sk(sk)->copy_thresh = val;
1363 return 0; 1428 return 0;
1364 } 1429 }
1430 case PACKET_VERSION:
1431 {
1432 int val;
1433
1434 if (optlen != sizeof(val))
1435 return -EINVAL;
1436 if (po->pg_vec)
1437 return -EBUSY;
1438 if (copy_from_user(&val, optval, sizeof(val)))
1439 return -EFAULT;
1440 switch (val) {
1441 case TPACKET_V1:
1442 case TPACKET_V2:
1443 po->tp_version = val;
1444 return 0;
1445 default:
1446 return -EINVAL;
1447 }
1448 }
1365#endif 1449#endif
1366 case PACKET_AUXDATA: 1450 case PACKET_AUXDATA:
1367 { 1451 {
@@ -1437,6 +1521,31 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
1437 1521
1438 data = &val; 1522 data = &val;
1439 break; 1523 break;
1524#ifdef CONFIG_PACKET_MMAP
1525 case PACKET_VERSION:
1526 if (len > sizeof(int))
1527 len = sizeof(int);
1528 val = po->tp_version;
1529 data = &val;
1530 break;
1531 case PACKET_HDRLEN:
1532 if (len > sizeof(int))
1533 len = sizeof(int);
1534 if (copy_from_user(&val, optval, len))
1535 return -EFAULT;
1536 switch (val) {
1537 case TPACKET_V1:
1538 val = sizeof(struct tpacket_hdr);
1539 break;
1540 case TPACKET_V2:
1541 val = sizeof(struct tpacket2_hdr);
1542 break;
1543 default:
1544 return -EINVAL;
1545 }
1546 data = &val;
1547 break;
1548#endif
1440 default: 1549 default:
1441 return -ENOPROTOOPT; 1550 return -ENOPROTOOPT;
1442 } 1551 }
@@ -1570,11 +1679,8 @@ static unsigned int packet_poll(struct file * file, struct socket *sock,
1570 spin_lock_bh(&sk->sk_receive_queue.lock); 1679 spin_lock_bh(&sk->sk_receive_queue.lock);
1571 if (po->pg_vec) { 1680 if (po->pg_vec) {
1572 unsigned last = po->head ? po->head-1 : po->frame_max; 1681 unsigned last = po->head ? po->head-1 : po->frame_max;
1573 struct tpacket_hdr *h;
1574
1575 h = packet_lookup_frame(po, last);
1576 1682
1577 if (h->tp_status) 1683 if (packet_lookup_frame(po, last, TP_STATUS_USER))
1578 mask |= POLLIN | POLLRDNORM; 1684 mask |= POLLIN | POLLRDNORM;
1579 } 1685 }
1580 spin_unlock_bh(&sk->sk_receive_queue.lock); 1686 spin_unlock_bh(&sk->sk_receive_queue.lock);
@@ -1669,11 +1775,20 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
1669 if (unlikely(po->pg_vec)) 1775 if (unlikely(po->pg_vec))
1670 return -EBUSY; 1776 return -EBUSY;
1671 1777
1778 switch (po->tp_version) {
1779 case TPACKET_V1:
1780 po->tp_hdrlen = TPACKET_HDRLEN;
1781 break;
1782 case TPACKET_V2:
1783 po->tp_hdrlen = TPACKET2_HDRLEN;
1784 break;
1785 }
1786
1672 if (unlikely((int)req->tp_block_size <= 0)) 1787 if (unlikely((int)req->tp_block_size <= 0))
1673 return -EINVAL; 1788 return -EINVAL;
1674 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1))) 1789 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
1675 return -EINVAL; 1790 return -EINVAL;
1676 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN)) 1791 if (unlikely(req->tp_frame_size < po->tp_hdrlen))
1677 return -EINVAL; 1792 return -EINVAL;
1678 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 1793 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
1679 return -EINVAL; 1794 return -EINVAL;
@@ -1692,13 +1807,11 @@ static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing
1692 goto out; 1807 goto out;
1693 1808
1694 for (i = 0; i < req->tp_block_nr; i++) { 1809 for (i = 0; i < req->tp_block_nr; i++) {
1695 char *ptr = pg_vec[i]; 1810 void *ptr = pg_vec[i];
1696 struct tpacket_hdr *header;
1697 int k; 1811 int k;
1698 1812
1699 for (k = 0; k < po->frames_per_block; k++) { 1813 for (k = 0; k < po->frames_per_block; k++) {
1700 header = (struct tpacket_hdr *) ptr; 1814 __packet_set_status(po, ptr, TP_STATUS_KERNEL);
1701 header->tp_status = TP_STATUS_KERNEL;
1702 ptr += req->tp_frame_size; 1815 ptr += req->tp_frame_size;
1703 } 1816 }
1704 } 1817 }