diff options
author | Neil Horman <nhorman@tuxdriver.com> | 2009-10-02 02:56:41 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-10-05 03:21:55 -0400 |
commit | 977750076d98c7ff6cbda51858bb5a5894a9d9ab (patch) | |
tree | 71b2fca8c6739e7d177996354b99504702a1b946 /net | |
parent | 69ef9694099802f7feeb23182dfb869e7c5f76f0 (diff) |
af_packet: add interframe drop cmsg (v6)
Add Ancilliary data to better represent loss information
I've had a few requests recently to provide more detail regarding frame loss
during an AF_PACKET packet capture session. Specifically the requestors want to
see where in a packet sequence frames were lost, i.e. they want to see that 40
frames were lost between frames 302 and 303 in a packet capture file. In order
to do this we need:
1) The kernel to export this data to user space
2) The applications to make use of it
This patch addresses item (1). It does this by doing the following:
A) Anytime we drop a frame for which we would increment po->stats.tp_drops, we
also no increment a stats called po->stats.tp_gap.
B) Every time we successfully enqueue a frame to sk_receive_queue, we record the
value of po->stats.tp_gap in skb->mark. skb->cb would nominally be the place to
record this, but since all the space there is used up, we're overloading
skb->mark. Its safe to do since any enqueued packet is guaranteed to be
unshared at this point, and skb->mark isn't used for anything else in the rx
path to the application. After we record tp_gap in the skb, we zero
po->stats.tp_gap. This allows us to keep a counter of the number of frames lost
between any two enqueued packets
C) When the application goes to dequeue a frame from the packet socket, we look
at skb->mark for that frame. If it is non-zero, we add a cmsg chunk to the
msghdr of level SOL_PACKET and type PACKET_GAPDATA. Its a 32 bit integer that
represents the number of frames lost between this packet and the last previous
frame received.
Note there is a chance that if there is frame loss after a receive, and then the
socket is closed, some gap data might be lost. This is covered by the use of
the PACKET_AUXDATA socket option, which gives total loss data. With a bit of
math, the final gap can be determined that way.
I've tested this patch myself, and it works well.
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
include/linux/if_packet.h | 2 ++
net/packet/af_packet.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 35 insertions(+)
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/packet/af_packet.c | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d7ecca0a0c07..d398a9bf6903 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -524,6 +524,31 @@ static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, | |||
524 | } | 524 | } |
525 | 525 | ||
526 | /* | 526 | /* |
527 | * If we've lost frames since the last time we queued one to the | ||
528 | * sk_receive_queue, we need to record it here. | ||
529 | * This must be called under the protection of the socket lock | ||
530 | * to prevent racing with other softirqs and user space | ||
531 | */ | ||
532 | static inline void record_packet_gap(struct sk_buff *skb, | ||
533 | struct packet_sock *po) | ||
534 | { | ||
535 | /* | ||
536 | * We overload the mark field here, since we're about | ||
537 | * to enqueue to a receive queue and no body else will | ||
538 | * use this field at this point | ||
539 | */ | ||
540 | skb->mark = po->stats.tp_gap; | ||
541 | po->stats.tp_gap = 0; | ||
542 | return; | ||
543 | |||
544 | } | ||
545 | |||
546 | static inline __u32 check_packet_gap(struct sk_buff *skb) | ||
547 | { | ||
548 | return skb->mark; | ||
549 | } | ||
550 | |||
551 | /* | ||
527 | This function makes lazy skb cloning in hope that most of packets | 552 | This function makes lazy skb cloning in hope that most of packets |
528 | are discarded by BPF. | 553 | are discarded by BPF. |
529 | 554 | ||
@@ -626,6 +651,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
626 | 651 | ||
627 | spin_lock(&sk->sk_receive_queue.lock); | 652 | spin_lock(&sk->sk_receive_queue.lock); |
628 | po->stats.tp_packets++; | 653 | po->stats.tp_packets++; |
654 | record_packet_gap(skb, po); | ||
629 | __skb_queue_tail(&sk->sk_receive_queue, skb); | 655 | __skb_queue_tail(&sk->sk_receive_queue, skb); |
630 | spin_unlock(&sk->sk_receive_queue.lock); | 656 | spin_unlock(&sk->sk_receive_queue.lock); |
631 | sk->sk_data_ready(sk, skb->len); | 657 | sk->sk_data_ready(sk, skb->len); |
@@ -634,6 +660,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, | |||
634 | drop_n_acct: | 660 | drop_n_acct: |
635 | spin_lock(&sk->sk_receive_queue.lock); | 661 | spin_lock(&sk->sk_receive_queue.lock); |
636 | po->stats.tp_drops++; | 662 | po->stats.tp_drops++; |
663 | po->stats.tp_gap++; | ||
637 | spin_unlock(&sk->sk_receive_queue.lock); | 664 | spin_unlock(&sk->sk_receive_queue.lock); |
638 | 665 | ||
639 | drop_n_restore: | 666 | drop_n_restore: |
@@ -811,6 +838,7 @@ drop: | |||
811 | 838 | ||
812 | ring_is_full: | 839 | ring_is_full: |
813 | po->stats.tp_drops++; | 840 | po->stats.tp_drops++; |
841 | po->stats.tp_gap++; | ||
814 | spin_unlock(&sk->sk_receive_queue.lock); | 842 | spin_unlock(&sk->sk_receive_queue.lock); |
815 | 843 | ||
816 | sk->sk_data_ready(sk, 0); | 844 | sk->sk_data_ready(sk, 0); |
@@ -1418,6 +1446,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1418 | struct sk_buff *skb; | 1446 | struct sk_buff *skb; |
1419 | int copied, err; | 1447 | int copied, err; |
1420 | struct sockaddr_ll *sll; | 1448 | struct sockaddr_ll *sll; |
1449 | __u32 gap; | ||
1421 | 1450 | ||
1422 | err = -EINVAL; | 1451 | err = -EINVAL; |
1423 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) | 1452 | if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) |
@@ -1496,6 +1525,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1496 | put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); | 1525 | put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); |
1497 | } | 1526 | } |
1498 | 1527 | ||
1528 | gap = check_packet_gap(skb); | ||
1529 | if (gap) | ||
1530 | put_cmsg(msg, SOL_PACKET, PACKET_GAPDATA, sizeof(__u32), &gap); | ||
1531 | |||
1499 | /* | 1532 | /* |
1500 | * Free or return the buffer as appropriate. Again this | 1533 | * Free or return the buffer as appropriate. Again this |
1501 | * hides all the races and re-entrancy issues from us. | 1534 | * hides all the races and re-entrancy issues from us. |