diff options
author | Daniel Borkmann <dborkman@redhat.com> | 2013-06-06 10:08:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-06-07 17:39:05 -0400 |
commit | d70a3f887a38cb0acc7233a1d05f15c2b6e0be2b (patch) | |
tree | 942562a2ce0e048ae72c8c202835fee2c418b11b /Documentation/networking | |
parent | 93a306aef5eef70e524500884ccca52c5dd19e17 (diff) |
doc: packet: simplify tpacket example code
This patch simplifies the tpacket_v3 example code a bit by getting rid
of unecessary macro wrappers, removing some debugging code so that it is
more to the point, and also adds a header comment. Now this example code
is the very minimum one needs to start from when dealing with tpacket_v3
and ~100 lines smaller than before.
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/packet_mmap.txt | 133 |
1 files changed, 28 insertions, 105 deletions
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 23dd80e82b8e..8572796b1eb6 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt | |||
@@ -704,6 +704,12 @@ So it seems to be a good candidate to be used with packet fanout. | |||
704 | Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile | 704 | Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile |
705 | it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.): | 705 | it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.): |
706 | 706 | ||
707 | /* Written from scratch, but kernel-to-user space API usage | ||
708 | * dissected from lolpcap: | ||
709 | * Copyright 2011, Chetan Loke <loke.chetan@gmail.com> | ||
710 | * License: GPL, version 2.0 | ||
711 | */ | ||
712 | |||
707 | #include <stdio.h> | 713 | #include <stdio.h> |
708 | #include <stdlib.h> | 714 | #include <stdlib.h> |
709 | #include <stdint.h> | 715 | #include <stdint.h> |
@@ -722,27 +728,6 @@ it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.): | |||
722 | #include <linux/if_ether.h> | 728 | #include <linux/if_ether.h> |
723 | #include <linux/ip.h> | 729 | #include <linux/ip.h> |
724 | 730 | ||
725 | #define BLOCK_SIZE (1 << 22) | ||
726 | #define FRAME_SIZE 2048 | ||
727 | |||
728 | #define NUM_BLOCKS 64 | ||
729 | #define NUM_FRAMES ((BLOCK_SIZE * NUM_BLOCKS) / FRAME_SIZE) | ||
730 | |||
731 | #define BLOCK_RETIRE_TOV_IN_MS 64 | ||
732 | #define BLOCK_PRIV_AREA_SZ 13 | ||
733 | |||
734 | #define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1)) | ||
735 | |||
736 | #define BLOCK_STATUS(x) ((x)->h1.block_status) | ||
737 | #define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts) | ||
738 | #define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt) | ||
739 | #define BLOCK_LEN(x) ((x)->h1.blk_len) | ||
740 | #define BLOCK_SNUM(x) ((x)->h1.seq_num) | ||
741 | #define BLOCK_O2PRIV(x) ((x)->offset_to_priv) | ||
742 | #define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x))) | ||
743 | #define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc))) | ||
744 | #define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri))) | ||
745 | |||
746 | #ifndef likely | 731 | #ifndef likely |
747 | # define likely(x) __builtin_expect(!!(x), 1) | 732 | # define likely(x) __builtin_expect(!!(x), 1) |
748 | #endif | 733 | #endif |
@@ -765,7 +750,7 @@ struct ring { | |||
765 | static unsigned long packets_total = 0, bytes_total = 0; | 750 | static unsigned long packets_total = 0, bytes_total = 0; |
766 | static sig_atomic_t sigint = 0; | 751 | static sig_atomic_t sigint = 0; |
767 | 752 | ||
768 | void sighandler(int num) | 753 | static void sighandler(int num) |
769 | { | 754 | { |
770 | sigint = 1; | 755 | sigint = 1; |
771 | } | 756 | } |
@@ -774,6 +759,8 @@ static int setup_socket(struct ring *ring, char *netdev) | |||
774 | { | 759 | { |
775 | int err, i, fd, v = TPACKET_V3; | 760 | int err, i, fd, v = TPACKET_V3; |
776 | struct sockaddr_ll ll; | 761 | struct sockaddr_ll ll; |
762 | unsigned int blocksiz = 1 << 22, framesiz = 1 << 11; | ||
763 | unsigned int blocknum = 64; | ||
777 | 764 | ||
778 | fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); | 765 | fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); |
779 | if (fd < 0) { | 766 | if (fd < 0) { |
@@ -788,13 +775,12 @@ static int setup_socket(struct ring *ring, char *netdev) | |||
788 | } | 775 | } |
789 | 776 | ||
790 | memset(&ring->req, 0, sizeof(ring->req)); | 777 | memset(&ring->req, 0, sizeof(ring->req)); |
791 | ring->req.tp_block_size = BLOCK_SIZE; | 778 | ring->req.tp_block_size = blocksiz; |
792 | ring->req.tp_frame_size = FRAME_SIZE; | 779 | ring->req.tp_frame_size = framesiz; |
793 | ring->req.tp_block_nr = NUM_BLOCKS; | 780 | ring->req.tp_block_nr = blocknum; |
794 | ring->req.tp_frame_nr = NUM_FRAMES; | 781 | ring->req.tp_frame_nr = (blocksiz * blocknum) / framesiz; |
795 | ring->req.tp_retire_blk_tov = BLOCK_RETIRE_TOV_IN_MS; | 782 | ring->req.tp_retire_blk_tov = 60; |
796 | ring->req.tp_sizeof_priv = BLOCK_PRIV_AREA_SZ; | 783 | ring->req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; |
797 | ring->req.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH; | ||
798 | 784 | ||
799 | err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req, | 785 | err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req, |
800 | sizeof(ring->req)); | 786 | sizeof(ring->req)); |
@@ -804,8 +790,7 @@ static int setup_socket(struct ring *ring, char *netdev) | |||
804 | } | 790 | } |
805 | 791 | ||
806 | ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr, | 792 | ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr, |
807 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, | 793 | PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0); |
808 | fd, 0); | ||
809 | if (ring->map == MAP_FAILED) { | 794 | if (ring->map == MAP_FAILED) { |
810 | perror("mmap"); | 795 | perror("mmap"); |
811 | exit(1); | 796 | exit(1); |
@@ -835,58 +820,6 @@ static int setup_socket(struct ring *ring, char *netdev) | |||
835 | return fd; | 820 | return fd; |
836 | } | 821 | } |
837 | 822 | ||
838 | #ifdef __checked | ||
839 | static uint64_t prev_block_seq_num = 0; | ||
840 | |||
841 | void assert_block_seq_num(struct block_desc *pbd) | ||
842 | { | ||
843 | if (unlikely(prev_block_seq_num + 1 != BLOCK_SNUM(pbd))) { | ||
844 | printf("prev_block_seq_num:%"PRIu64", expected seq:%"PRIu64" != " | ||
845 | "actual seq:%"PRIu64"\n", prev_block_seq_num, | ||
846 | prev_block_seq_num + 1, (uint64_t) BLOCK_SNUM(pbd)); | ||
847 | exit(1); | ||
848 | } | ||
849 | |||
850 | prev_block_seq_num = BLOCK_SNUM(pbd); | ||
851 | } | ||
852 | |||
853 | static void assert_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) | ||
854 | { | ||
855 | if (BLOCK_NUM_PKTS(pbd)) { | ||
856 | if (unlikely(bytes != BLOCK_LEN(pbd))) { | ||
857 | printf("block:%u with %upackets, expected len:%u != actual len:%u\n", | ||
858 | block_num, BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd)); | ||
859 | exit(1); | ||
860 | } | ||
861 | } else { | ||
862 | if (unlikely(BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(BLOCK_PRIV_AREA_SZ))) { | ||
863 | printf("block:%u, expected len:%lu != actual len:%u\n", | ||
864 | block_num, BLOCK_HDR_LEN, BLOCK_LEN(pbd)); | ||
865 | exit(1); | ||
866 | } | ||
867 | } | ||
868 | } | ||
869 | |||
870 | static void assert_block_header(struct block_desc *pbd, const int block_num) | ||
871 | { | ||
872 | uint32_t block_status = BLOCK_STATUS(pbd); | ||
873 | |||
874 | if (unlikely((block_status & TP_STATUS_USER) == 0)) { | ||
875 | printf("block:%u, not in TP_STATUS_USER\n", block_num); | ||
876 | exit(1); | ||
877 | } | ||
878 | |||
879 | assert_block_seq_num(pbd); | ||
880 | } | ||
881 | #else | ||
882 | static inline void assert_block_header(struct block_desc *pbd, const int block_num) | ||
883 | { | ||
884 | } | ||
885 | static void assert_block_len(struct block_desc *pbd, uint32_t bytes, int block_num) | ||
886 | { | ||
887 | } | ||
888 | #endif | ||
889 | |||
890 | static void display(struct tpacket3_hdr *ppd) | 823 | static void display(struct tpacket3_hdr *ppd) |
891 | { | 824 | { |
892 | struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac); | 825 | struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac); |
@@ -916,37 +849,27 @@ static void display(struct tpacket3_hdr *ppd) | |||
916 | 849 | ||
917 | static void walk_block(struct block_desc *pbd, const int block_num) | 850 | static void walk_block(struct block_desc *pbd, const int block_num) |
918 | { | 851 | { |
919 | int num_pkts = BLOCK_NUM_PKTS(pbd), i; | 852 | int num_pkts = pbd->h1.num_pkts, i; |
920 | unsigned long bytes = 0; | 853 | unsigned long bytes = 0; |
921 | unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(BLOCK_PRIV_AREA_SZ); | ||
922 | struct tpacket3_hdr *ppd; | 854 | struct tpacket3_hdr *ppd; |
923 | 855 | ||
924 | assert_block_header(pbd, block_num); | 856 | ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + |
925 | 857 | pbd->h1.offset_to_first_pkt); | |
926 | ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd)); | ||
927 | for (i = 0; i < num_pkts; ++i) { | 858 | for (i = 0; i < num_pkts; ++i) { |
928 | bytes += ppd->tp_snaplen; | 859 | bytes += ppd->tp_snaplen; |
929 | if (ppd->tp_next_offset) | ||
930 | bytes_with_padding += ppd->tp_next_offset; | ||
931 | else | ||
932 | bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac); | ||
933 | |||
934 | display(ppd); | 860 | display(ppd); |
935 | 861 | ||
936 | ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset); | 862 | ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + |
937 | __sync_synchronize(); | 863 | ppd->tp_next_offset); |
938 | } | 864 | } |
939 | 865 | ||
940 | assert_block_len(pbd, bytes_with_padding, block_num); | ||
941 | |||
942 | packets_total += num_pkts; | 866 | packets_total += num_pkts; |
943 | bytes_total += bytes; | 867 | bytes_total += bytes; |
944 | } | 868 | } |
945 | 869 | ||
946 | void flush_block(struct block_desc *pbd) | 870 | static void flush_block(struct block_desc *pbd) |
947 | { | 871 | { |
948 | BLOCK_STATUS(pbd) = TP_STATUS_KERNEL; | 872 | pbd->h1.block_status = TP_STATUS_KERNEL; |
949 | __sync_synchronize(); | ||
950 | } | 873 | } |
951 | 874 | ||
952 | static void teardown_socket(struct ring *ring, int fd) | 875 | static void teardown_socket(struct ring *ring, int fd) |
@@ -962,7 +885,7 @@ int main(int argc, char **argp) | |||
962 | socklen_t len; | 885 | socklen_t len; |
963 | struct ring ring; | 886 | struct ring ring; |
964 | struct pollfd pfd; | 887 | struct pollfd pfd; |
965 | unsigned int block_num = 0; | 888 | unsigned int block_num = 0, blocks = 64; |
966 | struct block_desc *pbd; | 889 | struct block_desc *pbd; |
967 | struct tpacket_stats_v3 stats; | 890 | struct tpacket_stats_v3 stats; |
968 | 891 | ||
@@ -984,15 +907,15 @@ int main(int argc, char **argp) | |||
984 | 907 | ||
985 | while (likely(!sigint)) { | 908 | while (likely(!sigint)) { |
986 | pbd = (struct block_desc *) ring.rd[block_num].iov_base; | 909 | pbd = (struct block_desc *) ring.rd[block_num].iov_base; |
987 | retry_block: | 910 | |
988 | if ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0) { | 911 | if ((pbd->h1.block_status & TP_STATUS_USER) == 0) { |
989 | poll(&pfd, 1, -1); | 912 | poll(&pfd, 1, -1); |
990 | goto retry_block; | 913 | continue; |
991 | } | 914 | } |
992 | 915 | ||
993 | walk_block(pbd, block_num); | 916 | walk_block(pbd, block_num); |
994 | flush_block(pbd); | 917 | flush_block(pbd); |
995 | block_num = (block_num + 1) % NUM_BLOCKS; | 918 | block_num = (block_num + 1) % blocks; |
996 | } | 919 | } |
997 | 920 | ||
998 | len = sizeof(stats); | 921 | len = sizeof(stats); |