aboutsummaryrefslogtreecommitdiffstats
path: root/Documentation/networking
diff options
context:
space:
mode:
authorDaniel Borkmann <dborkman@redhat.com>2013-06-06 10:08:13 -0400
committerDavid S. Miller <davem@davemloft.net>2013-06-07 17:39:05 -0400
commitd70a3f887a38cb0acc7233a1d05f15c2b6e0be2b (patch)
tree942562a2ce0e048ae72c8c202835fee2c418b11b /Documentation/networking
parent93a306aef5eef70e524500884ccca52c5dd19e17 (diff)
doc: packet: simplify tpacket example code
This patch simplifies the tpacket_v3 example code a bit by getting rid of unecessary macro wrappers, removing some debugging code so that it is more to the point, and also adds a header comment. Now this example code is the very minimum one needs to start from when dealing with tpacket_v3 and ~100 lines smaller than before. Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'Documentation/networking')
-rw-r--r--Documentation/networking/packet_mmap.txt133
1 files changed, 28 insertions, 105 deletions
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 23dd80e82b8e..8572796b1eb6 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -704,6 +704,12 @@ So it seems to be a good candidate to be used with packet fanout.
704Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile 704Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile
705it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.): 705it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.):
706 706
707/* Written from scratch, but kernel-to-user space API usage
708 * dissected from lolpcap:
709 * Copyright 2011, Chetan Loke <loke.chetan@gmail.com>
710 * License: GPL, version 2.0
711 */
712
707#include <stdio.h> 713#include <stdio.h>
708#include <stdlib.h> 714#include <stdlib.h>
709#include <stdint.h> 715#include <stdint.h>
@@ -722,27 +728,6 @@ it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.):
722#include <linux/if_ether.h> 728#include <linux/if_ether.h>
723#include <linux/ip.h> 729#include <linux/ip.h>
724 730
725#define BLOCK_SIZE (1 << 22)
726#define FRAME_SIZE 2048
727
728#define NUM_BLOCKS 64
729#define NUM_FRAMES ((BLOCK_SIZE * NUM_BLOCKS) / FRAME_SIZE)
730
731#define BLOCK_RETIRE_TOV_IN_MS 64
732#define BLOCK_PRIV_AREA_SZ 13
733
734#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1))
735
736#define BLOCK_STATUS(x) ((x)->h1.block_status)
737#define BLOCK_NUM_PKTS(x) ((x)->h1.num_pkts)
738#define BLOCK_O2FP(x) ((x)->h1.offset_to_first_pkt)
739#define BLOCK_LEN(x) ((x)->h1.blk_len)
740#define BLOCK_SNUM(x) ((x)->h1.seq_num)
741#define BLOCK_O2PRIV(x) ((x)->offset_to_priv)
742#define BLOCK_PRIV(x) ((void *) ((uint8_t *) (x) + BLOCK_O2PRIV(x)))
743#define BLOCK_HDR_LEN (ALIGN_8(sizeof(struct block_desc)))
744#define BLOCK_PLUS_PRIV(sz_pri) (BLOCK_HDR_LEN + ALIGN_8((sz_pri)))
745
746#ifndef likely 731#ifndef likely
747# define likely(x) __builtin_expect(!!(x), 1) 732# define likely(x) __builtin_expect(!!(x), 1)
748#endif 733#endif
@@ -765,7 +750,7 @@ struct ring {
765static unsigned long packets_total = 0, bytes_total = 0; 750static unsigned long packets_total = 0, bytes_total = 0;
766static sig_atomic_t sigint = 0; 751static sig_atomic_t sigint = 0;
767 752
768void sighandler(int num) 753static void sighandler(int num)
769{ 754{
770 sigint = 1; 755 sigint = 1;
771} 756}
@@ -774,6 +759,8 @@ static int setup_socket(struct ring *ring, char *netdev)
774{ 759{
775 int err, i, fd, v = TPACKET_V3; 760 int err, i, fd, v = TPACKET_V3;
776 struct sockaddr_ll ll; 761 struct sockaddr_ll ll;
762 unsigned int blocksiz = 1 << 22, framesiz = 1 << 11;
763 unsigned int blocknum = 64;
777 764
778 fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)); 765 fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
779 if (fd < 0) { 766 if (fd < 0) {
@@ -788,13 +775,12 @@ static int setup_socket(struct ring *ring, char *netdev)
788 } 775 }
789 776
790 memset(&ring->req, 0, sizeof(ring->req)); 777 memset(&ring->req, 0, sizeof(ring->req));
791 ring->req.tp_block_size = BLOCK_SIZE; 778 ring->req.tp_block_size = blocksiz;
792 ring->req.tp_frame_size = FRAME_SIZE; 779 ring->req.tp_frame_size = framesiz;
793 ring->req.tp_block_nr = NUM_BLOCKS; 780 ring->req.tp_block_nr = blocknum;
794 ring->req.tp_frame_nr = NUM_FRAMES; 781 ring->req.tp_frame_nr = (blocksiz * blocknum) / framesiz;
795 ring->req.tp_retire_blk_tov = BLOCK_RETIRE_TOV_IN_MS; 782 ring->req.tp_retire_blk_tov = 60;
796 ring->req.tp_sizeof_priv = BLOCK_PRIV_AREA_SZ; 783 ring->req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
797 ring->req.tp_feature_req_word |= TP_FT_REQ_FILL_RXHASH;
798 784
799 err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req, 785 err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req,
800 sizeof(ring->req)); 786 sizeof(ring->req));
@@ -804,8 +790,7 @@ static int setup_socket(struct ring *ring, char *netdev)
804 } 790 }
805 791
806 ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr, 792 ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr,
807 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, 793 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0);
808 fd, 0);
809 if (ring->map == MAP_FAILED) { 794 if (ring->map == MAP_FAILED) {
810 perror("mmap"); 795 perror("mmap");
811 exit(1); 796 exit(1);
@@ -835,58 +820,6 @@ static int setup_socket(struct ring *ring, char *netdev)
835 return fd; 820 return fd;
836} 821}
837 822
838#ifdef __checked
839static uint64_t prev_block_seq_num = 0;
840
841void assert_block_seq_num(struct block_desc *pbd)
842{
843 if (unlikely(prev_block_seq_num + 1 != BLOCK_SNUM(pbd))) {
844 printf("prev_block_seq_num:%"PRIu64", expected seq:%"PRIu64" != "
845 "actual seq:%"PRIu64"\n", prev_block_seq_num,
846 prev_block_seq_num + 1, (uint64_t) BLOCK_SNUM(pbd));
847 exit(1);
848 }
849
850 prev_block_seq_num = BLOCK_SNUM(pbd);
851}
852
853static void assert_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
854{
855 if (BLOCK_NUM_PKTS(pbd)) {
856 if (unlikely(bytes != BLOCK_LEN(pbd))) {
857 printf("block:%u with %upackets, expected len:%u != actual len:%u\n",
858 block_num, BLOCK_NUM_PKTS(pbd), bytes, BLOCK_LEN(pbd));
859 exit(1);
860 }
861 } else {
862 if (unlikely(BLOCK_LEN(pbd) != BLOCK_PLUS_PRIV(BLOCK_PRIV_AREA_SZ))) {
863 printf("block:%u, expected len:%lu != actual len:%u\n",
864 block_num, BLOCK_HDR_LEN, BLOCK_LEN(pbd));
865 exit(1);
866 }
867 }
868}
869
870static void assert_block_header(struct block_desc *pbd, const int block_num)
871{
872 uint32_t block_status = BLOCK_STATUS(pbd);
873
874 if (unlikely((block_status & TP_STATUS_USER) == 0)) {
875 printf("block:%u, not in TP_STATUS_USER\n", block_num);
876 exit(1);
877 }
878
879 assert_block_seq_num(pbd);
880}
881#else
882static inline void assert_block_header(struct block_desc *pbd, const int block_num)
883{
884}
885static void assert_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
886{
887}
888#endif
889
890static void display(struct tpacket3_hdr *ppd) 823static void display(struct tpacket3_hdr *ppd)
891{ 824{
892 struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac); 825 struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac);
@@ -916,37 +849,27 @@ static void display(struct tpacket3_hdr *ppd)
916 849
917static void walk_block(struct block_desc *pbd, const int block_num) 850static void walk_block(struct block_desc *pbd, const int block_num)
918{ 851{
919 int num_pkts = BLOCK_NUM_PKTS(pbd), i; 852 int num_pkts = pbd->h1.num_pkts, i;
920 unsigned long bytes = 0; 853 unsigned long bytes = 0;
921 unsigned long bytes_with_padding = BLOCK_PLUS_PRIV(BLOCK_PRIV_AREA_SZ);
922 struct tpacket3_hdr *ppd; 854 struct tpacket3_hdr *ppd;
923 855
924 assert_block_header(pbd, block_num); 856 ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
925 857 pbd->h1.offset_to_first_pkt);
926 ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd + BLOCK_O2FP(pbd));
927 for (i = 0; i < num_pkts; ++i) { 858 for (i = 0; i < num_pkts; ++i) {
928 bytes += ppd->tp_snaplen; 859 bytes += ppd->tp_snaplen;
929 if (ppd->tp_next_offset)
930 bytes_with_padding += ppd->tp_next_offset;
931 else
932 bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
933
934 display(ppd); 860 display(ppd);
935 861
936 ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset); 862 ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd +
937 __sync_synchronize(); 863 ppd->tp_next_offset);
938 } 864 }
939 865
940 assert_block_len(pbd, bytes_with_padding, block_num);
941
942 packets_total += num_pkts; 866 packets_total += num_pkts;
943 bytes_total += bytes; 867 bytes_total += bytes;
944} 868}
945 869
946void flush_block(struct block_desc *pbd) 870static void flush_block(struct block_desc *pbd)
947{ 871{
948 BLOCK_STATUS(pbd) = TP_STATUS_KERNEL; 872 pbd->h1.block_status = TP_STATUS_KERNEL;
949 __sync_synchronize();
950} 873}
951 874
952static void teardown_socket(struct ring *ring, int fd) 875static void teardown_socket(struct ring *ring, int fd)
@@ -962,7 +885,7 @@ int main(int argc, char **argp)
962 socklen_t len; 885 socklen_t len;
963 struct ring ring; 886 struct ring ring;
964 struct pollfd pfd; 887 struct pollfd pfd;
965 unsigned int block_num = 0; 888 unsigned int block_num = 0, blocks = 64;
966 struct block_desc *pbd; 889 struct block_desc *pbd;
967 struct tpacket_stats_v3 stats; 890 struct tpacket_stats_v3 stats;
968 891
@@ -984,15 +907,15 @@ int main(int argc, char **argp)
984 907
985 while (likely(!sigint)) { 908 while (likely(!sigint)) {
986 pbd = (struct block_desc *) ring.rd[block_num].iov_base; 909 pbd = (struct block_desc *) ring.rd[block_num].iov_base;
987retry_block: 910
988 if ((BLOCK_STATUS(pbd) & TP_STATUS_USER) == 0) { 911 if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
989 poll(&pfd, 1, -1); 912 poll(&pfd, 1, -1);
990 goto retry_block; 913 continue;
991 } 914 }
992 915
993 walk_block(pbd, block_num); 916 walk_block(pbd, block_num);
994 flush_block(pbd); 917 flush_block(pbd);
995 block_num = (block_num + 1) % NUM_BLOCKS; 918 block_num = (block_num + 1) % blocks;
996 } 919 }
997 920
998 len = sizeof(stats); 921 len = sizeof(stats);