diff options
author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2007-12-06 10:18:11 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2008-01-28 17:56:43 -0500 |
commit | b84a2189c4e1835c51fd6b974a0497be9bc4ba87 (patch) | |
tree | d488b0a45618ac37c605b10b093f8f03a050a7fc /net/dccp/ccids/ccid3.c | |
parent | 30a0eacd479f1c7c15fe0496585ff29f76de3378 (diff) |
[TFRC]: New rx history code
Credit here goes to Gerrit Renker, that provided the initial implementation for
this new codebase.
I modified it just to try to make it closer to the existing API, renaming some
functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not
freeing the allocated ring entries on the error path.
Original changeset comment from Gerrit:
-----------
This provides a new, self-contained and generic RX history service for TFRC
based protocols.
Details:
* new data structure, initialisation and cleanup routines;
* allocation of dccp_rx_hist entries local to packet_history.c,
as a service exported by the dccp_tfrc_lib module.
* interface to automatically track highest-received seqno;
* receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1);
* a generic function to test for `data packets' as per RFC 4340, sec. 7.7.
Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/dccp/ccids/ccid3.c')
-rw-r--r-- | net/dccp/ccids/ccid3.c | 282 |
1 files changed, 99 insertions, 183 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index f5cfc2e2d7b2..bf95c3292d5b 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c | |||
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, | |||
641 | /* | 641 | /* |
642 | * Receiver Half-Connection Routines | 642 | * Receiver Half-Connection Routines |
643 | */ | 643 | */ |
644 | |||
645 | /* CCID3 feedback types */ | ||
646 | enum ccid3_fback_type { | ||
647 | CCID3_FBACK_NONE = 0, | ||
648 | CCID3_FBACK_INITIAL, | ||
649 | CCID3_FBACK_PERIODIC, | ||
650 | CCID3_FBACK_PARAM_CHANGE | ||
651 | }; | ||
652 | |||
644 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG | 653 | #ifdef CONFIG_IP_DCCP_CCID3_DEBUG |
645 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) | 654 | static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) |
646 | { | 655 | { |
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk, | |||
667 | hcrx->ccid3hcrx_state = state; | 676 | hcrx->ccid3hcrx_state = state; |
668 | } | 677 | } |
669 | 678 | ||
670 | static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) | 679 | static void ccid3_hc_rx_send_feedback(struct sock *sk, |
671 | { | 680 | const struct sk_buff *skb, |
672 | if (likely(len > 0)) /* don't update on empty packets (e.g. ACKs) */ | 681 | enum ccid3_fback_type fbtype) |
673 | hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9); | ||
674 | } | ||
675 | |||
676 | static void ccid3_hc_rx_send_feedback(struct sock *sk) | ||
677 | { | 682 | { |
678 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 683 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
679 | struct dccp_sock *dp = dccp_sk(sk); | 684 | struct dccp_sock *dp = dccp_sk(sk); |
680 | struct tfrc_rx_hist_entry *packet; | ||
681 | ktime_t now; | 685 | ktime_t now; |
682 | suseconds_t delta; | 686 | s64 delta = 0; |
683 | 687 | ||
684 | ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); | 688 | ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); |
685 | 689 | ||
690 | if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM)) | ||
691 | return; | ||
692 | |||
686 | now = ktime_get_real(); | 693 | now = ktime_get_real(); |
687 | 694 | ||
688 | switch (hcrx->ccid3hcrx_state) { | 695 | switch (fbtype) { |
689 | case TFRC_RSTATE_NO_DATA: | 696 | case CCID3_FBACK_INITIAL: |
690 | hcrx->ccid3hcrx_x_recv = 0; | 697 | hcrx->ccid3hcrx_x_recv = 0; |
698 | hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ | ||
691 | break; | 699 | break; |
692 | case TFRC_RSTATE_DATA: | 700 | case CCID3_FBACK_PARAM_CHANGE: |
693 | delta = ktime_us_delta(now, | 701 | /* |
694 | hcrx->ccid3hcrx_tstamp_last_feedback); | 702 | * When parameters change (new loss or p > p_prev), we do not |
695 | DCCP_BUG_ON(delta < 0); | 703 | * have a reliable estimate for R_m of [RFC 3448, 6.2] and so |
696 | hcrx->ccid3hcrx_x_recv = | 704 | * need to reuse the previous value of X_recv. However, when |
697 | scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); | 705 | * X_recv was 0 (due to early loss), this would kill X down to |
706 | * s/t_mbi (i.e. one packet in 64 seconds). | ||
707 | * To avoid such drastic reduction, we approximate X_recv as | ||
708 | * the number of bytes since last feedback. | ||
709 | * This is a safe fallback, since X is bounded above by X_calc. | ||
710 | */ | ||
711 | if (hcrx->ccid3hcrx_x_recv > 0) | ||
712 | break; | ||
713 | /* fall through */ | ||
714 | case CCID3_FBACK_PERIODIC: | ||
715 | delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback); | ||
716 | if (delta <= 0) | ||
717 | DCCP_BUG("delta (%ld) <= 0", (long)delta); | ||
718 | else | ||
719 | hcrx->ccid3hcrx_x_recv = | ||
720 | scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); | ||
698 | break; | 721 | break; |
699 | case TFRC_RSTATE_TERM: | 722 | default: |
700 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | ||
701 | return; | 723 | return; |
702 | } | 724 | } |
703 | 725 | ||
704 | packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); | 726 | ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta, |
705 | if (unlikely(packet == NULL)) { | 727 | hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv); |
706 | DCCP_WARN("%s(%p), no data packet in history!\n", | ||
707 | dccp_role(sk), sk); | ||
708 | return; | ||
709 | } | ||
710 | 728 | ||
711 | hcrx->ccid3hcrx_tstamp_last_feedback = now; | 729 | hcrx->ccid3hcrx_tstamp_last_feedback = now; |
712 | hcrx->ccid3hcrx_ccval_last_counter = packet->tfrchrx_ccval; | 730 | hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval; |
713 | hcrx->ccid3hcrx_bytes_recv = 0; | 731 | hcrx->ccid3hcrx_bytes_recv = 0; |
714 | 732 | ||
715 | if (hcrx->ccid3hcrx_p == 0) | ||
716 | hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */ | ||
717 | else if (hcrx->ccid3hcrx_p > 1000000) { | ||
718 | DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p); | ||
719 | hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */ | ||
720 | } else | ||
721 | hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p; | ||
722 | |||
723 | dp->dccps_hc_rx_insert_options = 1; | 733 | dp->dccps_hc_rx_insert_options = 1; |
724 | dccp_send_ack(sk); | 734 | dccp_send_ack(sk); |
725 | } | 735 | } |
@@ -750,165 +760,74 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) | |||
750 | return 0; | 760 | return 0; |
751 | } | 761 | } |
752 | 762 | ||
753 | static int ccid3_hc_rx_detect_loss(struct sock *sk, | 763 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) |
754 | struct tfrc_rx_hist_entry *packet) | ||
755 | { | 764 | { |
756 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | 765 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); |
757 | struct tfrc_rx_hist_entry *rx_hist = | 766 | enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE; |
758 | tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist); | 767 | const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp; |
759 | u64 seqno = packet->tfrchrx_seqno; | 768 | const bool is_data_packet = dccp_data_packet(skb); |
760 | u64 tmp_seqno; | 769 | |
761 | int loss = 0; | 770 | if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) { |
762 | u8 ccval; | 771 | if (is_data_packet) { |
763 | 772 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; | |
764 | 773 | do_feedback = CCID3_FBACK_INITIAL; | |
765 | tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; | 774 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); |
766 | 775 | hcrx->ccid3hcrx_s = payload; | |
767 | if (!rx_hist || | 776 | /* |
768 | follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { | 777 | * Not necessary to update ccid3hcrx_bytes_recv here, |
769 | hcrx->ccid3hcrx_seqno_nonloss = seqno; | 778 | * since X_recv = 0 for the first feedback packet (cf. |
770 | hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval; | 779 | * RFC 3448, 6.3) -- gerrit |
771 | goto detect_out; | 780 | */ |
772 | } | ||
773 | |||
774 | |||
775 | while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno) | ||
776 | > TFRC_RECV_NUM_LATE_LOSS) { | ||
777 | loss = 1; | ||
778 | dccp_li_update_li(sk, | ||
779 | &hcrx->ccid3hcrx_li_hist, | ||
780 | &hcrx->ccid3hcrx_hist, | ||
781 | hcrx->ccid3hcrx_tstamp_last_feedback, | ||
782 | hcrx->ccid3hcrx_s, | ||
783 | hcrx->ccid3hcrx_bytes_recv, | ||
784 | hcrx->ccid3hcrx_x_recv, | ||
785 | hcrx->ccid3hcrx_seqno_nonloss, | ||
786 | hcrx->ccid3hcrx_ccval_nonloss); | ||
787 | tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; | ||
788 | dccp_inc_seqno(&tmp_seqno); | ||
789 | hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; | ||
790 | dccp_inc_seqno(&tmp_seqno); | ||
791 | while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist, | ||
792 | tmp_seqno, &ccval)) { | ||
793 | hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno; | ||
794 | hcrx->ccid3hcrx_ccval_nonloss = ccval; | ||
795 | dccp_inc_seqno(&tmp_seqno); | ||
796 | } | 781 | } |
782 | goto update_records; | ||
797 | } | 783 | } |
798 | 784 | ||
799 | /* FIXME - this code could be simplified with above while */ | 785 | if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb)) |
800 | /* but works at moment */ | 786 | return; /* done receiving */ |
801 | if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { | ||
802 | hcrx->ccid3hcrx_seqno_nonloss = seqno; | ||
803 | hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval; | ||
804 | } | ||
805 | |||
806 | detect_out: | ||
807 | tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, | ||
808 | &hcrx->ccid3hcrx_li_hist, packet, | ||
809 | hcrx->ccid3hcrx_seqno_nonloss); | ||
810 | return loss; | ||
811 | } | ||
812 | |||
813 | static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) | ||
814 | { | ||
815 | struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); | ||
816 | const struct dccp_options_received *opt_recv; | ||
817 | struct tfrc_rx_hist_entry *packet; | ||
818 | u32 p_prev, r_sample, rtt_prev; | ||
819 | int loss, payload_size; | ||
820 | ktime_t now; | ||
821 | |||
822 | opt_recv = &dccp_sk(sk)->dccps_options_received; | ||
823 | 787 | ||
824 | switch (DCCP_SKB_CB(skb)->dccpd_type) { | 788 | if (is_data_packet) { |
825 | case DCCP_PKT_ACK: | 789 | const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4; |
826 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | 790 | /* |
827 | return; | 791 | * Update moving-average of s and the sum of received payload bytes |
828 | case DCCP_PKT_DATAACK: | 792 | */ |
829 | if (opt_recv->dccpor_timestamp_echo == 0) | 793 | hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9); |
830 | break; | 794 | hcrx->ccid3hcrx_bytes_recv += payload; |
831 | r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo; | ||
832 | rtt_prev = hcrx->ccid3hcrx_rtt; | ||
833 | r_sample = dccp_sample_rtt(sk, 10 * r_sample); | ||
834 | |||
835 | if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) | ||
836 | hcrx->ccid3hcrx_rtt = r_sample; | ||
837 | else | ||
838 | hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 + | ||
839 | r_sample / 10; | ||
840 | |||
841 | if (rtt_prev != hcrx->ccid3hcrx_rtt) | ||
842 | ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n", | ||
843 | dccp_role(sk), sk, hcrx->ccid3hcrx_rtt, | ||
844 | opt_recv->dccpor_elapsed_time); | ||
845 | break; | ||
846 | case DCCP_PKT_DATA: | ||
847 | break; | ||
848 | default: /* We're not interested in other packet types, move along */ | ||
849 | return; | ||
850 | } | ||
851 | |||
852 | packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC); | ||
853 | if (unlikely(packet == NULL)) { | ||
854 | DCCP_WARN("%s(%p), Not enough mem to add rx packet " | ||
855 | "to history, consider it lost!\n", dccp_role(sk), sk); | ||
856 | return; | ||
857 | } | 795 | } |
858 | 796 | ||
859 | loss = ccid3_hc_rx_detect_loss(sk, packet); | 797 | /* |
860 | 798 | * Handle pending losses and otherwise check for new loss | |
861 | if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) | 799 | */ |
862 | return; | 800 | if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp)) |
863 | 801 | goto update_records; | |
864 | payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4; | ||
865 | ccid3_hc_rx_update_s(hcrx, payload_size); | ||
866 | 802 | ||
867 | switch (hcrx->ccid3hcrx_state) { | 803 | /* |
868 | case TFRC_RSTATE_NO_DATA: | 804 | * Handle data packets: RTT sampling and monitoring p |
869 | ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " | 805 | */ |
870 | "feedback\n", dccp_role(sk), sk, | 806 | if (unlikely(!is_data_packet)) |
871 | dccp_state_name(sk->sk_state), skb); | 807 | goto update_records; |
872 | ccid3_hc_rx_send_feedback(sk); | ||
873 | ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA); | ||
874 | return; | ||
875 | case TFRC_RSTATE_DATA: | ||
876 | hcrx->ccid3hcrx_bytes_recv += payload_size; | ||
877 | if (loss) | ||
878 | break; | ||
879 | 808 | ||
880 | now = ktime_get_real(); | 809 | if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss so far: p = 0 */ |
881 | if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - | 810 | const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb); |
882 | (s64)hcrx->ccid3hcrx_rtt) >= 0) { | 811 | /* |
883 | hcrx->ccid3hcrx_tstamp_last_ack = now; | 812 | * Empty loss history: no loss so far, hence p stays 0. |
884 | ccid3_hc_rx_send_feedback(sk); | 813 | * Sample RTT values, since an RTT estimate is required for the |
885 | } | 814 | * computation of p when the first loss occurs; RFC 3448, 6.3.1. |
886 | return; | 815 | */ |
887 | case TFRC_RSTATE_TERM: | 816 | if (sample != 0) |
888 | DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); | 817 | hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9); |
889 | return; | ||
890 | } | 818 | } |
891 | 819 | ||
892 | /* Dealing with packet loss */ | 820 | /* |
893 | ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", | 821 | * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3 |
894 | dccp_role(sk), sk, dccp_state_name(sk->sk_state)); | 822 | */ |
895 | 823 | if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3) | |
896 | p_prev = hcrx->ccid3hcrx_p; | 824 | do_feedback = CCID3_FBACK_PERIODIC; |
897 | |||
898 | /* Calculate loss event rate */ | ||
899 | if (!list_empty(&hcrx->ccid3hcrx_li_hist)) { | ||
900 | u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist); | ||
901 | 825 | ||
902 | /* Scaling up by 1000000 as fixed decimal */ | 826 | update_records: |
903 | if (i_mean != 0) | 827 | tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp); |
904 | hcrx->ccid3hcrx_p = 1000000 / i_mean; | ||
905 | } else | ||
906 | DCCP_BUG("empty loss history"); | ||
907 | 828 | ||
908 | if (hcrx->ccid3hcrx_p > p_prev) { | 829 | if (do_feedback) |
909 | ccid3_hc_rx_send_feedback(sk); | 830 | ccid3_hc_rx_send_feedback(sk, skb, do_feedback); |
910 | return; | ||
911 | } | ||
912 | } | 831 | } |
913 | 832 | ||
914 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) | 833 | static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) |
@@ -918,11 +837,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) | |||
918 | ccid3_pr_debug("entry\n"); | 837 | ccid3_pr_debug("entry\n"); |
919 | 838 | ||
920 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; | 839 | hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; |
921 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist); | ||
922 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); | 840 | INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); |
923 | hcrx->ccid3hcrx_tstamp_last_feedback = | 841 | return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist); |
924 | hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real(); | ||
925 | return 0; | ||
926 | } | 842 | } |
927 | 843 | ||
928 | static void ccid3_hc_rx_exit(struct sock *sk) | 844 | static void ccid3_hc_rx_exit(struct sock *sk) |