aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2007-12-06 10:18:11 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-28 17:56:43 -0500
commitb84a2189c4e1835c51fd6b974a0497be9bc4ba87 (patch)
treed488b0a45618ac37c605b10b093f8f03a050a7fc
parent30a0eacd479f1c7c15fe0496585ff29f76de3378 (diff)
[TFRC]: New rx history code
Credit here goes to Gerrit Renker, that provided the initial implementation for this new codebase. I modified it just to try to make it closer to the existing API, renaming some functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not freeing the allocated ring entries on the error path. Original changeset comment from Gerrit: ----------- This provides a new, self-contained and generic RX history service for TFRC based protocols. Details: * new data structure, initialisation and cleanup routines; * allocation of dccp_rx_hist entries local to packet_history.c, as a service exported by the dccp_tfrc_lib module. * interface to automatically track highest-received seqno; * receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1); * a generic function to test for `data packets' as per RFC 4340, sec. 7.7. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/dccp/ccids/ccid3.c282
-rw-r--r--net/dccp/ccids/ccid3.h14
-rw-r--r--net/dccp/ccids/lib/loss_interval.c13
-rw-r--r--net/dccp/ccids/lib/packet_history.c290
-rw-r--r--net/dccp/ccids/lib/packet_history.h83
5 files changed, 327 insertions, 355 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f5cfc2e2d7b2..bf95c3292d5b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
641/* 641/*
642 * Receiver Half-Connection Routines 642 * Receiver Half-Connection Routines
643 */ 643 */
644
645/* CCID3 feedback types */
646enum ccid3_fback_type {
647 CCID3_FBACK_NONE = 0,
648 CCID3_FBACK_INITIAL,
649 CCID3_FBACK_PERIODIC,
650 CCID3_FBACK_PARAM_CHANGE
651};
652
644#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 653#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
645static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) 654static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
646{ 655{
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
667 hcrx->ccid3hcrx_state = state; 676 hcrx->ccid3hcrx_state = state;
668} 677}
669 678
670static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) 679static void ccid3_hc_rx_send_feedback(struct sock *sk,
671{ 680 const struct sk_buff *skb,
672 if (likely(len > 0)) /* don't update on empty packets (e.g. ACKs) */ 681 enum ccid3_fback_type fbtype)
673 hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9);
674}
675
676static void ccid3_hc_rx_send_feedback(struct sock *sk)
677{ 682{
678 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 683 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
679 struct dccp_sock *dp = dccp_sk(sk); 684 struct dccp_sock *dp = dccp_sk(sk);
680 struct tfrc_rx_hist_entry *packet;
681 ktime_t now; 685 ktime_t now;
682 suseconds_t delta; 686 s64 delta = 0;
683 687
684 ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); 688 ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
685 689
690 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
691 return;
692
686 now = ktime_get_real(); 693 now = ktime_get_real();
687 694
688 switch (hcrx->ccid3hcrx_state) { 695 switch (fbtype) {
689 case TFRC_RSTATE_NO_DATA: 696 case CCID3_FBACK_INITIAL:
690 hcrx->ccid3hcrx_x_recv = 0; 697 hcrx->ccid3hcrx_x_recv = 0;
698 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
691 break; 699 break;
692 case TFRC_RSTATE_DATA: 700 case CCID3_FBACK_PARAM_CHANGE:
693 delta = ktime_us_delta(now, 701 /*
694 hcrx->ccid3hcrx_tstamp_last_feedback); 702 * When parameters change (new loss or p > p_prev), we do not
695 DCCP_BUG_ON(delta < 0); 703 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
696 hcrx->ccid3hcrx_x_recv = 704 * need to reuse the previous value of X_recv. However, when
697 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); 705 * X_recv was 0 (due to early loss), this would kill X down to
706 * s/t_mbi (i.e. one packet in 64 seconds).
707 * To avoid such drastic reduction, we approximate X_recv as
708 * the number of bytes since last feedback.
709 * This is a safe fallback, since X is bounded above by X_calc.
710 */
711 if (hcrx->ccid3hcrx_x_recv > 0)
712 break;
713 /* fall through */
714 case CCID3_FBACK_PERIODIC:
715 delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
716 if (delta <= 0)
717 DCCP_BUG("delta (%ld) <= 0", (long)delta);
718 else
719 hcrx->ccid3hcrx_x_recv =
720 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
698 break; 721 break;
699 case TFRC_RSTATE_TERM: 722 default:
700 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
701 return; 723 return;
702 } 724 }
703 725
704 packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); 726 ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
705 if (unlikely(packet == NULL)) { 727 hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
706 DCCP_WARN("%s(%p), no data packet in history!\n",
707 dccp_role(sk), sk);
708 return;
709 }
710 728
711 hcrx->ccid3hcrx_tstamp_last_feedback = now; 729 hcrx->ccid3hcrx_tstamp_last_feedback = now;
712 hcrx->ccid3hcrx_ccval_last_counter = packet->tfrchrx_ccval; 730 hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval;
713 hcrx->ccid3hcrx_bytes_recv = 0; 731 hcrx->ccid3hcrx_bytes_recv = 0;
714 732
715 if (hcrx->ccid3hcrx_p == 0)
716 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
717 else if (hcrx->ccid3hcrx_p > 1000000) {
718 DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
719 hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
720 } else
721 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
722
723 dp->dccps_hc_rx_insert_options = 1; 733 dp->dccps_hc_rx_insert_options = 1;
724 dccp_send_ack(sk); 734 dccp_send_ack(sk);
725} 735}
@@ -750,165 +760,74 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
750 return 0; 760 return 0;
751} 761}
752 762
753static int ccid3_hc_rx_detect_loss(struct sock *sk, 763static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
754 struct tfrc_rx_hist_entry *packet)
755{ 764{
756 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 765 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
757 struct tfrc_rx_hist_entry *rx_hist = 766 enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
758 tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist); 767 const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
759 u64 seqno = packet->tfrchrx_seqno; 768 const bool is_data_packet = dccp_data_packet(skb);
760 u64 tmp_seqno; 769
761 int loss = 0; 770 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
762 u8 ccval; 771 if (is_data_packet) {
763 772 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
764 773 do_feedback = CCID3_FBACK_INITIAL;
765 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; 774 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
766 775 hcrx->ccid3hcrx_s = payload;
767 if (!rx_hist || 776 /*
768 follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { 777 * Not necessary to update ccid3hcrx_bytes_recv here,
769 hcrx->ccid3hcrx_seqno_nonloss = seqno; 778 * since X_recv = 0 for the first feedback packet (cf.
770 hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval; 779 * RFC 3448, 6.3) -- gerrit
771 goto detect_out; 780 */
772 }
773
774
775 while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
776 > TFRC_RECV_NUM_LATE_LOSS) {
777 loss = 1;
778 dccp_li_update_li(sk,
779 &hcrx->ccid3hcrx_li_hist,
780 &hcrx->ccid3hcrx_hist,
781 hcrx->ccid3hcrx_tstamp_last_feedback,
782 hcrx->ccid3hcrx_s,
783 hcrx->ccid3hcrx_bytes_recv,
784 hcrx->ccid3hcrx_x_recv,
785 hcrx->ccid3hcrx_seqno_nonloss,
786 hcrx->ccid3hcrx_ccval_nonloss);
787 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
788 dccp_inc_seqno(&tmp_seqno);
789 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
790 dccp_inc_seqno(&tmp_seqno);
791 while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
792 tmp_seqno, &ccval)) {
793 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
794 hcrx->ccid3hcrx_ccval_nonloss = ccval;
795 dccp_inc_seqno(&tmp_seqno);
796 } 781 }
782 goto update_records;
797 } 783 }
798 784
799 /* FIXME - this code could be simplified with above while */ 785 if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
800 /* but works at moment */ 786 return; /* done receiving */
801 if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
802 hcrx->ccid3hcrx_seqno_nonloss = seqno;
803 hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
804 }
805
806detect_out:
807 tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
808 &hcrx->ccid3hcrx_li_hist, packet,
809 hcrx->ccid3hcrx_seqno_nonloss);
810 return loss;
811}
812
813static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
814{
815 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
816 const struct dccp_options_received *opt_recv;
817 struct tfrc_rx_hist_entry *packet;
818 u32 p_prev, r_sample, rtt_prev;
819 int loss, payload_size;
820 ktime_t now;
821
822 opt_recv = &dccp_sk(sk)->dccps_options_received;
823 787
824 switch (DCCP_SKB_CB(skb)->dccpd_type) { 788 if (is_data_packet) {
825 case DCCP_PKT_ACK: 789 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
826 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 790 /*
827 return; 791 * Update moving-average of s and the sum of received payload bytes
828 case DCCP_PKT_DATAACK: 792 */
829 if (opt_recv->dccpor_timestamp_echo == 0) 793 hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
830 break; 794 hcrx->ccid3hcrx_bytes_recv += payload;
831 r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo;
832 rtt_prev = hcrx->ccid3hcrx_rtt;
833 r_sample = dccp_sample_rtt(sk, 10 * r_sample);
834
835 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
836 hcrx->ccid3hcrx_rtt = r_sample;
837 else
838 hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
839 r_sample / 10;
840
841 if (rtt_prev != hcrx->ccid3hcrx_rtt)
842 ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
843 dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
844 opt_recv->dccpor_elapsed_time);
845 break;
846 case DCCP_PKT_DATA:
847 break;
848 default: /* We're not interested in other packet types, move along */
849 return;
850 }
851
852 packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
853 if (unlikely(packet == NULL)) {
854 DCCP_WARN("%s(%p), Not enough mem to add rx packet "
855 "to history, consider it lost!\n", dccp_role(sk), sk);
856 return;
857 } 795 }
858 796
859 loss = ccid3_hc_rx_detect_loss(sk, packet); 797 /*
860 798 * Handle pending losses and otherwise check for new loss
861 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) 799 */
862 return; 800 if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
863 801 goto update_records;
864 payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
865 ccid3_hc_rx_update_s(hcrx, payload_size);
866 802
867 switch (hcrx->ccid3hcrx_state) { 803 /*
868 case TFRC_RSTATE_NO_DATA: 804 * Handle data packets: RTT sampling and monitoring p
869 ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " 805 */
870 "feedback\n", dccp_role(sk), sk, 806 if (unlikely(!is_data_packet))
871 dccp_state_name(sk->sk_state), skb); 807 goto update_records;
872 ccid3_hc_rx_send_feedback(sk);
873 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
874 return;
875 case TFRC_RSTATE_DATA:
876 hcrx->ccid3hcrx_bytes_recv += payload_size;
877 if (loss)
878 break;
879 808
880 now = ktime_get_real(); 809 if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss so far: p = 0 */
881 if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - 810 const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
882 (s64)hcrx->ccid3hcrx_rtt) >= 0) { 811 /*
883 hcrx->ccid3hcrx_tstamp_last_ack = now; 812 * Empty loss history: no loss so far, hence p stays 0.
884 ccid3_hc_rx_send_feedback(sk); 813 * Sample RTT values, since an RTT estimate is required for the
885 } 814 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
886 return; 815 */
887 case TFRC_RSTATE_TERM: 816 if (sample != 0)
888 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); 817 hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
889 return;
890 } 818 }
891 819
892 /* Dealing with packet loss */ 820 /*
893 ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", 821 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
894 dccp_role(sk), sk, dccp_state_name(sk->sk_state)); 822 */
895 823 if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
896 p_prev = hcrx->ccid3hcrx_p; 824 do_feedback = CCID3_FBACK_PERIODIC;
897
898 /* Calculate loss event rate */
899 if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
900 u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
901 825
902 /* Scaling up by 1000000 as fixed decimal */ 826update_records:
903 if (i_mean != 0) 827 tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
904 hcrx->ccid3hcrx_p = 1000000 / i_mean;
905 } else
906 DCCP_BUG("empty loss history");
907 828
908 if (hcrx->ccid3hcrx_p > p_prev) { 829 if (do_feedback)
909 ccid3_hc_rx_send_feedback(sk); 830 ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
910 return;
911 }
912} 831}
913 832
914static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) 833static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
@@ -918,11 +837,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
918 ccid3_pr_debug("entry\n"); 837 ccid3_pr_debug("entry\n");
919 838
920 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; 839 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
921 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
922 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); 840 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
923 hcrx->ccid3hcrx_tstamp_last_feedback = 841 return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
924 hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
925 return 0;
926} 842}
927 843
928static void ccid3_hc_rx_exit(struct sock *sk) 844static void ccid3_hc_rx_exit(struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index b842a7dd99de..3c33dc670cf9 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -1,7 +1,8 @@
1/* 1/*
2 * net/dccp/ccids/ccid3.h 2 * net/dccp/ccids/ccid3.h
3 * 3 *
4 * Copyright (c) 2005-6 The University of Waikato, Hamilton, New Zealand. 4 * Copyright (c) 2005-7 The University of Waikato, Hamilton, New Zealand.
5 * Copyright (c) 2007 The University of Aberdeen, Scotland, UK
5 * 6 *
6 * An implementation of the DCCP protocol 7 * An implementation of the DCCP protocol
7 * 8 *
@@ -135,9 +136,7 @@ enum ccid3_hc_rx_states {
135 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3) 136 * @ccid3hcrx_x_recv - Receiver estimate of send rate (RFC 3448 4.3)
136 * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard) 137 * @ccid3hcrx_rtt - Receiver estimate of rtt (non-standard)
137 * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4) 138 * @ccid3hcrx_p - current loss event rate (RFC 3448 5.4)
138 * @ccid3hcrx_seqno_nonloss - Last received non-loss sequence number 139 * @ccid3hcrx_last_counter - Tracks window counter (RFC 4342, 8.1)
139 * @ccid3hcrx_ccval_nonloss - Last received non-loss Window CCVal
140 * @ccid3hcrx_ccval_last_counter - Tracks window counter (RFC 4342, 8.1)
141 * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states 140 * @ccid3hcrx_state - receiver state, one of %ccid3_hc_rx_states
142 * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes 141 * @ccid3hcrx_bytes_recv - Total sum of DCCP payload bytes
143 * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent 142 * @ccid3hcrx_tstamp_last_feedback - Time at which last feedback was sent
@@ -152,14 +151,11 @@ struct ccid3_hc_rx_sock {
152#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv 151#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv
153#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt 152#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt
154#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p 153#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p
155 u64 ccid3hcrx_seqno_nonloss:48, 154 u8 ccid3hcrx_last_counter:4;
156 ccid3hcrx_ccval_nonloss:4,
157 ccid3hcrx_ccval_last_counter:4;
158 enum ccid3_hc_rx_states ccid3hcrx_state:8; 155 enum ccid3_hc_rx_states ccid3hcrx_state:8;
159 u32 ccid3hcrx_bytes_recv; 156 u32 ccid3hcrx_bytes_recv;
160 ktime_t ccid3hcrx_tstamp_last_feedback; 157 ktime_t ccid3hcrx_tstamp_last_feedback;
161 ktime_t ccid3hcrx_tstamp_last_ack; 158 struct tfrc_rx_hist ccid3hcrx_hist;
162 struct list_head ccid3hcrx_hist;
163 struct list_head ccid3hcrx_li_hist; 159 struct list_head ccid3hcrx_li_hist;
164 u16 ccid3hcrx_s; 160 u16 ccid3hcrx_s;
165 u32 ccid3hcrx_pinv; 161 u32 ccid3hcrx_pinv;
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index a5f59af8df43..c0a933a1d28e 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -129,6 +129,13 @@ static u32 dccp_li_calc_first_li(struct sock *sk,
129 u16 s, u32 bytes_recv, 129 u16 s, u32 bytes_recv,
130 u32 previous_x_recv) 130 u32 previous_x_recv)
131{ 131{
132/*
133 * FIXME:
134 * Will be rewritten in the upcoming new loss intervals code.
135 * Has to be commented ou because it relies on the old rx history
136 * data structures
137 */
138#if 0
132 struct tfrc_rx_hist_entry *entry, *next, *tail = NULL; 139 struct tfrc_rx_hist_entry *entry, *next, *tail = NULL;
133 u32 x_recv, p; 140 u32 x_recv, p;
134 suseconds_t rtt, delta; 141 suseconds_t rtt, delta;
@@ -216,10 +223,10 @@ found:
216 dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied " 223 dccp_pr_debug("%s(%p), receive rate=%u bytes/s, implied "
217 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p); 224 "loss rate=%u\n", dccp_role(sk), sk, x_recv, p);
218 225
219 if (p == 0) 226 if (p != 0)
220 return ~0;
221 else
222 return 1000000 / p; 227 return 1000000 / p;
228#endif
229 return ~0;
223} 230}
224 231
225void dccp_li_update_li(struct sock *sk, 232void dccp_li_update_li(struct sock *sk,
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 255cca1ca737..4eddb2da8334 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -36,7 +36,9 @@
36 */ 36 */
37 37
38#include <linux/string.h> 38#include <linux/string.h>
39#include <linux/slab.h>
39#include "packet_history.h" 40#include "packet_history.h"
41#include "../../dccp.h"
40 42
41/** 43/**
42 * tfrc_tx_hist_entry - Simple singly-linked TX history list 44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
@@ -111,154 +113,214 @@ u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
111} 113}
112EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt); 114EXPORT_SYMBOL_GPL(tfrc_tx_hist_rtt);
113 115
116
117/**
118 * tfrc_rx_hist_index - index to reach n-th entry after loss_start
119 */
120static inline u8 tfrc_rx_hist_index(const struct tfrc_rx_hist *h, const u8 n)
121{
122 return (h->loss_start + n) & TFRC_NDUPACK;
123}
124
125/**
126 * tfrc_rx_hist_last_rcv - entry with highest-received-seqno so far
127 */
128static inline struct tfrc_rx_hist_entry *
129 tfrc_rx_hist_last_rcv(const struct tfrc_rx_hist *h)
130{
131 return h->ring[tfrc_rx_hist_index(h, h->loss_count)];
132}
133
114/* 134/*
115 * Receiver History Routines 135 * Receiver History Routines
116 */ 136 */
117static struct kmem_cache *tfrc_rx_hist_slab; 137static struct kmem_cache *tfrc_rx_hist_slab;
118 138
119struct tfrc_rx_hist_entry *tfrc_rx_hist_entry_new(const u32 ndp, 139void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
120 const struct sk_buff *skb, 140 const struct sk_buff *skb,
121 const gfp_t prio) 141 const u32 ndp)
122{ 142{
123 struct tfrc_rx_hist_entry *entry = kmem_cache_alloc(tfrc_rx_hist_slab, 143 struct tfrc_rx_hist_entry *entry = tfrc_rx_hist_last_rcv(h);
124 prio); 144 const struct dccp_hdr *dh = dccp_hdr(skb);
125 145
126 if (entry != NULL) { 146 entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
127 const struct dccp_hdr *dh = dccp_hdr(skb); 147 entry->tfrchrx_ccval = dh->dccph_ccval;
128 148 entry->tfrchrx_type = dh->dccph_type;
129 entry->tfrchrx_seqno = DCCP_SKB_CB(skb)->dccpd_seq; 149 entry->tfrchrx_ndp = ndp;
130 entry->tfrchrx_ccval = dh->dccph_ccval; 150 entry->tfrchrx_tstamp = ktime_get_real();
131 entry->tfrchrx_type = dh->dccph_type;
132 entry->tfrchrx_ndp = ndp;
133 entry->tfrchrx_tstamp = ktime_get_real();
134 }
135
136 return entry;
137} 151}
138EXPORT_SYMBOL_GPL(tfrc_rx_hist_entry_new); 152EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet);
139 153
140static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry) 154static inline void tfrc_rx_hist_entry_delete(struct tfrc_rx_hist_entry *entry)
141{ 155{
142 kmem_cache_free(tfrc_rx_hist_slab, entry); 156 kmem_cache_free(tfrc_rx_hist_slab, entry);
143} 157}
144 158
145int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq, 159/**
146 u8 *ccval) 160 * tfrc_rx_hist_entry - return the n-th history entry after loss_start
161 */
162static inline struct tfrc_rx_hist_entry *
163 tfrc_rx_hist_entry(const struct tfrc_rx_hist *h, const u8 n)
147{ 164{
148 struct tfrc_rx_hist_entry *packet = NULL, *entry; 165 return h->ring[tfrc_rx_hist_index(h, n)];
166}
149 167
150 list_for_each_entry(entry, list, tfrchrx_node) 168/**
151 if (entry->tfrchrx_seqno == seq) { 169 * tfrc_rx_hist_loss_prev - entry with highest-received-seqno before loss was detected
152 packet = entry; 170 */
153 break; 171static inline struct tfrc_rx_hist_entry *
154 } 172 tfrc_rx_hist_loss_prev(const struct tfrc_rx_hist *h)
173{
174 return h->ring[h->loss_start];
175}
176
177/* has the packet contained in skb been seen before? */
178int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb)
179{
180 const u64 seq = DCCP_SKB_CB(skb)->dccpd_seq;
181 int i;
182
183 if (dccp_delta_seqno(tfrc_rx_hist_loss_prev(h)->tfrchrx_seqno, seq) <= 0)
184 return 1;
155 185
156 if (packet) 186 for (i = 1; i <= h->loss_count; i++)
157 *ccval = packet->tfrchrx_ccval; 187 if (tfrc_rx_hist_entry(h, i)->tfrchrx_seqno == seq)
188 return 1;
158 189
159 return packet != NULL; 190 return 0;
160} 191}
192EXPORT_SYMBOL_GPL(tfrc_rx_hist_duplicate);
161 193
162EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_entry); 194/* initialise loss detection and disable RTT sampling */
163struct tfrc_rx_hist_entry * 195static inline void tfrc_rx_hist_loss_indicated(struct tfrc_rx_hist *h)
164 tfrc_rx_hist_find_data_packet(const struct list_head *list)
165{ 196{
166 struct tfrc_rx_hist_entry *entry, *packet = NULL; 197 h->loss_count = 1;
198}
167 199
168 list_for_each_entry(entry, list, tfrchrx_node) 200/* indicate whether previously a packet was detected missing */
169 if (entry->tfrchrx_type == DCCP_PKT_DATA || 201static inline int tfrc_rx_hist_loss_pending(const struct tfrc_rx_hist *h)
170 entry->tfrchrx_type == DCCP_PKT_DATAACK) { 202{
171 packet = entry; 203 return h->loss_count;
172 break; 204}
173 } 205
206/* any data packets missing between last reception and skb ? */
207int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
208 const struct sk_buff *skb, u32 ndp)
209{
210 int delta = dccp_delta_seqno(tfrc_rx_hist_last_rcv(h)->tfrchrx_seqno,
211 DCCP_SKB_CB(skb)->dccpd_seq);
212
213 if (delta > 1 && ndp < delta)
214 tfrc_rx_hist_loss_indicated(h);
174 215
175 return packet; 216 return tfrc_rx_hist_loss_pending(h);
176} 217}
218EXPORT_SYMBOL_GPL(tfrc_rx_hist_new_loss_indicated);
177 219
178EXPORT_SYMBOL_GPL(tfrc_rx_hist_find_data_packet); 220int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h)
221{
222 int i;
223
224 for (i = 0; i <= TFRC_NDUPACK; i++) {
225 h->ring[i] = kmem_cache_alloc(tfrc_rx_hist_slab, GFP_ATOMIC);
226 if (h->ring[i] == NULL)
227 goto out_free;
228 }
229
230 h->loss_count = h->loss_start = 0;
231 return 0;
232
233out_free:
234 while (i-- != 0) {
235 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
236 h->ring[i] = NULL;
237 }
238 return -ENOBUFS;
239}
240EXPORT_SYMBOL_GPL(tfrc_rx_hist_alloc);
179 241
180void tfrc_rx_hist_add_packet(struct list_head *rx_list, 242void tfrc_rx_hist_purge(struct tfrc_rx_hist *h)
181 struct list_head *li_list,
182 struct tfrc_rx_hist_entry *packet,
183 u64 nonloss_seqno)
184{ 243{
185 struct tfrc_rx_hist_entry *entry, *next; 244 int i;
186 u8 num_later = 0; 245
187 246 for (i = 0; i <= TFRC_NDUPACK; ++i)
188 list_add(&packet->tfrchrx_node, rx_list); 247 if (h->ring[i] != NULL) {
189 248 kmem_cache_free(tfrc_rx_hist_slab, h->ring[i]);
190 num_later = TFRC_RECV_NUM_LATE_LOSS + 1; 249 h->ring[i] = NULL;
191
192 if (!list_empty(li_list)) {
193 list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
194 if (num_later == 0) {
195 if (after48(nonloss_seqno,
196 entry->tfrchrx_seqno)) {
197 list_del_init(&entry->tfrchrx_node);
198 tfrc_rx_hist_entry_delete(entry);
199 }
200 } else if (tfrc_rx_hist_entry_data_packet(entry))
201 --num_later;
202 }
203 } else {
204 int step = 0;
205 u8 win_count = 0; /* Not needed, but lets shut up gcc */
206 int tmp;
207 /*
208 * We have no loss interval history so we need at least one
209 * rtt:s of data packets to approximate rtt.
210 */
211 list_for_each_entry_safe(entry, next, rx_list, tfrchrx_node) {
212 if (num_later == 0) {
213 switch (step) {
214 case 0:
215 step = 1;
216 /* OK, find next data packet */
217 num_later = 1;
218 break;
219 case 1:
220 step = 2;
221 /* OK, find next data packet */
222 num_later = 1;
223 win_count = entry->tfrchrx_ccval;
224 break;
225 case 2:
226 tmp = win_count - entry->tfrchrx_ccval;
227 if (tmp < 0)
228 tmp += TFRC_WIN_COUNT_LIMIT;
229 if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
230 /*
231 * We have found a packet older
232 * than one rtt remove the rest
233 */
234 step = 3;
235 } else /* OK, find next data packet */
236 num_later = 1;
237 break;
238 case 3:
239 list_del_init(&entry->tfrchrx_node);
240 tfrc_rx_hist_entry_delete(entry);
241 break;
242 }
243 } else if (tfrc_rx_hist_entry_data_packet(entry))
244 --num_later;
245 } 250 }
246 }
247} 251}
252EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge);
248 253
249EXPORT_SYMBOL_GPL(tfrc_rx_hist_add_packet); 254/**
255 * tfrc_rx_hist_rtt_last_s - reference entry to compute RTT samples against
256 */
257static inline struct tfrc_rx_hist_entry *
258 tfrc_rx_hist_rtt_last_s(const struct tfrc_rx_hist *h)
259{
260 return h->ring[0];
261}
250 262
251void tfrc_rx_hist_purge(struct list_head *list) 263/**
264 * tfrc_rx_hist_rtt_prev_s: previously suitable (wrt rtt_last_s) RTT-sampling entry
265 */
266static inline struct tfrc_rx_hist_entry *
267 tfrc_rx_hist_rtt_prev_s(const struct tfrc_rx_hist *h)
252{ 268{
253 struct tfrc_rx_hist_entry *entry, *next; 269 return h->ring[h->rtt_sample_prev];
270}
254 271
255 list_for_each_entry_safe(entry, next, list, tfrchrx_node) { 272/**
256 list_del_init(&entry->tfrchrx_node); 273 * tfrc_rx_hist_sample_rtt - Sample RTT from timestamp / CCVal
257 tfrc_rx_hist_entry_delete(entry); 274 * Based on ideas presented in RFC 4342, 8.1. Returns 0 if it was not able
275 * to compute a sample with given data - calling function should check this.
276 */
277u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h, const struct sk_buff *skb)
278{
279 u32 sample = 0,
280 delta_v = SUB16(dccp_hdr(skb)->dccph_ccval,
281 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
282
283 if (delta_v < 1 || delta_v > 4) { /* unsuitable CCVal delta */
284 if (h->rtt_sample_prev == 2) { /* previous candidate stored */
285 sample = SUB16(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
286 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
287 if (sample)
288 sample = 4 / sample *
289 ktime_us_delta(tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_tstamp,
290 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp);
291 else /*
292 * FIXME: This condition is in principle not
293 * possible but occurs when CCID is used for
294 * two-way data traffic. I have tried to trace
295 * it, but the cause does not seem to be here.
296 */
297 DCCP_BUG("please report to dccp@vger.kernel.org"
298 " => prev = %u, last = %u",
299 tfrc_rx_hist_rtt_prev_s(h)->tfrchrx_ccval,
300 tfrc_rx_hist_rtt_last_s(h)->tfrchrx_ccval);
301 } else if (delta_v < 1) {
302 h->rtt_sample_prev = 1;
303 goto keep_ref_for_next_time;
304 }
305
306 } else if (delta_v == 4) /* optimal match */
307 sample = ktime_to_us(net_timedelta(tfrc_rx_hist_rtt_last_s(h)->tfrchrx_tstamp));
308 else { /* suboptimal match */
309 h->rtt_sample_prev = 2;
310 goto keep_ref_for_next_time;
258 } 311 }
259}
260 312
261EXPORT_SYMBOL_GPL(tfrc_rx_hist_purge); 313 if (unlikely(sample > DCCP_SANE_RTT_MAX)) {
314 DCCP_WARN("RTT sample %u too large, using max\n", sample);
315 sample = DCCP_SANE_RTT_MAX;
316 }
317
318 h->rtt_sample_prev = 0; /* use current entry as next reference */
319keep_ref_for_next_time:
320
321 return sample;
322}
323EXPORT_SYMBOL_GPL(tfrc_rx_hist_sample_rtt);
262 324
263__init int packet_history_init(void) 325__init int packet_history_init(void)
264{ 326{
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 5b0b9834340d..3dfd182b0e64 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -37,15 +37,9 @@
37#define _DCCP_PKT_HIST_ 37#define _DCCP_PKT_HIST_
38 38
39#include <linux/ktime.h> 39#include <linux/ktime.h>
40#include <linux/list.h> 40#include <linux/types.h>
41#include <linux/slab.h>
42#include "tfrc.h"
43 41
44/* Number of later packets received before one is considered lost */ 42struct sk_buff;
45#define TFRC_RECV_NUM_LATE_LOSS 3
46
47#define TFRC_WIN_COUNT_PER_RTT 4
48#define TFRC_WIN_COUNT_LIMIT 16
49 43
50struct tfrc_tx_hist_entry; 44struct tfrc_tx_hist_entry;
51 45
@@ -54,11 +48,20 @@ extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
54extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, 48extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
55 const u64 seqno, const ktime_t now); 49 const u64 seqno, const ktime_t now);
56 50
57/* 51/* Subtraction a-b modulo-16, respects circular wrap-around */
58 * Receiver History data structures and declarations 52#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
53
54/* Number of packets to wait after a missing packet (RFC 4342, 6.1) */
55#define TFRC_NDUPACK 3
56
57/**
58 * tfrc_rx_hist_entry - Store information about a single received packet
59 * @tfrchrx_seqno: DCCP packet sequence number
60 * @tfrchrx_ccval: window counter value of packet (RFC 4342, 8.1)
61 * @tfrchrx_ndp: the NDP count (if any) of the packet
62 * @tfrchrx_tstamp: actual receive time of packet
59 */ 63 */
60struct tfrc_rx_hist_entry { 64struct tfrc_rx_hist_entry {
61 struct list_head tfrchrx_node;
62 u64 tfrchrx_seqno:48, 65 u64 tfrchrx_seqno:48,
63 tfrchrx_ccval:4, 66 tfrchrx_ccval:4,
64 tfrchrx_type:4; 67 tfrchrx_type:4;
@@ -66,42 +69,30 @@ struct tfrc_rx_hist_entry {
66 ktime_t tfrchrx_tstamp; 69 ktime_t tfrchrx_tstamp;
67}; 70};
68 71
69extern struct tfrc_rx_hist_entry * 72/**
70 tfrc_rx_hist_entry_new(const u32 ndp, 73 * tfrc_rx_hist - RX history structure for TFRC-based protocols
71 const struct sk_buff *skb, 74 *
72 const gfp_t prio); 75 * @ring: Packet history for RTT sampling and loss detection
73 76 * @loss_count: Number of entries in circular history
74static inline struct tfrc_rx_hist_entry * 77 * @loss_start: Movable index (for loss detection)
75 tfrc_rx_hist_head(struct list_head *list) 78 * @rtt_sample_prev: Used during RTT sampling, points to candidate entry
76{ 79 */
77 struct tfrc_rx_hist_entry *head = NULL; 80struct tfrc_rx_hist {
78 81 struct tfrc_rx_hist_entry *ring[TFRC_NDUPACK + 1];
79 if (!list_empty(list)) 82 u8 loss_count:2,
80 head = list_entry(list->next, struct tfrc_rx_hist_entry, 83 loss_start:2;
81 tfrchrx_node); 84#define rtt_sample_prev loss_start
82 return head; 85};
83}
84
85extern int tfrc_rx_hist_find_entry(const struct list_head *list, const u64 seq,
86 u8 *ccval);
87extern struct tfrc_rx_hist_entry *
88 tfrc_rx_hist_find_data_packet(const struct list_head *list);
89
90extern void tfrc_rx_hist_add_packet(struct list_head *rx_list,
91 struct list_head *li_list,
92 struct tfrc_rx_hist_entry *packet,
93 u64 nonloss_seqno);
94
95extern void tfrc_rx_hist_purge(struct list_head *list);
96 86
97static inline int 87extern void tfrc_rx_hist_add_packet(struct tfrc_rx_hist *h,
98 tfrc_rx_hist_entry_data_packet(const struct tfrc_rx_hist_entry *entry) 88 const struct sk_buff *skb, const u32 ndp);
99{
100 return entry->tfrchrx_type == DCCP_PKT_DATA ||
101 entry->tfrchrx_type == DCCP_PKT_DATAACK;
102}
103 89
104extern u64 tfrc_rx_hist_detect_loss(struct list_head *rx_list, 90extern int tfrc_rx_hist_duplicate(struct tfrc_rx_hist *h, struct sk_buff *skb);
105 struct list_head *li_list, u8 *win_loss); 91extern int tfrc_rx_hist_new_loss_indicated(struct tfrc_rx_hist *h,
92 const struct sk_buff *skb, u32 ndp);
93extern u32 tfrc_rx_hist_sample_rtt(struct tfrc_rx_hist *h,
94 const struct sk_buff *skb);
95extern int tfrc_rx_hist_alloc(struct tfrc_rx_hist *h);
96extern void tfrc_rx_hist_purge(struct tfrc_rx_hist *h);
106 97
107#endif /* _DCCP_PKT_HIST_ */ 98#endif /* _DCCP_PKT_HIST_ */