aboutsummaryrefslogtreecommitdiffstats
path: root/net/dccp/ccids/ccid3.c
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@redhat.com>2007-12-06 10:18:11 -0500
committerDavid S. Miller <davem@davemloft.net>2008-01-28 17:56:43 -0500
commitb84a2189c4e1835c51fd6b974a0497be9bc4ba87 (patch)
treed488b0a45618ac37c605b10b093f8f03a050a7fc /net/dccp/ccids/ccid3.c
parent30a0eacd479f1c7c15fe0496585ff29f76de3378 (diff)
[TFRC]: New rx history code
Credit here goes to Gerrit Renker, that provided the initial implementation for this new codebase. I modified it just to try to make it closer to the existing API, renaming some functions, add namespacing and fix one bug where the tfrc_rx_hist_alloc was not freeing the allocated ring entries on the error path. Original changeset comment from Gerrit: ----------- This provides a new, self-contained and generic RX history service for TFRC based protocols. Details: * new data structure, initialisation and cleanup routines; * allocation of dccp_rx_hist entries local to packet_history.c, as a service exported by the dccp_tfrc_lib module. * interface to automatically track highest-received seqno; * receiver-based RTT estimation (needed for instance by RFC 3448, 6.3.1); * a generic function to test for `data packets' as per RFC 4340, sec. 7.7. Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/dccp/ccids/ccid3.c')
-rw-r--r--net/dccp/ccids/ccid3.c282
1 files changed, 99 insertions, 183 deletions
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f5cfc2e2d7b2..bf95c3292d5b 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -641,6 +641,15 @@ static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
641/* 641/*
642 * Receiver Half-Connection Routines 642 * Receiver Half-Connection Routines
643 */ 643 */
644
645/* CCID3 feedback types */
646enum ccid3_fback_type {
647 CCID3_FBACK_NONE = 0,
648 CCID3_FBACK_INITIAL,
649 CCID3_FBACK_PERIODIC,
650 CCID3_FBACK_PARAM_CHANGE
651};
652
644#ifdef CONFIG_IP_DCCP_CCID3_DEBUG 653#ifdef CONFIG_IP_DCCP_CCID3_DEBUG
645static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state) 654static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
646{ 655{
@@ -667,59 +676,60 @@ static void ccid3_hc_rx_set_state(struct sock *sk,
667 hcrx->ccid3hcrx_state = state; 676 hcrx->ccid3hcrx_state = state;
668} 677}
669 678
670static inline void ccid3_hc_rx_update_s(struct ccid3_hc_rx_sock *hcrx, int len) 679static void ccid3_hc_rx_send_feedback(struct sock *sk,
671{ 680 const struct sk_buff *skb,
672 if (likely(len > 0)) /* don't update on empty packets (e.g. ACKs) */ 681 enum ccid3_fback_type fbtype)
673 hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, len, 9);
674}
675
676static void ccid3_hc_rx_send_feedback(struct sock *sk)
677{ 682{
678 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 683 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
679 struct dccp_sock *dp = dccp_sk(sk); 684 struct dccp_sock *dp = dccp_sk(sk);
680 struct tfrc_rx_hist_entry *packet;
681 ktime_t now; 685 ktime_t now;
682 suseconds_t delta; 686 s64 delta = 0;
683 687
684 ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk); 688 ccid3_pr_debug("%s(%p) - entry \n", dccp_role(sk), sk);
685 689
690 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_TERM))
691 return;
692
686 now = ktime_get_real(); 693 now = ktime_get_real();
687 694
688 switch (hcrx->ccid3hcrx_state) { 695 switch (fbtype) {
689 case TFRC_RSTATE_NO_DATA: 696 case CCID3_FBACK_INITIAL:
690 hcrx->ccid3hcrx_x_recv = 0; 697 hcrx->ccid3hcrx_x_recv = 0;
698 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
691 break; 699 break;
692 case TFRC_RSTATE_DATA: 700 case CCID3_FBACK_PARAM_CHANGE:
693 delta = ktime_us_delta(now, 701 /*
694 hcrx->ccid3hcrx_tstamp_last_feedback); 702 * When parameters change (new loss or p > p_prev), we do not
695 DCCP_BUG_ON(delta < 0); 703 * have a reliable estimate for R_m of [RFC 3448, 6.2] and so
696 hcrx->ccid3hcrx_x_recv = 704 * need to reuse the previous value of X_recv. However, when
697 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta); 705 * X_recv was 0 (due to early loss), this would kill X down to
706 * s/t_mbi (i.e. one packet in 64 seconds).
707 * To avoid such drastic reduction, we approximate X_recv as
708 * the number of bytes since last feedback.
709 * This is a safe fallback, since X is bounded above by X_calc.
710 */
711 if (hcrx->ccid3hcrx_x_recv > 0)
712 break;
713 /* fall through */
714 case CCID3_FBACK_PERIODIC:
715 delta = ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_feedback);
716 if (delta <= 0)
717 DCCP_BUG("delta (%ld) <= 0", (long)delta);
718 else
719 hcrx->ccid3hcrx_x_recv =
720 scaled_div32(hcrx->ccid3hcrx_bytes_recv, delta);
698 break; 721 break;
699 case TFRC_RSTATE_TERM: 722 default:
700 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
701 return; 723 return;
702 } 724 }
703 725
704 packet = tfrc_rx_hist_find_data_packet(&hcrx->ccid3hcrx_hist); 726 ccid3_pr_debug("Interval %ldusec, X_recv=%u, 1/p=%u\n", (long)delta,
705 if (unlikely(packet == NULL)) { 727 hcrx->ccid3hcrx_x_recv, hcrx->ccid3hcrx_pinv);
706 DCCP_WARN("%s(%p), no data packet in history!\n",
707 dccp_role(sk), sk);
708 return;
709 }
710 728
711 hcrx->ccid3hcrx_tstamp_last_feedback = now; 729 hcrx->ccid3hcrx_tstamp_last_feedback = now;
712 hcrx->ccid3hcrx_ccval_last_counter = packet->tfrchrx_ccval; 730 hcrx->ccid3hcrx_last_counter = dccp_hdr(skb)->dccph_ccval;
713 hcrx->ccid3hcrx_bytes_recv = 0; 731 hcrx->ccid3hcrx_bytes_recv = 0;
714 732
715 if (hcrx->ccid3hcrx_p == 0)
716 hcrx->ccid3hcrx_pinv = ~0U; /* see RFC 4342, 8.5 */
717 else if (hcrx->ccid3hcrx_p > 1000000) {
718 DCCP_WARN("p (%u) > 100%%\n", hcrx->ccid3hcrx_p);
719 hcrx->ccid3hcrx_pinv = 1; /* use 100% in this case */
720 } else
721 hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
722
723 dp->dccps_hc_rx_insert_options = 1; 733 dp->dccps_hc_rx_insert_options = 1;
724 dccp_send_ack(sk); 734 dccp_send_ack(sk);
725} 735}
@@ -750,165 +760,74 @@ static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
750 return 0; 760 return 0;
751} 761}
752 762
753static int ccid3_hc_rx_detect_loss(struct sock *sk, 763static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
754 struct tfrc_rx_hist_entry *packet)
755{ 764{
756 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); 765 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
757 struct tfrc_rx_hist_entry *rx_hist = 766 enum ccid3_fback_type do_feedback = CCID3_FBACK_NONE;
758 tfrc_rx_hist_head(&hcrx->ccid3hcrx_hist); 767 const u32 ndp = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
759 u64 seqno = packet->tfrchrx_seqno; 768 const bool is_data_packet = dccp_data_packet(skb);
760 u64 tmp_seqno; 769
761 int loss = 0; 770 if (unlikely(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)) {
762 u8 ccval; 771 if (is_data_packet) {
763 772 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
764 773 do_feedback = CCID3_FBACK_INITIAL;
765 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss; 774 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
766 775 hcrx->ccid3hcrx_s = payload;
767 if (!rx_hist || 776 /*
768 follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) { 777 * Not necessary to update ccid3hcrx_bytes_recv here,
769 hcrx->ccid3hcrx_seqno_nonloss = seqno; 778 * since X_recv = 0 for the first feedback packet (cf.
770 hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval; 779 * RFC 3448, 6.3) -- gerrit
771 goto detect_out; 780 */
772 }
773
774
775 while (dccp_delta_seqno(hcrx->ccid3hcrx_seqno_nonloss, seqno)
776 > TFRC_RECV_NUM_LATE_LOSS) {
777 loss = 1;
778 dccp_li_update_li(sk,
779 &hcrx->ccid3hcrx_li_hist,
780 &hcrx->ccid3hcrx_hist,
781 hcrx->ccid3hcrx_tstamp_last_feedback,
782 hcrx->ccid3hcrx_s,
783 hcrx->ccid3hcrx_bytes_recv,
784 hcrx->ccid3hcrx_x_recv,
785 hcrx->ccid3hcrx_seqno_nonloss,
786 hcrx->ccid3hcrx_ccval_nonloss);
787 tmp_seqno = hcrx->ccid3hcrx_seqno_nonloss;
788 dccp_inc_seqno(&tmp_seqno);
789 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
790 dccp_inc_seqno(&tmp_seqno);
791 while (tfrc_rx_hist_find_entry(&hcrx->ccid3hcrx_hist,
792 tmp_seqno, &ccval)) {
793 hcrx->ccid3hcrx_seqno_nonloss = tmp_seqno;
794 hcrx->ccid3hcrx_ccval_nonloss = ccval;
795 dccp_inc_seqno(&tmp_seqno);
796 } 781 }
782 goto update_records;
797 } 783 }
798 784
799 /* FIXME - this code could be simplified with above while */ 785 if (tfrc_rx_hist_duplicate(&hcrx->ccid3hcrx_hist, skb))
800 /* but works at moment */ 786 return; /* done receiving */
801 if (follows48(packet->tfrchrx_seqno, hcrx->ccid3hcrx_seqno_nonloss)) {
802 hcrx->ccid3hcrx_seqno_nonloss = seqno;
803 hcrx->ccid3hcrx_ccval_nonloss = packet->tfrchrx_ccval;
804 }
805
806detect_out:
807 tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist,
808 &hcrx->ccid3hcrx_li_hist, packet,
809 hcrx->ccid3hcrx_seqno_nonloss);
810 return loss;
811}
812
813static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
814{
815 struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
816 const struct dccp_options_received *opt_recv;
817 struct tfrc_rx_hist_entry *packet;
818 u32 p_prev, r_sample, rtt_prev;
819 int loss, payload_size;
820 ktime_t now;
821
822 opt_recv = &dccp_sk(sk)->dccps_options_received;
823 787
824 switch (DCCP_SKB_CB(skb)->dccpd_type) { 788 if (is_data_packet) {
825 case DCCP_PKT_ACK: 789 const u32 payload = skb->len - dccp_hdr(skb)->dccph_doff * 4;
826 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA) 790 /*
827 return; 791 * Update moving-average of s and the sum of received payload bytes
828 case DCCP_PKT_DATAACK: 792 */
829 if (opt_recv->dccpor_timestamp_echo == 0) 793 hcrx->ccid3hcrx_s = tfrc_ewma(hcrx->ccid3hcrx_s, payload, 9);
830 break; 794 hcrx->ccid3hcrx_bytes_recv += payload;
831 r_sample = dccp_timestamp() - opt_recv->dccpor_timestamp_echo;
832 rtt_prev = hcrx->ccid3hcrx_rtt;
833 r_sample = dccp_sample_rtt(sk, 10 * r_sample);
834
835 if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
836 hcrx->ccid3hcrx_rtt = r_sample;
837 else
838 hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
839 r_sample / 10;
840
841 if (rtt_prev != hcrx->ccid3hcrx_rtt)
842 ccid3_pr_debug("%s(%p), New RTT=%uus, elapsed time=%u\n",
843 dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
844 opt_recv->dccpor_elapsed_time);
845 break;
846 case DCCP_PKT_DATA:
847 break;
848 default: /* We're not interested in other packet types, move along */
849 return;
850 }
851
852 packet = tfrc_rx_hist_entry_new(opt_recv->dccpor_ndp, skb, GFP_ATOMIC);
853 if (unlikely(packet == NULL)) {
854 DCCP_WARN("%s(%p), Not enough mem to add rx packet "
855 "to history, consider it lost!\n", dccp_role(sk), sk);
856 return;
857 } 795 }
858 796
859 loss = ccid3_hc_rx_detect_loss(sk, packet); 797 /*
860 798 * Handle pending losses and otherwise check for new loss
861 if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK) 799 */
862 return; 800 if (tfrc_rx_hist_new_loss_indicated(&hcrx->ccid3hcrx_hist, skb, ndp))
863 801 goto update_records;
864 payload_size = skb->len - dccp_hdr(skb)->dccph_doff * 4;
865 ccid3_hc_rx_update_s(hcrx, payload_size);
866 802
867 switch (hcrx->ccid3hcrx_state) { 803 /*
868 case TFRC_RSTATE_NO_DATA: 804 * Handle data packets: RTT sampling and monitoring p
869 ccid3_pr_debug("%s(%p, state=%s), skb=%p, sending initial " 805 */
870 "feedback\n", dccp_role(sk), sk, 806 if (unlikely(!is_data_packet))
871 dccp_state_name(sk->sk_state), skb); 807 goto update_records;
872 ccid3_hc_rx_send_feedback(sk);
873 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
874 return;
875 case TFRC_RSTATE_DATA:
876 hcrx->ccid3hcrx_bytes_recv += payload_size;
877 if (loss)
878 break;
879 808
880 now = ktime_get_real(); 809 if (list_empty(&hcrx->ccid3hcrx_li_hist)) { /* no loss so far: p = 0 */
881 if ((ktime_us_delta(now, hcrx->ccid3hcrx_tstamp_last_ack) - 810 const u32 sample = tfrc_rx_hist_sample_rtt(&hcrx->ccid3hcrx_hist, skb);
882 (s64)hcrx->ccid3hcrx_rtt) >= 0) { 811 /*
883 hcrx->ccid3hcrx_tstamp_last_ack = now; 812 * Empty loss history: no loss so far, hence p stays 0.
884 ccid3_hc_rx_send_feedback(sk); 813 * Sample RTT values, since an RTT estimate is required for the
885 } 814 * computation of p when the first loss occurs; RFC 3448, 6.3.1.
886 return; 815 */
887 case TFRC_RSTATE_TERM: 816 if (sample != 0)
888 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk); 817 hcrx->ccid3hcrx_rtt = tfrc_ewma(hcrx->ccid3hcrx_rtt, sample, 9);
889 return;
890 } 818 }
891 819
892 /* Dealing with packet loss */ 820 /*
893 ccid3_pr_debug("%s(%p, state=%s), data loss! Reacting...\n", 821 * Check if the periodic once-per-RTT feedback is due; RFC 4342, 10.3
894 dccp_role(sk), sk, dccp_state_name(sk->sk_state)); 822 */
895 823 if (SUB16(dccp_hdr(skb)->dccph_ccval, hcrx->ccid3hcrx_last_counter) > 3)
896 p_prev = hcrx->ccid3hcrx_p; 824 do_feedback = CCID3_FBACK_PERIODIC;
897
898 /* Calculate loss event rate */
899 if (!list_empty(&hcrx->ccid3hcrx_li_hist)) {
900 u32 i_mean = dccp_li_hist_calc_i_mean(&hcrx->ccid3hcrx_li_hist);
901 825
902 /* Scaling up by 1000000 as fixed decimal */ 826update_records:
903 if (i_mean != 0) 827 tfrc_rx_hist_add_packet(&hcrx->ccid3hcrx_hist, skb, ndp);
904 hcrx->ccid3hcrx_p = 1000000 / i_mean;
905 } else
906 DCCP_BUG("empty loss history");
907 828
908 if (hcrx->ccid3hcrx_p > p_prev) { 829 if (do_feedback)
909 ccid3_hc_rx_send_feedback(sk); 830 ccid3_hc_rx_send_feedback(sk, skb, do_feedback);
910 return;
911 }
912} 831}
913 832
914static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk) 833static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
@@ -918,11 +837,8 @@ static int ccid3_hc_rx_init(struct ccid *ccid, struct sock *sk)
918 ccid3_pr_debug("entry\n"); 837 ccid3_pr_debug("entry\n");
919 838
920 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA; 839 hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
921 INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
922 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist); 840 INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
923 hcrx->ccid3hcrx_tstamp_last_feedback = 841 return tfrc_rx_hist_alloc(&hcrx->ccid3hcrx_hist);
924 hcrx->ccid3hcrx_tstamp_last_ack = ktime_get_real();
925 return 0;
926} 842}
927 843
928static void ccid3_hc_rx_exit(struct sock *sk) 844static void ccid3_hc_rx_exit(struct sock *sk)