aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/sched/sch_qfq.c85
1 files changed, 56 insertions, 29 deletions
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index a7ab323849b6..8056fb4e618a 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -113,7 +113,6 @@
113 113
114#define FRAC_BITS 30 /* fixed point arithmetic */ 114#define FRAC_BITS 30 /* fixed point arithmetic */
115#define ONE_FP (1UL << FRAC_BITS) 115#define ONE_FP (1UL << FRAC_BITS)
116#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
117 116
118#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ 117#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
119#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ 118#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */
@@ -189,6 +188,7 @@ struct qfq_sched {
189 struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ 188 struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
190 u32 num_active_agg; /* Num. of active aggregates */ 189 u32 num_active_agg; /* Num. of active aggregates */
191 u32 wsum; /* weight sum */ 190 u32 wsum; /* weight sum */
191 u32 iwsum; /* inverse weight sum */
192 192
193 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ 193 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
194 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ 194 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
@@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
314 314
315 q->wsum += 315 q->wsum +=
316 (int) agg->class_weight * (new_num_classes - agg->num_classes); 316 (int) agg->class_weight * (new_num_classes - agg->num_classes);
317 q->iwsum = ONE_FP / q->wsum;
317 318
318 agg->num_classes = new_num_classes; 319 agg->num_classes = new_num_classes;
319} 320}
@@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
340{ 341{
341 if (!hlist_unhashed(&agg->nonfull_next)) 342 if (!hlist_unhashed(&agg->nonfull_next))
342 hlist_del_init(&agg->nonfull_next); 343 hlist_del_init(&agg->nonfull_next);
344 q->wsum -= agg->class_weight;
345 if (q->wsum != 0)
346 q->iwsum = ONE_FP / q->wsum;
347
343 if (q->in_serv_agg == agg) 348 if (q->in_serv_agg == agg)
344 q->in_serv_agg = qfq_choose_next_agg(q); 349 q->in_serv_agg = qfq_choose_next_agg(q);
345 kfree(agg); 350 kfree(agg);
@@ -834,38 +839,60 @@ static void qfq_make_eligible(struct qfq_sched *q)
834 } 839 }
835} 840}
836 841
837
838/* 842/*
839 * The index of the slot in which the aggregate is to be inserted must 843 * The index of the slot in which the input aggregate agg is to be
840 * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' 844 * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2'
841 * because the start time of the group may be moved backward by one 845 * and not a '-1' because the start time of the group may be moved
842 * slot after the aggregate has been inserted, and this would cause 846 * backward by one slot after the aggregate has been inserted, and
843 * non-empty slots to be right-shifted by one position. 847 * this would cause non-empty slots to be right-shifted by one
848 * position.
849 *
850 * QFQ+ fully satisfies this bound to the slot index if the parameters
851 * of the classes are not changed dynamically, and if QFQ+ never
852 * happens to postpone the service of agg unjustly, i.e., it never
853 * happens that the aggregate becomes backlogged and eligible, or just
854 * eligible, while an aggregate with a higher approximated finish time
855 * is being served. In particular, in this case QFQ+ guarantees that
856 * the timestamps of agg are low enough that the slot index is never
857 * higher than 2. Unfortunately, QFQ+ cannot provide the same
858 * guarantee if it happens to unjustly postpone the service of agg, or
859 * if the parameters of some class are changed.
860 *
861 * As for the first event, i.e., an out-of-order service, the
862 * upper bound to the slot index guaranteed by QFQ+ grows to
863 * 2 +
864 * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
865 * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1.
844 * 866 *
845 * If the weight and lmax (max_pkt_size) of the classes do not change, 867 * The following function deals with this problem by backward-shifting
846 * then QFQ+ does meet the above contraint according to the current 868 * the timestamps of agg, if needed, so as to guarantee that the slot
847 * values of its parameters. In fact, if the weight and lmax of the 869 * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may
848 * classes do not change, then, from the theory, QFQ+ guarantees that 870 * cause the service of other aggregates to be postponed, yet the
849 * the slot index is never higher than 871 * worst-case guarantees of these aggregates are not violated. In
850 * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * 872 * fact, in case of no out-of-order service, the timestamps of agg
851 * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18 873 * would have been even lower than they are after the backward shift,
874 * because QFQ+ would have guaranteed a maximum value equal to 2 for
875 * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose
876 * service is postponed because of the backward-shift would have
877 * however waited for the service of agg before being served.
852 * 878 *
853 * When the weight of a class is increased or the lmax of the class is 879 * The other event that may cause the slot index to be higher than 2
854 * decreased, a new aggregate with smaller slot size than the original 880 * for agg is a recent change of the parameters of some class. If the
855 * parent aggregate of the class may happen to be activated. The 881 * weight of a class is increased or the lmax (max_pkt_size) of the
856 * activation of this aggregate should be properly delayed to when the 882 * class is decreased, then a new aggregate with smaller slot size
857 * service of the class has finished in the ideal system tracked by 883 * than the original parent aggregate of the class may happen to be
858 * QFQ+. If the activation of the aggregate is not delayed to this 884 * activated. The activation of this aggregate should be properly
859 * reference time instant, then this aggregate may be unjustly served 885 * delayed to when the service of the class has finished in the ideal
860 * before other aggregates waiting for service. This may cause the 886 * system tracked by QFQ+. If the activation of the aggregate is not
861 * above bound to the slot index to be violated for some of these 887 * delayed to this reference time instant, then this aggregate may be
862 * unlucky aggregates. 888 * unjustly served before other aggregates waiting for service. This
889 * may cause the above bound to the slot index to be violated for some
890 * of these unlucky aggregates.
863 * 891 *
864 * Instead of delaying the activation of the new aggregate, which is 892 * Instead of delaying the activation of the new aggregate, which is
865 * quite complex, the following inaccurate but simple solution is used: 893 * quite complex, the above-discussed capping of the slot index is
866 * if the slot index is higher than QFQ_MAX_SLOTS-2, then the 894 * used to handle also the consequences of a change of the parameters
867 * timestamps of the aggregate are shifted backward so as to let the 895 * of a class.
868 * slot index become equal to QFQ_MAX_SLOTS-2.
869 */ 896 */
870static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, 897static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
871 u64 roundedS) 898 u64 roundedS)
@@ -1136,7 +1163,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
1136 else 1163 else
1137 in_serv_agg->budget -= len; 1164 in_serv_agg->budget -= len;
1138 1165
1139 q->V += (u64)len * IWSUM; 1166 q->V += (u64)len * q->iwsum;
1140 pr_debug("qfq dequeue: len %u F %lld now %lld\n", 1167 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
1141 len, (unsigned long long) in_serv_agg->F, 1168 len, (unsigned long long) in_serv_agg->F,
1142 (unsigned long long) q->V); 1169 (unsigned long long) q->V);