aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@unimore.it>2012-11-05 15:29:24 -0500
committerDavid S. Miller <davem@davemloft.net>2012-11-07 15:37:04 -0500
commit3015f3d2a3cd9614294025849d3ed89fd2f3a7f5 (patch)
treee6190a5287b9df1e823f86f46d45ee1ffc5276da /net/sched
parentcacb6ba0f36ab14a507f4ee7697e8332899015d2 (diff)
pkt_sched: enable QFQ to support TSO/GSO
If the max packet size for some class (configured through tc) is violated by the actual size of the packets of that class, then QFQ would not schedule classes correctly, and the data structures implementing the bucket lists may get corrupted. This problem occurs with TSO/GSO even if the max packet size is set to the MTU, and is, e.g., the cause of the failure reported in [1]. Two patches have been proposed to solve this problem in [2], one of them is a preliminary version of this patch. This patch addresses the above issues by: 1) setting QFQ parameters to proper values for supporting TSO/GSO (in particular, setting the maximum possible packet size to 64KB), 2) automatically increasing the max packet size for a class, lmax, when a packet with a larger size than the current value of lmax arrives. The drawback of the first point is that the maximum weight for a class is now limited to 4096, which is equal to 1/16 of the maximum weight sum. Finally, this patch also forcibly caps the timestamps of a class if they are too high to be stored in the bucket list. This capping, taken from QFQ+ [3], handles the unfrequent case described in the comment to the function slot_insert. [1] http://marc.info/?l=linux-netdev&m=134968777902077&w=2 [2] http://marc.info/?l=linux-netdev&m=135096573507936&w=2 [3] http://marc.info/?l=linux-netdev&m=134902691421670&w=2 Signed-off-by: Paolo Valente <paolo.valente@unimore.it> Tested-by: Cong Wang <amwang@redhat.com> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Acked-by: Stephen Hemminger <shemminger@vyatta.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/sch_qfq.c109
1 files changed, 79 insertions, 30 deletions
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index f0dd83cff906..9687fa1c2275 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -84,18 +84,19 @@
84 * grp->index is the index of the group; and grp->slot_shift 84 * grp->index is the index of the group; and grp->slot_shift
85 * is the shift for the corresponding (scaled) sigma_i. 85 * is the shift for the corresponding (scaled) sigma_i.
86 */ 86 */
87#define QFQ_MAX_INDEX 19 87#define QFQ_MAX_INDEX 24
88#define QFQ_MAX_WSHIFT 16 88#define QFQ_MAX_WSHIFT 12
89 89
90#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT) 90#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
91#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT) 91#define QFQ_MAX_WSUM (16*QFQ_MAX_WEIGHT)
92 92
93#define FRAC_BITS 30 /* fixed point arithmetic */ 93#define FRAC_BITS 30 /* fixed point arithmetic */
94#define ONE_FP (1UL << FRAC_BITS) 94#define ONE_FP (1UL << FRAC_BITS)
95#define IWSUM (ONE_FP/QFQ_MAX_WSUM) 95#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
96 96
97#define QFQ_MTU_SHIFT 11 97#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
98#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX) 98#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
99#define QFQ_MIN_LMAX 256 /* min possible lmax for a class */
99 100
100/* 101/*
101 * Possible group states. These values are used as indexes for the bitmaps 102 * Possible group states. These values are used as indexes for the bitmaps
@@ -231,6 +232,32 @@ static void qfq_update_class_params(struct qfq_sched *q, struct qfq_class *cl,
231 q->wsum += delta_w; 232 q->wsum += delta_w;
232} 233}
233 234
235static void qfq_update_reactivate_class(struct qfq_sched *q,
236 struct qfq_class *cl,
237 u32 inv_w, u32 lmax, int delta_w)
238{
239 bool need_reactivation = false;
240 int i = qfq_calc_index(inv_w, lmax);
241
242 if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) {
243 /*
244 * shift cl->F back, to not charge the
245 * class for the not-yet-served head
246 * packet
247 */
248 cl->F = cl->S;
249 /* remove class from its slot in the old group */
250 qfq_deactivate_class(q, cl);
251 need_reactivation = true;
252 }
253
254 qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
255
256 if (need_reactivation) /* activate in new group */
257 qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
258}
259
260
234static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, 261static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
235 struct nlattr **tca, unsigned long *arg) 262 struct nlattr **tca, unsigned long *arg)
236{ 263{
@@ -238,7 +265,7 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
238 struct qfq_class *cl = (struct qfq_class *)*arg; 265 struct qfq_class *cl = (struct qfq_class *)*arg;
239 struct nlattr *tb[TCA_QFQ_MAX + 1]; 266 struct nlattr *tb[TCA_QFQ_MAX + 1];
240 u32 weight, lmax, inv_w; 267 u32 weight, lmax, inv_w;
241 int i, err; 268 int err;
242 int delta_w; 269 int delta_w;
243 270
244 if (tca[TCA_OPTIONS] == NULL) { 271 if (tca[TCA_OPTIONS] == NULL) {
@@ -270,16 +297,14 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
270 297
271 if (tb[TCA_QFQ_LMAX]) { 298 if (tb[TCA_QFQ_LMAX]) {
272 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); 299 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
273 if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) { 300 if (lmax < QFQ_MIN_LMAX || lmax > (1UL << QFQ_MTU_SHIFT)) {
274 pr_notice("qfq: invalid max length %u\n", lmax); 301 pr_notice("qfq: invalid max length %u\n", lmax);
275 return -EINVAL; 302 return -EINVAL;
276 } 303 }
277 } else 304 } else
278 lmax = 1UL << QFQ_MTU_SHIFT; 305 lmax = psched_mtu(qdisc_dev(sch));
279 306
280 if (cl != NULL) { 307 if (cl != NULL) {
281 bool need_reactivation = false;
282
283 if (tca[TCA_RATE]) { 308 if (tca[TCA_RATE]) {
284 err = gen_replace_estimator(&cl->bstats, &cl->rate_est, 309 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
285 qdisc_root_sleeping_lock(sch), 310 qdisc_root_sleeping_lock(sch),
@@ -291,24 +316,8 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
291 if (lmax == cl->lmax && inv_w == cl->inv_w) 316 if (lmax == cl->lmax && inv_w == cl->inv_w)
292 return 0; /* nothing to update */ 317 return 0; /* nothing to update */
293 318
294 i = qfq_calc_index(inv_w, lmax);
295 sch_tree_lock(sch); 319 sch_tree_lock(sch);
296 if (&q->groups[i] != cl->grp && cl->qdisc->q.qlen > 0) { 320 qfq_update_reactivate_class(q, cl, inv_w, lmax, delta_w);
297 /*
298 * shift cl->F back, to not charge the
299 * class for the not-yet-served head
300 * packet
301 */
302 cl->F = cl->S;
303 /* remove class from its slot in the old group */
304 qfq_deactivate_class(q, cl);
305 need_reactivation = true;
306 }
307
308 qfq_update_class_params(q, cl, lmax, inv_w, delta_w);
309
310 if (need_reactivation) /* activate in new group */
311 qfq_activate_class(q, cl, qdisc_peek_len(cl->qdisc));
312 sch_tree_unlock(sch); 321 sch_tree_unlock(sch);
313 322
314 return 0; 323 return 0;
@@ -663,15 +672,48 @@ static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
663 672
664 673
665/* 674/*
666 * XXX we should make sure that slot becomes less than 32. 675 * If the weight and lmax (max_pkt_size) of the classes do not change,
667 * This is guaranteed by the input values. 676 * then QFQ guarantees that the slot index is never higher than
668 * roundedS is always cl->S rounded on grp->slot_shift bits. 677 * 2 + ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM).
678 *
679 * With the current values of the above constants, the index is
680 * then guaranteed to never be higher than 2 + 256 * (1 / 16) = 18.
681 *
682 * When the weight of a class is increased or the lmax of the class is
683 * decreased, a new class with smaller slot size may happen to be
684 * activated. The activation of this class should be properly delayed
685 * to when the service of the class has finished in the ideal system
686 * tracked by QFQ. If the activation of the class is not delayed to
687 * this reference time instant, then this class may be unjustly served
688 * before other classes waiting for service. This may cause
689 * (unfrequently) the above bound to the slot index to be violated for
690 * some of these unlucky classes.
691 *
692 * Instead of delaying the activation of the new class, which is quite
693 * complex, the following inaccurate but simple solution is used: if
694 * the slot index is higher than QFQ_MAX_SLOTS-2, then the timestamps
695 * of the class are shifted backward so as to let the slot index
696 * become equal to QFQ_MAX_SLOTS-2. This threshold is used because, if
697 * the slot index is above it, then the data structure implementing
698 * the bucket list either gets immediately corrupted or may get
699 * corrupted on a possible next packet arrival that causes the start
700 * time of the group to be shifted backward.
669 */ 701 */
670static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, 702static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
671 u64 roundedS) 703 u64 roundedS)
672{ 704{
673 u64 slot = (roundedS - grp->S) >> grp->slot_shift; 705 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
674 unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS; 706 unsigned int i; /* slot index in the bucket list */
707
708 if (unlikely(slot > QFQ_MAX_SLOTS - 2)) {
709 u64 deltaS = roundedS - grp->S -
710 ((u64)(QFQ_MAX_SLOTS - 2)<<grp->slot_shift);
711 cl->S -= deltaS;
712 cl->F -= deltaS;
713 slot = QFQ_MAX_SLOTS - 2;
714 }
715
716 i = (grp->front + slot) % QFQ_MAX_SLOTS;
675 717
676 hlist_add_head(&cl->next, &grp->slots[i]); 718 hlist_add_head(&cl->next, &grp->slots[i]);
677 __set_bit(slot, &grp->full_slots); 719 __set_bit(slot, &grp->full_slots);
@@ -892,6 +934,13 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
892 } 934 }
893 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); 935 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
894 936
937 if (unlikely(cl->lmax < qdisc_pkt_len(skb))) {
938 pr_debug("qfq: increasing maxpkt from %u to %u for class %u",
939 cl->lmax, qdisc_pkt_len(skb), cl->common.classid);
940 qfq_update_reactivate_class(q, cl, cl->inv_w,
941 qdisc_pkt_len(skb), 0);
942 }
943
895 err = qdisc_enqueue(skb, cl->qdisc); 944 err = qdisc_enqueue(skb, cl->qdisc);
896 if (unlikely(err != NET_XMIT_SUCCESS)) { 945 if (unlikely(err != NET_XMIT_SUCCESS)) {
897 pr_debug("qfq_enqueue: enqueue failed %d\n", err); 946 pr_debug("qfq_enqueue: enqueue failed %d\n", err);