aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesper Dangaard Brouer <brouer@redhat.com>2013-08-14 17:47:11 -0400
committerDavid S. Miller <davem@davemloft.net>2013-08-15 04:43:08 -0400
commit8a8e3d84b1719a56f9151909e80ea6ebc5b8e318 (patch)
treebe050c259ea5bfc41731ac0581df4f57034cb7a3
parent09a8f03197d4799bc9969b35240e5606c026ded6 (diff)
net_sched: restore "linklayer atm" handling
commit 56b765b79 ("htb: improved accuracy at high rates") broke the "linklayer atm" handling. tc class add ... htb rate X ceil Y linklayer atm The linklayer setting is implemented by modifying the rate table which is send to the kernel. No direct parameter were transferred to the kernel indicating the linklayer setting. The commit 56b765b79 ("htb: improved accuracy at high rates") removed the use of the rate table system. To keep compatible with older iproute2 utils, this patch detects the linklayer by parsing the rate table. It also supports future versions of iproute2 to send this linklayer parameter to the kernel directly. This is done by using the __reserved field in struct tc_ratespec, to convey the choosen linklayer option, but only using the lower 4 bits of this field. Linklayer detection is limited to speeds below 100Mbit/s, because at high rates the rtab is gets too inaccurate, so bad that several fields contain the same values, this resembling the ATM detect. Fields even start to contain "0" time to send, e.g. at 1000Mbit/s sending a 96 bytes packet cost "0", thus the rtab have been more broken than we first realized. Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/sch_generic.h9
-rw-r--r--include/uapi/linux/pkt_sched.h10
-rw-r--r--net/sched/sch_api.c41
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_htb.c13
5 files changed, 72 insertions, 2 deletions
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6eab63363e59..e5ae0c50fa9c 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -683,13 +683,19 @@ struct psched_ratecfg {
683 u64 rate_bytes_ps; /* bytes per second */ 683 u64 rate_bytes_ps; /* bytes per second */
684 u32 mult; 684 u32 mult;
685 u16 overhead; 685 u16 overhead;
686 u8 linklayer;
686 u8 shift; 687 u8 shift;
687}; 688};
688 689
689static inline u64 psched_l2t_ns(const struct psched_ratecfg *r, 690static inline u64 psched_l2t_ns(const struct psched_ratecfg *r,
690 unsigned int len) 691 unsigned int len)
691{ 692{
692 return ((u64)(len + r->overhead) * r->mult) >> r->shift; 693 len += r->overhead;
694
695 if (unlikely(r->linklayer == TC_LINKLAYER_ATM))
696 return ((u64)(DIV_ROUND_UP(len,48)*53) * r->mult) >> r->shift;
697
698 return ((u64)len * r->mult) >> r->shift;
693} 699}
694 700
695extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf); 701extern void psched_ratecfg_precompute(struct psched_ratecfg *r, const struct tc_ratespec *conf);
@@ -700,6 +706,7 @@ static inline void psched_ratecfg_getrate(struct tc_ratespec *res,
700 memset(res, 0, sizeof(*res)); 706 memset(res, 0, sizeof(*res));
701 res->rate = r->rate_bytes_ps; 707 res->rate = r->rate_bytes_ps;
702 res->overhead = r->overhead; 708 res->overhead = r->overhead;
709 res->linklayer = (r->linklayer & TC_LINKLAYER_MASK);
703} 710}
704 711
705#endif 712#endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index dbd71b0c7d8c..09d62b9228ff 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -73,9 +73,17 @@ struct tc_estimator {
73#define TC_H_ROOT (0xFFFFFFFFU) 73#define TC_H_ROOT (0xFFFFFFFFU)
74#define TC_H_INGRESS (0xFFFFFFF1U) 74#define TC_H_INGRESS (0xFFFFFFF1U)
75 75
76/* Need to corrospond to iproute2 tc/tc_core.h "enum link_layer" */
77enum tc_link_layer {
78 TC_LINKLAYER_UNAWARE, /* Indicate unaware old iproute2 util */
79 TC_LINKLAYER_ETHERNET,
80 TC_LINKLAYER_ATM,
81};
82#define TC_LINKLAYER_MASK 0x0F /* limit use to lower 4 bits */
83
76struct tc_ratespec { 84struct tc_ratespec {
77 unsigned char cell_log; 85 unsigned char cell_log;
78 unsigned char __reserved; 86 __u8 linklayer; /* lower 4 bits */
79 unsigned short overhead; 87 unsigned short overhead;
80 short cell_align; 88 short cell_align;
81 unsigned short mpu; 89 unsigned short mpu;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 281c1bded1f6..51b968d3febb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
285 return q; 285 return q;
286} 286}
287 287
288/* The linklayer setting were not transferred from iproute2, in older
289 * versions, and the rate tables lookup systems have been dropped in
290 * the kernel. To keep backward compatible with older iproute2 tc
291 * utils, we detect the linklayer setting by detecting if the rate
292 * table were modified.
293 *
294 * For linklayer ATM table entries, the rate table will be aligned to
295 * 48 bytes, thus some table entries will contain the same value. The
296 * mpu (min packet unit) is also encoded into the old rate table, thus
297 * starting from the mpu, we find low and high table entries for
298 * mapping this cell. If these entries contain the same value, when
299 * the rate tables have been modified for linklayer ATM.
300 *
301 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
302 * and then roundup to the next cell, calc the table entry one below,
303 * and compare.
304 */
305static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
306{
307 int low = roundup(r->mpu, 48);
308 int high = roundup(low+1, 48);
309 int cell_low = low >> r->cell_log;
310 int cell_high = (high >> r->cell_log) - 1;
311
312 /* rtab is too inaccurate at rates > 100Mbit/s */
313 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
314 pr_debug("TC linklayer: Giving up ATM detection\n");
315 return TC_LINKLAYER_ETHERNET;
316 }
317
318 if ((cell_high > cell_low) && (cell_high < 256)
319 && (rtab[cell_low] == rtab[cell_high])) {
320 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
321 cell_low, cell_high, rtab[cell_high]);
322 return TC_LINKLAYER_ATM;
323 }
324 return TC_LINKLAYER_ETHERNET;
325}
326
288static struct qdisc_rate_table *qdisc_rtab_list; 327static struct qdisc_rate_table *qdisc_rtab_list;
289 328
290struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) 329struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
@@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
308 rtab->rate = *r; 347 rtab->rate = *r;
309 rtab->refcnt = 1; 348 rtab->refcnt = 1;
310 memcpy(rtab->data, nla_data(tab), 1024); 349 memcpy(rtab->data, nla_data(tab), 1024);
350 if (r->linklayer == TC_LINKLAYER_UNAWARE)
351 r->linklayer = __detect_linklayer(r, rtab->data);
311 rtab->next = qdisc_rtab_list; 352 rtab->next = qdisc_rtab_list;
312 qdisc_rtab_list = rtab; 353 qdisc_rtab_list = rtab;
313 } 354 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index eeb8276d7a89..48be3d5c0d92 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -909,6 +909,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
909 memset(r, 0, sizeof(*r)); 909 memset(r, 0, sizeof(*r));
910 r->overhead = conf->overhead; 910 r->overhead = conf->overhead;
911 r->rate_bytes_ps = conf->rate; 911 r->rate_bytes_ps = conf->rate;
912 r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
912 r->mult = 1; 913 r->mult = 1;
913 /* 914 /*
914 * The deal here is to replace a divide by a reciprocal one 915 * The deal here is to replace a divide by a reciprocal one
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 45e751527dfc..c2178b15ca6e 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1329,6 +1329,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1329 struct htb_sched *q = qdisc_priv(sch); 1329 struct htb_sched *q = qdisc_priv(sch);
1330 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1330 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1331 struct nlattr *opt = tca[TCA_OPTIONS]; 1331 struct nlattr *opt = tca[TCA_OPTIONS];
1332 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1332 struct nlattr *tb[TCA_HTB_MAX + 1]; 1333 struct nlattr *tb[TCA_HTB_MAX + 1];
1333 struct tc_htb_opt *hopt; 1334 struct tc_htb_opt *hopt;
1334 1335
@@ -1350,6 +1351,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1350 if (!hopt->rate.rate || !hopt->ceil.rate) 1351 if (!hopt->rate.rate || !hopt->ceil.rate)
1351 goto failure; 1352 goto failure;
1352 1353
1354 /* Keeping backward compatible with rate_table based iproute2 tc */
1355 if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
1356 rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
1357 if (rtab)
1358 qdisc_put_rtab(rtab);
1359 }
1360 if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
1361 ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
1362 if (ctab)
1363 qdisc_put_rtab(ctab);
1364 }
1365
1353 if (!cl) { /* new class */ 1366 if (!cl) { /* new class */
1354 struct Qdisc *new_q; 1367 struct Qdisc *new_q;
1355 int prio; 1368 int prio;