aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:49:40 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2008-12-28 15:49:40 -0500
commit0191b625ca5a46206d2fb862bb08f36f2fcb3b31 (patch)
tree454d1842b1833d976da62abcbd5c47521ebe9bd7 /net/sched
parent54a696bd07c14d3b1192d03ce7269bc59b45209a (diff)
parenteb56092fc168bf5af199d47af50c0d84a96db898 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1429 commits) net: Allow dependancies of FDDI & Tokenring to be modular. igb: Fix build warning when DCA is disabled. net: Fix warning fallout from recent NAPI interface changes. gro: Fix potential use after free sfc: If AN is enabled, always read speed/duplex from the AN advertising bits sfc: When disabling the NIC, close the device rather than unregistering it sfc: SFT9001: Add cable diagnostics sfc: Add support for multiple PHY self-tests sfc: Merge top-level functions for self-tests sfc: Clean up PHY mode management in loopback self-test sfc: Fix unreliable link detection in some loopback modes sfc: Generate unique names for per-NIC workqueues 802.3ad: use standard ethhdr instead of ad_header 802.3ad: generalize out mac address initializer 802.3ad: initialize ports LACPDU from const initializer 802.3ad: remove typedef around ad_system 802.3ad: turn ports is_individual into a bool 802.3ad: turn ports is_enabled into a bool 802.3ad: make ntt bool ixgbe: Fix set_ringparam in ixgbe to use the same memory pools. ... Fixed trivial IPv4/6 address printing conflicts in fs/cifs/connect.c due to the conversion to %pI (in this networking merge) and the addition of doing IPv6 addresses (from the earlier merge of CIFS).
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig22
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_api.c18
-rw-r--r--net/sched/act_gact.c4
-rw-r--r--net/sched/act_ipt.c4
-rw-r--r--net/sched/act_mirred.c4
-rw-r--r--net/sched/act_nat.c4
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c33
-rw-r--r--net/sched/act_simple.c4
-rw-r--r--net/sched/act_skbedit.c4
-rw-r--r--net/sched/cls_api.c3
-rw-r--r--net/sched/cls_basic.c2
-rw-r--r--net/sched/cls_cgroup.c288
-rw-r--r--net/sched/cls_fw.c2
-rw-r--r--net/sched/cls_route.c2
-rw-r--r--net/sched/cls_tcindex.c6
-rw-r--r--net/sched/cls_u32.c11
-rw-r--r--net/sched/ematch.c18
-rw-r--r--net/sched/sch_api.c50
-rw-r--r--net/sched/sch_atm.c36
-rw-r--r--net/sched/sch_blackhole.c1
-rw-r--r--net/sched/sch_cbq.c76
-rw-r--r--net/sched/sch_drr.c519
-rw-r--r--net/sched/sch_dsmark.c22
-rw-r--r--net/sched/sch_fifo.c4
-rw-r--r--net/sched/sch_generic.c40
-rw-r--r--net/sched/sch_gred.c22
-rw-r--r--net/sched/sch_hfsc.c64
-rw-r--r--net/sched/sch_htb.c171
-rw-r--r--net/sched/sch_multiq.c82
-rw-r--r--net/sched/sch_netem.c160
-rw-r--r--net/sched/sch_prio.c50
-rw-r--r--net/sched/sch_red.c33
-rw-r--r--net/sched/sch_sfq.c71
-rw-r--r--net/sched/sch_tbf.c44
-rw-r--r--net/sched/sch_teql.c19
37 files changed, 1216 insertions, 683 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 6767e54155db..4f7ef0db302b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -194,6 +194,17 @@ config NET_SCH_NETEM
194 194
195 If unsure, say N. 195 If unsure, say N.
196 196
197config NET_SCH_DRR
198 tristate "Deficit Round Robin scheduler (DRR)"
199 help
200 Say Y here if you want to use the Deficit Round Robin (DRR) packet
201 scheduling algorithm.
202
203 To compile this driver as a module, choose M here: the module
204 will be called sch_drr.
205
206 If unsure, say N.
207
197config NET_SCH_INGRESS 208config NET_SCH_INGRESS
198 tristate "Ingress Qdisc" 209 tristate "Ingress Qdisc"
199 depends on NET_CLS_ACT 210 depends on NET_CLS_ACT
@@ -316,6 +327,17 @@ config NET_CLS_FLOW
316 To compile this code as a module, choose M here: the 327 To compile this code as a module, choose M here: the
317 module will be called cls_flow. 328 module will be called cls_flow.
318 329
330config NET_CLS_CGROUP
331 bool "Control Group Classifier"
332 select NET_CLS
333 depends on CGROUPS
334 ---help---
335 Say Y here if you want to classify packets based on the control
336 cgroup of their process.
337
338 To compile this code as a module, choose M here: the
339 module will be called cls_cgroup.
340
319config NET_EMATCH 341config NET_EMATCH
320 bool "Extended Matches" 342 bool "Extended Matches"
321 select NET_CLS 343 select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index e60c9925b269..54d950cd4b8d 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
30obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o 30obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
31obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o 31obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
32obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o 32obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
33obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
33obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 34obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
34obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 35obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
35obj-$(CONFIG_NET_CLS_FW) += cls_fw.o 36obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
@@ -38,6 +39,7 @@ obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
38obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o 39obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
39obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o 40obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
40obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o 41obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
42obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
41obj-$(CONFIG_NET_EMATCH) += ematch.o 43obj-$(CONFIG_NET_EMATCH) += ematch.o
42obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o 44obj-$(CONFIG_NET_EMATCH_CMP) += em_cmp.o
43obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o 45obj-$(CONFIG_NET_EMATCH_NBYTE) += em_nbyte.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 8f457f1e0acf..9d03cc33b6cc 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -214,12 +214,14 @@ struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind,
214} 214}
215EXPORT_SYMBOL(tcf_hash_check); 215EXPORT_SYMBOL(tcf_hash_check);
216 216
217struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) 217struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est,
218 struct tc_action *a, int size, int bind,
219 u32 *idx_gen, struct tcf_hashinfo *hinfo)
218{ 220{
219 struct tcf_common *p = kzalloc(size, GFP_KERNEL); 221 struct tcf_common *p = kzalloc(size, GFP_KERNEL);
220 222
221 if (unlikely(!p)) 223 if (unlikely(!p))
222 return p; 224 return ERR_PTR(-ENOMEM);
223 p->tcfc_refcnt = 1; 225 p->tcfc_refcnt = 1;
224 if (bind) 226 if (bind)
225 p->tcfc_bindcnt = 1; 227 p->tcfc_bindcnt = 1;
@@ -228,9 +230,15 @@ struct tcf_common *tcf_hash_create(u32 index, struct nlattr *est, struct tc_acti
228 p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); 230 p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo);
229 p->tcfc_tm.install = jiffies; 231 p->tcfc_tm.install = jiffies;
230 p->tcfc_tm.lastuse = jiffies; 232 p->tcfc_tm.lastuse = jiffies;
231 if (est) 233 if (est) {
232 gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, 234 int err = gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est,
233 &p->tcfc_lock, est); 235 &p->tcfc_lock, est);
236 if (err) {
237 kfree(p);
238 return ERR_PTR(err);
239 }
240 }
241
234 a->priv = (void *) p; 242 a->priv = (void *) p;
235 return p; 243 return p;
236} 244}
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index ac04289da5d7..e7f796aec657 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -88,8 +88,8 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
88 if (!pc) { 88 if (!pc) {
89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), 89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
90 bind, &gact_idx_gen, &gact_hash_info); 90 bind, &gact_idx_gen, &gact_hash_info);
91 if (unlikely(!pc)) 91 if (IS_ERR(pc))
92 return -ENOMEM; 92 return PTR_ERR(pc);
93 ret = ACT_P_CREATED; 93 ret = ACT_P_CREATED;
94 } else { 94 } else {
95 if (!ovr) { 95 if (!ovr) {
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 0453d79ebf57..082c520b0def 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -136,8 +136,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
136 if (!pc) { 136 if (!pc) {
137 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, 137 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
138 &ipt_idx_gen, &ipt_hash_info); 138 &ipt_idx_gen, &ipt_hash_info);
139 if (unlikely(!pc)) 139 if (IS_ERR(pc))
140 return -ENOMEM; 140 return PTR_ERR(pc);
141 ret = ACT_P_CREATED; 141 ret = ACT_P_CREATED;
142 } else { 142 } else {
143 if (!ovr) { 143 if (!ovr) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 70341c020b6d..b9aaab4e0354 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -105,8 +105,8 @@ static int tcf_mirred_init(struct nlattr *nla, struct nlattr *est,
105 return -EINVAL; 105 return -EINVAL;
106 pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, 106 pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind,
107 &mirred_idx_gen, &mirred_hash_info); 107 &mirred_idx_gen, &mirred_hash_info);
108 if (unlikely(!pc)) 108 if (IS_ERR(pc))
109 return -ENOMEM; 109 return PTR_ERR(pc);
110 ret = ACT_P_CREATED; 110 ret = ACT_P_CREATED;
111 } else { 111 } else {
112 if (!ovr) { 112 if (!ovr) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 7b39ed485bca..d885ba311564 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -68,8 +68,8 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
68 if (!pc) { 68 if (!pc) {
69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
70 &nat_idx_gen, &nat_hash_info); 70 &nat_idx_gen, &nat_hash_info);
71 if (unlikely(!pc)) 71 if (IS_ERR(pc))
72 return -ENOMEM; 72 return PTR_ERR(pc);
73 p = to_tcf_nat(pc); 73 p = to_tcf_nat(pc);
74 ret = ACT_P_CREATED; 74 ret = ACT_P_CREATED;
75 } else { 75 } else {
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d5f4e3404864..96c0ed115e2a 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -68,8 +68,8 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
68 return -EINVAL; 68 return -EINVAL;
69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
70 &pedit_idx_gen, &pedit_hash_info); 70 &pedit_idx_gen, &pedit_hash_info);
71 if (unlikely(!pc)) 71 if (IS_ERR(pc))
72 return -ENOMEM; 72 return PTR_ERR(pc);
73 p = to_pedit(pc); 73 p = to_pedit(pc);
74 keys = kmalloc(ksize, GFP_KERNEL); 74 keys = kmalloc(ksize, GFP_KERNEL);
75 if (keys == NULL) { 75 if (keys == NULL) {
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 38015b493947..5c72a116b1a4 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -182,17 +182,32 @@ override:
182 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]); 182 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE]);
183 if (R_tab == NULL) 183 if (R_tab == NULL)
184 goto failure; 184 goto failure;
185
186 if (!est && (ret == ACT_P_CREATED ||
187 !gen_estimator_active(&police->tcf_bstats,
188 &police->tcf_rate_est))) {
189 err = -EINVAL;
190 goto failure;
191 }
192
185 if (parm->peakrate.rate) { 193 if (parm->peakrate.rate) {
186 P_tab = qdisc_get_rtab(&parm->peakrate, 194 P_tab = qdisc_get_rtab(&parm->peakrate,
187 tb[TCA_POLICE_PEAKRATE]); 195 tb[TCA_POLICE_PEAKRATE]);
188 if (P_tab == NULL) { 196 if (P_tab == NULL)
189 qdisc_put_rtab(R_tab);
190 goto failure; 197 goto failure;
191 }
192 } 198 }
193 } 199 }
194 /* No failure allowed after this point */ 200
195 spin_lock_bh(&police->tcf_lock); 201 spin_lock_bh(&police->tcf_lock);
202 if (est) {
203 err = gen_replace_estimator(&police->tcf_bstats,
204 &police->tcf_rate_est,
205 &police->tcf_lock, est);
206 if (err)
207 goto failure_unlock;
208 }
209
210 /* No failure allowed after this point */
196 if (R_tab != NULL) { 211 if (R_tab != NULL) {
197 qdisc_put_rtab(police->tcfp_R_tab); 212 qdisc_put_rtab(police->tcfp_R_tab);
198 police->tcfp_R_tab = R_tab; 213 police->tcfp_R_tab = R_tab;
@@ -217,10 +232,6 @@ override:
217 232
218 if (tb[TCA_POLICE_AVRATE]) 233 if (tb[TCA_POLICE_AVRATE])
219 police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]); 234 police->tcfp_ewma_rate = nla_get_u32(tb[TCA_POLICE_AVRATE]);
220 if (est)
221 gen_replace_estimator(&police->tcf_bstats,
222 &police->tcf_rate_est,
223 &police->tcf_lock, est);
224 235
225 spin_unlock_bh(&police->tcf_lock); 236 spin_unlock_bh(&police->tcf_lock);
226 if (ret != ACT_P_CREATED) 237 if (ret != ACT_P_CREATED)
@@ -238,7 +249,13 @@ override:
238 a->priv = police; 249 a->priv = police;
239 return ret; 250 return ret;
240 251
252failure_unlock:
253 spin_unlock_bh(&police->tcf_lock);
241failure: 254failure:
255 if (P_tab)
256 qdisc_put_rtab(P_tab);
257 if (R_tab)
258 qdisc_put_rtab(R_tab);
242 if (ret == ACT_P_CREATED) 259 if (ret == ACT_P_CREATED)
243 kfree(police); 260 kfree(police);
244 return err; 261 return err;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index e7851ce92cfe..8daa1ebc7413 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -124,8 +124,8 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
124 if (!pc) { 124 if (!pc) {
125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
126 &simp_idx_gen, &simp_hash_info); 126 &simp_idx_gen, &simp_hash_info);
127 if (unlikely(!pc)) 127 if (IS_ERR(pc))
128 return -ENOMEM; 128 return PTR_ERR(pc);
129 129
130 d = to_defact(pc); 130 d = to_defact(pc);
131 ret = alloc_defdata(d, defdata); 131 ret = alloc_defdata(d, defdata);
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fe9777e77f35..4ab916b8074b 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -104,8 +104,8 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
104 if (!pc) { 104 if (!pc) {
105 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 105 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
106 &skbedit_idx_gen, &skbedit_hash_info); 106 &skbedit_idx_gen, &skbedit_hash_info);
107 if (unlikely(!pc)) 107 if (IS_ERR(pc))
108 return -ENOMEM; 108 return PTR_ERR(pc);
109 109
110 d = to_skbedit(pc); 110 d = to_skbedit(pc);
111 ret = ACT_P_CREATED; 111 ret = ACT_P_CREATED;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 16e7ac9774e5..173fcc4b050d 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -531,7 +531,8 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
531 if (src->action) { 531 if (src->action) {
532 struct tc_action *act; 532 struct tc_action *act;
533 tcf_tree_lock(tp); 533 tcf_tree_lock(tp);
534 act = xchg(&dst->action, src->action); 534 act = dst->action;
535 dst->action = src->action;
535 tcf_tree_unlock(tp); 536 tcf_tree_unlock(tp);
536 if (act) 537 if (act)
537 tcf_action_destroy(act, TCA_ACT_UNBIND); 538 tcf_action_destroy(act, TCA_ACT_UNBIND);
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 956915c217d6..4e2bda854119 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -102,7 +102,7 @@ static inline void basic_delete_filter(struct tcf_proto *tp,
102 102
103static void basic_destroy(struct tcf_proto *tp) 103static void basic_destroy(struct tcf_proto *tp)
104{ 104{
105 struct basic_head *head = (struct basic_head *) xchg(&tp->root, NULL); 105 struct basic_head *head = tp->root;
106 struct basic_filter *f, *n; 106 struct basic_filter *f, *n;
107 107
108 list_for_each_entry_safe(f, n, &head->flist, link) { 108 list_for_each_entry_safe(f, n, &head->flist, link) {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
new file mode 100644
index 000000000000..0d68b1975983
--- /dev/null
+++ b/net/sched/cls_cgroup.c
@@ -0,0 +1,288 @@
1/*
2 * net/sched/cls_cgroup.c Control Group Classifier
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/string.h>
15#include <linux/errno.h>
16#include <linux/skbuff.h>
17#include <linux/cgroup.h>
18#include <net/rtnetlink.h>
19#include <net/pkt_cls.h>
20
21struct cgroup_cls_state
22{
23 struct cgroup_subsys_state css;
24 u32 classid;
25};
26
27static inline struct cgroup_cls_state *net_cls_state(struct cgroup *cgrp)
28{
29 return (struct cgroup_cls_state *)
30 cgroup_subsys_state(cgrp, net_cls_subsys_id);
31}
32
33static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
34 struct cgroup *cgrp)
35{
36 struct cgroup_cls_state *cs;
37
38 if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
39 return ERR_PTR(-ENOMEM);
40
41 if (cgrp->parent)
42 cs->classid = net_cls_state(cgrp->parent)->classid;
43
44 return &cs->css;
45}
46
47static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
48{
49 kfree(ss);
50}
51
52static u64 read_classid(struct cgroup *cgrp, struct cftype *cft)
53{
54 return net_cls_state(cgrp)->classid;
55}
56
57static int write_classid(struct cgroup *cgrp, struct cftype *cft, u64 value)
58{
59 if (!cgroup_lock_live_group(cgrp))
60 return -ENODEV;
61
62 net_cls_state(cgrp)->classid = (u32) value;
63
64 cgroup_unlock();
65
66 return 0;
67}
68
69static struct cftype ss_files[] = {
70 {
71 .name = "classid",
72 .read_u64 = read_classid,
73 .write_u64 = write_classid,
74 },
75};
76
77static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
78{
79 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
80}
81
82struct cgroup_subsys net_cls_subsys = {
83 .name = "net_cls",
84 .create = cgrp_create,
85 .destroy = cgrp_destroy,
86 .populate = cgrp_populate,
87 .subsys_id = net_cls_subsys_id,
88};
89
90struct cls_cgroup_head
91{
92 u32 handle;
93 struct tcf_exts exts;
94 struct tcf_ematch_tree ematches;
95};
96
97static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
98 struct tcf_result *res)
99{
100 struct cls_cgroup_head *head = tp->root;
101 struct cgroup_cls_state *cs;
102 int ret = 0;
103
104 /*
105 * Due to the nature of the classifier it is required to ignore all
106 * packets originating from softirq context as accessing `current'
107 * would lead to false results.
108 *
109 * This test assumes that all callers of dev_queue_xmit() explicitely
110 * disable bh. Knowing this, it is possible to detect softirq based
111 * calls by looking at the number of nested bh disable calls because
112 * softirqs always disables bh.
113 */
114 if (softirq_count() != SOFTIRQ_OFFSET)
115 return -1;
116
117 rcu_read_lock();
118 cs = (struct cgroup_cls_state *) task_subsys_state(current,
119 net_cls_subsys_id);
120 if (cs->classid && tcf_em_tree_match(skb, &head->ematches, NULL)) {
121 res->classid = cs->classid;
122 res->class = 0;
123 ret = tcf_exts_exec(skb, &head->exts, res);
124 } else
125 ret = -1;
126
127 rcu_read_unlock();
128
129 return ret;
130}
131
132static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle)
133{
134 return 0UL;
135}
136
137static void cls_cgroup_put(struct tcf_proto *tp, unsigned long f)
138{
139}
140
141static int cls_cgroup_init(struct tcf_proto *tp)
142{
143 return 0;
144}
145
146static const struct tcf_ext_map cgroup_ext_map = {
147 .action = TCA_CGROUP_ACT,
148 .police = TCA_CGROUP_POLICE,
149};
150
151static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = {
152 [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED },
153};
154
155static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
156 u32 handle, struct nlattr **tca,
157 unsigned long *arg)
158{
159 struct nlattr *tb[TCA_CGROUP_MAX+1];
160 struct cls_cgroup_head *head = tp->root;
161 struct tcf_ematch_tree t;
162 struct tcf_exts e;
163 int err;
164
165 if (head == NULL) {
166 if (!handle)
167 return -EINVAL;
168
169 head = kzalloc(sizeof(*head), GFP_KERNEL);
170 if (head == NULL)
171 return -ENOBUFS;
172
173 head->handle = handle;
174
175 tcf_tree_lock(tp);
176 tp->root = head;
177 tcf_tree_unlock(tp);
178 }
179
180 if (handle != head->handle)
181 return -ENOENT;
182
183 err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS],
184 cgroup_policy);
185 if (err < 0)
186 return err;
187
188 err = tcf_exts_validate(tp, tb, tca[TCA_RATE], &e, &cgroup_ext_map);
189 if (err < 0)
190 return err;
191
192 err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t);
193 if (err < 0)
194 return err;
195
196 tcf_exts_change(tp, &head->exts, &e);
197 tcf_em_tree_change(tp, &head->ematches, &t);
198
199 return 0;
200}
201
202static void cls_cgroup_destroy(struct tcf_proto *tp)
203{
204 struct cls_cgroup_head *head = tp->root;
205
206 if (head) {
207 tcf_exts_destroy(tp, &head->exts);
208 tcf_em_tree_destroy(tp, &head->ematches);
209 kfree(head);
210 }
211}
212
213static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg)
214{
215 return -EOPNOTSUPP;
216}
217
218static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
219{
220 struct cls_cgroup_head *head = tp->root;
221
222 if (arg->count < arg->skip)
223 goto skip;
224
225 if (arg->fn(tp, (unsigned long) head, arg) < 0) {
226 arg->stop = 1;
227 return;
228 }
229skip:
230 arg->count++;
231}
232
233static int cls_cgroup_dump(struct tcf_proto *tp, unsigned long fh,
234 struct sk_buff *skb, struct tcmsg *t)
235{
236 struct cls_cgroup_head *head = tp->root;
237 unsigned char *b = skb_tail_pointer(skb);
238 struct nlattr *nest;
239
240 t->tcm_handle = head->handle;
241
242 nest = nla_nest_start(skb, TCA_OPTIONS);
243 if (nest == NULL)
244 goto nla_put_failure;
245
246 if (tcf_exts_dump(skb, &head->exts, &cgroup_ext_map) < 0 ||
247 tcf_em_tree_dump(skb, &head->ematches, TCA_CGROUP_EMATCHES) < 0)
248 goto nla_put_failure;
249
250 nla_nest_end(skb, nest);
251
252 if (tcf_exts_dump_stats(skb, &head->exts, &cgroup_ext_map) < 0)
253 goto nla_put_failure;
254
255 return skb->len;
256
257nla_put_failure:
258 nlmsg_trim(skb, b);
259 return -1;
260}
261
262static struct tcf_proto_ops cls_cgroup_ops __read_mostly = {
263 .kind = "cgroup",
264 .init = cls_cgroup_init,
265 .change = cls_cgroup_change,
266 .classify = cls_cgroup_classify,
267 .destroy = cls_cgroup_destroy,
268 .get = cls_cgroup_get,
269 .put = cls_cgroup_put,
270 .delete = cls_cgroup_delete,
271 .walk = cls_cgroup_walk,
272 .dump = cls_cgroup_dump,
273 .owner = THIS_MODULE,
274};
275
276static int __init init_cgroup_cls(void)
277{
278 return register_tcf_proto_ops(&cls_cgroup_ops);
279}
280
281static void __exit exit_cgroup_cls(void)
282{
283 unregister_tcf_proto_ops(&cls_cgroup_ops);
284}
285
286module_init(init_cgroup_cls);
287module_exit(exit_cgroup_cls);
288MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index b0f90e593af0..6d6e87585fb1 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -148,7 +148,7 @@ fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
148 148
149static void fw_destroy(struct tcf_proto *tp) 149static void fw_destroy(struct tcf_proto *tp)
150{ 150{
151 struct fw_head *head = (struct fw_head*)xchg(&tp->root, NULL); 151 struct fw_head *head = tp->root;
152 struct fw_filter *f; 152 struct fw_filter *f;
153 int h; 153 int h;
154 154
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index e3d8455eebc2..bdf1f4172eef 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -260,7 +260,7 @@ route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
260 260
261static void route4_destroy(struct tcf_proto *tp) 261static void route4_destroy(struct tcf_proto *tp)
262{ 262{
263 struct route4_head *head = xchg(&tp->root, NULL); 263 struct route4_head *head = tp->root;
264 int h1, h2; 264 int h1, h2;
265 265
266 if (head == NULL) 266 if (head == NULL)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 7a7bff5ded24..e806f2314b5e 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -13,12 +13,6 @@
13#include <net/netlink.h> 13#include <net/netlink.h>
14#include <net/pkt_cls.h> 14#include <net/pkt_cls.h>
15 15
16
17/*
18 * Not quite sure if we need all the xchgs Alexey uses when accessing things.
19 * Can always add them later ... :)
20 */
21
22/* 16/*
23 * Passing parameters to the root seems to be done more awkwardly than really 17 * Passing parameters to the root seems to be done more awkwardly than really
24 * necessary. At least, u32 doesn't seem to use such dirty hacks. To be 18 * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 246f9065ce34..05d178008cbc 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -387,7 +387,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
387static void u32_destroy(struct tcf_proto *tp) 387static void u32_destroy(struct tcf_proto *tp)
388{ 388{
389 struct tc_u_common *tp_c = tp->data; 389 struct tc_u_common *tp_c = tp->data;
390 struct tc_u_hnode *root_ht = xchg(&tp->root, NULL); 390 struct tc_u_hnode *root_ht = tp->root;
391 391
392 WARN_ON(root_ht == NULL); 392 WARN_ON(root_ht == NULL);
393 393
@@ -479,7 +479,7 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
479 err = -EINVAL; 479 err = -EINVAL;
480 if (tb[TCA_U32_LINK]) { 480 if (tb[TCA_U32_LINK]) {
481 u32 handle = nla_get_u32(tb[TCA_U32_LINK]); 481 u32 handle = nla_get_u32(tb[TCA_U32_LINK]);
482 struct tc_u_hnode *ht_down = NULL; 482 struct tc_u_hnode *ht_down = NULL, *ht_old;
483 483
484 if (TC_U32_KEY(handle)) 484 if (TC_U32_KEY(handle))
485 goto errout; 485 goto errout;
@@ -493,11 +493,12 @@ static int u32_set_parms(struct tcf_proto *tp, unsigned long base,
493 } 493 }
494 494
495 tcf_tree_lock(tp); 495 tcf_tree_lock(tp);
496 ht_down = xchg(&n->ht_down, ht_down); 496 ht_old = n->ht_down;
497 n->ht_down = ht_down;
497 tcf_tree_unlock(tp); 498 tcf_tree_unlock(tp);
498 499
499 if (ht_down) 500 if (ht_old)
500 ht_down->refcnt--; 501 ht_old->refcnt--;
501 } 502 }
502 if (tb[TCA_U32_CLASSID]) { 503 if (tb[TCA_U32_CLASSID]) {
503 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]); 504 n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e82519e548d7..aab59409728b 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -71,7 +71,7 @@
71 * 71 *
72 * static void __exit exit_my_ematch(void) 72 * static void __exit exit_my_ematch(void)
73 * { 73 * {
74 * return tcf_em_unregister(&my_ops); 74 * tcf_em_unregister(&my_ops);
75 * } 75 * }
76 * 76 *
77 * module_init(init_my_ematch); 77 * module_init(init_my_ematch);
@@ -154,23 +154,11 @@ EXPORT_SYMBOL(tcf_em_register);
154 * 154 *
155 * Returns -ENOENT if no matching ematch was found. 155 * Returns -ENOENT if no matching ematch was found.
156 */ 156 */
157int tcf_em_unregister(struct tcf_ematch_ops *ops) 157void tcf_em_unregister(struct tcf_ematch_ops *ops)
158{ 158{
159 int err = 0;
160 struct tcf_ematch_ops *e;
161
162 write_lock(&ematch_mod_lock); 159 write_lock(&ematch_mod_lock);
163 list_for_each_entry(e, &ematch_ops, link) { 160 list_del(&ops->link);
164 if (e == ops) {
165 list_del(&e->link);
166 goto out;
167 }
168 }
169
170 err = -ENOENT;
171out:
172 write_unlock(&ematch_mod_lock); 161 write_unlock(&ematch_mod_lock);
173 return err;
174} 162}
175EXPORT_SYMBOL(tcf_em_unregister); 163EXPORT_SYMBOL(tcf_em_unregister);
176 164
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6ab4a2f92ca0..0fc4a18fd96f 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -97,10 +97,9 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
97 97
98 Auxiliary routines: 98 Auxiliary routines:
99 99
100 ---requeue 100 ---peek
101 101
102 requeues once dequeued packet. It is used for non-standard or 102 like dequeue but without removing a packet from the queue
103 just buggy devices, which can defer output even if netif_queue_stopped()=0.
104 103
105 ---reset 104 ---reset
106 105
@@ -147,8 +146,14 @@ int register_qdisc(struct Qdisc_ops *qops)
147 146
148 if (qops->enqueue == NULL) 147 if (qops->enqueue == NULL)
149 qops->enqueue = noop_qdisc_ops.enqueue; 148 qops->enqueue = noop_qdisc_ops.enqueue;
150 if (qops->requeue == NULL) 149 if (qops->peek == NULL) {
151 qops->requeue = noop_qdisc_ops.requeue; 150 if (qops->dequeue == NULL) {
151 qops->peek = noop_qdisc_ops.peek;
152 } else {
153 rc = -EINVAL;
154 goto out;
155 }
156 }
152 if (qops->dequeue == NULL) 157 if (qops->dequeue == NULL)
153 qops->dequeue = noop_qdisc_ops.dequeue; 158 qops->dequeue = noop_qdisc_ops.dequeue;
154 159
@@ -184,7 +189,7 @@ EXPORT_SYMBOL(unregister_qdisc);
184 (root qdisc, all its children, children of children etc.) 189 (root qdisc, all its children, children of children etc.)
185 */ 190 */
186 191
187struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) 192static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
188{ 193{
189 struct Qdisc *q; 194 struct Qdisc *q;
190 195
@@ -199,28 +204,16 @@ struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
199 return NULL; 204 return NULL;
200} 205}
201 206
202/*
203 * This lock is needed until some qdiscs stop calling qdisc_tree_decrease_qlen()
204 * without rtnl_lock(); currently hfsc_dequeue(), netem_dequeue(), tbf_dequeue()
205 */
206static DEFINE_SPINLOCK(qdisc_list_lock);
207
208static void qdisc_list_add(struct Qdisc *q) 207static void qdisc_list_add(struct Qdisc *q)
209{ 208{
210 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 209 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
211 spin_lock_bh(&qdisc_list_lock);
212 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); 210 list_add_tail(&q->list, &qdisc_root_sleeping(q)->list);
213 spin_unlock_bh(&qdisc_list_lock);
214 }
215} 211}
216 212
217void qdisc_list_del(struct Qdisc *q) 213void qdisc_list_del(struct Qdisc *q)
218{ 214{
219 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { 215 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
220 spin_lock_bh(&qdisc_list_lock);
221 list_del(&q->list); 216 list_del(&q->list);
222 spin_unlock_bh(&qdisc_list_lock);
223 }
224} 217}
225EXPORT_SYMBOL(qdisc_list_del); 218EXPORT_SYMBOL(qdisc_list_del);
226 219
@@ -229,22 +222,17 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
229 unsigned int i; 222 unsigned int i;
230 struct Qdisc *q; 223 struct Qdisc *q;
231 224
232 spin_lock_bh(&qdisc_list_lock);
233
234 for (i = 0; i < dev->num_tx_queues; i++) { 225 for (i = 0; i < dev->num_tx_queues; i++) {
235 struct netdev_queue *txq = netdev_get_tx_queue(dev, i); 226 struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
236 struct Qdisc *txq_root = txq->qdisc_sleeping; 227 struct Qdisc *txq_root = txq->qdisc_sleeping;
237 228
238 q = qdisc_match_from_root(txq_root, handle); 229 q = qdisc_match_from_root(txq_root, handle);
239 if (q) 230 if (q)
240 goto unlock; 231 goto out;
241 } 232 }
242 233
243 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); 234 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
244 235out:
245unlock:
246 spin_unlock_bh(&qdisc_list_lock);
247
248 return q; 236 return q;
249} 237}
250 238
@@ -462,7 +450,6 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
462 timer); 450 timer);
463 451
464 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 452 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
465 smp_wmb();
466 __netif_schedule(qdisc_root(wd->qdisc)); 453 __netif_schedule(qdisc_root(wd->qdisc));
467 454
468 return HRTIMER_NORESTART; 455 return HRTIMER_NORESTART;
@@ -892,9 +879,12 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
892 sch->stab = stab; 879 sch->stab = stab;
893 880
894 if (tca[TCA_RATE]) 881 if (tca[TCA_RATE])
882 /* NB: ignores errors from replace_estimator
883 because change can't be undone. */
895 gen_replace_estimator(&sch->bstats, &sch->rate_est, 884 gen_replace_estimator(&sch->bstats, &sch->rate_est,
896 qdisc_root_sleeping_lock(sch), 885 qdisc_root_sleeping_lock(sch),
897 tca[TCA_RATE]); 886 tca[TCA_RATE]);
887
898 return 0; 888 return 0;
899} 889}
900 890
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 43d37256c15e..2a8b83af7c47 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -62,7 +62,7 @@ struct atm_qdisc_data {
62 struct atm_flow_data link; /* unclassified skbs go here */ 62 struct atm_flow_data link; /* unclassified skbs go here */
63 struct atm_flow_data *flows; /* NB: "link" is also on this 63 struct atm_flow_data *flows; /* NB: "link" is also on this
64 list */ 64 list */
65 struct tasklet_struct task; /* requeue tasklet */ 65 struct tasklet_struct task; /* dequeue tasklet */
66}; 66};
67 67
68/* ------------------------- Class/flow operations ------------------------- */ 68/* ------------------------- Class/flow operations ------------------------- */
@@ -102,7 +102,8 @@ static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
102 return -EINVAL; 102 return -EINVAL;
103 if (!new) 103 if (!new)
104 new = &noop_qdisc; 104 new = &noop_qdisc;
105 *old = xchg(&flow->q, new); 105 *old = flow->q;
106 flow->q = new;
106 if (*old) 107 if (*old)
107 qdisc_reset(*old); 108 qdisc_reset(*old);
108 return 0; 109 return 0;
@@ -480,11 +481,14 @@ static void sch_atm_dequeue(unsigned long data)
480 * If traffic is properly shaped, this won't generate nasty 481 * If traffic is properly shaped, this won't generate nasty
481 * little bursts. Otherwise, it may ... (but that's okay) 482 * little bursts. Otherwise, it may ... (but that's okay)
482 */ 483 */
483 while ((skb = flow->q->dequeue(flow->q))) { 484 while ((skb = flow->q->ops->peek(flow->q))) {
484 if (!atm_may_send(flow->vcc, skb->truesize)) { 485 if (!atm_may_send(flow->vcc, skb->truesize))
485 (void)flow->q->ops->requeue(skb, flow->q);
486 break; 486 break;
487 } 487
488 skb = qdisc_dequeue_peeked(flow->q);
489 if (unlikely(!skb))
490 break;
491
488 pr_debug("atm_tc_dequeue: sending on class %p\n", flow); 492 pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
489 /* remove any LL header somebody else has attached */ 493 /* remove any LL header somebody else has attached */
490 skb_pull(skb, skb_network_offset(skb)); 494 skb_pull(skb, skb_network_offset(skb));
@@ -516,27 +520,19 @@ static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
516 520
517 pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p); 521 pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
518 tasklet_schedule(&p->task); 522 tasklet_schedule(&p->task);
519 skb = p->link.q->dequeue(p->link.q); 523 skb = qdisc_dequeue_peeked(p->link.q);
520 if (skb) 524 if (skb)
521 sch->q.qlen--; 525 sch->q.qlen--;
522 return skb; 526 return skb;
523} 527}
524 528
525static int atm_tc_requeue(struct sk_buff *skb, struct Qdisc *sch) 529static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
526{ 530{
527 struct atm_qdisc_data *p = qdisc_priv(sch); 531 struct atm_qdisc_data *p = qdisc_priv(sch);
528 int ret;
529 532
530 pr_debug("atm_tc_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p); 533 pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
531 ret = p->link.q->ops->requeue(skb, p->link.q); 534
532 if (!ret) { 535 return p->link.q->ops->peek(p->link.q);
533 sch->q.qlen++;
534 sch->qstats.requeues++;
535 } else if (net_xmit_drop_count(ret)) {
536 sch->qstats.drops++;
537 p->link.qstats.drops++;
538 }
539 return ret;
540} 536}
541 537
542static unsigned int atm_tc_drop(struct Qdisc *sch) 538static unsigned int atm_tc_drop(struct Qdisc *sch)
@@ -694,7 +690,7 @@ static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
694 .priv_size = sizeof(struct atm_qdisc_data), 690 .priv_size = sizeof(struct atm_qdisc_data),
695 .enqueue = atm_tc_enqueue, 691 .enqueue = atm_tc_enqueue,
696 .dequeue = atm_tc_dequeue, 692 .dequeue = atm_tc_dequeue,
697 .requeue = atm_tc_requeue, 693 .peek = atm_tc_peek,
698 .drop = atm_tc_drop, 694 .drop = atm_tc_drop,
699 .init = atm_tc_init, 695 .init = atm_tc_init,
700 .reset = atm_tc_reset, 696 .reset = atm_tc_reset,
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 507fb488bc98..094a874b48bc 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -33,6 +33,7 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
33 .priv_size = 0, 33 .priv_size = 0,
34 .enqueue = blackhole_enqueue, 34 .enqueue = blackhole_enqueue,
35 .dequeue = blackhole_dequeue, 35 .dequeue = blackhole_dequeue,
36 .peek = blackhole_dequeue,
36 .owner = THIS_MODULE, 37 .owner = THIS_MODULE,
37}; 38};
38 39
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 03e389e8d945..9e43ed949167 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -405,40 +405,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
405 return ret; 405 return ret;
406} 406}
407 407
408static int
409cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
410{
411 struct cbq_sched_data *q = qdisc_priv(sch);
412 struct cbq_class *cl;
413 int ret;
414
415 if ((cl = q->tx_class) == NULL) {
416 kfree_skb(skb);
417 sch->qstats.drops++;
418 return NET_XMIT_CN;
419 }
420 q->tx_class = NULL;
421
422 cbq_mark_toplevel(q, cl);
423
424#ifdef CONFIG_NET_CLS_ACT
425 q->rx_class = cl;
426 cl->q->__parent = sch;
427#endif
428 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
429 sch->q.qlen++;
430 sch->qstats.requeues++;
431 if (!cl->next_alive)
432 cbq_activate_class(cl);
433 return 0;
434 }
435 if (net_xmit_drop_count(ret)) {
436 sch->qstats.drops++;
437 cl->qstats.drops++;
438 }
439 return ret;
440}
441
442/* Overlimit actions */ 408/* Overlimit actions */
443 409
444/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */ 410/* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
@@ -1669,7 +1635,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1669#endif 1635#endif
1670 } 1636 }
1671 sch_tree_lock(sch); 1637 sch_tree_lock(sch);
1672 *old = xchg(&cl->q, new); 1638 *old = cl->q;
1639 cl->q = new;
1673 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 1640 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1674 qdisc_reset(*old); 1641 qdisc_reset(*old);
1675 sch_tree_unlock(sch); 1642 sch_tree_unlock(sch);
@@ -1798,11 +1765,23 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1798 } 1765 }
1799 1766
1800 if (tb[TCA_CBQ_RATE]) { 1767 if (tb[TCA_CBQ_RATE]) {
1801 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]); 1768 rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1769 tb[TCA_CBQ_RTAB]);
1802 if (rtab == NULL) 1770 if (rtab == NULL)
1803 return -EINVAL; 1771 return -EINVAL;
1804 } 1772 }
1805 1773
1774 if (tca[TCA_RATE]) {
1775 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1776 qdisc_root_sleeping_lock(sch),
1777 tca[TCA_RATE]);
1778 if (err) {
1779 if (rtab)
1780 qdisc_put_rtab(rtab);
1781 return err;
1782 }
1783 }
1784
1806 /* Change class parameters */ 1785 /* Change class parameters */
1807 sch_tree_lock(sch); 1786 sch_tree_lock(sch);
1808 1787
@@ -1810,8 +1789,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1810 cbq_deactivate_class(cl); 1789 cbq_deactivate_class(cl);
1811 1790
1812 if (rtab) { 1791 if (rtab) {
1813 rtab = xchg(&cl->R_tab, rtab); 1792 qdisc_put_rtab(cl->R_tab);
1814 qdisc_put_rtab(rtab); 1793 cl->R_tab = rtab;
1815 } 1794 }
1816 1795
1817 if (tb[TCA_CBQ_LSSOPT]) 1796 if (tb[TCA_CBQ_LSSOPT])
@@ -1838,10 +1817,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1838 1817
1839 sch_tree_unlock(sch); 1818 sch_tree_unlock(sch);
1840 1819
1841 if (tca[TCA_RATE])
1842 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1843 qdisc_root_sleeping_lock(sch),
1844 tca[TCA_RATE]);
1845 return 0; 1820 return 0;
1846 } 1821 }
1847 1822
@@ -1888,6 +1863,17 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1888 cl = kzalloc(sizeof(*cl), GFP_KERNEL); 1863 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1889 if (cl == NULL) 1864 if (cl == NULL)
1890 goto failure; 1865 goto failure;
1866
1867 if (tca[TCA_RATE]) {
1868 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1869 qdisc_root_sleeping_lock(sch),
1870 tca[TCA_RATE]);
1871 if (err) {
1872 kfree(cl);
1873 goto failure;
1874 }
1875 }
1876
1891 cl->R_tab = rtab; 1877 cl->R_tab = rtab;
1892 rtab = NULL; 1878 rtab = NULL;
1893 cl->refcnt = 1; 1879 cl->refcnt = 1;
@@ -1929,10 +1915,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1929 1915
1930 qdisc_class_hash_grow(sch, &q->clhash); 1916 qdisc_class_hash_grow(sch, &q->clhash);
1931 1917
1932 if (tca[TCA_RATE])
1933 gen_new_estimator(&cl->bstats, &cl->rate_est,
1934 qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1935
1936 *arg = (unsigned long)cl; 1918 *arg = (unsigned long)cl;
1937 return 0; 1919 return 0;
1938 1920
@@ -2066,7 +2048,7 @@ static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2066 .priv_size = sizeof(struct cbq_sched_data), 2048 .priv_size = sizeof(struct cbq_sched_data),
2067 .enqueue = cbq_enqueue, 2049 .enqueue = cbq_enqueue,
2068 .dequeue = cbq_dequeue, 2050 .dequeue = cbq_dequeue,
2069 .requeue = cbq_requeue, 2051 .peek = qdisc_peek_dequeued,
2070 .drop = cbq_drop, 2052 .drop = cbq_drop,
2071 .init = cbq_init, 2053 .init = cbq_init,
2072 .reset = cbq_reset, 2054 .reset = cbq_reset,
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
new file mode 100644
index 000000000000..f6b4fa97df70
--- /dev/null
+++ b/net/sched/sch_drr.c
@@ -0,0 +1,519 @@
1/*
2 * net/sched/sch_drr.c Deficit Round Robin scheduler
3 *
4 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/errno.h>
14#include <linux/netdevice.h>
15#include <linux/pkt_sched.h>
16#include <net/sch_generic.h>
17#include <net/pkt_sched.h>
18#include <net/pkt_cls.h>
19
20struct drr_class {
21 struct Qdisc_class_common common;
22 unsigned int refcnt;
23 unsigned int filter_cnt;
24
25 struct gnet_stats_basic bstats;
26 struct gnet_stats_queue qstats;
27 struct gnet_stats_rate_est rate_est;
28 struct list_head alist;
29 struct Qdisc *qdisc;
30
31 u32 quantum;
32 u32 deficit;
33};
34
35struct drr_sched {
36 struct list_head active;
37 struct tcf_proto *filter_list;
38 struct Qdisc_class_hash clhash;
39};
40
41static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
42{
43 struct drr_sched *q = qdisc_priv(sch);
44 struct Qdisc_class_common *clc;
45
46 clc = qdisc_class_find(&q->clhash, classid);
47 if (clc == NULL)
48 return NULL;
49 return container_of(clc, struct drr_class, common);
50}
51
52static void drr_purge_queue(struct drr_class *cl)
53{
54 unsigned int len = cl->qdisc->q.qlen;
55
56 qdisc_reset(cl->qdisc);
57 qdisc_tree_decrease_qlen(cl->qdisc, len);
58}
59
60static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
61 [TCA_DRR_QUANTUM] = { .type = NLA_U32 },
62};
63
64static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
65 struct nlattr **tca, unsigned long *arg)
66{
67 struct drr_sched *q = qdisc_priv(sch);
68 struct drr_class *cl = (struct drr_class *)*arg;
69 struct nlattr *tb[TCA_DRR_MAX + 1];
70 u32 quantum;
71 int err;
72
73 err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy);
74 if (err < 0)
75 return err;
76
77 if (tb[TCA_DRR_QUANTUM]) {
78 quantum = nla_get_u32(tb[TCA_DRR_QUANTUM]);
79 if (quantum == 0)
80 return -EINVAL;
81 } else
82 quantum = psched_mtu(qdisc_dev(sch));
83
84 if (cl != NULL) {
85 if (tca[TCA_RATE]) {
86 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
87 qdisc_root_sleeping_lock(sch),
88 tca[TCA_RATE]);
89 if (err)
90 return err;
91 }
92
93 sch_tree_lock(sch);
94 if (tb[TCA_DRR_QUANTUM])
95 cl->quantum = quantum;
96 sch_tree_unlock(sch);
97
98 return 0;
99 }
100
101 cl = kzalloc(sizeof(struct drr_class), GFP_KERNEL);
102 if (cl == NULL)
103 return -ENOBUFS;
104
105 cl->refcnt = 1;
106 cl->common.classid = classid;
107 cl->quantum = quantum;
108 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
109 &pfifo_qdisc_ops, classid);
110 if (cl->qdisc == NULL)
111 cl->qdisc = &noop_qdisc;
112
113 if (tca[TCA_RATE]) {
114 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
115 qdisc_root_sleeping_lock(sch),
116 tca[TCA_RATE]);
117 if (err) {
118 qdisc_destroy(cl->qdisc);
119 kfree(cl);
120 return err;
121 }
122 }
123
124 sch_tree_lock(sch);
125 qdisc_class_hash_insert(&q->clhash, &cl->common);
126 sch_tree_unlock(sch);
127
128 qdisc_class_hash_grow(sch, &q->clhash);
129
130 *arg = (unsigned long)cl;
131 return 0;
132}
133
134static void drr_destroy_class(struct Qdisc *sch, struct drr_class *cl)
135{
136 gen_kill_estimator(&cl->bstats, &cl->rate_est);
137 qdisc_destroy(cl->qdisc);
138 kfree(cl);
139}
140
141static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
142{
143 struct drr_sched *q = qdisc_priv(sch);
144 struct drr_class *cl = (struct drr_class *)arg;
145
146 if (cl->filter_cnt > 0)
147 return -EBUSY;
148
149 sch_tree_lock(sch);
150
151 drr_purge_queue(cl);
152 qdisc_class_hash_remove(&q->clhash, &cl->common);
153
154 if (--cl->refcnt == 0)
155 drr_destroy_class(sch, cl);
156
157 sch_tree_unlock(sch);
158 return 0;
159}
160
161static unsigned long drr_get_class(struct Qdisc *sch, u32 classid)
162{
163 struct drr_class *cl = drr_find_class(sch, classid);
164
165 if (cl != NULL)
166 cl->refcnt++;
167
168 return (unsigned long)cl;
169}
170
171static void drr_put_class(struct Qdisc *sch, unsigned long arg)
172{
173 struct drr_class *cl = (struct drr_class *)arg;
174
175 if (--cl->refcnt == 0)
176 drr_destroy_class(sch, cl);
177}
178
179static struct tcf_proto **drr_tcf_chain(struct Qdisc *sch, unsigned long cl)
180{
181 struct drr_sched *q = qdisc_priv(sch);
182
183 if (cl)
184 return NULL;
185
186 return &q->filter_list;
187}
188
189static unsigned long drr_bind_tcf(struct Qdisc *sch, unsigned long parent,
190 u32 classid)
191{
192 struct drr_class *cl = drr_find_class(sch, classid);
193
194 if (cl != NULL)
195 cl->filter_cnt++;
196
197 return (unsigned long)cl;
198}
199
200static void drr_unbind_tcf(struct Qdisc *sch, unsigned long arg)
201{
202 struct drr_class *cl = (struct drr_class *)arg;
203
204 cl->filter_cnt--;
205}
206
207static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
208 struct Qdisc *new, struct Qdisc **old)
209{
210 struct drr_class *cl = (struct drr_class *)arg;
211
212 if (new == NULL) {
213 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
214 &pfifo_qdisc_ops, cl->common.classid);
215 if (new == NULL)
216 new = &noop_qdisc;
217 }
218
219 sch_tree_lock(sch);
220 drr_purge_queue(cl);
221 *old = cl->qdisc;
222 cl->qdisc = new;
223 sch_tree_unlock(sch);
224 return 0;
225}
226
227static struct Qdisc *drr_class_leaf(struct Qdisc *sch, unsigned long arg)
228{
229 struct drr_class *cl = (struct drr_class *)arg;
230
231 return cl->qdisc;
232}
233
234static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg)
235{
236 struct drr_class *cl = (struct drr_class *)arg;
237
238 if (cl->qdisc->q.qlen == 0)
239 list_del(&cl->alist);
240}
241
242static int drr_dump_class(struct Qdisc *sch, unsigned long arg,
243 struct sk_buff *skb, struct tcmsg *tcm)
244{
245 struct drr_class *cl = (struct drr_class *)arg;
246 struct nlattr *nest;
247
248 tcm->tcm_parent = TC_H_ROOT;
249 tcm->tcm_handle = cl->common.classid;
250 tcm->tcm_info = cl->qdisc->handle;
251
252 nest = nla_nest_start(skb, TCA_OPTIONS);
253 if (nest == NULL)
254 goto nla_put_failure;
255 NLA_PUT_U32(skb, TCA_DRR_QUANTUM, cl->quantum);
256 return nla_nest_end(skb, nest);
257
258nla_put_failure:
259 nla_nest_cancel(skb, nest);
260 return -EMSGSIZE;
261}
262
263static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
264 struct gnet_dump *d)
265{
266 struct drr_class *cl = (struct drr_class *)arg;
267 struct tc_drr_stats xstats;
268
269 memset(&xstats, 0, sizeof(xstats));
270 if (cl->qdisc->q.qlen)
271 xstats.deficit = cl->deficit;
272
273 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
274 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
275 gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
276 return -1;
277
278 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
279}
280
281static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
282{
283 struct drr_sched *q = qdisc_priv(sch);
284 struct drr_class *cl;
285 struct hlist_node *n;
286 unsigned int i;
287
288 if (arg->stop)
289 return;
290
291 for (i = 0; i < q->clhash.hashsize; i++) {
292 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
293 if (arg->count < arg->skip) {
294 arg->count++;
295 continue;
296 }
297 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
298 arg->stop = 1;
299 return;
300 }
301 arg->count++;
302 }
303 }
304}
305
306static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
307 int *qerr)
308{
309 struct drr_sched *q = qdisc_priv(sch);
310 struct drr_class *cl;
311 struct tcf_result res;
312 int result;
313
314 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
315 cl = drr_find_class(sch, skb->priority);
316 if (cl != NULL)
317 return cl;
318 }
319
320 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
321 result = tc_classify(skb, q->filter_list, &res);
322 if (result >= 0) {
323#ifdef CONFIG_NET_CLS_ACT
324 switch (result) {
325 case TC_ACT_QUEUED:
326 case TC_ACT_STOLEN:
327 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
328 case TC_ACT_SHOT:
329 return NULL;
330 }
331#endif
332 cl = (struct drr_class *)res.class;
333 if (cl == NULL)
334 cl = drr_find_class(sch, res.classid);
335 return cl;
336 }
337 return NULL;
338}
339
340static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
341{
342 struct drr_sched *q = qdisc_priv(sch);
343 struct drr_class *cl;
344 unsigned int len;
345 int err;
346
347 cl = drr_classify(skb, sch, &err);
348 if (cl == NULL) {
349 if (err & __NET_XMIT_BYPASS)
350 sch->qstats.drops++;
351 kfree_skb(skb);
352 return err;
353 }
354
355 len = qdisc_pkt_len(skb);
356 err = qdisc_enqueue(skb, cl->qdisc);
357 if (unlikely(err != NET_XMIT_SUCCESS)) {
358 if (net_xmit_drop_count(err)) {
359 cl->qstats.drops++;
360 sch->qstats.drops++;
361 }
362 return err;
363 }
364
365 if (cl->qdisc->q.qlen == 1) {
366 list_add_tail(&cl->alist, &q->active);
367 cl->deficit = cl->quantum;
368 }
369
370 cl->bstats.packets++;
371 cl->bstats.bytes += len;
372 sch->bstats.packets++;
373 sch->bstats.bytes += len;
374
375 sch->q.qlen++;
376 return err;
377}
378
379static struct sk_buff *drr_dequeue(struct Qdisc *sch)
380{
381 struct drr_sched *q = qdisc_priv(sch);
382 struct drr_class *cl;
383 struct sk_buff *skb;
384 unsigned int len;
385
386 if (list_empty(&q->active))
387 goto out;
388 while (1) {
389 cl = list_first_entry(&q->active, struct drr_class, alist);
390 skb = cl->qdisc->ops->peek(cl->qdisc);
391 if (skb == NULL)
392 goto out;
393
394 len = qdisc_pkt_len(skb);
395 if (len <= cl->deficit) {
396 cl->deficit -= len;
397 skb = qdisc_dequeue_peeked(cl->qdisc);
398 if (cl->qdisc->q.qlen == 0)
399 list_del(&cl->alist);
400 sch->q.qlen--;
401 return skb;
402 }
403
404 cl->deficit += cl->quantum;
405 list_move_tail(&cl->alist, &q->active);
406 }
407out:
408 return NULL;
409}
410
411static unsigned int drr_drop(struct Qdisc *sch)
412{
413 struct drr_sched *q = qdisc_priv(sch);
414 struct drr_class *cl;
415 unsigned int len;
416
417 list_for_each_entry(cl, &q->active, alist) {
418 if (cl->qdisc->ops->drop) {
419 len = cl->qdisc->ops->drop(cl->qdisc);
420 if (len > 0) {
421 sch->q.qlen--;
422 if (cl->qdisc->q.qlen == 0)
423 list_del(&cl->alist);
424 return len;
425 }
426 }
427 }
428 return 0;
429}
430
431static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
432{
433 struct drr_sched *q = qdisc_priv(sch);
434 int err;
435
436 err = qdisc_class_hash_init(&q->clhash);
437 if (err < 0)
438 return err;
439 INIT_LIST_HEAD(&q->active);
440 return 0;
441}
442
443static void drr_reset_qdisc(struct Qdisc *sch)
444{
445 struct drr_sched *q = qdisc_priv(sch);
446 struct drr_class *cl;
447 struct hlist_node *n;
448 unsigned int i;
449
450 for (i = 0; i < q->clhash.hashsize; i++) {
451 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
452 if (cl->qdisc->q.qlen)
453 list_del(&cl->alist);
454 qdisc_reset(cl->qdisc);
455 }
456 }
457 sch->q.qlen = 0;
458}
459
460static void drr_destroy_qdisc(struct Qdisc *sch)
461{
462 struct drr_sched *q = qdisc_priv(sch);
463 struct drr_class *cl;
464 struct hlist_node *n, *next;
465 unsigned int i;
466
467 tcf_destroy_chain(&q->filter_list);
468
469 for (i = 0; i < q->clhash.hashsize; i++) {
470 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
471 common.hnode)
472 drr_destroy_class(sch, cl);
473 }
474 qdisc_class_hash_destroy(&q->clhash);
475}
476
477static const struct Qdisc_class_ops drr_class_ops = {
478 .change = drr_change_class,
479 .delete = drr_delete_class,
480 .get = drr_get_class,
481 .put = drr_put_class,
482 .tcf_chain = drr_tcf_chain,
483 .bind_tcf = drr_bind_tcf,
484 .unbind_tcf = drr_unbind_tcf,
485 .graft = drr_graft_class,
486 .leaf = drr_class_leaf,
487 .qlen_notify = drr_qlen_notify,
488 .dump = drr_dump_class,
489 .dump_stats = drr_dump_class_stats,
490 .walk = drr_walk,
491};
492
493static struct Qdisc_ops drr_qdisc_ops __read_mostly = {
494 .cl_ops = &drr_class_ops,
495 .id = "drr",
496 .priv_size = sizeof(struct drr_sched),
497 .enqueue = drr_enqueue,
498 .dequeue = drr_dequeue,
499 .peek = qdisc_peek_dequeued,
500 .drop = drr_drop,
501 .init = drr_init_qdisc,
502 .reset = drr_reset_qdisc,
503 .destroy = drr_destroy_qdisc,
504 .owner = THIS_MODULE,
505};
506
507static int __init drr_init(void)
508{
509 return register_qdisc(&drr_qdisc_ops);
510}
511
512static void __exit drr_exit(void)
513{
514 unregister_qdisc(&drr_qdisc_ops);
515}
516
517module_init(drr_init);
518module_exit(drr_exit);
519MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index ba43aab3a851..d303daa45d49 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -68,7 +68,8 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
68 } 68 }
69 69
70 sch_tree_lock(sch); 70 sch_tree_lock(sch);
71 *old = xchg(&p->q, new); 71 *old = p->q;
72 p->q = new;
72 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 73 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
73 qdisc_reset(*old); 74 qdisc_reset(*old);
74 sch_tree_unlock(sch); 75 sch_tree_unlock(sch);
@@ -313,24 +314,13 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
313 return skb; 314 return skb;
314} 315}
315 316
316static int dsmark_requeue(struct sk_buff *skb, struct Qdisc *sch) 317static struct sk_buff *dsmark_peek(struct Qdisc *sch)
317{ 318{
318 struct dsmark_qdisc_data *p = qdisc_priv(sch); 319 struct dsmark_qdisc_data *p = qdisc_priv(sch);
319 int err;
320
321 pr_debug("dsmark_requeue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
322
323 err = p->q->ops->requeue(skb, p->q);
324 if (err != NET_XMIT_SUCCESS) {
325 if (net_xmit_drop_count(err))
326 sch->qstats.drops++;
327 return err;
328 }
329 320
330 sch->q.qlen++; 321 pr_debug("dsmark_peek(sch %p,[qdisc %p])\n", sch, p);
331 sch->qstats.requeues++;
332 322
333 return NET_XMIT_SUCCESS; 323 return p->q->ops->peek(p->q);
334} 324}
335 325
336static unsigned int dsmark_drop(struct Qdisc *sch) 326static unsigned int dsmark_drop(struct Qdisc *sch)
@@ -496,7 +486,7 @@ static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
496 .priv_size = sizeof(struct dsmark_qdisc_data), 486 .priv_size = sizeof(struct dsmark_qdisc_data),
497 .enqueue = dsmark_enqueue, 487 .enqueue = dsmark_enqueue,
498 .dequeue = dsmark_dequeue, 488 .dequeue = dsmark_dequeue,
499 .requeue = dsmark_requeue, 489 .peek = dsmark_peek,
500 .drop = dsmark_drop, 490 .drop = dsmark_drop,
501 .init = dsmark_init, 491 .init = dsmark_init,
502 .reset = dsmark_reset, 492 .reset = dsmark_reset,
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 23d258bfe8ac..92cfc9d7e3b9 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -83,7 +83,7 @@ struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
83 .priv_size = sizeof(struct fifo_sched_data), 83 .priv_size = sizeof(struct fifo_sched_data),
84 .enqueue = pfifo_enqueue, 84 .enqueue = pfifo_enqueue,
85 .dequeue = qdisc_dequeue_head, 85 .dequeue = qdisc_dequeue_head,
86 .requeue = qdisc_requeue, 86 .peek = qdisc_peek_head,
87 .drop = qdisc_queue_drop, 87 .drop = qdisc_queue_drop,
88 .init = fifo_init, 88 .init = fifo_init,
89 .reset = qdisc_reset_queue, 89 .reset = qdisc_reset_queue,
@@ -98,7 +98,7 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
98 .priv_size = sizeof(struct fifo_sched_data), 98 .priv_size = sizeof(struct fifo_sched_data),
99 .enqueue = bfifo_enqueue, 99 .enqueue = bfifo_enqueue,
100 .dequeue = qdisc_dequeue_head, 100 .dequeue = qdisc_dequeue_head,
101 .requeue = qdisc_requeue, 101 .peek = qdisc_peek_head,
102 .drop = qdisc_queue_drop, 102 .drop = qdisc_queue_drop,
103 .init = fifo_init, 103 .init = fifo_init,
104 .reset = qdisc_reset_queue, 104 .reset = qdisc_reset_queue,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cdcd16fcfeda..5f5efe4e6072 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -224,7 +224,7 @@ static void dev_watchdog(unsigned long arg)
224 char drivername[64]; 224 char drivername[64];
225 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n", 225 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
226 dev->name, netdev_drivername(dev, drivername, 64)); 226 dev->name, netdev_drivername(dev, drivername, 64));
227 dev->tx_timeout(dev); 227 dev->netdev_ops->ndo_tx_timeout(dev);
228 } 228 }
229 if (!mod_timer(&dev->watchdog_timer, 229 if (!mod_timer(&dev->watchdog_timer,
230 round_jiffies(jiffies + 230 round_jiffies(jiffies +
@@ -239,7 +239,7 @@ static void dev_watchdog(unsigned long arg)
239 239
240void __netdev_watchdog_up(struct net_device *dev) 240void __netdev_watchdog_up(struct net_device *dev)
241{ 241{
242 if (dev->tx_timeout) { 242 if (dev->netdev_ops->ndo_tx_timeout) {
243 if (dev->watchdog_timeo <= 0) 243 if (dev->watchdog_timeo <= 0)
244 dev->watchdog_timeo = 5*HZ; 244 dev->watchdog_timeo = 5*HZ;
245 if (!mod_timer(&dev->watchdog_timer, 245 if (!mod_timer(&dev->watchdog_timer,
@@ -311,21 +311,12 @@ static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
311 return NULL; 311 return NULL;
312} 312}
313 313
314static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
315{
316 if (net_ratelimit())
317 printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
318 skb->dev->name);
319 kfree_skb(skb);
320 return NET_XMIT_CN;
321}
322
323struct Qdisc_ops noop_qdisc_ops __read_mostly = { 314struct Qdisc_ops noop_qdisc_ops __read_mostly = {
324 .id = "noop", 315 .id = "noop",
325 .priv_size = 0, 316 .priv_size = 0,
326 .enqueue = noop_enqueue, 317 .enqueue = noop_enqueue,
327 .dequeue = noop_dequeue, 318 .dequeue = noop_dequeue,
328 .requeue = noop_requeue, 319 .peek = noop_dequeue,
329 .owner = THIS_MODULE, 320 .owner = THIS_MODULE,
330}; 321};
331 322
@@ -340,7 +331,6 @@ struct Qdisc noop_qdisc = {
340 .flags = TCQ_F_BUILTIN, 331 .flags = TCQ_F_BUILTIN,
341 .ops = &noop_qdisc_ops, 332 .ops = &noop_qdisc_ops,
342 .list = LIST_HEAD_INIT(noop_qdisc.list), 333 .list = LIST_HEAD_INIT(noop_qdisc.list),
343 .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
344 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), 334 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
345 .dev_queue = &noop_netdev_queue, 335 .dev_queue = &noop_netdev_queue,
346}; 336};
@@ -351,7 +341,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
351 .priv_size = 0, 341 .priv_size = 0,
352 .enqueue = noop_enqueue, 342 .enqueue = noop_enqueue,
353 .dequeue = noop_dequeue, 343 .dequeue = noop_dequeue,
354 .requeue = noop_requeue, 344 .peek = noop_dequeue,
355 .owner = THIS_MODULE, 345 .owner = THIS_MODULE,
356}; 346};
357 347
@@ -367,7 +357,6 @@ static struct Qdisc noqueue_qdisc = {
367 .flags = TCQ_F_BUILTIN, 357 .flags = TCQ_F_BUILTIN,
368 .ops = &noqueue_qdisc_ops, 358 .ops = &noqueue_qdisc_ops,
369 .list = LIST_HEAD_INIT(noqueue_qdisc.list), 359 .list = LIST_HEAD_INIT(noqueue_qdisc.list),
370 .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
371 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), 360 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
372 .dev_queue = &noqueue_netdev_queue, 361 .dev_queue = &noqueue_netdev_queue,
373}; 362};
@@ -416,10 +405,17 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
416 return NULL; 405 return NULL;
417} 406}
418 407
419static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc) 408static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
420{ 409{
421 qdisc->q.qlen++; 410 int prio;
422 return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc)); 411 struct sk_buff_head *list = qdisc_priv(qdisc);
412
413 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
414 if (!skb_queue_empty(list + prio))
415 return skb_peek(list + prio);
416 }
417
418 return NULL;
423} 419}
424 420
425static void pfifo_fast_reset(struct Qdisc* qdisc) 421static void pfifo_fast_reset(struct Qdisc* qdisc)
@@ -462,7 +458,7 @@ static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
462 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head), 458 .priv_size = PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
463 .enqueue = pfifo_fast_enqueue, 459 .enqueue = pfifo_fast_enqueue,
464 .dequeue = pfifo_fast_dequeue, 460 .dequeue = pfifo_fast_dequeue,
465 .requeue = pfifo_fast_requeue, 461 .peek = pfifo_fast_peek,
466 .init = pfifo_fast_init, 462 .init = pfifo_fast_init,
467 .reset = pfifo_fast_reset, 463 .reset = pfifo_fast_reset,
468 .dump = pfifo_fast_dump, 464 .dump = pfifo_fast_dump,
@@ -488,7 +484,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
488 sch->padded = (char *) sch - (char *) p; 484 sch->padded = (char *) sch - (char *) p;
489 485
490 INIT_LIST_HEAD(&sch->list); 486 INIT_LIST_HEAD(&sch->list);
491 skb_queue_head_init(&sch->requeue);
492 skb_queue_head_init(&sch->q); 487 skb_queue_head_init(&sch->q);
493 sch->ops = ops; 488 sch->ops = ops;
494 sch->enqueue = ops->enqueue; 489 sch->enqueue = ops->enqueue;
@@ -531,6 +526,9 @@ void qdisc_reset(struct Qdisc *qdisc)
531 526
532 if (ops->reset) 527 if (ops->reset)
533 ops->reset(qdisc); 528 ops->reset(qdisc);
529
530 kfree_skb(qdisc->gso_skb);
531 qdisc->gso_skb = NULL;
534} 532}
535EXPORT_SYMBOL(qdisc_reset); 533EXPORT_SYMBOL(qdisc_reset);
536 534
@@ -557,8 +555,6 @@ void qdisc_destroy(struct Qdisc *qdisc)
557 dev_put(qdisc_dev(qdisc)); 555 dev_put(qdisc_dev(qdisc));
558 556
559 kfree_skb(qdisc->gso_skb); 557 kfree_skb(qdisc->gso_skb);
560 __skb_queue_purge(&qdisc->requeue);
561
562 kfree((char *) qdisc - qdisc->padded); 558 kfree((char *) qdisc - qdisc->padded);
563} 559}
564EXPORT_SYMBOL(qdisc_destroy); 560EXPORT_SYMBOL(qdisc_destroy);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index c1ad6b8de105..40408d595c08 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -240,26 +240,6 @@ congestion_drop:
240 return NET_XMIT_CN; 240 return NET_XMIT_CN;
241} 241}
242 242
243static int gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
244{
245 struct gred_sched *t = qdisc_priv(sch);
246 struct gred_sched_data *q;
247 u16 dp = tc_index_to_dp(skb);
248
249 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
250 if (net_ratelimit())
251 printk(KERN_WARNING "GRED: Unable to relocate VQ 0x%x "
252 "for requeue, screwing up backlog.\n",
253 tc_index_to_dp(skb));
254 } else {
255 if (red_is_idling(&q->parms))
256 red_end_of_idle_period(&q->parms);
257 q->backlog += qdisc_pkt_len(skb);
258 }
259
260 return qdisc_requeue(skb, sch);
261}
262
263static struct sk_buff *gred_dequeue(struct Qdisc* sch) 243static struct sk_buff *gred_dequeue(struct Qdisc* sch)
264{ 244{
265 struct sk_buff *skb; 245 struct sk_buff *skb;
@@ -602,7 +582,7 @@ static struct Qdisc_ops gred_qdisc_ops __read_mostly = {
602 .priv_size = sizeof(struct gred_sched), 582 .priv_size = sizeof(struct gred_sched),
603 .enqueue = gred_enqueue, 583 .enqueue = gred_enqueue,
604 .dequeue = gred_dequeue, 584 .dequeue = gred_dequeue,
605 .requeue = gred_requeue, 585 .peek = qdisc_peek_head,
606 .drop = gred_drop, 586 .drop = gred_drop,
607 .init = gred_init, 587 .init = gred_init,
608 .reset = gred_reset, 588 .reset = gred_reset,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index c1e77da8cd09..45c31b1a4e1d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -184,7 +184,6 @@ struct hfsc_sched
184 struct rb_root eligible; /* eligible tree */ 184 struct rb_root eligible; /* eligible tree */
185 struct list_head droplist; /* active leaf class list (for 185 struct list_head droplist; /* active leaf class list (for
186 dropping) */ 186 dropping) */
187 struct sk_buff_head requeue; /* requeued packet */
188 struct qdisc_watchdog watchdog; /* watchdog timer */ 187 struct qdisc_watchdog watchdog; /* watchdog timer */
189}; 188};
190 189
@@ -880,28 +879,20 @@ set_passive(struct hfsc_class *cl)
880 */ 879 */
881} 880}
882 881
883/*
884 * hack to get length of first packet in queue.
885 */
886static unsigned int 882static unsigned int
887qdisc_peek_len(struct Qdisc *sch) 883qdisc_peek_len(struct Qdisc *sch)
888{ 884{
889 struct sk_buff *skb; 885 struct sk_buff *skb;
890 unsigned int len; 886 unsigned int len;
891 887
892 skb = sch->dequeue(sch); 888 skb = sch->ops->peek(sch);
893 if (skb == NULL) { 889 if (skb == NULL) {
894 if (net_ratelimit()) 890 if (net_ratelimit())
895 printk("qdisc_peek_len: non work-conserving qdisc ?\n"); 891 printk("qdisc_peek_len: non work-conserving qdisc ?\n");
896 return 0; 892 return 0;
897 } 893 }
898 len = qdisc_pkt_len(skb); 894 len = qdisc_pkt_len(skb);
899 if (unlikely(sch->ops->requeue(skb, sch) != NET_XMIT_SUCCESS)) { 895
900 if (net_ratelimit())
901 printk("qdisc_peek_len: failed to requeue\n");
902 qdisc_tree_decrease_qlen(sch, 1);
903 return 0;
904 }
905 return len; 896 return len;
906} 897}
907 898
@@ -1027,6 +1018,14 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1027 } 1018 }
1028 cur_time = psched_get_time(); 1019 cur_time = psched_get_time();
1029 1020
1021 if (tca[TCA_RATE]) {
1022 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1023 qdisc_root_sleeping_lock(sch),
1024 tca[TCA_RATE]);
1025 if (err)
1026 return err;
1027 }
1028
1030 sch_tree_lock(sch); 1029 sch_tree_lock(sch);
1031 if (rsc != NULL) 1030 if (rsc != NULL)
1032 hfsc_change_rsc(cl, rsc, cur_time); 1031 hfsc_change_rsc(cl, rsc, cur_time);
@@ -1043,10 +1042,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1043 } 1042 }
1044 sch_tree_unlock(sch); 1043 sch_tree_unlock(sch);
1045 1044
1046 if (tca[TCA_RATE])
1047 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1048 qdisc_root_sleeping_lock(sch),
1049 tca[TCA_RATE]);
1050 return 0; 1045 return 0;
1051 } 1046 }
1052 1047
@@ -1072,6 +1067,16 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1072 if (cl == NULL) 1067 if (cl == NULL)
1073 return -ENOBUFS; 1068 return -ENOBUFS;
1074 1069
1070 if (tca[TCA_RATE]) {
1071 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1072 qdisc_root_sleeping_lock(sch),
1073 tca[TCA_RATE]);
1074 if (err) {
1075 kfree(cl);
1076 return err;
1077 }
1078 }
1079
1075 if (rsc != NULL) 1080 if (rsc != NULL)
1076 hfsc_change_rsc(cl, rsc, 0); 1081 hfsc_change_rsc(cl, rsc, 0);
1077 if (fsc != NULL) 1082 if (fsc != NULL)
@@ -1102,9 +1107,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1102 1107
1103 qdisc_class_hash_grow(sch, &q->clhash); 1108 qdisc_class_hash_grow(sch, &q->clhash);
1104 1109
1105 if (tca[TCA_RATE])
1106 gen_new_estimator(&cl->bstats, &cl->rate_est,
1107 qdisc_root_sleeping_lock(sch), tca[TCA_RATE]);
1108 *arg = (unsigned long)cl; 1110 *arg = (unsigned long)cl;
1109 return 0; 1111 return 0;
1110} 1112}
@@ -1211,7 +1213,8 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1211 1213
1212 sch_tree_lock(sch); 1214 sch_tree_lock(sch);
1213 hfsc_purge_queue(sch, cl); 1215 hfsc_purge_queue(sch, cl);
1214 *old = xchg(&cl->qdisc, new); 1216 *old = cl->qdisc;
1217 cl->qdisc = new;
1215 sch_tree_unlock(sch); 1218 sch_tree_unlock(sch);
1216 return 0; 1219 return 0;
1217} 1220}
@@ -1440,7 +1443,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1440 return err; 1443 return err;
1441 q->eligible = RB_ROOT; 1444 q->eligible = RB_ROOT;
1442 INIT_LIST_HEAD(&q->droplist); 1445 INIT_LIST_HEAD(&q->droplist);
1443 skb_queue_head_init(&q->requeue);
1444 1446
1445 q->root.cl_common.classid = sch->handle; 1447 q->root.cl_common.classid = sch->handle;
1446 q->root.refcnt = 1; 1448 q->root.refcnt = 1;
@@ -1525,7 +1527,6 @@ hfsc_reset_qdisc(struct Qdisc *sch)
1525 hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode) 1527 hlist_for_each_entry(cl, n, &q->clhash.hash[i], cl_common.hnode)
1526 hfsc_reset_class(cl); 1528 hfsc_reset_class(cl);
1527 } 1529 }
1528 __skb_queue_purge(&q->requeue);
1529 q->eligible = RB_ROOT; 1530 q->eligible = RB_ROOT;
1530 INIT_LIST_HEAD(&q->droplist); 1531 INIT_LIST_HEAD(&q->droplist);
1531 qdisc_watchdog_cancel(&q->watchdog); 1532 qdisc_watchdog_cancel(&q->watchdog);
@@ -1550,7 +1551,6 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
1550 hfsc_destroy_class(sch, cl); 1551 hfsc_destroy_class(sch, cl);
1551 } 1552 }
1552 qdisc_class_hash_destroy(&q->clhash); 1553 qdisc_class_hash_destroy(&q->clhash);
1553 __skb_queue_purge(&q->requeue);
1554 qdisc_watchdog_cancel(&q->watchdog); 1554 qdisc_watchdog_cancel(&q->watchdog);
1555} 1555}
1556 1556
@@ -1574,7 +1574,7 @@ static int
1574hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) 1574hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1575{ 1575{
1576 struct hfsc_class *cl; 1576 struct hfsc_class *cl;
1577 int err; 1577 int uninitialized_var(err);
1578 1578
1579 cl = hfsc_classify(skb, sch, &err); 1579 cl = hfsc_classify(skb, sch, &err);
1580 if (cl == NULL) { 1580 if (cl == NULL) {
@@ -1617,8 +1617,6 @@ hfsc_dequeue(struct Qdisc *sch)
1617 1617
1618 if (sch->q.qlen == 0) 1618 if (sch->q.qlen == 0)
1619 return NULL; 1619 return NULL;
1620 if ((skb = __skb_dequeue(&q->requeue)))
1621 goto out;
1622 1620
1623 cur_time = psched_get_time(); 1621 cur_time = psched_get_time();
1624 1622
@@ -1642,7 +1640,7 @@ hfsc_dequeue(struct Qdisc *sch)
1642 } 1640 }
1643 } 1641 }
1644 1642
1645 skb = cl->qdisc->dequeue(cl->qdisc); 1643 skb = qdisc_dequeue_peeked(cl->qdisc);
1646 if (skb == NULL) { 1644 if (skb == NULL) {
1647 if (net_ratelimit()) 1645 if (net_ratelimit())
1648 printk("HFSC: Non-work-conserving qdisc ?\n"); 1646 printk("HFSC: Non-work-conserving qdisc ?\n");
@@ -1667,24 +1665,12 @@ hfsc_dequeue(struct Qdisc *sch)
1667 set_passive(cl); 1665 set_passive(cl);
1668 } 1666 }
1669 1667
1670 out:
1671 sch->flags &= ~TCQ_F_THROTTLED; 1668 sch->flags &= ~TCQ_F_THROTTLED;
1672 sch->q.qlen--; 1669 sch->q.qlen--;
1673 1670
1674 return skb; 1671 return skb;
1675} 1672}
1676 1673
1677static int
1678hfsc_requeue(struct sk_buff *skb, struct Qdisc *sch)
1679{
1680 struct hfsc_sched *q = qdisc_priv(sch);
1681
1682 __skb_queue_head(&q->requeue, skb);
1683 sch->q.qlen++;
1684 sch->qstats.requeues++;
1685 return NET_XMIT_SUCCESS;
1686}
1687
1688static unsigned int 1674static unsigned int
1689hfsc_drop(struct Qdisc *sch) 1675hfsc_drop(struct Qdisc *sch)
1690{ 1676{
@@ -1735,7 +1721,7 @@ static struct Qdisc_ops hfsc_qdisc_ops __read_mostly = {
1735 .dump = hfsc_dump_qdisc, 1721 .dump = hfsc_dump_qdisc,
1736 .enqueue = hfsc_enqueue, 1722 .enqueue = hfsc_enqueue,
1737 .dequeue = hfsc_dequeue, 1723 .dequeue = hfsc_dequeue,
1738 .requeue = hfsc_requeue, 1724 .peek = qdisc_peek_dequeued,
1739 .drop = hfsc_drop, 1725 .drop = hfsc_drop,
1740 .cl_ops = &hfsc_class_ops, 1726 .cl_ops = &hfsc_class_ops,
1741 .priv_size = sizeof(struct hfsc_sched), 1727 .priv_size = sizeof(struct hfsc_sched),
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index d14f02056ae6..5070643ce534 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -84,12 +84,12 @@ struct htb_class {
84 unsigned int children; 84 unsigned int children;
85 struct htb_class *parent; /* parent class */ 85 struct htb_class *parent; /* parent class */
86 86
87 int prio; /* these two are used only by leaves... */
88 int quantum; /* but stored for parent-to-leaf return */
89
87 union { 90 union {
88 struct htb_class_leaf { 91 struct htb_class_leaf {
89 struct Qdisc *q; 92 struct Qdisc *q;
90 int prio;
91 int aprio;
92 int quantum;
93 int deficit[TC_HTB_MAXDEPTH]; 93 int deficit[TC_HTB_MAXDEPTH];
94 struct list_head drop_list; 94 struct list_head drop_list;
95 } leaf; 95 } leaf;
@@ -123,19 +123,8 @@ struct htb_class {
123 psched_tdiff_t mbuffer; /* max wait time */ 123 psched_tdiff_t mbuffer; /* max wait time */
124 long tokens, ctokens; /* current number of tokens */ 124 long tokens, ctokens; /* current number of tokens */
125 psched_time_t t_c; /* checkpoint time */ 125 psched_time_t t_c; /* checkpoint time */
126
127 int prio; /* For parent to leaf return possible here */
128 int quantum; /* we do backup. Finally full replacement */
129 /* of un.leaf originals should be done. */
130}; 126};
131 127
132static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate,
133 int size)
134{
135 long result = qdisc_l2t(rate, size);
136 return result;
137}
138
139struct htb_sched { 128struct htb_sched {
140 struct Qdisc_class_hash clhash; 129 struct Qdisc_class_hash clhash;
141 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ 130 struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */
@@ -152,9 +141,6 @@ struct htb_sched {
152 /* time of nearest event per level (row) */ 141 /* time of nearest event per level (row) */
153 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH]; 142 psched_time_t near_ev_cache[TC_HTB_MAXDEPTH];
154 143
155 /* whether we hit non-work conserving class during this dequeue; we use */
156 int nwc_hit; /* this to disable mindelay complaint in dequeue */
157
158 int defcls; /* class where unclassified flows go to */ 144 int defcls; /* class where unclassified flows go to */
159 145
160 /* filters for qdisc itself */ 146 /* filters for qdisc itself */
@@ -527,10 +513,10 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl)
527 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen); 513 WARN_ON(cl->level || !cl->un.leaf.q || !cl->un.leaf.q->q.qlen);
528 514
529 if (!cl->prio_activity) { 515 if (!cl->prio_activity) {
530 cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); 516 cl->prio_activity = 1 << cl->prio;
531 htb_activate_prios(q, cl); 517 htb_activate_prios(q, cl);
532 list_add_tail(&cl->un.leaf.drop_list, 518 list_add_tail(&cl->un.leaf.drop_list,
533 q->drops + cl->un.leaf.aprio); 519 q->drops + cl->prio);
534 } 520 }
535} 521}
536 522
@@ -551,7 +537,7 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl)
551 537
552static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) 538static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
553{ 539{
554 int ret; 540 int uninitialized_var(ret);
555 struct htb_sched *q = qdisc_priv(sch); 541 struct htb_sched *q = qdisc_priv(sch);
556 struct htb_class *cl = htb_classify(skb, sch, &ret); 542 struct htb_class *cl = htb_classify(skb, sch, &ret);
557 543
@@ -591,45 +577,30 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
591 return NET_XMIT_SUCCESS; 577 return NET_XMIT_SUCCESS;
592} 578}
593 579
594/* TODO: requeuing packet charges it to policers again !! */ 580static inline void htb_accnt_tokens(struct htb_class *cl, int bytes, long diff)
595static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch)
596{ 581{
597 int ret; 582 long toks = diff + cl->tokens;
598 struct htb_sched *q = qdisc_priv(sch);
599 struct htb_class *cl = htb_classify(skb, sch, &ret);
600 struct sk_buff *tskb;
601 583
602 if (cl == HTB_DIRECT) { 584 if (toks > cl->buffer)
603 /* enqueue to helper queue */ 585 toks = cl->buffer;
604 if (q->direct_queue.qlen < q->direct_qlen) { 586 toks -= (long) qdisc_l2t(cl->rate, bytes);
605 __skb_queue_head(&q->direct_queue, skb); 587 if (toks <= -cl->mbuffer)
606 } else { 588 toks = 1 - cl->mbuffer;
607 __skb_queue_head(&q->direct_queue, skb);
608 tskb = __skb_dequeue_tail(&q->direct_queue);
609 kfree_skb(tskb);
610 sch->qstats.drops++;
611 return NET_XMIT_CN;
612 }
613#ifdef CONFIG_NET_CLS_ACT
614 } else if (!cl) {
615 if (ret & __NET_XMIT_BYPASS)
616 sch->qstats.drops++;
617 kfree_skb(skb);
618 return ret;
619#endif
620 } else if ((ret = cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q)) !=
621 NET_XMIT_SUCCESS) {
622 if (net_xmit_drop_count(ret)) {
623 sch->qstats.drops++;
624 cl->qstats.drops++;
625 }
626 return ret;
627 } else
628 htb_activate(q, cl);
629 589
630 sch->q.qlen++; 590 cl->tokens = toks;
631 sch->qstats.requeues++; 591}
632 return NET_XMIT_SUCCESS; 592
593static inline void htb_accnt_ctokens(struct htb_class *cl, int bytes, long diff)
594{
595 long toks = diff + cl->ctokens;
596
597 if (toks > cl->cbuffer)
598 toks = cl->cbuffer;
599 toks -= (long) qdisc_l2t(cl->ceil, bytes);
600 if (toks <= -cl->mbuffer)
601 toks = 1 - cl->mbuffer;
602
603 cl->ctokens = toks;
633} 604}
634 605
635/** 606/**
@@ -647,26 +618,20 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
647 int level, struct sk_buff *skb) 618 int level, struct sk_buff *skb)
648{ 619{
649 int bytes = qdisc_pkt_len(skb); 620 int bytes = qdisc_pkt_len(skb);
650 long toks, diff;
651 enum htb_cmode old_mode; 621 enum htb_cmode old_mode;
652 622 long diff;
653#define HTB_ACCNT(T,B,R) toks = diff + cl->T; \
654 if (toks > cl->B) toks = cl->B; \
655 toks -= L2T(cl, cl->R, bytes); \
656 if (toks <= -cl->mbuffer) toks = 1-cl->mbuffer; \
657 cl->T = toks
658 623
659 while (cl) { 624 while (cl) {
660 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer); 625 diff = psched_tdiff_bounded(q->now, cl->t_c, cl->mbuffer);
661 if (cl->level >= level) { 626 if (cl->level >= level) {
662 if (cl->level == level) 627 if (cl->level == level)
663 cl->xstats.lends++; 628 cl->xstats.lends++;
664 HTB_ACCNT(tokens, buffer, rate); 629 htb_accnt_tokens(cl, bytes, diff);
665 } else { 630 } else {
666 cl->xstats.borrows++; 631 cl->xstats.borrows++;
667 cl->tokens += diff; /* we moved t_c; update tokens */ 632 cl->tokens += diff; /* we moved t_c; update tokens */
668 } 633 }
669 HTB_ACCNT(ctokens, cbuffer, ceil); 634 htb_accnt_ctokens(cl, bytes, diff);
670 cl->t_c = q->now; 635 cl->t_c = q->now;
671 636
672 old_mode = cl->cmode; 637 old_mode = cl->cmode;
@@ -733,14 +698,14 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
733 while (n) { 698 while (n) {
734 struct htb_class *cl = 699 struct htb_class *cl =
735 rb_entry(n, struct htb_class, node[prio]); 700 rb_entry(n, struct htb_class, node[prio]);
736 if (id == cl->common.classid)
737 return n;
738 701
739 if (id > cl->common.classid) { 702 if (id > cl->common.classid) {
740 n = n->rb_right; 703 n = n->rb_right;
741 } else { 704 } else if (id < cl->common.classid) {
742 r = n; 705 r = n;
743 n = n->rb_left; 706 n = n->rb_left;
707 } else {
708 return n;
744 } 709 }
745 } 710 }
746 return r; 711 return r;
@@ -761,7 +726,7 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
761 u32 *pid; 726 u32 *pid;
762 } stk[TC_HTB_MAXDEPTH], *sp = stk; 727 } stk[TC_HTB_MAXDEPTH], *sp = stk;
763 728
764 WARN_ON(!tree->rb_node); 729 BUG_ON(!tree->rb_node);
765 sp->root = tree->rb_node; 730 sp->root = tree->rb_node;
766 sp->pptr = pptr; 731 sp->pptr = pptr;
767 sp->pid = pid; 732 sp->pid = pid;
@@ -781,9 +746,10 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
781 *sp->pptr = (*sp->pptr)->rb_left; 746 *sp->pptr = (*sp->pptr)->rb_left;
782 if (sp > stk) { 747 if (sp > stk) {
783 sp--; 748 sp--;
784 WARN_ON(!*sp->pptr); 749 if (!*sp->pptr) {
785 if (!*sp->pptr) 750 WARN_ON(1);
786 return NULL; 751 return NULL;
752 }
787 htb_next_rb_node(sp->pptr); 753 htb_next_rb_node(sp->pptr);
788 } 754 }
789 } else { 755 } else {
@@ -814,8 +780,7 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
814 780
815 do { 781 do {
816next: 782next:
817 WARN_ON(!cl); 783 if (unlikely(!cl))
818 if (!cl)
819 return NULL; 784 return NULL;
820 785
821 /* class can be empty - it is unlikely but can be true if leaf 786 /* class can be empty - it is unlikely but can be true if leaf
@@ -849,7 +814,7 @@ next:
849 cl->common.classid); 814 cl->common.classid);
850 cl->warned = 1; 815 cl->warned = 1;
851 } 816 }
852 q->nwc_hit++; 817
853 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> 818 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
854 ptr[0]) + prio); 819 ptr[0]) + prio);
855 cl = htb_lookup_leaf(q->row[level] + prio, prio, 820 cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -861,7 +826,7 @@ next:
861 if (likely(skb != NULL)) { 826 if (likely(skb != NULL)) {
862 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb); 827 cl->un.leaf.deficit[level] -= qdisc_pkt_len(skb);
863 if (cl->un.leaf.deficit[level] < 0) { 828 if (cl->un.leaf.deficit[level] < 0) {
864 cl->un.leaf.deficit[level] += cl->un.leaf.quantum; 829 cl->un.leaf.deficit[level] += cl->quantum;
865 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> 830 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
866 ptr[0]) + prio); 831 ptr[0]) + prio);
867 } 832 }
@@ -894,7 +859,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
894 q->now = psched_get_time(); 859 q->now = psched_get_time();
895 860
896 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC; 861 next_event = q->now + 5 * PSCHED_TICKS_PER_SEC;
897 q->nwc_hit = 0; 862
898 for (level = 0; level < TC_HTB_MAXDEPTH; level++) { 863 for (level = 0; level < TC_HTB_MAXDEPTH; level++) {
899 /* common case optimization - skip event handler quickly */ 864 /* common case optimization - skip event handler quickly */
900 int m; 865 int m;
@@ -1095,8 +1060,8 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
1095 opt.buffer = cl->buffer; 1060 opt.buffer = cl->buffer;
1096 opt.ceil = cl->ceil->rate; 1061 opt.ceil = cl->ceil->rate;
1097 opt.cbuffer = cl->cbuffer; 1062 opt.cbuffer = cl->cbuffer;
1098 opt.quantum = cl->un.leaf.quantum; 1063 opt.quantum = cl->quantum;
1099 opt.prio = cl->un.leaf.prio; 1064 opt.prio = cl->prio;
1100 opt.level = cl->level; 1065 opt.level = cl->level;
1101 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); 1066 NLA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt);
1102 1067
@@ -1141,7 +1106,9 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1141 == NULL) 1106 == NULL)
1142 return -ENOBUFS; 1107 return -ENOBUFS;
1143 sch_tree_lock(sch); 1108 sch_tree_lock(sch);
1144 if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { 1109 *old = cl->un.leaf.q;
1110 cl->un.leaf.q = new;
1111 if (*old != NULL) {
1145 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 1112 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1146 qdisc_reset(*old); 1113 qdisc_reset(*old);
1147 } 1114 }
@@ -1198,8 +1165,6 @@ static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
1198 memset(&parent->un.inner, 0, sizeof(parent->un.inner)); 1165 memset(&parent->un.inner, 0, sizeof(parent->un.inner));
1199 INIT_LIST_HEAD(&parent->un.leaf.drop_list); 1166 INIT_LIST_HEAD(&parent->un.leaf.drop_list);
1200 parent->un.leaf.q = new_q ? new_q : &noop_qdisc; 1167 parent->un.leaf.q = new_q ? new_q : &noop_qdisc;
1201 parent->un.leaf.quantum = parent->quantum;
1202 parent->un.leaf.prio = parent->prio;
1203 parent->tokens = parent->buffer; 1168 parent->tokens = parent->buffer;
1204 parent->ctokens = parent->cbuffer; 1169 parent->ctokens = parent->cbuffer;
1205 parent->t_c = psched_get_time(); 1170 parent->t_c = psched_get_time();
@@ -1371,9 +1336,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1371 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) 1336 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
1372 goto failure; 1337 goto failure;
1373 1338
1374 gen_new_estimator(&cl->bstats, &cl->rate_est, 1339 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
1375 qdisc_root_sleeping_lock(sch), 1340 qdisc_root_sleeping_lock(sch),
1376 tca[TCA_RATE] ? : &est.nla); 1341 tca[TCA_RATE] ? : &est.nla);
1342 if (err) {
1343 kfree(cl);
1344 goto failure;
1345 }
1346
1377 cl->refcnt = 1; 1347 cl->refcnt = 1;
1378 cl->children = 0; 1348 cl->children = 0;
1379 INIT_LIST_HEAD(&cl->un.leaf.drop_list); 1349 INIT_LIST_HEAD(&cl->un.leaf.drop_list);
@@ -1425,37 +1395,36 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1425 if (parent) 1395 if (parent)
1426 parent->children++; 1396 parent->children++;
1427 } else { 1397 } else {
1428 if (tca[TCA_RATE]) 1398 if (tca[TCA_RATE]) {
1429 gen_replace_estimator(&cl->bstats, &cl->rate_est, 1399 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
1430 qdisc_root_sleeping_lock(sch), 1400 qdisc_root_sleeping_lock(sch),
1431 tca[TCA_RATE]); 1401 tca[TCA_RATE]);
1402 if (err)
1403 return err;
1404 }
1432 sch_tree_lock(sch); 1405 sch_tree_lock(sch);
1433 } 1406 }
1434 1407
1435 /* it used to be a nasty bug here, we have to check that node 1408 /* it used to be a nasty bug here, we have to check that node
1436 is really leaf before changing cl->un.leaf ! */ 1409 is really leaf before changing cl->un.leaf ! */
1437 if (!cl->level) { 1410 if (!cl->level) {
1438 cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; 1411 cl->quantum = rtab->rate.rate / q->rate2quantum;
1439 if (!hopt->quantum && cl->un.leaf.quantum < 1000) { 1412 if (!hopt->quantum && cl->quantum < 1000) {
1440 printk(KERN_WARNING 1413 printk(KERN_WARNING
1441 "HTB: quantum of class %X is small. Consider r2q change.\n", 1414 "HTB: quantum of class %X is small. Consider r2q change.\n",
1442 cl->common.classid); 1415 cl->common.classid);
1443 cl->un.leaf.quantum = 1000; 1416 cl->quantum = 1000;
1444 } 1417 }
1445 if (!hopt->quantum && cl->un.leaf.quantum > 200000) { 1418 if (!hopt->quantum && cl->quantum > 200000) {
1446 printk(KERN_WARNING 1419 printk(KERN_WARNING
1447 "HTB: quantum of class %X is big. Consider r2q change.\n", 1420 "HTB: quantum of class %X is big. Consider r2q change.\n",
1448 cl->common.classid); 1421 cl->common.classid);
1449 cl->un.leaf.quantum = 200000; 1422 cl->quantum = 200000;
1450 } 1423 }
1451 if (hopt->quantum) 1424 if (hopt->quantum)
1452 cl->un.leaf.quantum = hopt->quantum; 1425 cl->quantum = hopt->quantum;
1453 if ((cl->un.leaf.prio = hopt->prio) >= TC_HTB_NUMPRIO) 1426 if ((cl->prio = hopt->prio) >= TC_HTB_NUMPRIO)
1454 cl->un.leaf.prio = TC_HTB_NUMPRIO - 1; 1427 cl->prio = TC_HTB_NUMPRIO - 1;
1455
1456 /* backup for htb_parent_to_leaf */
1457 cl->quantum = cl->un.leaf.quantum;
1458 cl->prio = cl->un.leaf.prio;
1459 } 1428 }
1460 1429
1461 cl->buffer = hopt->buffer; 1430 cl->buffer = hopt->buffer;
@@ -1565,7 +1534,7 @@ static struct Qdisc_ops htb_qdisc_ops __read_mostly = {
1565 .priv_size = sizeof(struct htb_sched), 1534 .priv_size = sizeof(struct htb_sched),
1566 .enqueue = htb_enqueue, 1535 .enqueue = htb_enqueue,
1567 .dequeue = htb_dequeue, 1536 .dequeue = htb_dequeue,
1568 .requeue = htb_requeue, 1537 .peek = qdisc_peek_dequeued,
1569 .drop = htb_drop, 1538 .drop = htb_drop,
1570 .init = htb_init, 1539 .init = htb_init,
1571 .reset = htb_reset, 1540 .reset = htb_reset,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 915f3149dde2..7e151861794b 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -92,40 +92,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
92 return ret; 92 return ret;
93} 93}
94 94
95
96static int
97multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
98{
99 struct Qdisc *qdisc;
100 struct multiq_sched_data *q = qdisc_priv(sch);
101 int ret;
102
103 qdisc = multiq_classify(skb, sch, &ret);
104#ifdef CONFIG_NET_CLS_ACT
105 if (qdisc == NULL) {
106 if (ret & __NET_XMIT_BYPASS)
107 sch->qstats.drops++;
108 kfree_skb(skb);
109 return ret;
110 }
111#endif
112
113 ret = qdisc->ops->requeue(skb, qdisc);
114 if (ret == NET_XMIT_SUCCESS) {
115 sch->q.qlen++;
116 sch->qstats.requeues++;
117 if (q->curband)
118 q->curband--;
119 else
120 q->curband = q->bands - 1;
121 return NET_XMIT_SUCCESS;
122 }
123 if (net_xmit_drop_count(ret))
124 sch->qstats.drops++;
125 return ret;
126}
127
128
129static struct sk_buff *multiq_dequeue(struct Qdisc *sch) 95static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
130{ 96{
131 struct multiq_sched_data *q = qdisc_priv(sch); 97 struct multiq_sched_data *q = qdisc_priv(sch);
@@ -140,7 +106,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
140 q->curband = 0; 106 q->curband = 0;
141 107
142 /* Check that target subqueue is available before 108 /* Check that target subqueue is available before
143 * pulling an skb to avoid excessive requeues 109 * pulling an skb to avoid head-of-line blocking.
144 */ 110 */
145 if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { 111 if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
146 qdisc = q->queues[q->curband]; 112 qdisc = q->queues[q->curband];
@@ -155,6 +121,34 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
155 121
156} 122}
157 123
124static struct sk_buff *multiq_peek(struct Qdisc *sch)
125{
126 struct multiq_sched_data *q = qdisc_priv(sch);
127 unsigned int curband = q->curband;
128 struct Qdisc *qdisc;
129 struct sk_buff *skb;
130 int band;
131
132 for (band = 0; band < q->bands; band++) {
133 /* cycle through bands to ensure fairness */
134 curband++;
135 if (curband >= q->bands)
136 curband = 0;
137
138 /* Check that target subqueue is available before
139 * pulling an skb to avoid head-of-line blocking.
140 */
141 if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
142 qdisc = q->queues[curband];
143 skb = qdisc->ops->peek(qdisc);
144 if (skb)
145 return skb;
146 }
147 }
148 return NULL;
149
150}
151
158static unsigned int multiq_drop(struct Qdisc *sch) 152static unsigned int multiq_drop(struct Qdisc *sch)
159{ 153{
160 struct multiq_sched_data *q = qdisc_priv(sch); 154 struct multiq_sched_data *q = qdisc_priv(sch);
@@ -220,7 +214,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
220 q->bands = qopt->bands; 214 q->bands = qopt->bands;
221 for (i = q->bands; i < q->max_bands; i++) { 215 for (i = q->bands; i < q->max_bands; i++) {
222 if (q->queues[i] != &noop_qdisc) { 216 if (q->queues[i] != &noop_qdisc) {
223 struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); 217 struct Qdisc *child = q->queues[i];
218 q->queues[i] = &noop_qdisc;
224 qdisc_tree_decrease_qlen(child, child->q.qlen); 219 qdisc_tree_decrease_qlen(child, child->q.qlen);
225 qdisc_destroy(child); 220 qdisc_destroy(child);
226 } 221 }
@@ -230,7 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
230 225
231 for (i = 0; i < q->bands; i++) { 226 for (i = 0; i < q->bands; i++) {
232 if (q->queues[i] == &noop_qdisc) { 227 if (q->queues[i] == &noop_qdisc) {
233 struct Qdisc *child; 228 struct Qdisc *child, *old;
234 child = qdisc_create_dflt(qdisc_dev(sch), 229 child = qdisc_create_dflt(qdisc_dev(sch),
235 sch->dev_queue, 230 sch->dev_queue,
236 &pfifo_qdisc_ops, 231 &pfifo_qdisc_ops,
@@ -238,12 +233,13 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
238 i + 1)); 233 i + 1));
239 if (child) { 234 if (child) {
240 sch_tree_lock(sch); 235 sch_tree_lock(sch);
241 child = xchg(&q->queues[i], child); 236 old = q->queues[i];
237 q->queues[i] = child;
242 238
243 if (child != &noop_qdisc) { 239 if (old != &noop_qdisc) {
244 qdisc_tree_decrease_qlen(child, 240 qdisc_tree_decrease_qlen(old,
245 child->q.qlen); 241 old->q.qlen);
246 qdisc_destroy(child); 242 qdisc_destroy(old);
247 } 243 }
248 sch_tree_unlock(sch); 244 sch_tree_unlock(sch);
249 } 245 }
@@ -451,7 +447,7 @@ static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
451 .priv_size = sizeof(struct multiq_sched_data), 447 .priv_size = sizeof(struct multiq_sched_data),
452 .enqueue = multiq_enqueue, 448 .enqueue = multiq_enqueue,
453 .dequeue = multiq_dequeue, 449 .dequeue = multiq_dequeue,
454 .requeue = multiq_requeue, 450 .peek = multiq_peek,
455 .drop = multiq_drop, 451 .drop = multiq_drop,
456 .init = multiq_init, 452 .init = multiq_init,
457 .reset = multiq_reset, 453 .reset = multiq_reset,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 98402f0efa47..d876b8734848 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -230,7 +230,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
230 */ 230 */
231 cb->time_to_send = psched_get_time(); 231 cb->time_to_send = psched_get_time();
232 q->counter = 0; 232 q->counter = 0;
233 ret = q->qdisc->ops->requeue(skb, q->qdisc); 233
234 __skb_queue_head(&q->qdisc->q, skb);
235 q->qdisc->qstats.backlog += qdisc_pkt_len(skb);
236 q->qdisc->qstats.requeues++;
237 ret = NET_XMIT_SUCCESS;
234 } 238 }
235 239
236 if (likely(ret == NET_XMIT_SUCCESS)) { 240 if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -245,20 +249,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
245 return ret; 249 return ret;
246} 250}
247 251
248/* Requeue packets but don't change time stamp */
249static int netem_requeue(struct sk_buff *skb, struct Qdisc *sch)
250{
251 struct netem_sched_data *q = qdisc_priv(sch);
252 int ret;
253
254 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
255 sch->q.qlen++;
256 sch->qstats.requeues++;
257 }
258
259 return ret;
260}
261
262static unsigned int netem_drop(struct Qdisc* sch) 252static unsigned int netem_drop(struct Qdisc* sch)
263{ 253{
264 struct netem_sched_data *q = qdisc_priv(sch); 254 struct netem_sched_data *q = qdisc_priv(sch);
@@ -276,29 +266,25 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
276 struct netem_sched_data *q = qdisc_priv(sch); 266 struct netem_sched_data *q = qdisc_priv(sch);
277 struct sk_buff *skb; 267 struct sk_buff *skb;
278 268
279 smp_mb();
280 if (sch->flags & TCQ_F_THROTTLED) 269 if (sch->flags & TCQ_F_THROTTLED)
281 return NULL; 270 return NULL;
282 271
283 skb = q->qdisc->dequeue(q->qdisc); 272 skb = q->qdisc->ops->peek(q->qdisc);
284 if (skb) { 273 if (skb) {
285 const struct netem_skb_cb *cb = netem_skb_cb(skb); 274 const struct netem_skb_cb *cb = netem_skb_cb(skb);
286 psched_time_t now = psched_get_time(); 275 psched_time_t now = psched_get_time();
287 276
288 /* if more time remaining? */ 277 /* if more time remaining? */
289 if (cb->time_to_send <= now) { 278 if (cb->time_to_send <= now) {
279 skb = qdisc_dequeue_peeked(q->qdisc);
280 if (unlikely(!skb))
281 return NULL;
282
290 pr_debug("netem_dequeue: return skb=%p\n", skb); 283 pr_debug("netem_dequeue: return skb=%p\n", skb);
291 sch->q.qlen--; 284 sch->q.qlen--;
292 return skb; 285 return skb;
293 } 286 }
294 287
295 if (unlikely(q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS)) {
296 qdisc_tree_decrease_qlen(q->qdisc, 1);
297 sch->qstats.drops++;
298 printk(KERN_ERR "netem: %s could not requeue\n",
299 q->qdisc->ops->id);
300 }
301
302 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send); 288 qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
303 } 289 }
304 290
@@ -341,14 +327,13 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
341 root_lock = qdisc_root_sleeping_lock(sch); 327 root_lock = qdisc_root_sleeping_lock(sch);
342 328
343 spin_lock_bh(root_lock); 329 spin_lock_bh(root_lock);
344 d = xchg(&q->delay_dist, d); 330 kfree(q->delay_dist);
331 q->delay_dist = d;
345 spin_unlock_bh(root_lock); 332 spin_unlock_bh(root_lock);
346
347 kfree(d);
348 return 0; 333 return 0;
349} 334}
350 335
351static int get_correlation(struct Qdisc *sch, const struct nlattr *attr) 336static void get_correlation(struct Qdisc *sch, const struct nlattr *attr)
352{ 337{
353 struct netem_sched_data *q = qdisc_priv(sch); 338 struct netem_sched_data *q = qdisc_priv(sch);
354 const struct tc_netem_corr *c = nla_data(attr); 339 const struct tc_netem_corr *c = nla_data(attr);
@@ -356,27 +341,24 @@ static int get_correlation(struct Qdisc *sch, const struct nlattr *attr)
356 init_crandom(&q->delay_cor, c->delay_corr); 341 init_crandom(&q->delay_cor, c->delay_corr);
357 init_crandom(&q->loss_cor, c->loss_corr); 342 init_crandom(&q->loss_cor, c->loss_corr);
358 init_crandom(&q->dup_cor, c->dup_corr); 343 init_crandom(&q->dup_cor, c->dup_corr);
359 return 0;
360} 344}
361 345
362static int get_reorder(struct Qdisc *sch, const struct nlattr *attr) 346static void get_reorder(struct Qdisc *sch, const struct nlattr *attr)
363{ 347{
364 struct netem_sched_data *q = qdisc_priv(sch); 348 struct netem_sched_data *q = qdisc_priv(sch);
365 const struct tc_netem_reorder *r = nla_data(attr); 349 const struct tc_netem_reorder *r = nla_data(attr);
366 350
367 q->reorder = r->probability; 351 q->reorder = r->probability;
368 init_crandom(&q->reorder_cor, r->correlation); 352 init_crandom(&q->reorder_cor, r->correlation);
369 return 0;
370} 353}
371 354
372static int get_corrupt(struct Qdisc *sch, const struct nlattr *attr) 355static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
373{ 356{
374 struct netem_sched_data *q = qdisc_priv(sch); 357 struct netem_sched_data *q = qdisc_priv(sch);
375 const struct tc_netem_corrupt *r = nla_data(attr); 358 const struct tc_netem_corrupt *r = nla_data(attr);
376 359
377 q->corrupt = r->probability; 360 q->corrupt = r->probability;
378 init_crandom(&q->corrupt_cor, r->correlation); 361 init_crandom(&q->corrupt_cor, r->correlation);
379 return 0;
380} 362}
381 363
382static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 364static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
@@ -435,11 +417,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
435 if (q->gap) 417 if (q->gap)
436 q->reorder = ~0; 418 q->reorder = ~0;
437 419
438 if (tb[TCA_NETEM_CORR]) { 420 if (tb[TCA_NETEM_CORR])
439 ret = get_correlation(sch, tb[TCA_NETEM_CORR]); 421 get_correlation(sch, tb[TCA_NETEM_CORR]);
440 if (ret)
441 return ret;
442 }
443 422
444 if (tb[TCA_NETEM_DELAY_DIST]) { 423 if (tb[TCA_NETEM_DELAY_DIST]) {
445 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]); 424 ret = get_dist_table(sch, tb[TCA_NETEM_DELAY_DIST]);
@@ -447,17 +426,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
447 return ret; 426 return ret;
448 } 427 }
449 428
450 if (tb[TCA_NETEM_REORDER]) { 429 if (tb[TCA_NETEM_REORDER])
451 ret = get_reorder(sch, tb[TCA_NETEM_REORDER]); 430 get_reorder(sch, tb[TCA_NETEM_REORDER]);
452 if (ret)
453 return ret;
454 }
455 431
456 if (tb[TCA_NETEM_CORRUPT]) { 432 if (tb[TCA_NETEM_CORRUPT])
457 ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 433 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
458 if (ret)
459 return ret;
460 }
461 434
462 return 0; 435 return 0;
463} 436}
@@ -538,7 +511,7 @@ static struct Qdisc_ops tfifo_qdisc_ops __read_mostly = {
538 .priv_size = sizeof(struct fifo_sched_data), 511 .priv_size = sizeof(struct fifo_sched_data),
539 .enqueue = tfifo_enqueue, 512 .enqueue = tfifo_enqueue,
540 .dequeue = qdisc_dequeue_head, 513 .dequeue = qdisc_dequeue_head,
541 .requeue = qdisc_requeue, 514 .peek = qdisc_peek_head,
542 .drop = qdisc_queue_drop, 515 .drop = qdisc_queue_drop,
543 .init = tfifo_init, 516 .init = tfifo_init,
544 .reset = qdisc_reset_queue, 517 .reset = qdisc_reset_queue,
@@ -621,99 +594,12 @@ nla_put_failure:
621 return -1; 594 return -1;
622} 595}
623 596
624static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
625 struct sk_buff *skb, struct tcmsg *tcm)
626{
627 struct netem_sched_data *q = qdisc_priv(sch);
628
629 if (cl != 1) /* only one class */
630 return -ENOENT;
631
632 tcm->tcm_handle |= TC_H_MIN(1);
633 tcm->tcm_info = q->qdisc->handle;
634
635 return 0;
636}
637
638static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
639 struct Qdisc **old)
640{
641 struct netem_sched_data *q = qdisc_priv(sch);
642
643 if (new == NULL)
644 new = &noop_qdisc;
645
646 sch_tree_lock(sch);
647 *old = xchg(&q->qdisc, new);
648 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
649 qdisc_reset(*old);
650 sch_tree_unlock(sch);
651
652 return 0;
653}
654
655static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
656{
657 struct netem_sched_data *q = qdisc_priv(sch);
658 return q->qdisc;
659}
660
661static unsigned long netem_get(struct Qdisc *sch, u32 classid)
662{
663 return 1;
664}
665
666static void netem_put(struct Qdisc *sch, unsigned long arg)
667{
668}
669
670static int netem_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
671 struct nlattr **tca, unsigned long *arg)
672{
673 return -ENOSYS;
674}
675
676static int netem_delete(struct Qdisc *sch, unsigned long arg)
677{
678 return -ENOSYS;
679}
680
681static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
682{
683 if (!walker->stop) {
684 if (walker->count >= walker->skip)
685 if (walker->fn(sch, 1, walker) < 0) {
686 walker->stop = 1;
687 return;
688 }
689 walker->count++;
690 }
691}
692
693static struct tcf_proto **netem_find_tcf(struct Qdisc *sch, unsigned long cl)
694{
695 return NULL;
696}
697
698static const struct Qdisc_class_ops netem_class_ops = {
699 .graft = netem_graft,
700 .leaf = netem_leaf,
701 .get = netem_get,
702 .put = netem_put,
703 .change = netem_change_class,
704 .delete = netem_delete,
705 .walk = netem_walk,
706 .tcf_chain = netem_find_tcf,
707 .dump = netem_dump_class,
708};
709
710static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 597static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
711 .id = "netem", 598 .id = "netem",
712 .cl_ops = &netem_class_ops,
713 .priv_size = sizeof(struct netem_sched_data), 599 .priv_size = sizeof(struct netem_sched_data),
714 .enqueue = netem_enqueue, 600 .enqueue = netem_enqueue,
715 .dequeue = netem_dequeue, 601 .dequeue = netem_dequeue,
716 .requeue = netem_requeue, 602 .peek = qdisc_peek_dequeued,
717 .drop = netem_drop, 603 .drop = netem_drop,
718 .init = netem_init, 604 .init = netem_init,
719 .reset = netem_reset, 605 .reset = netem_reset,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 504a78cdb718..94cecef70145 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -93,34 +93,20 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
93 return ret; 93 return ret;
94} 94}
95 95
96 96static struct sk_buff *prio_peek(struct Qdisc *sch)
97static int
98prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
99{ 97{
100 struct Qdisc *qdisc; 98 struct prio_sched_data *q = qdisc_priv(sch);
101 int ret; 99 int prio;
102
103 qdisc = prio_classify(skb, sch, &ret);
104#ifdef CONFIG_NET_CLS_ACT
105 if (qdisc == NULL) {
106 if (ret & __NET_XMIT_BYPASS)
107 sch->qstats.drops++;
108 kfree_skb(skb);
109 return ret;
110 }
111#endif
112 100
113 if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { 101 for (prio = 0; prio < q->bands; prio++) {
114 sch->q.qlen++; 102 struct Qdisc *qdisc = q->queues[prio];
115 sch->qstats.requeues++; 103 struct sk_buff *skb = qdisc->ops->peek(qdisc);
116 return NET_XMIT_SUCCESS; 104 if (skb)
105 return skb;
117 } 106 }
118 if (net_xmit_drop_count(ret)) 107 return NULL;
119 sch->qstats.drops++;
120 return ret;
121} 108}
122 109
123
124static struct sk_buff *prio_dequeue(struct Qdisc* sch) 110static struct sk_buff *prio_dequeue(struct Qdisc* sch)
125{ 111{
126 struct prio_sched_data *q = qdisc_priv(sch); 112 struct prio_sched_data *q = qdisc_priv(sch);
@@ -201,7 +187,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
201 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); 187 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
202 188
203 for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { 189 for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
204 struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); 190 struct Qdisc *child = q->queues[i];
191 q->queues[i] = &noop_qdisc;
205 if (child != &noop_qdisc) { 192 if (child != &noop_qdisc) {
206 qdisc_tree_decrease_qlen(child, child->q.qlen); 193 qdisc_tree_decrease_qlen(child, child->q.qlen);
207 qdisc_destroy(child); 194 qdisc_destroy(child);
@@ -211,18 +198,19 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
211 198
212 for (i=0; i<q->bands; i++) { 199 for (i=0; i<q->bands; i++) {
213 if (q->queues[i] == &noop_qdisc) { 200 if (q->queues[i] == &noop_qdisc) {
214 struct Qdisc *child; 201 struct Qdisc *child, *old;
215 child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 202 child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
216 &pfifo_qdisc_ops, 203 &pfifo_qdisc_ops,
217 TC_H_MAKE(sch->handle, i + 1)); 204 TC_H_MAKE(sch->handle, i + 1));
218 if (child) { 205 if (child) {
219 sch_tree_lock(sch); 206 sch_tree_lock(sch);
220 child = xchg(&q->queues[i], child); 207 old = q->queues[i];
208 q->queues[i] = child;
221 209
222 if (child != &noop_qdisc) { 210 if (old != &noop_qdisc) {
223 qdisc_tree_decrease_qlen(child, 211 qdisc_tree_decrease_qlen(old,
224 child->q.qlen); 212 old->q.qlen);
225 qdisc_destroy(child); 213 qdisc_destroy(old);
226 } 214 }
227 sch_tree_unlock(sch); 215 sch_tree_unlock(sch);
228 } 216 }
@@ -421,7 +409,7 @@ static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
421 .priv_size = sizeof(struct prio_sched_data), 409 .priv_size = sizeof(struct prio_sched_data),
422 .enqueue = prio_enqueue, 410 .enqueue = prio_enqueue,
423 .dequeue = prio_dequeue, 411 .dequeue = prio_dequeue,
424 .requeue = prio_requeue, 412 .peek = prio_peek,
425 .drop = prio_drop, 413 .drop = prio_drop,
426 .init = prio_init, 414 .init = prio_init,
427 .reset = prio_reset, 415 .reset = prio_reset,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 5da05839e225..2bdf241f6315 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -108,23 +108,6 @@ congestion_drop:
108 return NET_XMIT_CN; 108 return NET_XMIT_CN;
109} 109}
110 110
111static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
112{
113 struct red_sched_data *q = qdisc_priv(sch);
114 struct Qdisc *child = q->qdisc;
115 int ret;
116
117 if (red_is_idling(&q->parms))
118 red_end_of_idle_period(&q->parms);
119
120 ret = child->ops->requeue(skb, child);
121 if (likely(ret == NET_XMIT_SUCCESS)) {
122 sch->qstats.requeues++;
123 sch->q.qlen++;
124 }
125 return ret;
126}
127
128static struct sk_buff * red_dequeue(struct Qdisc* sch) 111static struct sk_buff * red_dequeue(struct Qdisc* sch)
129{ 112{
130 struct sk_buff *skb; 113 struct sk_buff *skb;
@@ -140,6 +123,14 @@ static struct sk_buff * red_dequeue(struct Qdisc* sch)
140 return skb; 123 return skb;
141} 124}
142 125
126static struct sk_buff * red_peek(struct Qdisc* sch)
127{
128 struct red_sched_data *q = qdisc_priv(sch);
129 struct Qdisc *child = q->qdisc;
130
131 return child->ops->peek(child);
132}
133
143static unsigned int red_drop(struct Qdisc* sch) 134static unsigned int red_drop(struct Qdisc* sch)
144{ 135{
145 struct red_sched_data *q = qdisc_priv(sch); 136 struct red_sched_data *q = qdisc_priv(sch);
@@ -211,7 +202,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
211 q->limit = ctl->limit; 202 q->limit = ctl->limit;
212 if (child) { 203 if (child) {
213 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 204 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
214 qdisc_destroy(xchg(&q->qdisc, child)); 205 qdisc_destroy(q->qdisc);
206 q->qdisc = child;
215 } 207 }
216 208
217 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, 209 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
@@ -292,7 +284,8 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
292 new = &noop_qdisc; 284 new = &noop_qdisc;
293 285
294 sch_tree_lock(sch); 286 sch_tree_lock(sch);
295 *old = xchg(&q->qdisc, new); 287 *old = q->qdisc;
288 q->qdisc = new;
296 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 289 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
297 qdisc_reset(*old); 290 qdisc_reset(*old);
298 sch_tree_unlock(sch); 291 sch_tree_unlock(sch);
@@ -361,7 +354,7 @@ static struct Qdisc_ops red_qdisc_ops __read_mostly = {
361 .cl_ops = &red_class_ops, 354 .cl_ops = &red_class_ops,
362 .enqueue = red_enqueue, 355 .enqueue = red_enqueue,
363 .dequeue = red_dequeue, 356 .dequeue = red_dequeue,
364 .requeue = red_requeue, 357 .peek = red_peek,
365 .drop = red_drop, 358 .drop = red_drop,
366 .init = red_init, 359 .init = red_init,
367 .reset = red_reset, 360 .reset = red_reset,
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index fe1508ef0d3d..f3965df00559 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -281,7 +281,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
281 struct sfq_sched_data *q = qdisc_priv(sch); 281 struct sfq_sched_data *q = qdisc_priv(sch);
282 unsigned int hash; 282 unsigned int hash;
283 sfq_index x; 283 sfq_index x;
284 int ret; 284 int uninitialized_var(ret);
285 285
286 hash = sfq_classify(skb, sch, &ret); 286 hash = sfq_classify(skb, sch, &ret);
287 if (hash == 0) { 287 if (hash == 0) {
@@ -329,71 +329,20 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
329 return NET_XMIT_CN; 329 return NET_XMIT_CN;
330} 330}
331 331
332static int 332static struct sk_buff *
333sfq_requeue(struct sk_buff *skb, struct Qdisc *sch) 333sfq_peek(struct Qdisc *sch)
334{ 334{
335 struct sfq_sched_data *q = qdisc_priv(sch); 335 struct sfq_sched_data *q = qdisc_priv(sch);
336 unsigned int hash; 336 sfq_index a;
337 sfq_index x;
338 int ret;
339
340 hash = sfq_classify(skb, sch, &ret);
341 if (hash == 0) {
342 if (ret & __NET_XMIT_BYPASS)
343 sch->qstats.drops++;
344 kfree_skb(skb);
345 return ret;
346 }
347 hash--;
348 337
349 x = q->ht[hash]; 338 /* No active slots */
350 if (x == SFQ_DEPTH) { 339 if (q->tail == SFQ_DEPTH)
351 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 340 return NULL;
352 q->hash[x] = hash;
353 }
354
355 sch->qstats.backlog += qdisc_pkt_len(skb);
356 __skb_queue_head(&q->qs[x], skb);
357 /* If selected queue has length q->limit+1, this means that
358 * all another queues are empty and we do simple tail drop.
359 * This packet is still requeued at head of queue, tail packet
360 * is dropped.
361 */
362 if (q->qs[x].qlen > q->limit) {
363 skb = q->qs[x].prev;
364 __skb_unlink(skb, &q->qs[x]);
365 sch->qstats.drops++;
366 sch->qstats.backlog -= qdisc_pkt_len(skb);
367 kfree_skb(skb);
368 return NET_XMIT_CN;
369 }
370
371 sfq_inc(q, x);
372 if (q->qs[x].qlen == 1) { /* The flow is new */
373 if (q->tail == SFQ_DEPTH) { /* It is the first flow */
374 q->tail = x;
375 q->next[x] = x;
376 q->allot[x] = q->quantum;
377 } else {
378 q->next[x] = q->next[q->tail];
379 q->next[q->tail] = x;
380 q->tail = x;
381 }
382 }
383
384 if (++sch->q.qlen <= q->limit) {
385 sch->qstats.requeues++;
386 return 0;
387 }
388 341
389 sch->qstats.drops++; 342 a = q->next[q->tail];
390 sfq_drop(sch); 343 return skb_peek(&q->qs[a]);
391 return NET_XMIT_CN;
392} 344}
393 345
394
395
396
397static struct sk_buff * 346static struct sk_buff *
398sfq_dequeue(struct Qdisc *sch) 347sfq_dequeue(struct Qdisc *sch)
399{ 348{
@@ -624,7 +573,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
624 .priv_size = sizeof(struct sfq_sched_data), 573 .priv_size = sizeof(struct sfq_sched_data),
625 .enqueue = sfq_enqueue, 574 .enqueue = sfq_enqueue,
626 .dequeue = sfq_dequeue, 575 .dequeue = sfq_dequeue,
627 .requeue = sfq_requeue, 576 .peek = sfq_peek,
628 .drop = sfq_drop, 577 .drop = sfq_drop,
629 .init = sfq_init, 578 .init = sfq_init,
630 .reset = sfq_reset, 579 .reset = sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 94c61598b86a..a2f93c09f3cc 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -139,19 +139,6 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
139 return 0; 139 return 0;
140} 140}
141 141
142static int tbf_requeue(struct sk_buff *skb, struct Qdisc* sch)
143{
144 struct tbf_sched_data *q = qdisc_priv(sch);
145 int ret;
146
147 if ((ret = q->qdisc->ops->requeue(skb, q->qdisc)) == 0) {
148 sch->q.qlen++;
149 sch->qstats.requeues++;
150 }
151
152 return ret;
153}
154
155static unsigned int tbf_drop(struct Qdisc* sch) 142static unsigned int tbf_drop(struct Qdisc* sch)
156{ 143{
157 struct tbf_sched_data *q = qdisc_priv(sch); 144 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -169,7 +156,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
169 struct tbf_sched_data *q = qdisc_priv(sch); 156 struct tbf_sched_data *q = qdisc_priv(sch);
170 struct sk_buff *skb; 157 struct sk_buff *skb;
171 158
172 skb = q->qdisc->dequeue(q->qdisc); 159 skb = q->qdisc->ops->peek(q->qdisc);
173 160
174 if (skb) { 161 if (skb) {
175 psched_time_t now; 162 psched_time_t now;
@@ -192,6 +179,10 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
192 toks -= L2T(q, len); 179 toks -= L2T(q, len);
193 180
194 if ((toks|ptoks) >= 0) { 181 if ((toks|ptoks) >= 0) {
182 skb = qdisc_dequeue_peeked(q->qdisc);
183 if (unlikely(!skb))
184 return NULL;
185
195 q->t_c = now; 186 q->t_c = now;
196 q->tokens = toks; 187 q->tokens = toks;
197 q->ptokens = ptoks; 188 q->ptokens = ptoks;
@@ -214,12 +205,6 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
214 (cf. CSZ, HPFQ, HFSC) 205 (cf. CSZ, HPFQ, HFSC)
215 */ 206 */
216 207
217 if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) {
218 /* When requeue fails skb is dropped */
219 qdisc_tree_decrease_qlen(q->qdisc, 1);
220 sch->qstats.drops++;
221 }
222
223 sch->qstats.overlimits++; 208 sch->qstats.overlimits++;
224 } 209 }
225 return NULL; 210 return NULL;
@@ -251,6 +236,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
251 struct tc_tbf_qopt *qopt; 236 struct tc_tbf_qopt *qopt;
252 struct qdisc_rate_table *rtab = NULL; 237 struct qdisc_rate_table *rtab = NULL;
253 struct qdisc_rate_table *ptab = NULL; 238 struct qdisc_rate_table *ptab = NULL;
239 struct qdisc_rate_table *tmp;
254 struct Qdisc *child = NULL; 240 struct Qdisc *child = NULL;
255 int max_size,n; 241 int max_size,n;
256 242
@@ -299,7 +285,8 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
299 sch_tree_lock(sch); 285 sch_tree_lock(sch);
300 if (child) { 286 if (child) {
301 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); 287 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
302 qdisc_destroy(xchg(&q->qdisc, child)); 288 qdisc_destroy(q->qdisc);
289 q->qdisc = child;
303 } 290 }
304 q->limit = qopt->limit; 291 q->limit = qopt->limit;
305 q->mtu = qopt->mtu; 292 q->mtu = qopt->mtu;
@@ -307,8 +294,14 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
307 q->buffer = qopt->buffer; 294 q->buffer = qopt->buffer;
308 q->tokens = q->buffer; 295 q->tokens = q->buffer;
309 q->ptokens = q->mtu; 296 q->ptokens = q->mtu;
310 rtab = xchg(&q->R_tab, rtab); 297
311 ptab = xchg(&q->P_tab, ptab); 298 tmp = q->R_tab;
299 q->R_tab = rtab;
300 rtab = tmp;
301
302 tmp = q->P_tab;
303 q->P_tab = ptab;
304 ptab = tmp;
312 sch_tree_unlock(sch); 305 sch_tree_unlock(sch);
313 err = 0; 306 err = 0;
314done: 307done:
@@ -398,7 +391,8 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
398 new = &noop_qdisc; 391 new = &noop_qdisc;
399 392
400 sch_tree_lock(sch); 393 sch_tree_lock(sch);
401 *old = xchg(&q->qdisc, new); 394 *old = q->qdisc;
395 q->qdisc = new;
402 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); 396 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
403 qdisc_reset(*old); 397 qdisc_reset(*old);
404 sch_tree_unlock(sch); 398 sch_tree_unlock(sch);
@@ -469,7 +463,7 @@ static struct Qdisc_ops tbf_qdisc_ops __read_mostly = {
469 .priv_size = sizeof(struct tbf_sched_data), 463 .priv_size = sizeof(struct tbf_sched_data),
470 .enqueue = tbf_enqueue, 464 .enqueue = tbf_enqueue,
471 .dequeue = tbf_dequeue, 465 .dequeue = tbf_dequeue,
472 .requeue = tbf_requeue, 466 .peek = qdisc_peek_dequeued,
473 .drop = tbf_drop, 467 .drop = tbf_drop,
474 .init = tbf_init, 468 .init = tbf_init,
475 .reset = tbf_reset, 469 .reset = tbf_reset,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index d35ef059abb1..cfc8e7caba62 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -93,16 +93,6 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
93 return NET_XMIT_DROP; 93 return NET_XMIT_DROP;
94} 94}
95 95
96static int
97teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98{
99 struct teql_sched_data *q = qdisc_priv(sch);
100
101 __skb_queue_head(&q->q, skb);
102 sch->qstats.requeues++;
103 return 0;
104}
105
106static struct sk_buff * 96static struct sk_buff *
107teql_dequeue(struct Qdisc* sch) 97teql_dequeue(struct Qdisc* sch)
108{ 98{
@@ -123,6 +113,13 @@ teql_dequeue(struct Qdisc* sch)
123 return skb; 113 return skb;
124} 114}
125 115
116static struct sk_buff *
117teql_peek(struct Qdisc* sch)
118{
119 /* teql is meant to be used as root qdisc */
120 return NULL;
121}
122
126static __inline__ void 123static __inline__ void
127teql_neigh_release(struct neighbour *n) 124teql_neigh_release(struct neighbour *n)
128{ 125{
@@ -433,7 +430,7 @@ static __init void teql_master_setup(struct net_device *dev)
433 430
434 ops->enqueue = teql_enqueue; 431 ops->enqueue = teql_enqueue;
435 ops->dequeue = teql_dequeue; 432 ops->dequeue = teql_dequeue;
436 ops->requeue = teql_requeue; 433 ops->peek = teql_peek;
437 ops->init = teql_qdisc_init; 434 ops->init = teql_qdisc_init;
438 ops->reset = teql_reset; 435 ops->reset = teql_reset;
439 ops->destroy = teql_destroy; 436 ops->destroy = teql_destroy;