aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/sched
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig63
-rw-r--r--net/sched/Makefile6
-rw-r--r--net/sched/act_api.c55
-rw-r--r--net/sched/act_csum.c594
-rw-r--r--net/sched/act_gact.c8
-rw-r--r--net/sched/act_ipt.c33
-rw-r--r--net/sched/act_mirred.c7
-rw-r--r--net/sched/act_nat.c5
-rw-r--r--net/sched/act_pedit.c15
-rw-r--r--net/sched/act_police.c20
-rw-r--r--net/sched/act_simple.c13
-rw-r--r--net/sched/act_skbedit.c11
-rw-r--r--net/sched/cls_api.c33
-rw-r--r--net/sched/cls_basic.c21
-rw-r--r--net/sched/cls_cgroup.c12
-rw-r--r--net/sched/cls_flow.c76
-rw-r--r--net/sched/cls_fw.c38
-rw-r--r--net/sched/cls_route.c126
-rw-r--r--net/sched/cls_rsvp.h95
-rw-r--r--net/sched/cls_tcindex.c2
-rw-r--r--net/sched/cls_u32.c89
-rw-r--r--net/sched/em_cmp.c47
-rw-r--r--net/sched/em_meta.c56
-rw-r--r--net/sched/em_nbyte.c3
-rw-r--r--net/sched/em_text.c6
-rw-r--r--net/sched/em_u32.c2
-rw-r--r--net/sched/ematch.c37
-rw-r--r--net/sched/sch_api.c205
-rw-r--r--net/sched/sch_atm.c27
-rw-r--r--net/sched/sch_cbq.c379
-rw-r--r--net/sched/sch_choke.c688
-rw-r--r--net/sched/sch_drr.c12
-rw-r--r--net/sched/sch_dsmark.c30
-rw-r--r--net/sched/sch_fifo.c60
-rw-r--r--net/sched/sch_generic.c138
-rw-r--r--net/sched/sch_gred.c85
-rw-r--r--net/sched/sch_hfsc.c51
-rw-r--r--net/sched/sch_htb.c147
-rw-r--r--net/sched/sch_ingress.c3
-rw-r--r--net/sched/sch_mq.c3
-rw-r--r--net/sched/sch_mqprio.c418
-rw-r--r--net/sched/sch_multiq.c14
-rw-r--r--net/sched/sch_netem.c417
-rw-r--r--net/sched/sch_prio.c39
-rw-r--r--net/sched/sch_qfq.c1137
-rw-r--r--net/sched/sch_red.c74
-rw-r--r--net/sched/sch_sfb.c709
-rw-r--r--net/sched/sch_sfq.c409
-rw-r--r--net/sched/sch_tbf.c42
-rw-r--r--net/sched/sch_teql.c77
50 files changed, 5384 insertions, 1253 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..2590e91b3289 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -24,7 +24,7 @@ menuconfig NET_SCHED
24 To administer these schedulers, you'll need the user-level utilities 24 To administer these schedulers, you'll need the user-level utilities
25 from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>. 25 from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
26 That package also contains some documentation; for more, check out 26 That package also contains some documentation; for more, check out
27 <http://linux-net.osdl.org/index.php/Iproute2>. 27 <http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2>.
28 28
29 This Quality of Service (QoS) support will enable you to use 29 This Quality of Service (QoS) support will enable you to use
30 Differentiated Services (diffserv) and Resource Reservation Protocol 30 Differentiated Services (diffserv) and Resource Reservation Protocol
@@ -126,6 +126,17 @@ config NET_SCH_RED
126 To compile this code as a module, choose M here: the 126 To compile this code as a module, choose M here: the
127 module will be called sch_red. 127 module will be called sch_red.
128 128
129config NET_SCH_SFB
130 tristate "Stochastic Fair Blue (SFB)"
131 ---help---
132 Say Y here if you want to use the Stochastic Fair Blue (SFB)
133 packet scheduling algorithm.
134
135 See the top of <file:net/sched/sch_sfb.c> for more details.
136
137 To compile this code as a module, choose M here: the
138 module will be called sch_sfb.
139
129config NET_SCH_SFQ 140config NET_SCH_SFQ
130 tristate "Stochastic Fairness Queueing (SFQ)" 141 tristate "Stochastic Fairness Queueing (SFQ)"
131 ---help--- 142 ---help---
@@ -205,6 +216,40 @@ config NET_SCH_DRR
205 216
206 If unsure, say N. 217 If unsure, say N.
207 218
219config NET_SCH_MQPRIO
220 tristate "Multi-queue priority scheduler (MQPRIO)"
221 help
222 Say Y here if you want to use the Multi-queue Priority scheduler.
223 This scheduler allows QOS to be offloaded on NICs that have support
224 for offloading QOS schedulers.
225
226 To compile this driver as a module, choose M here: the module will
227 be called sch_mqprio.
228
229 If unsure, say N.
230
231config NET_SCH_CHOKE
232 tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
233 help
234 Say Y here if you want to use the CHOKe packet scheduler (CHOose
235 and Keep for responsive flows, CHOose and Kill for unresponsive
236 flows). This is a variation of RED which trys to penalize flows
237 that monopolize the queue.
238
239 To compile this code as a module, choose M here: the
240 module will be called sch_choke.
241
242config NET_SCH_QFQ
243 tristate "Quick Fair Queueing scheduler (QFQ)"
244 help
245 Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ)
246 packet scheduling algorithm.
247
248 To compile this driver as a module, choose M here: the module
249 will be called sch_qfq.
250
251 If unsure, say N.
252
208config NET_SCH_INGRESS 253config NET_SCH_INGRESS
209 tristate "Ingress Qdisc" 254 tristate "Ingress Qdisc"
210 depends on NET_CLS_ACT 255 depends on NET_CLS_ACT
@@ -243,7 +288,8 @@ config NET_CLS_TCINDEX
243 288
244config NET_CLS_ROUTE4 289config NET_CLS_ROUTE4
245 tristate "Routing decision (ROUTE)" 290 tristate "Routing decision (ROUTE)"
246 select NET_CLS_ROUTE 291 depends on INET
292 select IP_ROUTE_CLASSID
247 select NET_CLS 293 select NET_CLS
248 ---help--- 294 ---help---
249 If you say Y here, you will be able to classify packets 295 If you say Y here, you will be able to classify packets
@@ -252,9 +298,6 @@ config NET_CLS_ROUTE4
252 To compile this code as a module, choose M here: the 298 To compile this code as a module, choose M here: the
253 module will be called cls_route. 299 module will be called cls_route.
254 300
255config NET_CLS_ROUTE
256 bool
257
258config NET_CLS_FW 301config NET_CLS_FW
259 tristate "Netfilter mark (FW)" 302 tristate "Netfilter mark (FW)"
260 select NET_CLS 303 select NET_CLS
@@ -518,6 +561,16 @@ config NET_ACT_SKBEDIT
518 To compile this code as a module, choose M here: the 561 To compile this code as a module, choose M here: the
519 module will be called act_skbedit. 562 module will be called act_skbedit.
520 563
564config NET_ACT_CSUM
565 tristate "Checksum Updating"
566 depends on NET_CLS_ACT && INET
567 ---help---
568 Say Y here to update some common checksum after some direct
569 packet alterations.
570
571 To compile this code as a module, choose M here: the
572 module will be called act_csum.
573
521config NET_CLS_IND 574config NET_CLS_IND
522 bool "Incoming device classification" 575 bool "Incoming device classification"
523 depends on NET_CLS_U32 || NET_CLS_FW 576 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..dc5889c0a15a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
18obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 19obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
19obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 20obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
20obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 21obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
@@ -23,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o
23obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o 24obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
24obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o 25obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
25obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o 26obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
27obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
26obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o 28obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
27obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o 29obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
28obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o 30obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
@@ -31,6 +33,10 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
31obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o 33obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
32obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o 34obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
33obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o 35obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
36obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
37obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
38obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
39
34obj-$(CONFIG_NET_CLS_U32) += cls_u32.o 40obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
35obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o 41obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
36obj-$(CONFIG_NET_CLS_FW) += cls_fw.o 42obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f89e7e0..a606025814a1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,11 +26,6 @@
26#include <net/act_api.h> 26#include <net/act_api.h>
27#include <net/netlink.h> 27#include <net/netlink.h>
28 28
29static void tcf_common_free_rcu(struct rcu_head *head)
30{
31 kfree(container_of(head, struct tcf_common, tcfc_rcu));
32}
33
34void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) 29void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
35{ 30{
36 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); 31 unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -47,7 +42,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
47 * gen_estimator est_timer() might access p->tcfc_lock 42 * gen_estimator est_timer() might access p->tcfc_lock
48 * or bstats, wait a RCU grace period before freeing p 43 * or bstats, wait a RCU grace period before freeing p
49 */ 44 */
50 call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); 45 kfree_rcu(p, tcfc_rcu);
51 return; 46 return;
52 } 47 }
53 } 48 }
@@ -78,7 +73,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
78 struct tc_action *a, struct tcf_hashinfo *hinfo) 73 struct tc_action *a, struct tcf_hashinfo *hinfo)
79{ 74{
80 struct tcf_common *p; 75 struct tcf_common *p;
81 int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; 76 int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
82 struct nlattr *nest; 77 struct nlattr *nest;
83 78
84 read_lock_bh(hinfo->lock); 79 read_lock_bh(hinfo->lock);
@@ -126,7 +121,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
126{ 121{
127 struct tcf_common *p, *s_p; 122 struct tcf_common *p, *s_p;
128 struct nlattr *nest; 123 struct nlattr *nest;
129 int i= 0, n_i = 0; 124 int i = 0, n_i = 0;
130 125
131 nest = nla_nest_start(skb, a->order); 126 nest = nla_nest_start(skb, a->order);
132 if (nest == NULL) 127 if (nest == NULL)
@@ -138,7 +133,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
138 while (p != NULL) { 133 while (p != NULL) {
139 s_p = p->tcfc_next; 134 s_p = p->tcfc_next;
140 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) 135 if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
141 module_put(a->ops->owner); 136 module_put(a->ops->owner);
142 n_i++; 137 n_i++;
143 p = s_p; 138 p = s_p;
144 } 139 }
@@ -447,7 +442,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
447 nest = nla_nest_start(skb, TCA_OPTIONS); 442 nest = nla_nest_start(skb, TCA_OPTIONS);
448 if (nest == NULL) 443 if (nest == NULL)
449 goto nla_put_failure; 444 goto nla_put_failure;
450 if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { 445 err = tcf_action_dump_old(skb, a, bind, ref);
446 if (err > 0) {
451 nla_nest_end(skb, nest); 447 nla_nest_end(skb, nest);
452 return err; 448 return err;
453 } 449 }
@@ -491,7 +487,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
491 struct tc_action *a; 487 struct tc_action *a;
492 struct tc_action_ops *a_o; 488 struct tc_action_ops *a_o;
493 char act_name[IFNAMSIZ]; 489 char act_name[IFNAMSIZ];
494 struct nlattr *tb[TCA_ACT_MAX+1]; 490 struct nlattr *tb[TCA_ACT_MAX + 1];
495 struct nlattr *kind; 491 struct nlattr *kind;
496 int err; 492 int err;
497 493
@@ -549,9 +545,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
549 goto err_free; 545 goto err_free;
550 546
551 /* module count goes up only when brand new policy is created 547 /* module count goes up only when brand new policy is created
552 if it exists and is only bound to in a_o->init() then 548 * if it exists and is only bound to in a_o->init() then
553 ACT_P_CREATED is not returned (a zero is). 549 * ACT_P_CREATED is not returned (a zero is).
554 */ 550 */
555 if (err != ACT_P_CREATED) 551 if (err != ACT_P_CREATED)
556 module_put(a_o->owner); 552 module_put(a_o->owner);
557 a->ops = a_o; 553 a->ops = a_o;
@@ -569,7 +565,7 @@ err_out:
569struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, 565struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
570 char *name, int ovr, int bind) 566 char *name, int ovr, int bind)
571{ 567{
572 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 568 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
573 struct tc_action *head = NULL, *act, *act_prev = NULL; 569 struct tc_action *head = NULL, *act, *act_prev = NULL;
574 int err; 570 int err;
575 int i; 571 int i;
@@ -697,7 +693,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
697static struct tc_action * 693static struct tc_action *
698tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) 694tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
699{ 695{
700 struct nlattr *tb[TCA_ACT_MAX+1]; 696 struct nlattr *tb[TCA_ACT_MAX + 1];
701 struct tc_action *a; 697 struct tc_action *a;
702 int index; 698 int index;
703 int err; 699 int err;
@@ -770,7 +766,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
770 struct tcamsg *t; 766 struct tcamsg *t;
771 struct netlink_callback dcb; 767 struct netlink_callback dcb;
772 struct nlattr *nest; 768 struct nlattr *nest;
773 struct nlattr *tb[TCA_ACT_MAX+1]; 769 struct nlattr *tb[TCA_ACT_MAX + 1];
774 struct nlattr *kind; 770 struct nlattr *kind;
775 struct tc_action *a = create_a(0); 771 struct tc_action *a = create_a(0);
776 int err = -ENOMEM; 772 int err = -ENOMEM;
@@ -821,7 +817,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
821 nlh->nlmsg_flags |= NLM_F_ROOT; 817 nlh->nlmsg_flags |= NLM_F_ROOT;
822 module_put(a->ops->owner); 818 module_put(a->ops->owner);
823 kfree(a); 819 kfree(a);
824 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 820 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
821 n->nlmsg_flags & NLM_F_ECHO);
825 if (err > 0) 822 if (err > 0)
826 return 0; 823 return 0;
827 824
@@ -842,14 +839,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
842 u32 pid, int event) 839 u32 pid, int event)
843{ 840{
844 int i, ret; 841 int i, ret;
845 struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; 842 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
846 struct tc_action *head = NULL, *act, *act_prev = NULL; 843 struct tc_action *head = NULL, *act, *act_prev = NULL;
847 844
848 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); 845 ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
849 if (ret < 0) 846 if (ret < 0)
850 return ret; 847 return ret;
851 848
852 if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { 849 if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
853 if (tb[1] != NULL) 850 if (tb[1] != NULL)
854 return tca_action_flush(net, tb[1], n, pid); 851 return tca_action_flush(net, tb[1], n, pid);
855 else 852 else
@@ -892,7 +889,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
892 /* now do the delete */ 889 /* now do the delete */
893 tcf_action_destroy(head, 0); 890 tcf_action_destroy(head, 0);
894 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, 891 ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
895 n->nlmsg_flags&NLM_F_ECHO); 892 n->nlmsg_flags & NLM_F_ECHO);
896 if (ret > 0) 893 if (ret > 0)
897 return 0; 894 return 0;
898 return ret; 895 return ret;
@@ -936,7 +933,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
936 nlh->nlmsg_len = skb_tail_pointer(skb) - b; 933 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
937 NETLINK_CB(skb).dst_group = RTNLGRP_TC; 934 NETLINK_CB(skb).dst_group = RTNLGRP_TC;
938 935
939 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); 936 err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
940 if (err > 0) 937 if (err > 0)
941 err = 0; 938 err = 0;
942 return err; 939 return err;
@@ -967,7 +964,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
967 964
968 /* dump then free all the actions after update; inserted policy 965 /* dump then free all the actions after update; inserted policy
969 * stays intact 966 * stays intact
970 * */ 967 */
971 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); 968 ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
972 for (a = act; a; a = act) { 969 for (a = act; a; a = act) {
973 act = a->next; 970 act = a->next;
@@ -993,17 +990,16 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
993 return -EINVAL; 990 return -EINVAL;
994 } 991 }
995 992
996 /* n->nlmsg_flags&NLM_F_CREATE 993 /* n->nlmsg_flags & NLM_F_CREATE */
997 * */
998 switch (n->nlmsg_type) { 994 switch (n->nlmsg_type) {
999 case RTM_NEWACTION: 995 case RTM_NEWACTION:
1000 /* we are going to assume all other flags 996 /* we are going to assume all other flags
1001 * imply create only if it doesnt exist 997 * imply create only if it doesn't exist
1002 * Note that CREATE | EXCL implies that 998 * Note that CREATE | EXCL implies that
1003 * but since we want avoid ambiguity (eg when flags 999 * but since we want avoid ambiguity (eg when flags
1004 * is zero) then just set this 1000 * is zero) then just set this
1005 */ 1001 */
1006 if (n->nlmsg_flags&NLM_F_REPLACE) 1002 if (n->nlmsg_flags & NLM_F_REPLACE)
1007 ovr = 1; 1003 ovr = 1;
1008replay: 1004replay:
1009 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); 1005 ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1024,7 @@ replay:
1028static struct nlattr * 1024static struct nlattr *
1029find_dump_kind(const struct nlmsghdr *n) 1025find_dump_kind(const struct nlmsghdr *n)
1030{ 1026{
1031 struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; 1027 struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
1032 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; 1028 struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
1033 struct nlattr *nla[TCAA_MAX + 1]; 1029 struct nlattr *nla[TCAA_MAX + 1];
1034 struct nlattr *kind; 1030 struct nlattr *kind;
@@ -1071,9 +1067,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
1071 } 1067 }
1072 1068
1073 a_o = tc_lookup_action(kind); 1069 a_o = tc_lookup_action(kind);
1074 if (a_o == NULL) { 1070 if (a_o == NULL)
1075 return 0; 1071 return 0;
1076 }
1077 1072
1078 memset(&a, 0, sizeof(struct tc_action)); 1073 memset(&a, 0, sizeof(struct tc_action));
1079 a.ops = a_o; 1074 a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..6cdf9abe475f
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,594 @@
1/*
2 * Checksum updating actions
3 *
4 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18
19#include <linux/netlink.h>
20#include <net/netlink.h>
21#include <linux/rtnetlink.h>
22
23#include <linux/skbuff.h>
24
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <net/icmp.h>
28#include <linux/icmpv6.h>
29#include <linux/igmp.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/ip6_checksum.h>
33
34#include <net/act_api.h>
35
36#include <linux/tc_act/tc_csum.h>
37#include <net/tc_act/tc_csum.h>
38
39#define CSUM_TAB_MASK 15
40static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
41static u32 csum_idx_gen;
42static DEFINE_RWLOCK(csum_lock);
43
44static struct tcf_hashinfo csum_hash_info = {
45 .htab = tcf_csum_ht,
46 .hmask = CSUM_TAB_MASK,
47 .lock = &csum_lock,
48};
49
50static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
51 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
52};
53
54static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
55 struct tc_action *a, int ovr, int bind)
56{
57 struct nlattr *tb[TCA_CSUM_MAX + 1];
58 struct tc_csum *parm;
59 struct tcf_common *pc;
60 struct tcf_csum *p;
61 int ret = 0, err;
62
63 if (nla == NULL)
64 return -EINVAL;
65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
67 if (err < 0)
68 return err;
69
70 if (tb[TCA_CSUM_PARMS] == NULL)
71 return -EINVAL;
72 parm = nla_data(tb[TCA_CSUM_PARMS]);
73
74 pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
75 if (!pc) {
76 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
77 &csum_idx_gen, &csum_hash_info);
78 if (IS_ERR(pc))
79 return PTR_ERR(pc);
80 p = to_tcf_csum(pc);
81 ret = ACT_P_CREATED;
82 } else {
83 p = to_tcf_csum(pc);
84 if (!ovr) {
85 tcf_hash_release(pc, bind, &csum_hash_info);
86 return -EEXIST;
87 }
88 }
89
90 spin_lock_bh(&p->tcf_lock);
91 p->tcf_action = parm->action;
92 p->update_flags = parm->update_flags;
93 spin_unlock_bh(&p->tcf_lock);
94
95 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(pc, &csum_hash_info);
97
98 return ret;
99}
100
101static int tcf_csum_cleanup(struct tc_action *a, int bind)
102{
103 struct tcf_csum *p = a->priv;
104 return tcf_hash_release(&p->common, bind, &csum_hash_info);
105}
106
107/**
108 * tcf_csum_skb_nextlayer - Get next layer pointer
109 * @skb: sk_buff to use
110 * @ihl: previous summed headers length
111 * @ipl: complete packet length
112 * @jhl: next header length
113 *
114 * Check the expected next layer availability in the specified sk_buff.
115 * Return the next layer pointer if pass, NULL otherwise.
116 */
117static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
118 unsigned int ihl, unsigned int ipl,
119 unsigned int jhl)
120{
121 int ntkoff = skb_network_offset(skb);
122 int hl = ihl + jhl;
123
124 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
125 (skb_cloned(skb) &&
126 !skb_clone_writable(skb, hl + ntkoff) &&
127 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
128 return NULL;
129 else
130 return (void *)(skb_network_header(skb) + ihl);
131}
132
133static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
134 unsigned int ihl, unsigned int ipl)
135{
136 struct icmphdr *icmph;
137
138 icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
139 if (icmph == NULL)
140 return 0;
141
142 icmph->checksum = 0;
143 skb->csum = csum_partial(icmph, ipl - ihl, 0);
144 icmph->checksum = csum_fold(skb->csum);
145
146 skb->ip_summed = CHECKSUM_NONE;
147
148 return 1;
149}
150
151static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
152 unsigned int ihl, unsigned int ipl)
153{
154 struct igmphdr *igmph;
155
156 igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
157 if (igmph == NULL)
158 return 0;
159
160 igmph->csum = 0;
161 skb->csum = csum_partial(igmph, ipl - ihl, 0);
162 igmph->csum = csum_fold(skb->csum);
163
164 skb->ip_summed = CHECKSUM_NONE;
165
166 return 1;
167}
168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
170 unsigned int ihl, unsigned int ipl)
171{
172 struct icmp6hdr *icmp6h;
173
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL)
176 return 0;
177
178 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
181 ipl - ihl, IPPROTO_ICMPV6,
182 skb->csum);
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 return 1;
187}
188
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
190 unsigned int ihl, unsigned int ipl)
191{
192 struct tcphdr *tcph;
193
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL)
196 return 0;
197
198 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl,
201 iph->saddr, iph->daddr, skb->csum);
202
203 skb->ip_summed = CHECKSUM_NONE;
204
205 return 1;
206}
207
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
209 unsigned int ihl, unsigned int ipl)
210{
211 struct tcphdr *tcph;
212
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL)
215 return 0;
216
217 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
220 ipl - ihl, IPPROTO_TCP,
221 skb->csum);
222
223 skb->ip_summed = CHECKSUM_NONE;
224
225 return 1;
226}
227
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
229 unsigned int ihl, unsigned int ipl, int udplite)
230{
231 struct udphdr *udph;
232 u16 ul;
233
234 /*
235 * Support both UDP and UDPLITE checksum algorithms, Don't use
236 * udph->len to get the real length without any protocol check,
237 * UDPLITE uses udph->len for another thing,
238 * Use iph->tot_len, or just ipl.
239 */
240
241 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
242 if (udph == NULL)
243 return 0;
244
245 ul = ntohs(udph->len);
246
247 if (udplite || udph->check) {
248
249 udph->check = 0;
250
251 if (udplite) {
252 if (ul == 0)
253 skb->csum = csum_partial(udph, ipl - ihl, 0);
254 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
255 skb->csum = csum_partial(udph, ul, 0);
256 else
257 goto ignore_obscure_skb;
258 } else {
259 if (ul != ipl - ihl)
260 goto ignore_obscure_skb;
261
262 skb->csum = csum_partial(udph, ul, 0);
263 }
264
265 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
266 ul, iph->protocol,
267 skb->csum);
268
269 if (!udph->check)
270 udph->check = CSUM_MANGLED_0;
271 }
272
273 skb->ip_summed = CHECKSUM_NONE;
274
275ignore_obscure_skb:
276 return 1;
277}
278
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
280 unsigned int ihl, unsigned int ipl, int udplite)
281{
282 struct udphdr *udph;
283 u16 ul;
284
285 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check,
288 * UDPLITE uses udph->len for another thing,
289 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
290 */
291
292 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
293 if (udph == NULL)
294 return 0;
295
296 ul = ntohs(udph->len);
297
298 udph->check = 0;
299
300 if (udplite) {
301 if (ul == 0)
302 skb->csum = csum_partial(udph, ipl - ihl, 0);
303
304 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
305 skb->csum = csum_partial(udph, ul, 0);
306
307 else
308 goto ignore_obscure_skb;
309 } else {
310 if (ul != ipl - ihl)
311 goto ignore_obscure_skb;
312
313 skb->csum = csum_partial(udph, ul, 0);
314 }
315
316 udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
317 udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
318 skb->csum);
319
320 if (!udph->check)
321 udph->check = CSUM_MANGLED_0;
322
323 skb->ip_summed = CHECKSUM_NONE;
324
325ignore_obscure_skb:
326 return 1;
327}
328
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{
331 struct iphdr *iph;
332 int ntkoff;
333
334 ntkoff = skb_network_offset(skb);
335
336 if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
337 goto fail;
338
339 iph = ip_hdr(skb);
340
341 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
342 case IPPROTO_ICMP:
343 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
344 if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
345 ntohs(iph->tot_len)))
346 goto fail;
347 break;
348 case IPPROTO_IGMP:
349 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
350 if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
351 ntohs(iph->tot_len)))
352 goto fail;
353 break;
354 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
357 ntohs(iph->tot_len)))
358 goto fail;
359 break;
360 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
363 ntohs(iph->tot_len), 0))
364 goto fail;
365 break;
366 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
369 ntohs(iph->tot_len), 1))
370 goto fail;
371 break;
372 }
373
374 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
375 if (skb_cloned(skb) &&
376 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail;
379
380 ip_send_check(iph);
381 }
382
383 return 1;
384
385fail:
386 return 0;
387}
388
389static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
390 unsigned int ixhl, unsigned int *pl)
391{
392 int off, len, optlen;
393 unsigned char *xh = (void *)ip6xh;
394
395 off = sizeof(*ip6xh);
396 len = ixhl - off;
397
398 while (len > 1) {
399 switch (xh[off]) {
400 case IPV6_TLV_PAD0:
401 optlen = 1;
402 break;
403 case IPV6_TLV_JUMBO:
404 optlen = xh[off + 1] + 2;
405 if (optlen != 6 || len < 6 || (off & 3) != 2)
406 /* wrong jumbo option length/alignment */
407 return 0;
408 *pl = ntohl(*(__be32 *)(xh + off + 2));
409 goto done;
410 default:
411 optlen = xh[off + 1] + 2;
412 if (optlen > len)
413 /* ignore obscure options */
414 goto done;
415 break;
416 }
417 off += optlen;
418 len -= optlen;
419 }
420
421done:
422 return 1;
423}
424
425static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
426{
427 struct ipv6hdr *ip6h;
428 struct ipv6_opt_hdr *ip6xh;
429 unsigned int hl, ixhl;
430 unsigned int pl;
431 int ntkoff;
432 u8 nexthdr;
433
434 ntkoff = skb_network_offset(skb);
435
436 hl = sizeof(*ip6h);
437
438 if (!pskb_may_pull(skb, hl + ntkoff))
439 goto fail;
440
441 ip6h = ipv6_hdr(skb);
442
443 pl = ntohs(ip6h->payload_len);
444 nexthdr = ip6h->nexthdr;
445
446 do {
447 switch (nexthdr) {
448 case NEXTHDR_FRAGMENT:
449 goto ignore_skb;
450 case NEXTHDR_ROUTING:
451 case NEXTHDR_HOP:
452 case NEXTHDR_DEST:
453 if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
454 goto fail;
455 ip6xh = (void *)(skb_network_header(skb) + hl);
456 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail;
459 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail;
462 nexthdr = ip6xh->nexthdr;
463 hl += ixhl;
464 break;
465 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h,
468 hl, pl + sizeof(*ip6h)))
469 goto fail;
470 goto done;
471 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h,
474 hl, pl + sizeof(*ip6h)))
475 goto fail;
476 goto done;
477 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
480 pl + sizeof(*ip6h), 0))
481 goto fail;
482 goto done;
483 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
486 pl + sizeof(*ip6h), 1))
487 goto fail;
488 goto done;
489 default:
490 goto ignore_skb;
491 }
492 } while (pskb_may_pull(skb, hl + 1 + ntkoff));
493
494done:
495ignore_skb:
496 return 1;
497
498fail:
499 return 0;
500}
501
502static int tcf_csum(struct sk_buff *skb,
503 struct tc_action *a, struct tcf_result *res)
504{
505 struct tcf_csum *p = a->priv;
506 int action;
507 u32 update_flags;
508
509 spin_lock(&p->tcf_lock);
510 p->tcf_tm.lastuse = jiffies;
511 bstats_update(&p->tcf_bstats, skb);
512 action = p->tcf_action;
513 update_flags = p->update_flags;
514 spin_unlock(&p->tcf_lock);
515
516 if (unlikely(action == TC_ACT_SHOT))
517 goto drop;
518
519 switch (skb->protocol) {
520 case cpu_to_be16(ETH_P_IP):
521 if (!tcf_csum_ipv4(skb, update_flags))
522 goto drop;
523 break;
524 case cpu_to_be16(ETH_P_IPV6):
525 if (!tcf_csum_ipv6(skb, update_flags))
526 goto drop;
527 break;
528 }
529
530 return action;
531
532drop:
533 spin_lock(&p->tcf_lock);
534 p->tcf_qstats.drops++;
535 spin_unlock(&p->tcf_lock);
536 return TC_ACT_SHOT;
537}
538
539static int tcf_csum_dump(struct sk_buff *skb,
540 struct tc_action *a, int bind, int ref)
541{
542 unsigned char *b = skb_tail_pointer(skb);
543 struct tcf_csum *p = a->priv;
544 struct tc_csum opt = {
545 .update_flags = p->update_flags,
546 .index = p->tcf_index,
547 .action = p->tcf_action,
548 .refcnt = p->tcf_refcnt - ref,
549 .bindcnt = p->tcf_bindcnt - bind,
550 };
551 struct tcf_t t;
552
553 NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
554 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
555 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
556 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
557 NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
558
559 return skb->len;
560
561nla_put_failure:
562 nlmsg_trim(skb, b);
563 return -1;
564}
565
566static struct tc_action_ops act_csum_ops = {
567 .kind = "csum",
568 .hinfo = &csum_hash_info,
569 .type = TCA_ACT_CSUM,
570 .capab = TCA_CAP_NONE,
571 .owner = THIS_MODULE,
572 .act = tcf_csum,
573 .dump = tcf_csum_dump,
574 .cleanup = tcf_csum_cleanup,
575 .lookup = tcf_hash_search,
576 .init = tcf_csum_init,
577 .walk = tcf_generic_walker
578};
579
580MODULE_DESCRIPTION("Checksum updating actions");
581MODULE_LICENSE("GPL");
582
583static int __init csum_init_module(void)
584{
585 return tcf_register_action(&act_csum_ops);
586}
587
588static void __exit csum_cleanup_module(void)
589{
590 tcf_unregister_action(&act_csum_ops);
591}
592
593module_init(csum_init_module);
594module_exit(csum_cleanup_module);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a4c0b4..2b4ab4b05ce8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
50} 50}
51 51
52typedef int (*g_rand)(struct tcf_gact *gact); 52typedef int (*g_rand)(struct tcf_gact *gact);
53static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; 53static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
54#endif /* CONFIG_GACT_PROB */ 54#endif /* CONFIG_GACT_PROB */
55 55
56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { 56static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), 89 pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
90 bind, &gact_idx_gen, &gact_hash_info); 90 bind, &gact_idx_gen, &gact_hash_info);
91 if (IS_ERR(pc)) 91 if (IS_ERR(pc))
92 return PTR_ERR(pc); 92 return PTR_ERR(pc);
93 ret = ACT_P_CREATED; 93 ret = ACT_P_CREATED;
94 } else { 94 } else {
95 if (!ovr) { 95 if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
205static int __init gact_init_module(void) 205static int __init gact_init_module(void)
206{ 206{
207#ifdef CONFIG_GACT_PROB 207#ifdef CONFIG_GACT_PROB
208 printk(KERN_INFO "GACT probability on\n"); 208 pr_info("GACT probability on\n");
209#else 209#else
210 printk(KERN_INFO "GACT probability NOT on\n"); 210 pr_info("GACT probability NOT on\n");
211#endif 211#endif
212 return tcf_register_action(&act_gact_ops); 212 return tcf_register_action(&act_gact_ops);
213} 213}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c7e59e6ec349..9fc211a1b20e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = {
39 .lock = &ipt_lock, 39 .lock = &ipt_lock,
40}; 40};
41 41
42static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) 42static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
43{ 43{
44 struct xt_tgchk_param par; 44 struct xt_tgchk_param par;
45 struct xt_target *target; 45 struct xt_target *target;
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
66 return 0; 66 return 0;
67} 67}
68 68
69static void ipt_destroy_target(struct ipt_entry_target *t) 69static void ipt_destroy_target(struct xt_entry_target *t)
70{ 70{
71 struct xt_tgdtor_param par = { 71 struct xt_tgdtor_param par = {
72 .target = t->u.kernel.target, 72 .target = t->u.kernel.target,
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, 99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
100 [TCA_IPT_HOOK] = { .type = NLA_U32 }, 100 [TCA_IPT_HOOK] = { .type = NLA_U32 },
101 [TCA_IPT_INDEX] = { .type = NLA_U32 }, 101 [TCA_IPT_INDEX] = { .type = NLA_U32 },
102 [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) }, 102 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
103}; 103};
104 104
105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, 105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
108 struct nlattr *tb[TCA_IPT_MAX + 1]; 108 struct nlattr *tb[TCA_IPT_MAX + 1];
109 struct tcf_ipt *ipt; 109 struct tcf_ipt *ipt;
110 struct tcf_common *pc; 110 struct tcf_common *pc;
111 struct ipt_entry_target *td, *t; 111 struct xt_entry_target *td, *t;
112 char *tname; 112 char *tname;
113 int ret = 0, err; 113 int ret = 0, err;
114 u32 hook = 0; 114 u32 hook = 0;
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
126 if (tb[TCA_IPT_TARG] == NULL) 126 if (tb[TCA_IPT_TARG] == NULL)
127 return -EINVAL; 127 return -EINVAL;
128 128
129 td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]); 129 td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) 130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
131 return -EINVAL; 131 return -EINVAL;
132 132
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
138 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, 138 pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
139 &ipt_idx_gen, &ipt_hash_info); 139 &ipt_idx_gen, &ipt_hash_info);
140 if (IS_ERR(pc)) 140 if (IS_ERR(pc))
141 return PTR_ERR(pc); 141 return PTR_ERR(pc);
142 ret = ACT_P_CREATED; 142 ret = ACT_P_CREATED;
143 } else { 143 } else {
144 if (!ovr) { 144 if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
162 if (unlikely(!t)) 162 if (unlikely(!t))
163 goto err2; 163 goto err2;
164 164
165 if ((err = ipt_init_target(t, tname, hook)) < 0) 165 err = ipt_init_target(t, tname, hook);
166 if (err < 0)
166 goto err3; 167 goto err3;
167 168
168 spin_lock_bh(&ipt->tcf_lock); 169 spin_lock_bh(&ipt->tcf_lock);
@@ -209,12 +210,12 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
209 spin_lock(&ipt->tcf_lock); 210 spin_lock(&ipt->tcf_lock);
210 211
211 ipt->tcf_tm.lastuse = jiffies; 212 ipt->tcf_tm.lastuse = jiffies;
212 ipt->tcf_bstats.bytes += qdisc_pkt_len(skb); 213 bstats_update(&ipt->tcf_bstats, skb);
213 ipt->tcf_bstats.packets++;
214 214
215 /* yes, we have to worry about both in and out dev 215 /* yes, we have to worry about both in and out dev
216 worry later - danger - this API seems to have changed 216 * worry later - danger - this API seems to have changed
217 from earlier kernels */ 217 * from earlier kernels
218 */
218 par.in = skb->dev; 219 par.in = skb->dev;
219 par.out = NULL; 220 par.out = NULL;
220 par.hooknum = ipt->tcfi_hook; 221 par.hooknum = ipt->tcfi_hook;
@@ -230,7 +231,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
230 result = TC_ACT_SHOT; 231 result = TC_ACT_SHOT;
231 ipt->tcf_qstats.drops++; 232 ipt->tcf_qstats.drops++;
232 break; 233 break;
233 case IPT_CONTINUE: 234 case XT_CONTINUE:
234 result = TC_ACT_PIPE; 235 result = TC_ACT_PIPE;
235 break; 236 break;
236 default: 237 default:
@@ -249,14 +250,14 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
249{ 250{
250 unsigned char *b = skb_tail_pointer(skb); 251 unsigned char *b = skb_tail_pointer(skb);
251 struct tcf_ipt *ipt = a->priv; 252 struct tcf_ipt *ipt = a->priv;
252 struct ipt_entry_target *t; 253 struct xt_entry_target *t;
253 struct tcf_t tm; 254 struct tcf_t tm;
254 struct tc_cnt c; 255 struct tc_cnt c;
255 256
256 /* for simple targets kernel size == user size 257 /* for simple targets kernel size == user size
257 ** user name = target name 258 * user name = target name
258 ** for foolproof you need to not assume this 259 * for foolproof you need to not assume this
259 */ 260 */
260 261
261 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); 262 t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
262 if (unlikely(!t)) 263 if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0c311be92827..961386e2f2c0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
41 .lock = &mirred_lock, 41 .lock = &mirred_lock,
42}; 42};
43 43
44static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) 44static int tcf_mirred_release(struct tcf_mirred *m, int bind)
45{ 45{
46 if (m) { 46 if (m) {
47 if (bind) 47 if (bind)
48 m->tcf_bindcnt--; 48 m->tcf_bindcnt--;
49 m->tcf_refcnt--; 49 m->tcf_refcnt--;
50 if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { 50 if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
51 list_del(&m->tcfm_list); 51 list_del(&m->tcfm_list);
52 if (m->tcfm_dev) 52 if (m->tcfm_dev)
53 dev_put(m->tcfm_dev); 53 dev_put(m->tcfm_dev);
@@ -165,8 +165,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
165 165
166 spin_lock(&m->tcf_lock); 166 spin_lock(&m->tcf_lock);
167 m->tcf_tm.lastuse = jiffies; 167 m->tcf_tm.lastuse = jiffies;
168 m->tcf_bstats.bytes += qdisc_pkt_len(skb); 168 bstats_update(&m->tcf_bstats, skb);
169 m->tcf_bstats.packets++;
170 169
171 dev = m->tcfm_dev; 170 dev = m->tcfm_dev;
172 if (!dev) { 171 if (!dev) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 186eb837e600..762b027650a9 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 69 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
70 &nat_idx_gen, &nat_hash_info); 70 &nat_idx_gen, &nat_hash_info);
71 if (IS_ERR(pc)) 71 if (IS_ERR(pc))
72 return PTR_ERR(pc); 72 return PTR_ERR(pc);
73 p = to_tcf_nat(pc); 73 p = to_tcf_nat(pc);
74 ret = ACT_P_CREATED; 74 ret = ACT_P_CREATED;
75 } else { 75 } else {
@@ -125,8 +125,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
125 egress = p->flags & TCA_NAT_FLAG_EGRESS; 125 egress = p->flags & TCA_NAT_FLAG_EGRESS;
126 action = p->tcf_action; 126 action = p->tcf_action;
127 127
128 p->tcf_bstats.bytes += qdisc_pkt_len(skb); 128 bstats_update(&p->tcf_bstats, skb);
129 p->tcf_bstats.packets++;
130 129
131 spin_unlock(&p->tcf_lock); 130 spin_unlock(&p->tcf_lock);
132 131
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0593c9640db..7affe9a92757 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
70 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, 70 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
71 &pedit_idx_gen, &pedit_hash_info); 71 &pedit_idx_gen, &pedit_hash_info);
72 if (IS_ERR(pc)) 72 if (IS_ERR(pc))
73 return PTR_ERR(pc); 73 return PTR_ERR(pc);
74 p = to_pedit(pc); 74 p = to_pedit(pc);
75 keys = kmalloc(ksize, GFP_KERNEL); 75 keys = kmalloc(ksize, GFP_KERNEL);
76 if (keys == NULL) { 76 if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
127 int i, munged = 0; 127 int i, munged = 0;
128 unsigned int off; 128 unsigned int off;
129 129
130 if (skb_cloned(skb)) { 130 if (skb_cloned(skb) &&
131 if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { 131 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
132 return p->tcf_action; 132 return p->tcf_action;
133 }
134 }
135 133
136 off = skb_network_offset(skb); 134 off = skb_network_offset(skb);
137 135
@@ -163,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
163 } 161 }
164 if (offset > 0 && offset > skb->len) { 162 if (offset > 0 && offset > skb->len) {
165 pr_info("tc filter pedit" 163 pr_info("tc filter pedit"
166 " offset %d cant exceed pkt length %d\n", 164 " offset %d can't exceed pkt length %d\n",
167 offset, skb->len); 165 offset, skb->len);
168 goto bad; 166 goto bad;
169 } 167 }
@@ -187,8 +185,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
187bad: 185bad:
188 p->tcf_qstats.overlimits++; 186 p->tcf_qstats.overlimits++;
189done: 187done:
190 p->tcf_bstats.bytes += qdisc_pkt_len(skb); 188 bstats_update(&p->tcf_bstats, skb);
191 p->tcf_bstats.packets++;
192 spin_unlock(&p->tcf_lock); 189 spin_unlock(&p->tcf_lock);
193 return p->tcf_action; 190 return p->tcf_action;
194} 191}
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 7ebf7439b478..b3b9b32f4e00 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
22#include <net/act_api.h> 22#include <net/act_api.h>
23#include <net/netlink.h> 23#include <net/netlink.h>
24 24
25#define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) 25#define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L)
26#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) 26#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
27 27
28#define POL_TAB_MASK 15 28#define POL_TAB_MASK 15
29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; 29static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
37}; 37};
38 38
39/* old policer structure from before tc actions */ 39/* old policer structure from before tc actions */
40struct tc_police_compat 40struct tc_police_compat {
41{
42 u32 index; 41 u32 index;
43 int action; 42 int action;
44 u32 limit; 43 u32 limit;
@@ -97,11 +96,6 @@ nla_put_failure:
97 goto done; 96 goto done;
98} 97}
99 98
100static void tcf_police_free_rcu(struct rcu_head *head)
101{
102 kfree(container_of(head, struct tcf_police, tcf_rcu));
103}
104
105static void tcf_police_destroy(struct tcf_police *p) 99static void tcf_police_destroy(struct tcf_police *p)
106{ 100{
107 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); 101 unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -122,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
122 * gen_estimator est_timer() might access p->tcf_lock 116 * gen_estimator est_timer() might access p->tcf_lock
123 * or bstats, wait a RCU grace period before freeing p 117 * or bstats, wait a RCU grace period before freeing p
124 */ 118 */
125 call_rcu(&p->tcf_rcu, tcf_police_free_rcu); 119 kfree_rcu(p, tcf_rcu);
126 return; 120 return;
127 } 121 }
128 } 122 }
@@ -139,7 +133,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
139static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, 133static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
140 struct tc_action *a, int ovr, int bind) 134 struct tc_action *a, int ovr, int bind)
141{ 135{
142 unsigned h; 136 unsigned int h;
143 int ret = 0, err; 137 int ret = 0, err;
144 struct nlattr *tb[TCA_POLICE_MAX + 1]; 138 struct nlattr *tb[TCA_POLICE_MAX + 1];
145 struct tc_police *parm; 139 struct tc_police *parm;
@@ -298,8 +292,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
298 292
299 spin_lock(&police->tcf_lock); 293 spin_lock(&police->tcf_lock);
300 294
301 police->tcf_bstats.bytes += qdisc_pkt_len(skb); 295 bstats_update(&police->tcf_bstats, skb);
302 police->tcf_bstats.packets++;
303 296
304 if (police->tcfp_ewma_rate && 297 if (police->tcfp_ewma_rate &&
305 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { 298 police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
@@ -403,7 +396,6 @@ static void __exit
403police_cleanup_module(void) 396police_cleanup_module(void)
404{ 397{
405 tcf_unregister_action(&act_police_ops); 398 tcf_unregister_action(&act_police_ops);
406 rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
407} 399}
408 400
409module_init(police_init_module); 401module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 97e84f3ee775..a34a22de60b3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -42,13 +42,12 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
42 42
43 spin_lock(&d->tcf_lock); 43 spin_lock(&d->tcf_lock);
44 d->tcf_tm.lastuse = jiffies; 44 d->tcf_tm.lastuse = jiffies;
45 d->tcf_bstats.bytes += qdisc_pkt_len(skb); 45 bstats_update(&d->tcf_bstats, skb);
46 d->tcf_bstats.packets++;
47 46
48 /* print policy string followed by _ then packet count 47 /* print policy string followed by _ then packet count
49 * Example if this was the 3rd packet and the string was "hello" 48 * Example if this was the 3rd packet and the string was "hello"
50 * then it would look like "hello_3" (without quotes) 49 * then it would look like "hello_3" (without quotes)
51 **/ 50 */
52 pr_info("simple: %s_%d\n", 51 pr_info("simple: %s_%d\n",
53 (char *)d->tcfd_defdata, d->tcf_bstats.packets); 52 (char *)d->tcfd_defdata, d->tcf_bstats.packets);
54 spin_unlock(&d->tcf_lock); 53 spin_unlock(&d->tcf_lock);
@@ -126,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
126 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 125 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
127 &simp_idx_gen, &simp_hash_info); 126 &simp_idx_gen, &simp_hash_info);
128 if (IS_ERR(pc)) 127 if (IS_ERR(pc))
129 return PTR_ERR(pc); 128 return PTR_ERR(pc);
130 129
131 d = to_defact(pc); 130 d = to_defact(pc);
132 ret = alloc_defdata(d, defdata); 131 ret = alloc_defdata(d, defdata);
@@ -150,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
150 return ret; 149 return ret;
151} 150}
152 151
153static inline int tcf_simp_cleanup(struct tc_action *a, int bind) 152static int tcf_simp_cleanup(struct tc_action *a, int bind)
154{ 153{
155 struct tcf_defact *d = a->priv; 154 struct tcf_defact *d = a->priv;
156 155
@@ -159,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
159 return 0; 158 return 0;
160} 159}
161 160
162static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, 161static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
163 int bind, int ref) 162 int bind, int ref)
164{ 163{
165 unsigned char *b = skb_tail_pointer(skb); 164 unsigned char *b = skb_tail_pointer(skb);
166 struct tcf_defact *d = a->priv; 165 struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 66cbf4eb8855..5f6f0c7c3905 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -46,8 +46,7 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
46 46
47 spin_lock(&d->tcf_lock); 47 spin_lock(&d->tcf_lock);
48 d->tcf_tm.lastuse = jiffies; 48 d->tcf_tm.lastuse = jiffies;
49 d->tcf_bstats.bytes += qdisc_pkt_len(skb); 49 bstats_update(&d->tcf_bstats, skb);
50 d->tcf_bstats.packets++;
51 50
52 if (d->flags & SKBEDIT_F_PRIORITY) 51 if (d->flags & SKBEDIT_F_PRIORITY)
53 skb->priority = d->priority; 52 skb->priority = d->priority;
@@ -114,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
114 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, 113 pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
115 &skbedit_idx_gen, &skbedit_hash_info); 114 &skbedit_idx_gen, &skbedit_hash_info);
116 if (IS_ERR(pc)) 115 if (IS_ERR(pc))
117 return PTR_ERR(pc); 116 return PTR_ERR(pc);
118 117
119 d = to_skbedit(pc); 118 d = to_skbedit(pc);
120 ret = ACT_P_CREATED; 119 ret = ACT_P_CREATED;
@@ -145,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
145 return ret; 144 return ret;
146} 145}
147 146
148static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) 147static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
149{ 148{
150 struct tcf_skbedit *d = a->priv; 149 struct tcf_skbedit *d = a->priv;
151 150
@@ -154,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
154 return 0; 153 return 0;
155} 154}
156 155
157static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, 156static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
158 int bind, int ref) 157 int bind, int ref)
159{ 158{
160 unsigned char *b = skb_tail_pointer(skb); 159 unsigned char *b = skb_tail_pointer(skb);
161 struct tcf_skbedit *d = a->priv; 160 struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28ef79a..bb2c523f8158 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
85 int rc = -ENOENT; 85 int rc = -ENOENT;
86 86
87 write_lock(&cls_mod_lock); 87 write_lock(&cls_mod_lock);
88 for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) 88 for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
89 if (t == ops) 89 if (t == ops)
90 break; 90 break;
91 91
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
111 u32 first = TC_H_MAKE(0xC0000000U, 0U); 111 u32 first = TC_H_MAKE(0xC0000000U, 0U);
112 112
113 if (tp) 113 if (tp)
114 first = tp->prio-1; 114 first = tp->prio - 1;
115 115
116 return first; 116 return first;
117} 117}
@@ -149,7 +149,8 @@ replay:
149 149
150 if (prio == 0) { 150 if (prio == 0) {
151 /* If no priority is given, user wants we allocated it. */ 151 /* If no priority is given, user wants we allocated it. */
152 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 152 if (n->nlmsg_type != RTM_NEWTFILTER ||
153 !(n->nlmsg_flags & NLM_F_CREATE))
153 return -ENOENT; 154 return -ENOENT;
154 prio = TC_H_MAKE(0x80000000U, 0U); 155 prio = TC_H_MAKE(0x80000000U, 0U);
155 } 156 }
@@ -176,7 +177,8 @@ replay:
176 } 177 }
177 178
178 /* Is it classful? */ 179 /* Is it classful? */
179 if ((cops = q->ops->cl_ops) == NULL) 180 cops = q->ops->cl_ops;
181 if (!cops)
180 return -EINVAL; 182 return -EINVAL;
181 183
182 if (cops->tcf_chain == NULL) 184 if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
196 goto errout; 198 goto errout;
197 199
198 /* Check the chain for existence of proto-tcf with this priority */ 200 /* Check the chain for existence of proto-tcf with this priority */
199 for (back = chain; (tp=*back) != NULL; back = &tp->next) { 201 for (back = chain; (tp = *back) != NULL; back = &tp->next) {
200 if (tp->prio >= prio) { 202 if (tp->prio >= prio) {
201 if (tp->prio == prio) { 203 if (tp->prio == prio) {
202 if (!nprio || (tp->protocol != protocol && protocol)) 204 if (!nprio ||
205 (tp->protocol != protocol && protocol))
203 goto errout; 206 goto errout;
204 } else 207 } else
205 tp = NULL; 208 tp = NULL;
@@ -216,7 +219,8 @@ replay:
216 goto errout; 219 goto errout;
217 220
218 err = -ENOENT; 221 err = -ENOENT;
219 if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) 222 if (n->nlmsg_type != RTM_NEWTFILTER ||
223 !(n->nlmsg_flags & NLM_F_CREATE))
220 goto errout; 224 goto errout;
221 225
222 226
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
420 424
421 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 425 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
422 return skb->len; 426 return skb->len;
423 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 427 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
428 if (!dev)
424 return skb->len; 429 return skb->len;
425 430
426 if (!tcm->tcm_parent) 431 if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
429 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); 434 q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
430 if (!q) 435 if (!q)
431 goto out; 436 goto out;
432 if ((cops = q->ops->cl_ops) == NULL) 437 cops = q->ops->cl_ops;
438 if (!cops)
433 goto errout; 439 goto errout;
434 if (cops->tcf_chain == NULL) 440 if (cops->tcf_chain == NULL)
435 goto errout; 441 goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
444 450
445 s_t = cb->args[0]; 451 s_t = cb->args[0];
446 452
447 for (tp=*chain, t=0; tp; tp = tp->next, t++) { 453 for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
448 if (t < s_t) continue; 454 if (t < s_t)
455 continue;
449 if (TC_H_MAJ(tcm->tcm_info) && 456 if (TC_H_MAJ(tcm->tcm_info) &&
450 TC_H_MAJ(tcm->tcm_info) != tp->prio) 457 TC_H_MAJ(tcm->tcm_info) != tp->prio)
451 continue; 458 continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
468 arg.skb = skb; 475 arg.skb = skb;
469 arg.cb = cb; 476 arg.cb = cb;
470 arg.w.stop = 0; 477 arg.w.stop = 0;
471 arg.w.skip = cb->args[1]-1; 478 arg.w.skip = cb->args[1] - 1;
472 arg.w.count = 0; 479 arg.w.count = 0;
473 tp->ops->walk(tp, &arg.w); 480 tp->ops->walk(tp, &arg.w);
474 cb->args[1] = arg.w.count+1; 481 cb->args[1] = arg.w.count + 1;
475 if (arg.w.stop) 482 if (arg.w.stop)
476 break; 483 break;
477 } 484 }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..8be8872dd571 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
21#include <net/act_api.h> 21#include <net/act_api.h>
22#include <net/pkt_cls.h> 22#include <net/pkt_cls.h>
23 23
24struct basic_head 24struct basic_head {
25{
26 u32 hgenerator; 25 u32 hgenerator;
27 struct list_head flist; 26 struct list_head flist;
28}; 27};
29 28
30struct basic_filter 29struct basic_filter {
31{
32 u32 handle; 30 u32 handle;
33 struct tcf_exts exts; 31 struct tcf_exts exts;
34 struct tcf_ematch_tree ematches; 32 struct tcf_ematch_tree ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
92 return 0; 90 return 0;
93} 91}
94 92
95static inline void basic_delete_filter(struct tcf_proto *tp, 93static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
96 struct basic_filter *f)
97{ 94{
98 tcf_unbind_filter(tp, &f->res); 95 tcf_unbind_filter(tp, &f->res);
99 tcf_exts_destroy(tp, &f->exts); 96 tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
135 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, 132 [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED },
136}; 133};
137 134
138static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, 135static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
139 unsigned long base, struct nlattr **tb, 136 unsigned long base, struct nlattr **tb,
140 struct nlattr *est) 137 struct nlattr *est)
141{ 138{
142 int err = -EINVAL; 139 int err = -EINVAL;
143 struct tcf_exts e; 140 struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
203 } while (--i > 0 && basic_get(tp, head->hgenerator)); 200 } while (--i > 0 && basic_get(tp, head->hgenerator));
204 201
205 if (i <= 0) { 202 if (i <= 0) {
206 printk(KERN_ERR "Insufficient number of handles\n"); 203 pr_err("Insufficient number of handles\n");
207 goto errout; 204 goto errout;
208 } 205 }
209 206
@@ -268,6 +265,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
268 goto nla_put_failure; 265 goto nla_put_failure;
269 266
270 nla_nest_end(skb, nest); 267 nla_nest_end(skb, nest);
268
269 if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
270 goto nla_put_failure;
271
271 return skb->len; 272 return skb->len;
272 273
273nla_put_failure: 274nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 78ef2c5e130b..32a335194ca5 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
34 .populate = cgrp_populate, 34 .populate = cgrp_populate,
35#ifdef CONFIG_NET_CLS_CGROUP 35#ifdef CONFIG_NET_CLS_CGROUP
36 .subsys_id = net_cls_subsys_id, 36 .subsys_id = net_cls_subsys_id,
37#else
38#define net_cls_subsys_id net_cls_subsys.subsys_id
39#endif 37#endif
40 .module = THIS_MODULE, 38 .module = THIS_MODULE,
41}; 39};
@@ -58,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
58{ 56{
59 struct cgroup_cls_state *cs; 57 struct cgroup_cls_state *cs;
60 58
61 if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) 59 cs = kzalloc(sizeof(*cs), GFP_KERNEL);
60 if (!cs)
62 return ERR_PTR(-ENOMEM); 61 return ERR_PTR(-ENOMEM);
63 62
64 if (cgrp->parent) 63 if (cgrp->parent)
@@ -96,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
96 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); 95 return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
97} 96}
98 97
99struct cls_cgroup_head 98struct cls_cgroup_head {
100{
101 u32 handle; 99 u32 handle;
102 struct tcf_exts exts; 100 struct tcf_exts exts;
103 struct tcf_ematch_tree ematches; 101 struct tcf_ematch_tree ematches;
@@ -123,7 +121,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
123 * calls by looking at the number of nested bh disable calls because 121 * calls by looking at the number of nested bh disable calls because
124 * softirqs always disables bh. 122 * softirqs always disables bh.
125 */ 123 */
126 if (softirq_count() != SOFTIRQ_OFFSET) { 124 if (in_serving_softirq()) {
127 /* If there is an sk_classid we'll use that. */ 125 /* If there is an sk_classid we'll use that. */
128 if (!skb->sk) 126 if (!skb->sk)
129 return -1; 127 return -1;
@@ -168,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
168 u32 handle, struct nlattr **tca, 166 u32 handle, struct nlattr **tca,
169 unsigned long *arg) 167 unsigned long *arg)
170{ 168{
171 struct nlattr *tb[TCA_CGROUP_MAX+1]; 169 struct nlattr *tb[TCA_CGROUP_MAX + 1];
172 struct cls_cgroup_head *head = tp->root; 170 struct cls_cgroup_head *head = tp->root;
173 struct tcf_ematch_tree t; 171 struct tcf_ematch_tree t;
174 struct tcf_exts e; 172 struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..8ec01391d988 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
111 } 111 }
112} 112}
113 113
114static int has_ports(u8 protocol)
115{
116 switch (protocol) {
117 case IPPROTO_TCP:
118 case IPPROTO_UDP:
119 case IPPROTO_UDPLITE:
120 case IPPROTO_SCTP:
121 case IPPROTO_DCCP:
122 case IPPROTO_ESP:
123 return 1;
124 default:
125 return 0;
126 }
127}
128
129static u32 flow_get_proto_src(struct sk_buff *skb) 114static u32 flow_get_proto_src(struct sk_buff *skb)
130{ 115{
131 switch (skb->protocol) { 116 switch (skb->protocol) {
132 case htons(ETH_P_IP): { 117 case htons(ETH_P_IP): {
133 struct iphdr *iph; 118 struct iphdr *iph;
119 int poff;
134 120
135 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break; 122 break;
137 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 124 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
139 has_ports(iph->protocol) && 125 break;
140 pskb_network_may_pull(skb, iph->ihl * 4 + 2)) 126 poff = proto_ports_offset(iph->protocol);
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 127 if (poff >= 0 &&
128 pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
129 iph = ip_hdr(skb);
130 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
131 poff));
132 }
142 break; 133 break;
143 } 134 }
144 case htons(ETH_P_IPV6): { 135 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *iph; 136 struct ipv6hdr *iph;
137 int poff;
146 138
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) 139 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 break; 140 break;
149 iph = ipv6_hdr(skb); 141 iph = ipv6_hdr(skb);
150 if (has_ports(iph->nexthdr)) 142 poff = proto_ports_offset(iph->nexthdr);
151 return ntohs(*(__be16 *)&iph[1]); 143 if (poff >= 0 &&
144 pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
145 iph = ipv6_hdr(skb);
146 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
147 poff));
148 }
152 break; 149 break;
153 } 150 }
154 } 151 }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
161 switch (skb->protocol) { 158 switch (skb->protocol) {
162 case htons(ETH_P_IP): { 159 case htons(ETH_P_IP): {
163 struct iphdr *iph; 160 struct iphdr *iph;
161 int poff;
164 162
165 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break; 164 break;
167 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 166 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
169 has_ports(iph->protocol) && 167 break;
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 168 poff = proto_ports_offset(iph->protocol);
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 169 if (poff >= 0 &&
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
171 iph = ip_hdr(skb);
172 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
173 2 + poff));
174 }
172 break; 175 break;
173 } 176 }
174 case htons(ETH_P_IPV6): { 177 case htons(ETH_P_IPV6): {
175 struct ipv6hdr *iph; 178 struct ipv6hdr *iph;
179 int poff;
176 180
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) 181 if (!pskb_network_may_pull(skb, sizeof(*iph)))
178 break; 182 break;
179 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
180 if (has_ports(iph->nexthdr)) 184 poff = proto_ports_offset(iph->nexthdr);
181 return ntohs(*(__be16 *)((void *)&iph[1] + 2)); 185 if (poff >= 0 &&
186 pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
187 iph = ipv6_hdr(skb);
188 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
189 poff + 2));
190 }
182 break; 191 break;
183 } 192 }
184 } 193 }
@@ -267,7 +276,7 @@ fallback:
267 276
268static u32 flow_get_rtclassid(const struct sk_buff *skb) 277static u32 flow_get_rtclassid(const struct sk_buff *skb)
269{ 278{
270#ifdef CONFIG_NET_CLS_ROUTE 279#ifdef CONFIG_IP_ROUTE_CLASSID
271 if (skb_dst(skb)) 280 if (skb_dst(skb))
272 return skb_dst(skb)->tclassid; 281 return skb_dst(skb)->tclassid;
273#endif 282#endif
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
297 return tag & VLAN_VID_MASK; 306 return tag & VLAN_VID_MASK;
298} 307}
299 308
309static u32 flow_get_rxhash(struct sk_buff *skb)
310{
311 return skb_get_rxhash(skb);
312}
313
300static u32 flow_key_get(struct sk_buff *skb, int key) 314static u32 flow_key_get(struct sk_buff *skb, int key)
301{ 315{
302 switch (key) { 316 switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
334 return flow_get_skgid(skb); 348 return flow_get_skgid(skb);
335 case FLOW_KEY_VLAN_TAG: 349 case FLOW_KEY_VLAN_TAG:
336 return flow_get_vlan_tag(skb); 350 return flow_get_vlan_tag(skb);
351 case FLOW_KEY_RXHASH:
352 return flow_get_rxhash(skb);
337 default: 353 default:
338 WARN_ON(1); 354 WARN_ON(1);
339 return 0; 355 return 0;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b6f9b4..26e7bc4ffb79 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
31 31
32#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) 32#define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
33 33
34struct fw_head 34struct fw_head {
35{
36 struct fw_filter *ht[HTSIZE]; 35 struct fw_filter *ht[HTSIZE];
37 u32 mask; 36 u32 mask;
38}; 37};
39 38
40struct fw_filter 39struct fw_filter {
41{
42 struct fw_filter *next; 40 struct fw_filter *next;
43 u32 id; 41 u32 id;
44 struct tcf_result res; 42 struct tcf_result res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
53 .police = TCA_FW_POLICE 51 .police = TCA_FW_POLICE
54}; 52};
55 53
56static __inline__ int fw_hash(u32 handle) 54static inline int fw_hash(u32 handle)
57{ 55{
58 if (HTSIZE == 4096) 56 if (HTSIZE == 4096)
59 return ((handle >> 24) & 0xFFF) ^ 57 return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
82static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, 80static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
83 struct tcf_result *res) 81 struct tcf_result *res)
84{ 82{
85 struct fw_head *head = (struct fw_head*)tp->root; 83 struct fw_head *head = (struct fw_head *)tp->root;
86 struct fw_filter *f; 84 struct fw_filter *f;
87 int r; 85 int r;
88 u32 id = skb->mark; 86 u32 id = skb->mark;
89 87
90 if (head != NULL) { 88 if (head != NULL) {
91 id &= head->mask; 89 id &= head->mask;
92 for (f=head->ht[fw_hash(id)]; f; f=f->next) { 90 for (f = head->ht[fw_hash(id)]; f; f = f->next) {
93 if (f->id == id) { 91 if (f->id == id) {
94 *res = f->res; 92 *res = f->res;
95#ifdef CONFIG_NET_CLS_IND 93#ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
105 } 103 }
106 } else { 104 } else {
107 /* old method */ 105 /* old method */
108 if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) { 106 if (id && (TC_H_MAJ(id) == 0 ||
107 !(TC_H_MAJ(id ^ tp->q->handle)))) {
109 res->classid = id; 108 res->classid = id;
110 res->class = 0; 109 res->class = 0;
111 return 0; 110 return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
117 116
118static unsigned long fw_get(struct tcf_proto *tp, u32 handle) 117static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
119{ 118{
120 struct fw_head *head = (struct fw_head*)tp->root; 119 struct fw_head *head = (struct fw_head *)tp->root;
121 struct fw_filter *f; 120 struct fw_filter *f;
122 121
123 if (head == NULL) 122 if (head == NULL)
124 return 0; 123 return 0;
125 124
126 for (f=head->ht[fw_hash(handle)]; f; f=f->next) { 125 for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
127 if (f->id == handle) 126 if (f->id == handle)
128 return (unsigned long)f; 127 return (unsigned long)f;
129 } 128 }
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
139 return 0; 138 return 0;
140} 139}
141 140
142static inline void 141static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
143fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
144{ 142{
145 tcf_unbind_filter(tp, &f->res); 143 tcf_unbind_filter(tp, &f->res);
146 tcf_exts_destroy(tp, &f->exts); 144 tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
156 if (head == NULL) 154 if (head == NULL)
157 return; 155 return;
158 156
159 for (h=0; h<HTSIZE; h++) { 157 for (h = 0; h < HTSIZE; h++) {
160 while ((f=head->ht[h]) != NULL) { 158 while ((f = head->ht[h]) != NULL) {
161 head->ht[h] = f->next; 159 head->ht[h] = f->next;
162 fw_delete_filter(tp, f); 160 fw_delete_filter(tp, f);
163 } 161 }
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
167 165
168static int fw_delete(struct tcf_proto *tp, unsigned long arg) 166static int fw_delete(struct tcf_proto *tp, unsigned long arg)
169{ 167{
170 struct fw_head *head = (struct fw_head*)tp->root; 168 struct fw_head *head = (struct fw_head *)tp->root;
171 struct fw_filter *f = (struct fw_filter*)arg; 169 struct fw_filter *f = (struct fw_filter *)arg;
172 struct fw_filter **fp; 170 struct fw_filter **fp;
173 171
174 if (head == NULL || f == NULL) 172 if (head == NULL || f == NULL)
175 goto out; 173 goto out;
176 174
177 for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { 175 for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
178 if (*fp == f) { 176 if (*fp == f) {
179 tcf_tree_lock(tp); 177 tcf_tree_lock(tp);
180 *fp = f->next; 178 *fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
240 struct nlattr **tca, 238 struct nlattr **tca,
241 unsigned long *arg) 239 unsigned long *arg)
242{ 240{
243 struct fw_head *head = (struct fw_head*)tp->root; 241 struct fw_head *head = (struct fw_head *)tp->root;
244 struct fw_filter *f = (struct fw_filter *) *arg; 242 struct fw_filter *f = (struct fw_filter *) *arg;
245 struct nlattr *opt = tca[TCA_OPTIONS]; 243 struct nlattr *opt = tca[TCA_OPTIONS];
246 struct nlattr *tb[TCA_FW_MAX + 1]; 244 struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
302 300
303static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) 301static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
304{ 302{
305 struct fw_head *head = (struct fw_head*)tp->root; 303 struct fw_head *head = (struct fw_head *)tp->root;
306 int h; 304 int h;
307 305
308 if (head == NULL) 306 if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
332 struct sk_buff *skb, struct tcmsg *t) 330 struct sk_buff *skb, struct tcmsg *t)
333{ 331{
334 struct fw_head *head = (struct fw_head *)tp->root; 332 struct fw_head *head = (struct fw_head *)tp->root;
335 struct fw_filter *f = (struct fw_filter*)fh; 333 struct fw_filter *f = (struct fw_filter *)fh;
336 unsigned char *b = skb_tail_pointer(skb); 334 unsigned char *b = skb_tail_pointer(skb);
337 struct nlattr *nest; 335 struct nlattr *nest;
338 336
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd85dec8..a907905376df 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
23#include <net/pkt_cls.h> 23#include <net/pkt_cls.h>
24 24
25/* 25/*
26 1. For now we assume that route tags < 256. 26 * 1. For now we assume that route tags < 256.
27 It allows to use direct table lookups, instead of hash tables. 27 * It allows to use direct table lookups, instead of hash tables.
28 2. For now we assume that "from TAG" and "fromdev DEV" statements 28 * 2. For now we assume that "from TAG" and "fromdev DEV" statements
29 are mutually exclusive. 29 * are mutually exclusive.
30 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" 30 * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
31 */ 31 */
32 32
33struct route4_fastmap 33struct route4_fastmap {
34{
35 struct route4_filter *filter; 34 struct route4_filter *filter;
36 u32 id; 35 u32 id;
37 int iif; 36 int iif;
38}; 37};
39 38
40struct route4_head 39struct route4_head {
41{
42 struct route4_fastmap fastmap[16]; 40 struct route4_fastmap fastmap[16];
43 struct route4_bucket *table[256+1]; 41 struct route4_bucket *table[256 + 1];
44}; 42};
45 43
46struct route4_bucket 44struct route4_bucket {
47{
48 /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ 45 /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
49 struct route4_filter *ht[16+16+1]; 46 struct route4_filter *ht[16 + 16 + 1];
50}; 47};
51 48
52struct route4_filter 49struct route4_filter {
53{
54 struct route4_filter *next; 50 struct route4_filter *next;
55 u32 id; 51 u32 id;
56 int iif; 52 int iif;
@@ -61,20 +57,20 @@ struct route4_filter
61 struct route4_bucket *bkt; 57 struct route4_bucket *bkt;
62}; 58};
63 59
64#define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) 60#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
65 61
66static const struct tcf_ext_map route_ext_map = { 62static const struct tcf_ext_map route_ext_map = {
67 .police = TCA_ROUTE4_POLICE, 63 .police = TCA_ROUTE4_POLICE,
68 .action = TCA_ROUTE4_ACT 64 .action = TCA_ROUTE4_ACT
69}; 65};
70 66
71static __inline__ int route4_fastmap_hash(u32 id, int iif) 67static inline int route4_fastmap_hash(u32 id, int iif)
72{ 68{
73 return id&0xF; 69 return id & 0xF;
74} 70}
75 71
76static inline 72static void
77void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) 73route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
78{ 74{
79 spinlock_t *root_lock = qdisc_root_sleeping_lock(q); 75 spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
80 76
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
83 spin_unlock_bh(root_lock); 79 spin_unlock_bh(root_lock);
84} 80}
85 81
86static inline void 82static void
87route4_set_fastmap(struct route4_head *head, u32 id, int iif, 83route4_set_fastmap(struct route4_head *head, u32 id, int iif,
88 struct route4_filter *f) 84 struct route4_filter *f)
89{ 85{
90 int h = route4_fastmap_hash(id, iif); 86 int h = route4_fastmap_hash(id, iif);
87
91 head->fastmap[h].id = id; 88 head->fastmap[h].id = id;
92 head->fastmap[h].iif = iif; 89 head->fastmap[h].iif = iif;
93 head->fastmap[h].filter = f; 90 head->fastmap[h].filter = f;
94} 91}
95 92
96static __inline__ int route4_hash_to(u32 id) 93static inline int route4_hash_to(u32 id)
97{ 94{
98 return id&0xFF; 95 return id & 0xFF;
99} 96}
100 97
101static __inline__ int route4_hash_from(u32 id) 98static inline int route4_hash_from(u32 id)
102{ 99{
103 return (id>>16)&0xF; 100 return (id >> 16) & 0xF;
104} 101}
105 102
106static __inline__ int route4_hash_iif(int iif) 103static inline int route4_hash_iif(int iif)
107{ 104{
108 return 16 + ((iif>>16)&0xF); 105 return 16 + ((iif >> 16) & 0xF);
109} 106}
110 107
111static __inline__ int route4_hash_wild(void) 108static inline int route4_hash_wild(void)
112{ 109{
113 return 32; 110 return 32;
114} 111}
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
131static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, 128static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
132 struct tcf_result *res) 129 struct tcf_result *res)
133{ 130{
134 struct route4_head *head = (struct route4_head*)tp->root; 131 struct route4_head *head = (struct route4_head *)tp->root;
135 struct dst_entry *dst; 132 struct dst_entry *dst;
136 struct route4_bucket *b; 133 struct route4_bucket *b;
137 struct route4_filter *f; 134 struct route4_filter *f;
138 u32 id, h; 135 u32 id, h;
139 int iif, dont_cache = 0; 136 int iif, dont_cache = 0;
140 137
141 if ((dst = skb_dst(skb)) == NULL) 138 dst = skb_dst(skb);
139 if (!dst)
142 goto failure; 140 goto failure;
143 141
144 id = dst->tclassid; 142 id = dst->tclassid;
145 if (head == NULL) 143 if (head == NULL)
146 goto old_method; 144 goto old_method;
147 145
148 iif = ((struct rtable*)dst)->fl.iif; 146 iif = ((struct rtable *)dst)->rt_iif;
149 147
150 h = route4_fastmap_hash(id, iif); 148 h = route4_fastmap_hash(id, iif);
151 if (id == head->fastmap[h].id && 149 if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
161 h = route4_hash_to(id); 159 h = route4_hash_to(id);
162 160
163restart: 161restart:
164 if ((b = head->table[h]) != NULL) { 162 b = head->table[h];
163 if (b) {
165 for (f = b->ht[route4_hash_from(id)]; f; f = f->next) 164 for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
166 if (f->id == id) 165 if (f->id == id)
167 ROUTE4_APPLY_RESULT(); 166 ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
197 196
198static inline u32 to_hash(u32 id) 197static inline u32 to_hash(u32 id)
199{ 198{
200 u32 h = id&0xFF; 199 u32 h = id & 0xFF;
201 if (id&0x8000) 200
201 if (id & 0x8000)
202 h += 256; 202 h += 256;
203 return h; 203 return h;
204} 204}
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
211 if (!(id & 0x8000)) { 211 if (!(id & 0x8000)) {
212 if (id > 255) 212 if (id > 255)
213 return 256; 213 return 256;
214 return id&0xF; 214 return id & 0xF;
215 } 215 }
216 return 16 + (id&0xF); 216 return 16 + (id & 0xF);
217} 217}
218 218
219static unsigned long route4_get(struct tcf_proto *tp, u32 handle) 219static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
220{ 220{
221 struct route4_head *head = (struct route4_head*)tp->root; 221 struct route4_head *head = (struct route4_head *)tp->root;
222 struct route4_bucket *b; 222 struct route4_bucket *b;
223 struct route4_filter *f; 223 struct route4_filter *f;
224 unsigned h1, h2; 224 unsigned int h1, h2;
225 225
226 if (!head) 226 if (!head)
227 return 0; 227 return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
230 if (h1 > 256) 230 if (h1 > 256)
231 return 0; 231 return 0;
232 232
233 h2 = from_hash(handle>>16); 233 h2 = from_hash(handle >> 16);
234 if (h2 > 32) 234 if (h2 > 32)
235 return 0; 235 return 0;
236 236
237 if ((b = head->table[h1]) != NULL) { 237 b = head->table[h1];
238 if (b) {
238 for (f = b->ht[h2]; f; f = f->next) 239 for (f = b->ht[h2]; f; f = f->next)
239 if (f->handle == handle) 240 if (f->handle == handle)
240 return (unsigned long)f; 241 return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
251 return 0; 252 return 0;
252} 253}
253 254
254static inline void 255static void
255route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) 256route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
256{ 257{
257 tcf_unbind_filter(tp, &f->res); 258 tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
267 if (head == NULL) 268 if (head == NULL)
268 return; 269 return;
269 270
270 for (h1=0; h1<=256; h1++) { 271 for (h1 = 0; h1 <= 256; h1++) {
271 struct route4_bucket *b; 272 struct route4_bucket *b;
272 273
273 if ((b = head->table[h1]) != NULL) { 274 b = head->table[h1];
274 for (h2=0; h2<=32; h2++) { 275 if (b) {
276 for (h2 = 0; h2 <= 32; h2++) {
275 struct route4_filter *f; 277 struct route4_filter *f;
276 278
277 while ((f = b->ht[h2]) != NULL) { 279 while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
287 289
288static int route4_delete(struct tcf_proto *tp, unsigned long arg) 290static int route4_delete(struct tcf_proto *tp, unsigned long arg)
289{ 291{
290 struct route4_head *head = (struct route4_head*)tp->root; 292 struct route4_head *head = (struct route4_head *)tp->root;
291 struct route4_filter **fp, *f = (struct route4_filter*)arg; 293 struct route4_filter **fp, *f = (struct route4_filter *)arg;
292 unsigned h = 0; 294 unsigned int h = 0;
293 struct route4_bucket *b; 295 struct route4_bucket *b;
294 int i; 296 int i;
295 297
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
299 h = f->handle; 301 h = f->handle;
300 b = f->bkt; 302 b = f->bkt;
301 303
302 for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { 304 for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
303 if (*fp == f) { 305 if (*fp == f) {
304 tcf_tree_lock(tp); 306 tcf_tree_lock(tp);
305 *fp = f->next; 307 *fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
310 312
311 /* Strip tree */ 313 /* Strip tree */
312 314
313 for (i=0; i<=32; i++) 315 for (i = 0; i <= 32; i++)
314 if (b->ht[i]) 316 if (b->ht[i])
315 return 0; 317 return 0;
316 318
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
380 } 382 }
381 383
382 h1 = to_hash(nhandle); 384 h1 = to_hash(nhandle);
383 if ((b = head->table[h1]) == NULL) { 385 b = head->table[h1];
386 if (!b) {
384 err = -ENOBUFS; 387 err = -ENOBUFS;
385 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); 388 b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
386 if (b == NULL) 389 if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
391 tcf_tree_unlock(tp); 394 tcf_tree_unlock(tp);
392 } else { 395 } else {
393 unsigned int h2 = from_hash(nhandle >> 16); 396 unsigned int h2 = from_hash(nhandle >> 16);
397
394 err = -EEXIST; 398 err = -EEXIST;
395 for (fp = b->ht[h2]; fp; fp = fp->next) 399 for (fp = b->ht[h2]; fp; fp = fp->next)
396 if (fp->handle == f->handle) 400 if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
444 if (err < 0) 448 if (err < 0)
445 return err; 449 return err;
446 450
447 if ((f = (struct route4_filter*)*arg) != NULL) { 451 f = (struct route4_filter *)*arg;
452 if (f) {
448 if (f->handle != handle && handle) 453 if (f->handle != handle && handle)
449 return -EINVAL; 454 return -EINVAL;
450 455
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
481 486
482reinsert: 487reinsert:
483 h = from_hash(f->handle >> 16); 488 h = from_hash(f->handle >> 16);
484 for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next) 489 for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
485 if (f->handle < f1->handle) 490 if (f->handle < f1->handle)
486 break; 491 break;
487 492
@@ -492,7 +497,8 @@ reinsert:
492 if (old_handle && f->handle != old_handle) { 497 if (old_handle && f->handle != old_handle) {
493 th = to_hash(old_handle); 498 th = to_hash(old_handle);
494 h = from_hash(old_handle >> 16); 499 h = from_hash(old_handle >> 16);
495 if ((b = head->table[th]) != NULL) { 500 b = head->table[th];
501 if (b) {
496 for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { 502 for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
497 if (*fp == f) { 503 if (*fp == f) {
498 *fp = f->next; 504 *fp = f->next;
@@ -515,7 +521,7 @@ errout:
515static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) 521static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
516{ 522{
517 struct route4_head *head = tp->root; 523 struct route4_head *head = tp->root;
518 unsigned h, h1; 524 unsigned int h, h1;
519 525
520 if (head == NULL) 526 if (head == NULL)
521 arg->stop = 1; 527 arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
549static int route4_dump(struct tcf_proto *tp, unsigned long fh, 555static int route4_dump(struct tcf_proto *tp, unsigned long fh,
550 struct sk_buff *skb, struct tcmsg *t) 556 struct sk_buff *skb, struct tcmsg *t)
551{ 557{
552 struct route4_filter *f = (struct route4_filter*)fh; 558 struct route4_filter *f = (struct route4_filter *)fh;
553 unsigned char *b = skb_tail_pointer(skb); 559 unsigned char *b = skb_tail_pointer(skb);
554 struct nlattr *nest; 560 struct nlattr *nest;
555 u32 id; 561 u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
563 if (nest == NULL) 569 if (nest == NULL)
564 goto nla_put_failure; 570 goto nla_put_failure;
565 571
566 if (!(f->handle&0x8000)) { 572 if (!(f->handle & 0x8000)) {
567 id = f->id&0xFF; 573 id = f->id & 0xFF;
568 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); 574 NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
569 } 575 }
570 if (f->handle&0x80000000) { 576 if (f->handle & 0x80000000) {
571 if ((f->handle>>16) != 0xFFFF) 577 if ((f->handle >> 16) != 0xFFFF)
572 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); 578 NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
573 } else { 579 } else {
574 id = f->id>>16; 580 id = f->id >> 16;
575 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); 581 NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
576 } 582 }
577 if (f->res.classid) 583 if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a1790b048..402c44b241a3 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
66 powerful classification engine. */ 66 powerful classification engine. */
67 67
68 68
69struct rsvp_head 69struct rsvp_head {
70{
71 u32 tmap[256/32]; 70 u32 tmap[256/32];
72 u32 hgenerator; 71 u32 hgenerator;
73 u8 tgenerator; 72 u8 tgenerator;
74 struct rsvp_session *ht[256]; 73 struct rsvp_session *ht[256];
75}; 74};
76 75
77struct rsvp_session 76struct rsvp_session {
78{
79 struct rsvp_session *next; 77 struct rsvp_session *next;
80 __be32 dst[RSVP_DST_LEN]; 78 __be32 dst[RSVP_DST_LEN];
81 struct tc_rsvp_gpi dpi; 79 struct tc_rsvp_gpi dpi;
82 u8 protocol; 80 u8 protocol;
83 u8 tunnelid; 81 u8 tunnelid;
84 /* 16 (src,sport) hash slots, and one wildcard source slot */ 82 /* 16 (src,sport) hash slots, and one wildcard source slot */
85 struct rsvp_filter *ht[16+1]; 83 struct rsvp_filter *ht[16 + 1];
86}; 84};
87 85
88 86
89struct rsvp_filter 87struct rsvp_filter {
90{
91 struct rsvp_filter *next; 88 struct rsvp_filter *next;
92 __be32 src[RSVP_DST_LEN]; 89 __be32 src[RSVP_DST_LEN];
93 struct tc_rsvp_gpi spi; 90 struct tc_rsvp_gpi spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
100 struct rsvp_session *sess; 97 struct rsvp_session *sess;
101}; 98};
102 99
103static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) 100static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104{ 101{
105 unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; 102 unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
103
106 h ^= h>>16; 104 h ^= h>>16;
107 h ^= h>>8; 105 h ^= h>>8;
108 return (h ^ protocol ^ tunnelid) & 0xFF; 106 return (h ^ protocol ^ tunnelid) & 0xFF;
109} 107}
110 108
111static __inline__ unsigned hash_src(__be32 *src) 109static inline unsigned int hash_src(__be32 *src)
112{ 110{
113 unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; 111 unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
112
114 h ^= h>>16; 113 h ^= h>>16;
115 h ^= h>>8; 114 h ^= h>>8;
116 h ^= h>>4; 115 h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
134static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, 133static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135 struct tcf_result *res) 134 struct tcf_result *res)
136{ 135{
137 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; 136 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
138 struct rsvp_session *s; 137 struct rsvp_session *s;
139 struct rsvp_filter *f; 138 struct rsvp_filter *f;
140 unsigned h1, h2; 139 unsigned int h1, h2;
141 __be32 *dst, *src; 140 __be32 *dst, *src;
142 u8 protocol; 141 u8 protocol;
143 u8 tunnelid = 0; 142 u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
162 src = &nhptr->saddr.s6_addr32[0]; 161 src = &nhptr->saddr.s6_addr32[0];
163 dst = &nhptr->daddr.s6_addr32[0]; 162 dst = &nhptr->daddr.s6_addr32[0];
164 protocol = nhptr->nexthdr; 163 protocol = nhptr->nexthdr;
165 xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); 164 xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
166#else 165#else
167 src = &nhptr->saddr; 166 src = &nhptr->saddr;
168 dst = &nhptr->daddr; 167 dst = &nhptr->daddr;
169 protocol = nhptr->protocol; 168 protocol = nhptr->protocol;
170 xprt = ((u8*)nhptr) + (nhptr->ihl<<2); 169 xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
171 if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) 170 if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
172 return -1; 171 return -1;
173#endif 172#endif
174 173
@@ -176,10 +175,10 @@ restart:
176 h2 = hash_src(src); 175 h2 = hash_src(src);
177 176
178 for (s = sht[h1]; s; s = s->next) { 177 for (s = sht[h1]; s; s = s->next) {
179 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && 178 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
180 protocol == s->protocol && 179 protocol == s->protocol &&
181 !(s->dpi.mask & 180 !(s->dpi.mask &
182 (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && 181 (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
183#if RSVP_DST_LEN == 4 182#if RSVP_DST_LEN == 4
184 dst[0] == s->dst[0] && 183 dst[0] == s->dst[0] &&
185 dst[1] == s->dst[1] && 184 dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
188 tunnelid == s->tunnelid) { 187 tunnelid == s->tunnelid) {
189 188
190 for (f = s->ht[h2]; f; f = f->next) { 189 for (f = s->ht[h2]; f; f = f->next) {
191 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && 190 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
192 !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) 191 !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
193#if RSVP_DST_LEN == 4 192#if RSVP_DST_LEN == 4
194 && 193 &&
195 src[0] == f->src[0] && 194 src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
205 return 0; 204 return 0;
206 205
207 tunnelid = f->res.classid; 206 tunnelid = f->res.classid;
208 nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); 207 nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
209 goto restart; 208 goto restart;
210 } 209 }
211 } 210 }
@@ -224,11 +223,11 @@ matched:
224 223
225static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) 224static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
226{ 225{
227 struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; 226 struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
228 struct rsvp_session *s; 227 struct rsvp_session *s;
229 struct rsvp_filter *f; 228 struct rsvp_filter *f;
230 unsigned h1 = handle&0xFF; 229 unsigned int h1 = handle & 0xFF;
231 unsigned h2 = (handle>>8)&0xFF; 230 unsigned int h2 = (handle >> 8) & 0xFF;
232 231
233 if (h2 > 16) 232 if (h2 > 16)
234 return 0; 233 return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
258 return -ENOBUFS; 257 return -ENOBUFS;
259} 258}
260 259
261static inline void 260static void
262rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) 261rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
263{ 262{
264 tcf_unbind_filter(tp, &f->res); 263 tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
277 276
278 sht = data->ht; 277 sht = data->ht;
279 278
280 for (h1=0; h1<256; h1++) { 279 for (h1 = 0; h1 < 256; h1++) {
281 struct rsvp_session *s; 280 struct rsvp_session *s;
282 281
283 while ((s = sht[h1]) != NULL) { 282 while ((s = sht[h1]) != NULL) {
284 sht[h1] = s->next; 283 sht[h1] = s->next;
285 284
286 for (h2=0; h2<=16; h2++) { 285 for (h2 = 0; h2 <= 16; h2++) {
287 struct rsvp_filter *f; 286 struct rsvp_filter *f;
288 287
289 while ((f = s->ht[h2]) != NULL) { 288 while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
299 298
300static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) 299static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
301{ 300{
302 struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; 301 struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
303 unsigned h = f->handle; 302 unsigned int h = f->handle;
304 struct rsvp_session **sp; 303 struct rsvp_session **sp;
305 struct rsvp_session *s = f->sess; 304 struct rsvp_session *s = f->sess;
306 int i; 305 int i;
307 306
308 for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { 307 for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
309 if (*fp == f) { 308 if (*fp == f) {
310 tcf_tree_lock(tp); 309 tcf_tree_lock(tp);
311 *fp = f->next; 310 *fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
314 313
315 /* Strip tree */ 314 /* Strip tree */
316 315
317 for (i=0; i<=16; i++) 316 for (i = 0; i <= 16; i++)
318 if (s->ht[i]) 317 if (s->ht[i])
319 return 0; 318 return 0;
320 319
321 /* OK, session has no flows */ 320 /* OK, session has no flows */
322 for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; 321 for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
323 *sp; sp = &(*sp)->next) { 322 *sp; sp = &(*sp)->next) {
324 if (*sp == s) { 323 if (*sp == s) {
325 tcf_tree_lock(tp); 324 tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
337 return 0; 336 return 0;
338} 337}
339 338
340static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) 339static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
341{ 340{
342 struct rsvp_head *data = tp->root; 341 struct rsvp_head *data = tp->root;
343 int i = 0xFFFF; 342 int i = 0xFFFF;
344 343
345 while (i-- > 0) { 344 while (i-- > 0) {
346 u32 h; 345 u32 h;
346
347 if ((data->hgenerator += 0x10000) == 0) 347 if ((data->hgenerator += 0x10000) == 0)
348 data->hgenerator = 0x10000; 348 data->hgenerator = 0x10000;
349 h = data->hgenerator|salt; 349 h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
355 355
356static int tunnel_bts(struct rsvp_head *data) 356static int tunnel_bts(struct rsvp_head *data)
357{ 357{
358 int n = data->tgenerator>>5; 358 int n = data->tgenerator >> 5;
359 u32 b = 1<<(data->tgenerator&0x1F); 359 u32 b = 1 << (data->tgenerator & 0x1F);
360 360
361 if (data->tmap[n]&b) 361 if (data->tmap[n] & b)
362 return 0; 362 return 0;
363 data->tmap[n] |= b; 363 data->tmap[n] |= b;
364 return 1; 364 return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
372 372
373 memset(tmap, 0, sizeof(tmap)); 373 memset(tmap, 0, sizeof(tmap));
374 374
375 for (h1=0; h1<256; h1++) { 375 for (h1 = 0; h1 < 256; h1++) {
376 struct rsvp_session *s; 376 struct rsvp_session *s;
377 for (s = sht[h1]; s; s = s->next) { 377 for (s = sht[h1]; s; s = s->next) {
378 for (h2=0; h2<=16; h2++) { 378 for (h2 = 0; h2 <= 16; h2++) {
379 struct rsvp_filter *f; 379 struct rsvp_filter *f;
380 380
381 for (f = s->ht[h2]; f; f = f->next) { 381 for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
395{ 395{
396 int i, k; 396 int i, k;
397 397
398 for (k=0; k<2; k++) { 398 for (k = 0; k < 2; k++) {
399 for (i=255; i>0; i--) { 399 for (i = 255; i > 0; i--) {
400 if (++data->tgenerator == 0) 400 if (++data->tgenerator == 0)
401 data->tgenerator = 1; 401 data->tgenerator = 1;
402 if (tunnel_bts(data)) 402 if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
428 struct nlattr *opt = tca[TCA_OPTIONS-1]; 428 struct nlattr *opt = tca[TCA_OPTIONS-1];
429 struct nlattr *tb[TCA_RSVP_MAX + 1]; 429 struct nlattr *tb[TCA_RSVP_MAX + 1];
430 struct tcf_exts e; 430 struct tcf_exts e;
431 unsigned h1, h2; 431 unsigned int h1, h2;
432 __be32 *dst; 432 __be32 *dst;
433 int err; 433 int err;
434 434
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
443 if (err < 0) 443 if (err < 0)
444 return err; 444 return err;
445 445
446 if ((f = (struct rsvp_filter*)*arg) != NULL) { 446 f = (struct rsvp_filter *)*arg;
447 if (f) {
447 /* Node exists: adjust only classid */ 448 /* Node exists: adjust only classid */
448 449
449 if (f->handle != handle && handle) 450 if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
500 goto errout; 501 goto errout;
501 } 502 }
502 503
503 for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { 504 for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
504 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && 505 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
505 pinfo && pinfo->protocol == s->protocol && 506 pinfo && pinfo->protocol == s->protocol &&
506 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && 507 memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
523 tcf_exts_change(tp, &f->exts, &e); 524 tcf_exts_change(tp, &f->exts, &e);
524 525
525 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) 526 for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
526 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) 527 if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
527 break; 528 break;
528 f->next = *fp; 529 f->next = *fp;
529 wmb(); 530 wmb();
@@ -567,7 +568,7 @@ errout2:
567static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) 568static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
568{ 569{
569 struct rsvp_head *head = tp->root; 570 struct rsvp_head *head = tp->root;
570 unsigned h, h1; 571 unsigned int h, h1;
571 572
572 if (arg->stop) 573 if (arg->stop)
573 return; 574 return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
598static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, 599static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
599 struct sk_buff *skb, struct tcmsg *t) 600 struct sk_buff *skb, struct tcmsg *t)
600{ 601{
601 struct rsvp_filter *f = (struct rsvp_filter*)fh; 602 struct rsvp_filter *f = (struct rsvp_filter *)fh;
602 struct rsvp_session *s; 603 struct rsvp_session *s;
603 unsigned char *b = skb_tail_pointer(skb); 604 unsigned char *b = skb_tail_pointer(skb);
604 struct nlattr *nest; 605 struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
624 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); 625 NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
625 if (f->res.classid) 626 if (f->res.classid)
626 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); 627 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
627 if (((f->handle>>8)&0xFF) != 16) 628 if (((f->handle >> 8) & 0xFF) != 16)
628 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); 629 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
629 630
630 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) 631 if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330bb918..36667fa64237 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
249 * of the hashing index is below the threshold. 249 * of the hashing index is below the threshold.
250 */ 250 */
251 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) 251 if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
252 cp.hash = (cp.mask >> cp.shift)+1; 252 cp.hash = (cp.mask >> cp.shift) + 1;
253 else 253 else
254 cp.hash = DEFAULT_HASH_SIZE; 254 cp.hash = DEFAULT_HASH_SIZE;
255 } 255 }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82178af..3b93fc0c8955 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
42#include <net/act_api.h> 42#include <net/act_api.h>
43#include <net/pkt_cls.h> 43#include <net/pkt_cls.h>
44 44
45struct tc_u_knode 45struct tc_u_knode {
46{
47 struct tc_u_knode *next; 46 struct tc_u_knode *next;
48 u32 handle; 47 u32 handle;
49 struct tc_u_hnode *ht_up; 48 struct tc_u_hnode *ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
63 struct tc_u32_sel sel; 62 struct tc_u32_sel sel;
64}; 63};
65 64
66struct tc_u_hnode 65struct tc_u_hnode {
67{
68 struct tc_u_hnode *next; 66 struct tc_u_hnode *next;
69 u32 handle; 67 u32 handle;
70 u32 prio; 68 u32 prio;
71 struct tc_u_common *tp_c; 69 struct tc_u_common *tp_c;
72 int refcnt; 70 int refcnt;
73 unsigned divisor; 71 unsigned int divisor;
74 struct tc_u_knode *ht[1]; 72 struct tc_u_knode *ht[1];
75}; 73};
76 74
77struct tc_u_common 75struct tc_u_common {
78{
79 struct tc_u_hnode *hlist; 76 struct tc_u_hnode *hlist;
80 struct Qdisc *q; 77 struct Qdisc *q;
81 int refcnt; 78 int refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
87 .police = TCA_U32_POLICE 84 .police = TCA_U32_POLICE
88}; 85};
89 86
90static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) 87static inline unsigned int u32_hash_fold(__be32 key,
88 const struct tc_u32_sel *sel,
89 u8 fshift)
91{ 90{
92 unsigned h = ntohl(key & sel->hmask)>>fshift; 91 unsigned int h = ntohl(key & sel->hmask) >> fshift;
93 92
94 return h; 93 return h;
95} 94}
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
101 unsigned int off; 100 unsigned int off;
102 } stack[TC_U32_MAXDEPTH]; 101 } stack[TC_U32_MAXDEPTH];
103 102
104 struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; 103 struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
105 unsigned int off = skb_network_offset(skb); 104 unsigned int off = skb_network_offset(skb);
106 struct tc_u_knode *n; 105 struct tc_u_knode *n;
107 int sdepth = 0; 106 int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
120 struct tc_u32_key *key = n->sel.keys; 119 struct tc_u32_key *key = n->sel.keys;
121 120
122#ifdef CONFIG_CLS_U32_PERF 121#ifdef CONFIG_CLS_U32_PERF
123 n->pf->rcnt +=1; 122 n->pf->rcnt += 1;
124 j = 0; 123 j = 0;
125#endif 124#endif
126 125
@@ -133,14 +132,14 @@ next_knode:
133 } 132 }
134#endif 133#endif
135 134
136 for (i = n->sel.nkeys; i>0; i--, key++) { 135 for (i = n->sel.nkeys; i > 0; i--, key++) {
137 int toff = off + key->off + (off2 & key->offmask); 136 int toff = off + key->off + (off2 & key->offmask);
138 __be32 *data, _data; 137 __be32 *data, hdata;
139 138
140 if (skb_headroom(skb) + toff > INT_MAX) 139 if (skb_headroom(skb) + toff > INT_MAX)
141 goto out; 140 goto out;
142 141
143 data = skb_header_pointer(skb, toff, 4, &_data); 142 data = skb_header_pointer(skb, toff, 4, &hdata);
144 if (!data) 143 if (!data)
145 goto out; 144 goto out;
146 if ((*data ^ key->val) & key->mask) { 145 if ((*data ^ key->val) & key->mask) {
@@ -148,13 +147,13 @@ next_knode:
148 goto next_knode; 147 goto next_knode;
149 } 148 }
150#ifdef CONFIG_CLS_U32_PERF 149#ifdef CONFIG_CLS_U32_PERF
151 n->pf->kcnts[j] +=1; 150 n->pf->kcnts[j] += 1;
152 j++; 151 j++;
153#endif 152#endif
154 } 153 }
155 if (n->ht_down == NULL) { 154 if (n->ht_down == NULL) {
156check_terminal: 155check_terminal:
157 if (n->sel.flags&TC_U32_TERMINAL) { 156 if (n->sel.flags & TC_U32_TERMINAL) {
158 157
159 *res = n->res; 158 *res = n->res;
160#ifdef CONFIG_NET_CLS_IND 159#ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
164 } 163 }
165#endif 164#endif
166#ifdef CONFIG_CLS_U32_PERF 165#ifdef CONFIG_CLS_U32_PERF
167 n->pf->rhit +=1; 166 n->pf->rhit += 1;
168#endif 167#endif
169 r = tcf_exts_exec(skb, &n->exts, res); 168 r = tcf_exts_exec(skb, &n->exts, res);
170 if (r < 0) { 169 if (r < 0) {
@@ -188,26 +187,26 @@ check_terminal:
188 ht = n->ht_down; 187 ht = n->ht_down;
189 sel = 0; 188 sel = 0;
190 if (ht->divisor) { 189 if (ht->divisor) {
191 __be32 *data, _data; 190 __be32 *data, hdata;
192 191
193 data = skb_header_pointer(skb, off + n->sel.hoff, 4, 192 data = skb_header_pointer(skb, off + n->sel.hoff, 4,
194 &_data); 193 &hdata);
195 if (!data) 194 if (!data)
196 goto out; 195 goto out;
197 sel = ht->divisor & u32_hash_fold(*data, &n->sel, 196 sel = ht->divisor & u32_hash_fold(*data, &n->sel,
198 n->fshift); 197 n->fshift);
199 } 198 }
200 if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) 199 if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
201 goto next_ht; 200 goto next_ht;
202 201
203 if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { 202 if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
204 off2 = n->sel.off + 3; 203 off2 = n->sel.off + 3;
205 if (n->sel.flags & TC_U32_VAROFFSET) { 204 if (n->sel.flags & TC_U32_VAROFFSET) {
206 __be16 *data, _data; 205 __be16 *data, hdata;
207 206
208 data = skb_header_pointer(skb, 207 data = skb_header_pointer(skb,
209 off + n->sel.offoff, 208 off + n->sel.offoff,
210 2, &_data); 209 2, &hdata);
211 if (!data) 210 if (!data)
212 goto out; 211 goto out;
213 off2 += ntohs(n->sel.offmask & *data) >> 212 off2 += ntohs(n->sel.offmask & *data) >>
@@ -215,7 +214,7 @@ check_terminal:
215 } 214 }
216 off2 &= ~3; 215 off2 &= ~3;
217 } 216 }
218 if (n->sel.flags&TC_U32_EAT) { 217 if (n->sel.flags & TC_U32_EAT) {
219 off += off2; 218 off += off2;
220 off2 = 0; 219 off2 = 0;
221 } 220 }
@@ -236,11 +235,11 @@ out:
236 235
237deadloop: 236deadloop:
238 if (net_ratelimit()) 237 if (net_ratelimit())
239 printk(KERN_WARNING "cls_u32: dead loop\n"); 238 pr_warning("cls_u32: dead loop\n");
240 return -1; 239 return -1;
241} 240}
242 241
243static __inline__ struct tc_u_hnode * 242static struct tc_u_hnode *
244u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) 243u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
245{ 244{
246 struct tc_u_hnode *ht; 245 struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
252 return ht; 251 return ht;
253} 252}
254 253
255static __inline__ struct tc_u_knode * 254static struct tc_u_knode *
256u32_lookup_key(struct tc_u_hnode *ht, u32 handle) 255u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
257{ 256{
258 unsigned sel; 257 unsigned int sel;
259 struct tc_u_knode *n = NULL; 258 struct tc_u_knode *n = NULL;
260 259
261 sel = TC_U32_HASH(handle); 260 sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
300 do { 299 do {
301 if (++tp_c->hgenerator == 0x7FF) 300 if (++tp_c->hgenerator == 0x7FF)
302 tp_c->hgenerator = 1; 301 tp_c->hgenerator = 1;
303 } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); 302 } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
304 303
305 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; 304 return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
306} 305}
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
378static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) 377static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
379{ 378{
380 struct tc_u_knode *n; 379 struct tc_u_knode *n;
381 unsigned h; 380 unsigned int h;
382 381
383 for (h=0; h<=ht->divisor; h++) { 382 for (h = 0; h <= ht->divisor; h++) {
384 while ((n = ht->ht[h]) != NULL) { 383 while ((n = ht->ht[h]) != NULL) {
385 ht->ht[h] = n->next; 384 ht->ht[h] = n->next;
386 385
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
446 445
447static int u32_delete(struct tcf_proto *tp, unsigned long arg) 446static int u32_delete(struct tcf_proto *tp, unsigned long arg)
448{ 447{
449 struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; 448 struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
450 449
451 if (ht == NULL) 450 if (ht == NULL)
452 return 0; 451 return 0;
453 452
454 if (TC_U32_KEY(ht->handle)) 453 if (TC_U32_KEY(ht->handle))
455 return u32_delete_key(tp, (struct tc_u_knode*)ht); 454 return u32_delete_key(tp, (struct tc_u_knode *)ht);
456 455
457 if (tp->root == ht) 456 if (tp->root == ht)
458 return -EINVAL; 457 return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
470static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) 469static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
471{ 470{
472 struct tc_u_knode *n; 471 struct tc_u_knode *n;
473 unsigned i = 0x7FF; 472 unsigned int i = 0x7FF;
474 473
475 for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) 474 for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
476 if (i < TC_U32_NODE(n->handle)) 475 if (i < TC_U32_NODE(n->handle))
477 i = TC_U32_NODE(n->handle); 476 i = TC_U32_NODE(n->handle);
478 i++; 477 i++;
479 478
480 return handle|(i>0xFFF ? 0xFFF : i); 479 return handle | (i > 0xFFF ? 0xFFF : i);
481} 480}
482 481
483static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { 482static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
566 if (err < 0) 565 if (err < 0)
567 return err; 566 return err;
568 567
569 if ((n = (struct tc_u_knode*)*arg) != NULL) { 568 n = (struct tc_u_knode *)*arg;
569 if (n) {
570 if (TC_U32_KEY(n->handle) == 0) 570 if (TC_U32_KEY(n->handle) == 0)
571 return -EINVAL; 571 return -EINVAL;
572 572
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
574 } 574 }
575 575
576 if (tb[TCA_U32_DIVISOR]) { 576 if (tb[TCA_U32_DIVISOR]) {
577 unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); 577 unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
578 578
579 if (--divisor > 0x100) 579 if (--divisor > 0x100)
580 return -EINVAL; 580 return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
585 if (handle == 0) 585 if (handle == 0)
586 return -ENOMEM; 586 return -ENOMEM;
587 } 587 }
588 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); 588 ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
589 if (ht == NULL) 589 if (ht == NULL)
590 return -ENOBUFS; 590 return -ENOBUFS;
591 ht->tp_c = tp_c; 591 ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
683 struct tc_u_common *tp_c = tp->data; 683 struct tc_u_common *tp_c = tp->data;
684 struct tc_u_hnode *ht; 684 struct tc_u_hnode *ht;
685 struct tc_u_knode *n; 685 struct tc_u_knode *n;
686 unsigned h; 686 unsigned int h;
687 687
688 if (arg->stop) 688 if (arg->stop)
689 return; 689 return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
717static int u32_dump(struct tcf_proto *tp, unsigned long fh, 717static int u32_dump(struct tcf_proto *tp, unsigned long fh,
718 struct sk_buff *skb, struct tcmsg *t) 718 struct sk_buff *skb, struct tcmsg *t)
719{ 719{
720 struct tc_u_knode *n = (struct tc_u_knode*)fh; 720 struct tc_u_knode *n = (struct tc_u_knode *)fh;
721 struct nlattr *nest; 721 struct nlattr *nest;
722 722
723 if (n == NULL) 723 if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
730 goto nla_put_failure; 730 goto nla_put_failure;
731 731
732 if (TC_U32_KEY(n->handle) == 0) { 732 if (TC_U32_KEY(n->handle) == 0) {
733 struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; 733 struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
734 u32 divisor = ht->divisor+1; 734 u32 divisor = ht->divisor + 1;
735
735 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); 736 NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
736 } else { 737 } else {
737 NLA_PUT(skb, TCA_U32_SEL, 738 NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
755 goto nla_put_failure; 756 goto nla_put_failure;
756 757
757#ifdef CONFIG_NET_CLS_IND 758#ifdef CONFIG_NET_CLS_IND
758 if(strlen(n->indev)) 759 if (strlen(n->indev))
759 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); 760 NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
760#endif 761#endif
761#ifdef CONFIG_CLS_U32_PERF 762#ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc450397487a..1c8360a2752a 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
33 return 0; 33 return 0;
34 34
35 switch (cmp->align) { 35 switch (cmp->align) {
36 case TCF_EM_ALIGN_U8: 36 case TCF_EM_ALIGN_U8:
37 val = *ptr; 37 val = *ptr;
38 break; 38 break;
39 39
40 case TCF_EM_ALIGN_U16: 40 case TCF_EM_ALIGN_U16:
41 val = get_unaligned_be16(ptr); 41 val = get_unaligned_be16(ptr);
42 42
43 if (cmp_needs_transformation(cmp)) 43 if (cmp_needs_transformation(cmp))
44 val = be16_to_cpu(val); 44 val = be16_to_cpu(val);
45 break; 45 break;
46 46
47 case TCF_EM_ALIGN_U32: 47 case TCF_EM_ALIGN_U32:
48 /* Worth checking boundries? The branching seems 48 /* Worth checking boundries? The branching seems
49 * to get worse. Visit again. */ 49 * to get worse. Visit again.
50 val = get_unaligned_be32(ptr); 50 */
51 val = get_unaligned_be32(ptr);
51 52
52 if (cmp_needs_transformation(cmp)) 53 if (cmp_needs_transformation(cmp))
53 val = be32_to_cpu(val); 54 val = be32_to_cpu(val);
54 break; 55 break;
55 56
56 default: 57 default:
57 return 0; 58 return 0;
58 } 59 }
59 60
60 if (cmp->mask) 61 if (cmp->mask)
61 val &= cmp->mask; 62 val &= cmp->mask;
62 63
63 switch (cmp->opnd) { 64 switch (cmp->opnd) {
64 case TCF_EM_OPND_EQ: 65 case TCF_EM_OPND_EQ:
65 return val == cmp->val; 66 return val == cmp->val;
66 case TCF_EM_OPND_LT: 67 case TCF_EM_OPND_LT:
67 return val < cmp->val; 68 return val < cmp->val;
68 case TCF_EM_OPND_GT: 69 case TCF_EM_OPND_GT:
69 return val > cmp->val; 70 return val > cmp->val;
70 } 71 }
71 72
72 return 0; 73 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..49130e8abff0 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -47,7 +47,7 @@
47 * on the meta type. Obviously, the length of the data must also 47 * on the meta type. Obviously, the length of the data must also
48 * be provided for non-numeric types. 48 * be provided for non-numeric types.
49 * 49 *
50 * Additionaly, type dependant modifiers such as shift operators 50 * Additionally, type dependent modifiers such as shift operators
51 * or mask may be applied to extend the functionaliy. As of now, 51 * or mask may be applied to extend the functionaliy. As of now,
52 * the variable length type supports shifting the byte string to 52 * the variable length type supports shifting the byte string to
53 * the right, eating up any number of octets and thus supporting 53 * the right, eating up any number of octets and thus supporting
@@ -73,21 +73,18 @@
73#include <net/pkt_cls.h> 73#include <net/pkt_cls.h>
74#include <net/sock.h> 74#include <net/sock.h>
75 75
76struct meta_obj 76struct meta_obj {
77{
78 unsigned long value; 77 unsigned long value;
79 unsigned int len; 78 unsigned int len;
80}; 79};
81 80
82struct meta_value 81struct meta_value {
83{
84 struct tcf_meta_val hdr; 82 struct tcf_meta_val hdr;
85 unsigned long val; 83 unsigned long val;
86 unsigned int len; 84 unsigned int len;
87}; 85};
88 86
89struct meta_match 87struct meta_match {
90{
91 struct meta_value lvalue; 88 struct meta_value lvalue;
92 struct meta_value rvalue; 89 struct meta_value rvalue;
93}; 90};
@@ -223,6 +220,11 @@ META_COLLECTOR(int_maclen)
223 dst->value = skb->mac_len; 220 dst->value = skb->mac_len;
224} 221}
225 222
223META_COLLECTOR(int_rxhash)
224{
225 dst->value = skb_get_rxhash(skb);
226}
227
226/************************************************************************** 228/**************************************************************************
227 * Netfilter 229 * Netfilter
228 **************************************************************************/ 230 **************************************************************************/
@@ -250,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
250 if (unlikely(skb_dst(skb) == NULL)) 252 if (unlikely(skb_dst(skb) == NULL))
251 *err = -1; 253 *err = -1;
252 else 254 else
253#ifdef CONFIG_NET_CLS_ROUTE 255#ifdef CONFIG_IP_ROUTE_CLASSID
254 dst->value = skb_dst(skb)->tclassid; 256 dst->value = skb_dst(skb)->tclassid;
255#else 257#else
256 dst->value = 0; 258 dst->value = 0;
@@ -262,7 +264,7 @@ META_COLLECTOR(int_rtiif)
262 if (unlikely(skb_rtable(skb) == NULL)) 264 if (unlikely(skb_rtable(skb) == NULL))
263 *err = -1; 265 *err = -1;
264 else 266 else
265 dst->value = skb_rtable(skb)->fl.iif; 267 dst->value = skb_rtable(skb)->rt_iif;
266} 268}
267 269
268/************************************************************************** 270/**************************************************************************
@@ -399,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf)
399META_COLLECTOR(int_sk_alloc) 401META_COLLECTOR(int_sk_alloc)
400{ 402{
401 SKIP_NONLOCAL(skb); 403 SKIP_NONLOCAL(skb);
402 dst->value = skb->sk->sk_allocation; 404 dst->value = (__force int) skb->sk->sk_allocation;
403} 405}
404 406
405META_COLLECTOR(int_sk_route_caps) 407META_COLLECTOR(int_sk_route_caps)
@@ -478,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
478 * Meta value collectors assignment table 480 * Meta value collectors assignment table
479 **************************************************************************/ 481 **************************************************************************/
480 482
481struct meta_ops 483struct meta_ops {
482{
483 void (*get)(struct sk_buff *, struct tcf_pkt_info *, 484 void (*get)(struct sk_buff *, struct tcf_pkt_info *,
484 struct meta_value *, struct meta_obj *, int *); 485 struct meta_value *, struct meta_obj *, int *);
485}; 486};
@@ -489,7 +490,7 @@ struct meta_ops
489 490
490/* Meta value operations table listing all meta value collectors and 491/* Meta value operations table listing all meta value collectors and
491 * assigns them to a type and meta id. */ 492 * assigns them to a type and meta id. */
492static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { 493static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
493 [TCF_META_TYPE_VAR] = { 494 [TCF_META_TYPE_VAR] = {
494 [META_ID(DEV)] = META_FUNC(var_dev), 495 [META_ID(DEV)] = META_FUNC(var_dev),
495 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), 496 [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
@@ -541,10 +542,11 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
541 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), 542 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
542 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), 543 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
543 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), 544 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag),
545 [META_ID(RXHASH)] = META_FUNC(int_rxhash),
544 } 546 }
545}; 547};
546 548
547static inline struct meta_ops * meta_ops(struct meta_value *val) 549static inline struct meta_ops *meta_ops(struct meta_value *val)
548{ 550{
549 return &__meta_ops[meta_type(val)][meta_id(val)]; 551 return &__meta_ops[meta_type(val)][meta_id(val)];
550} 552}
@@ -643,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
643{ 645{
644 if (v->len == sizeof(unsigned long)) 646 if (v->len == sizeof(unsigned long))
645 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); 647 NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
646 else if (v->len == sizeof(u32)) { 648 else if (v->len == sizeof(u32))
647 NLA_PUT_U32(skb, tlv, v->val); 649 NLA_PUT_U32(skb, tlv, v->val);
648 }
649 650
650 return 0; 651 return 0;
651 652
@@ -657,8 +658,7 @@ nla_put_failure:
657 * Type specific operations table 658 * Type specific operations table
658 **************************************************************************/ 659 **************************************************************************/
659 660
660struct meta_type_ops 661struct meta_type_ops {
661{
662 void (*destroy)(struct meta_value *); 662 void (*destroy)(struct meta_value *);
663 int (*compare)(struct meta_obj *, struct meta_obj *); 663 int (*compare)(struct meta_obj *, struct meta_obj *);
664 int (*change)(struct meta_value *, struct nlattr *); 664 int (*change)(struct meta_value *, struct nlattr *);
@@ -666,7 +666,7 @@ struct meta_type_ops
666 int (*dump)(struct sk_buff *, struct meta_value *, int); 666 int (*dump)(struct sk_buff *, struct meta_value *, int);
667}; 667};
668 668
669static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { 669static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
670 [TCF_META_TYPE_VAR] = { 670 [TCF_META_TYPE_VAR] = {
671 .destroy = meta_var_destroy, 671 .destroy = meta_var_destroy,
672 .compare = meta_var_compare, 672 .compare = meta_var_compare,
@@ -682,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
682 } 682 }
683}; 683};
684 684
685static inline struct meta_type_ops * meta_type_ops(struct meta_value *v) 685static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
686{ 686{
687 return &__meta_type_ops[meta_type(v)]; 687 return &__meta_type_ops[meta_type(v)];
688} 688}
@@ -707,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
707 return err; 707 return err;
708 708
709 if (meta_type_ops(v)->apply_extras) 709 if (meta_type_ops(v)->apply_extras)
710 meta_type_ops(v)->apply_extras(v, dst); 710 meta_type_ops(v)->apply_extras(v, dst);
711 711
712 return 0; 712 return 0;
713} 713}
@@ -726,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
726 r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); 726 r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
727 727
728 switch (meta->lvalue.hdr.op) { 728 switch (meta->lvalue.hdr.op) {
729 case TCF_EM_OPND_EQ: 729 case TCF_EM_OPND_EQ:
730 return !r; 730 return !r;
731 case TCF_EM_OPND_LT: 731 case TCF_EM_OPND_LT:
732 return r < 0; 732 return r < 0;
733 case TCF_EM_OPND_GT: 733 case TCF_EM_OPND_GT:
734 return r > 0; 734 return r > 0;
735 } 735 }
736 736
737 return 0; 737 return 0;
@@ -765,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
765 765
766static inline int meta_is_supported(struct meta_value *val) 766static inline int meta_is_supported(struct meta_value *val)
767{ 767{
768 return (!meta_id(val) || meta_ops(val)->get); 768 return !meta_id(val) || meta_ops(val)->get;
769} 769}
770 770
771static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { 771static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176aee6e5..a3bed07a008b 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
18#include <linux/tc_ematch/tc_em_nbyte.h> 18#include <linux/tc_ematch/tc_em_nbyte.h>
19#include <net/pkt_cls.h> 19#include <net/pkt_cls.h>
20 20
21struct nbyte_data 21struct nbyte_data {
22{
23 struct tcf_em_nbyte hdr; 22 struct tcf_em_nbyte hdr;
24 char pattern[0]; 23 char pattern[0];
25}; 24};
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..15d353d2e4be 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
19#include <linux/tc_ematch/tc_em_text.h> 19#include <linux/tc_ematch/tc_em_text.h>
20#include <net/pkt_cls.h> 20#include <net/pkt_cls.h>
21 21
22struct text_match 22struct text_match {
23{
24 u16 from_offset; 23 u16 from_offset;
25 u16 to_offset; 24 u16 to_offset;
26 u8 from_layer; 25 u8 from_layer;
@@ -103,7 +102,8 @@ retry:
103 102
104static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) 103static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
105{ 104{
106 textsearch_destroy(EM_TEXT_PRIV(m)->config); 105 if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
106 textsearch_destroy(EM_TEXT_PRIV(m)->config);
107} 107}
108 108
109static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) 109static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f1479f7da..797bdb88c010 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
35 if (!tcf_valid_offset(skb, ptr, sizeof(u32))) 35 if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
36 return 0; 36 return 0;
37 37
38 return !(((*(__be32*) ptr) ^ key->val) & key->mask); 38 return !(((*(__be32 *) ptr) ^ key->val) & key->mask);
39} 39}
40 40
41static struct tcf_ematch_ops em_u32_ops = { 41static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da961f80..88d93eb92507 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
93static LIST_HEAD(ematch_ops); 93static LIST_HEAD(ematch_ops);
94static DEFINE_RWLOCK(ematch_mod_lock); 94static DEFINE_RWLOCK(ematch_mod_lock);
95 95
96static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind) 96static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
97{ 97{
98 struct tcf_ematch_ops *e = NULL; 98 struct tcf_ematch_ops *e = NULL;
99 99
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
163} 163}
164EXPORT_SYMBOL(tcf_em_unregister); 164EXPORT_SYMBOL(tcf_em_unregister);
165 165
166static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, 166static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
167 int index) 167 int index)
168{ 168{
169 return &tree->matches[index]; 169 return &tree->matches[index];
170} 170}
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
184 184
185 if (em_hdr->kind == TCF_EM_CONTAINER) { 185 if (em_hdr->kind == TCF_EM_CONTAINER) {
186 /* Special ematch called "container", carries an index 186 /* Special ematch called "container", carries an index
187 * referencing an external ematch sequence. */ 187 * referencing an external ematch sequence.
188 */
188 u32 ref; 189 u32 ref;
189 190
190 if (data_len < sizeof(ref)) 191 if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
195 goto errout; 196 goto errout;
196 197
197 /* We do not allow backward jumps to avoid loops and jumps 198 /* We do not allow backward jumps to avoid loops and jumps
198 * to our own position are of course illegal. */ 199 * to our own position are of course illegal.
200 */
199 if (ref <= idx) 201 if (ref <= idx)
200 goto errout; 202 goto errout;
201 203
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
208 * which automatically releases the reference again, therefore 210 * which automatically releases the reference again, therefore
209 * the module MUST not be given back under any circumstances 211 * the module MUST not be given back under any circumstances
210 * here. Be aware, the destroy function assumes that the 212 * here. Be aware, the destroy function assumes that the
211 * module is held if the ops field is non zero. */ 213 * module is held if the ops field is non zero.
214 */
212 em->ops = tcf_em_lookup(em_hdr->kind); 215 em->ops = tcf_em_lookup(em_hdr->kind);
213 216
214 if (em->ops == NULL) { 217 if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
221 if (em->ops) { 224 if (em->ops) {
222 /* We dropped the RTNL mutex in order to 225 /* We dropped the RTNL mutex in order to
223 * perform the module load. Tell the caller 226 * perform the module load. Tell the caller
224 * to replay the request. */ 227 * to replay the request.
228 */
225 module_put(em->ops->owner); 229 module_put(em->ops->owner);
226 err = -EAGAIN; 230 err = -EAGAIN;
227 } 231 }
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
230 } 234 }
231 235
232 /* ematch module provides expected length of data, so we 236 /* ematch module provides expected length of data, so we
233 * can do a basic sanity check. */ 237 * can do a basic sanity check.
238 */
234 if (em->ops->datalen && data_len < em->ops->datalen) 239 if (em->ops->datalen && data_len < em->ops->datalen)
235 goto errout; 240 goto errout;
236 241
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
246 * TCF_EM_SIMPLE may be specified stating that the 251 * TCF_EM_SIMPLE may be specified stating that the
247 * data only consists of a u32 integer and the module 252 * data only consists of a u32 integer and the module
248 * does not expected a memory reference but rather 253 * does not expected a memory reference but rather
249 * the value carried. */ 254 * the value carried.
255 */
250 if (em_hdr->flags & TCF_EM_SIMPLE) { 256 if (em_hdr->flags & TCF_EM_SIMPLE) {
251 if (data_len < sizeof(u32)) 257 if (data_len < sizeof(u32))
252 goto errout; 258 goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
334 * The array of rt attributes is parsed in the order as they are 340 * The array of rt attributes is parsed in the order as they are
335 * provided, their type must be incremental from 1 to n. Even 341 * provided, their type must be incremental from 1 to n. Even
336 * if it does not serve any real purpose, a failure of sticking 342 * if it does not serve any real purpose, a failure of sticking
337 * to this policy will result in parsing failure. */ 343 * to this policy will result in parsing failure.
344 */
338 for (idx = 0; nla_ok(rt_match, list_len); idx++) { 345 for (idx = 0; nla_ok(rt_match, list_len); idx++) {
339 err = -EINVAL; 346 err = -EINVAL;
340 347
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
359 /* Check if the number of matches provided by userspace actually 366 /* Check if the number of matches provided by userspace actually
360 * complies with the array of matches. The number was used for 367 * complies with the array of matches. The number was used for
361 * the validation of references and a mismatch could lead to 368 * the validation of references and a mismatch could lead to
362 * undefined references during the matching process. */ 369 * undefined references during the matching process.
370 */
363 if (idx != tree_hdr->nmatches) { 371 if (idx != tree_hdr->nmatches) {
364 err = -EINVAL; 372 err = -EINVAL;
365 goto errout_abort; 373 goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
449 .flags = em->flags 457 .flags = em->flags
450 }; 458 };
451 459
452 NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); 460 NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
453 461
454 if (em->ops && em->ops->dump) { 462 if (em->ops && em->ops->dump) {
455 if (em->ops->dump(skb, em) < 0) 463 if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
478 struct tcf_pkt_info *info) 486 struct tcf_pkt_info *info)
479{ 487{
480 int r = em->ops->match(skb, em, info); 488 int r = em->ops->match(skb, em, info);
489
481 return tcf_em_is_inverted(em) ? !r : r; 490 return tcf_em_is_inverted(em) ? !r : r;
482} 491}
483 492
@@ -527,8 +536,8 @@ pop_stack:
527 536
528stack_overflow: 537stack_overflow:
529 if (net_ratelimit()) 538 if (net_ratelimit())
530 printk(KERN_WARNING "tc ematch: local stack overflow," 539 pr_warning("tc ematch: local stack overflow,"
531 " increase NET_EMATCH_STACK\n"); 540 " increase NET_EMATCH_STACK\n");
532 return -1; 541 return -1;
533} 542}
534EXPORT_SYMBOL(__tcf_em_tree_match); 543EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..6b8627661c98 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
187 int err = -ENOENT; 187 int err = -ENOENT;
188 188
189 write_lock(&qdisc_mod_lock); 189 write_lock(&qdisc_mod_lock);
190 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) 190 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
191 if (q == qops) 191 if (q == qops)
192 break; 192 break;
193 if (q) { 193 if (q) {
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
240 if (q) 240 if (q)
241 goto out; 241 goto out;
242 242
243 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); 243 if (dev_ingress_queue(dev))
244 q = qdisc_match_from_root(
245 dev_ingress_queue(dev)->qdisc_sleeping,
246 handle);
244out: 247out:
245 return q; 248 return q;
246} 249}
@@ -318,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
318 if (!tab || --tab->refcnt) 321 if (!tab || --tab->refcnt)
319 return; 322 return;
320 323
321 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { 324 for (rtabp = &qdisc_rtab_list;
325 (rtab = *rtabp) != NULL;
326 rtabp = &rtab->next) {
322 if (rtab == tab) { 327 if (rtab == tab) {
323 *rtabp = rtab->next; 328 *rtabp = rtab->next;
324 kfree(rtab); 329 kfree(rtab);
@@ -360,7 +365,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 365 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
361 } 366 }
362 367
363 if (!s || tsize != s->tsize || (!tab && tsize > 0)) 368 if (tsize != s->tsize || (!tab && tsize > 0))
364 return ERR_PTR(-EINVAL); 369 return ERR_PTR(-EINVAL);
365 370
366 spin_lock(&qdisc_stab_lock); 371 spin_lock(&qdisc_stab_lock);
@@ -393,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
393 return stab; 398 return stab;
394} 399}
395 400
401static void stab_kfree_rcu(struct rcu_head *head)
402{
403 kfree(container_of(head, struct qdisc_size_table, rcu));
404}
405
396void qdisc_put_stab(struct qdisc_size_table *tab) 406void qdisc_put_stab(struct qdisc_size_table *tab)
397{ 407{
398 if (!tab) 408 if (!tab)
@@ -402,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
402 412
403 if (--tab->refcnt == 0) { 413 if (--tab->refcnt == 0) {
404 list_del(&tab->list); 414 list_del(&tab->list);
405 kfree(tab); 415 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
406 } 416 }
407 417
408 spin_unlock(&qdisc_stab_lock); 418 spin_unlock(&qdisc_stab_lock);
@@ -425,7 +435,7 @@ nla_put_failure:
425 return -1; 435 return -1;
426} 436}
427 437
428void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) 438void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
429{ 439{
430 int pkt_len, slot; 440 int pkt_len, slot;
431 441
@@ -451,14 +461,13 @@ out:
451 pkt_len = 1; 461 pkt_len = 1;
452 qdisc_skb_cb(skb)->pkt_len = pkt_len; 462 qdisc_skb_cb(skb)->pkt_len = pkt_len;
453} 463}
454EXPORT_SYMBOL(qdisc_calculate_pkt_len); 464EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
455 465
456void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) 466void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
457{ 467{
458 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { 468 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
459 printk(KERN_WARNING 469 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
460 "%s: %s qdisc %X: is non-work-conserving?\n", 470 txt, qdisc->ops->id, qdisc->handle >> 16);
461 txt, qdisc->ops->id, qdisc->handle >> 16);
462 qdisc->flags |= TCQ_F_WARN_NONWC; 471 qdisc->flags |= TCQ_F_WARN_NONWC;
463 } 472 }
464} 473}
@@ -469,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
469 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 478 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
470 timer); 479 timer);
471 480
472 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 481 qdisc_unthrottled(wd->qdisc);
473 __netif_schedule(qdisc_root(wd->qdisc)); 482 __netif_schedule(qdisc_root(wd->qdisc));
474 483
475 return HRTIMER_NORESTART; 484 return HRTIMER_NORESTART;
@@ -491,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
491 &qdisc_root_sleeping(wd->qdisc)->state)) 500 &qdisc_root_sleeping(wd->qdisc)->state))
492 return; 501 return;
493 502
494 wd->qdisc->flags |= TCQ_F_THROTTLED; 503 qdisc_throttled(wd->qdisc);
495 time = ktime_set(0, 0); 504 time = ktime_set(0, 0);
496 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); 505 time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
497 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); 506 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -501,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
501void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) 510void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
502{ 511{
503 hrtimer_cancel(&wd->timer); 512 hrtimer_cancel(&wd->timer);
504 wd->qdisc->flags &= ~TCQ_F_THROTTLED; 513 qdisc_unthrottled(wd->qdisc);
505} 514}
506EXPORT_SYMBOL(qdisc_watchdog_cancel); 515EXPORT_SYMBOL(qdisc_watchdog_cancel);
507 516
@@ -622,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
622 autohandle = TC_H_MAKE(0x80000000U, 0); 631 autohandle = TC_H_MAKE(0x80000000U, 0);
623 } while (qdisc_lookup(dev, autohandle) && --i > 0); 632 } while (qdisc_lookup(dev, autohandle) && --i > 0);
624 633
625 return i>0 ? autohandle : 0; 634 return i > 0 ? autohandle : 0;
626} 635}
627 636
628void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) 637void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -690,6 +699,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
690 (new && new->flags & TCQ_F_INGRESS)) { 699 (new && new->flags & TCQ_F_INGRESS)) {
691 num_q = 1; 700 num_q = 1;
692 ingress = 1; 701 ingress = 1;
702 if (!dev_ingress_queue(dev))
703 return -ENOENT;
693 } 704 }
694 705
695 if (dev->flags & IFF_UP) 706 if (dev->flags & IFF_UP)
@@ -701,7 +712,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
701 } 712 }
702 713
703 for (i = 0; i < num_q; i++) { 714 for (i = 0; i < num_q; i++) {
704 struct netdev_queue *dev_queue = &dev->rx_queue; 715 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
705 716
706 if (!ingress) 717 if (!ingress)
707 dev_queue = netdev_get_tx_queue(dev, i); 718 dev_queue = netdev_get_tx_queue(dev, i);
@@ -829,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
829 err = PTR_ERR(stab); 840 err = PTR_ERR(stab);
830 goto err_out4; 841 goto err_out4;
831 } 842 }
832 sch->stab = stab; 843 rcu_assign_pointer(sch->stab, stab);
833 } 844 }
834 if (tca[TCA_RATE]) { 845 if (tca[TCA_RATE]) {
835 spinlock_t *root_lock; 846 spinlock_t *root_lock;
@@ -869,7 +880,7 @@ err_out4:
869 * Any broken qdiscs that would require a ops->reset() here? 880 * Any broken qdiscs that would require a ops->reset() here?
870 * The qdisc was never in action so it shouldn't be necessary. 881 * The qdisc was never in action so it shouldn't be necessary.
871 */ 882 */
872 qdisc_put_stab(sch->stab); 883 qdisc_put_stab(rtnl_dereference(sch->stab));
873 if (ops->destroy) 884 if (ops->destroy)
874 ops->destroy(sch); 885 ops->destroy(sch);
875 goto err_out3; 886 goto err_out3;
@@ -877,7 +888,7 @@ err_out4:
877 888
878static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) 889static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
879{ 890{
880 struct qdisc_size_table *stab = NULL; 891 struct qdisc_size_table *ostab, *stab = NULL;
881 int err = 0; 892 int err = 0;
882 893
883 if (tca[TCA_OPTIONS]) { 894 if (tca[TCA_OPTIONS]) {
@@ -894,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
894 return PTR_ERR(stab); 905 return PTR_ERR(stab);
895 } 906 }
896 907
897 qdisc_put_stab(sch->stab); 908 ostab = rtnl_dereference(sch->stab);
898 sch->stab = stab; 909 rcu_assign_pointer(sch->stab, stab);
910 qdisc_put_stab(ostab);
899 911
900 if (tca[TCA_RATE]) { 912 if (tca[TCA_RATE]) {
901 /* NB: ignores errors from replace_estimator 913 /* NB: ignores errors from replace_estimator
@@ -910,9 +922,8 @@ out:
910 return 0; 922 return 0;
911} 923}
912 924
913struct check_loop_arg 925struct check_loop_arg {
914{ 926 struct qdisc_walker w;
915 struct qdisc_walker w;
916 struct Qdisc *p; 927 struct Qdisc *p;
917 int depth; 928 int depth;
918}; 929};
@@ -965,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
965 struct Qdisc *p = NULL; 976 struct Qdisc *p = NULL;
966 int err; 977 int err;
967 978
968 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 979 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
980 if (!dev)
969 return -ENODEV; 981 return -ENODEV;
970 982
971 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 983 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -975,11 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
975 if (clid) { 987 if (clid) {
976 if (clid != TC_H_ROOT) { 988 if (clid != TC_H_ROOT) {
977 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { 989 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
978 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 990 p = qdisc_lookup(dev, TC_H_MAJ(clid));
991 if (!p)
979 return -ENOENT; 992 return -ENOENT;
980 q = qdisc_leaf(p, clid); 993 q = qdisc_leaf(p, clid);
981 } else { /* ingress */ 994 } else if (dev_ingress_queue(dev)) {
982 q = dev->rx_queue.qdisc_sleeping; 995 q = dev_ingress_queue(dev)->qdisc_sleeping;
983 } 996 }
984 } else { 997 } else {
985 q = dev->qdisc; 998 q = dev->qdisc;
@@ -990,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
990 if (tcm->tcm_handle && q->handle != tcm->tcm_handle) 1003 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
991 return -EINVAL; 1004 return -EINVAL;
992 } else { 1005 } else {
993 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) 1006 q = qdisc_lookup(dev, tcm->tcm_handle);
1007 if (!q)
994 return -ENOENT; 1008 return -ENOENT;
995 } 1009 }
996 1010
@@ -1002,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1002 return -EINVAL; 1016 return -EINVAL;
1003 if (q->handle == 0) 1017 if (q->handle == 0)
1004 return -ENOENT; 1018 return -ENOENT;
1005 if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) 1019 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1020 if (err != 0)
1006 return err; 1021 return err;
1007 } else { 1022 } else {
1008 qdisc_notify(net, skb, n, clid, NULL, q); 1023 qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1011,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1011} 1026}
1012 1027
1013/* 1028/*
1014 Create/change qdisc. 1029 * Create/change qdisc.
1015 */ 1030 */
1016 1031
1017static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) 1032static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1030,7 +1045,8 @@ replay:
1030 clid = tcm->tcm_parent; 1045 clid = tcm->tcm_parent;
1031 q = p = NULL; 1046 q = p = NULL;
1032 1047
1033 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1048 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1049 if (!dev)
1034 return -ENODEV; 1050 return -ENODEV;
1035 1051
1036 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1052 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1040,11 +1056,12 @@ replay:
1040 if (clid) { 1056 if (clid) {
1041 if (clid != TC_H_ROOT) { 1057 if (clid != TC_H_ROOT) {
1042 if (clid != TC_H_INGRESS) { 1058 if (clid != TC_H_INGRESS) {
1043 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 1059 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1060 if (!p)
1044 return -ENOENT; 1061 return -ENOENT;
1045 q = qdisc_leaf(p, clid); 1062 q = qdisc_leaf(p, clid);
1046 } else { /*ingress */ 1063 } else if (dev_ingress_queue_create(dev)) {
1047 q = dev->rx_queue.qdisc_sleeping; 1064 q = dev_ingress_queue(dev)->qdisc_sleeping;
1048 } 1065 }
1049 } else { 1066 } else {
1050 q = dev->qdisc; 1067 q = dev->qdisc;
@@ -1056,13 +1073,14 @@ replay:
1056 1073
1057 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { 1074 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1058 if (tcm->tcm_handle) { 1075 if (tcm->tcm_handle) {
1059 if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) 1076 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
1060 return -EEXIST; 1077 return -EEXIST;
1061 if (TC_H_MIN(tcm->tcm_handle)) 1078 if (TC_H_MIN(tcm->tcm_handle))
1062 return -EINVAL; 1079 return -EINVAL;
1063 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) 1080 q = qdisc_lookup(dev, tcm->tcm_handle);
1081 if (!q)
1064 goto create_n_graft; 1082 goto create_n_graft;
1065 if (n->nlmsg_flags&NLM_F_EXCL) 1083 if (n->nlmsg_flags & NLM_F_EXCL)
1066 return -EEXIST; 1084 return -EEXIST;
1067 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1085 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1068 return -EINVAL; 1086 return -EINVAL;
@@ -1072,7 +1090,7 @@ replay:
1072 atomic_inc(&q->refcnt); 1090 atomic_inc(&q->refcnt);
1073 goto graft; 1091 goto graft;
1074 } else { 1092 } else {
1075 if (q == NULL) 1093 if (!q)
1076 goto create_n_graft; 1094 goto create_n_graft;
1077 1095
1078 /* This magic test requires explanation. 1096 /* This magic test requires explanation.
@@ -1094,9 +1112,9 @@ replay:
1094 * For now we select create/graft, if 1112 * For now we select create/graft, if
1095 * user gave KIND, which does not match existing. 1113 * user gave KIND, which does not match existing.
1096 */ 1114 */
1097 if ((n->nlmsg_flags&NLM_F_CREATE) && 1115 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1098 (n->nlmsg_flags&NLM_F_REPLACE) && 1116 (n->nlmsg_flags & NLM_F_REPLACE) &&
1099 ((n->nlmsg_flags&NLM_F_EXCL) || 1117 ((n->nlmsg_flags & NLM_F_EXCL) ||
1100 (tca[TCA_KIND] && 1118 (tca[TCA_KIND] &&
1101 nla_strcmp(tca[TCA_KIND], q->ops->id)))) 1119 nla_strcmp(tca[TCA_KIND], q->ops->id))))
1102 goto create_n_graft; 1120 goto create_n_graft;
@@ -1111,7 +1129,7 @@ replay:
1111 /* Change qdisc parameters */ 1129 /* Change qdisc parameters */
1112 if (q == NULL) 1130 if (q == NULL)
1113 return -ENOENT; 1131 return -ENOENT;
1114 if (n->nlmsg_flags&NLM_F_EXCL) 1132 if (n->nlmsg_flags & NLM_F_EXCL)
1115 return -EEXIST; 1133 return -EEXIST;
1116 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) 1134 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
1117 return -EINVAL; 1135 return -EINVAL;
@@ -1121,13 +1139,16 @@ replay:
1121 return err; 1139 return err;
1122 1140
1123create_n_graft: 1141create_n_graft:
1124 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1142 if (!(n->nlmsg_flags & NLM_F_CREATE))
1125 return -ENOENT; 1143 return -ENOENT;
1126 if (clid == TC_H_INGRESS) 1144 if (clid == TC_H_INGRESS) {
1127 q = qdisc_create(dev, &dev->rx_queue, p, 1145 if (dev_ingress_queue(dev))
1128 tcm->tcm_parent, tcm->tcm_parent, 1146 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1129 tca, &err); 1147 tcm->tcm_parent, tcm->tcm_parent,
1130 else { 1148 tca, &err);
1149 else
1150 err = -ENOENT;
1151 } else {
1131 struct netdev_queue *dev_queue; 1152 struct netdev_queue *dev_queue;
1132 1153
1133 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1154 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1165,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1165 struct nlmsghdr *nlh; 1186 struct nlmsghdr *nlh;
1166 unsigned char *b = skb_tail_pointer(skb); 1187 unsigned char *b = skb_tail_pointer(skb);
1167 struct gnet_dump d; 1188 struct gnet_dump d;
1189 struct qdisc_size_table *stab;
1168 1190
1169 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); 1191 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1170 tcm = NLMSG_DATA(nlh); 1192 tcm = NLMSG_DATA(nlh);
@@ -1180,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
1180 goto nla_put_failure; 1202 goto nla_put_failure;
1181 q->qstats.qlen = q->q.qlen; 1203 q->qstats.qlen = q->q.qlen;
1182 1204
1183 if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) 1205 stab = rtnl_dereference(q->stab);
1206 if (stab && qdisc_dump_stab(skb, stab) < 0)
1184 goto nla_put_failure; 1207 goto nla_put_failure;
1185 1208
1186 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, 1209 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1224,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1224 return -ENOBUFS; 1247 return -ENOBUFS;
1225 1248
1226 if (old && !tc_qdisc_dump_ignore(old)) { 1249 if (old && !tc_qdisc_dump_ignore(old)) {
1227 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) 1250 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
1251 0, RTM_DELQDISC) < 0)
1228 goto err_out; 1252 goto err_out;
1229 } 1253 }
1230 if (new && !tc_qdisc_dump_ignore(new)) { 1254 if (new && !tc_qdisc_dump_ignore(new)) {
1231 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) 1255 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
1256 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
1232 goto err_out; 1257 goto err_out;
1233 } 1258 }
1234 1259
1235 if (skb->len) 1260 if (skb->len)
1236 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1261 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
1262 n->nlmsg_flags & NLM_F_ECHO);
1237 1263
1238err_out: 1264err_out:
1239 kfree_skb(skb); 1265 kfree_skb(skb);
@@ -1265,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1265 q_idx++; 1291 q_idx++;
1266 continue; 1292 continue;
1267 } 1293 }
1268 if (!tc_qdisc_dump_ignore(q) && 1294 if (!tc_qdisc_dump_ignore(q) &&
1269 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, 1295 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
1270 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) 1296 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1271 goto done; 1297 goto done;
@@ -1304,8 +1330,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1304 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) 1330 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1305 goto done; 1331 goto done;
1306 1332
1307 dev_queue = &dev->rx_queue; 1333 dev_queue = dev_ingress_queue(dev);
1308 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) 1334 if (dev_queue &&
1335 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1336 &q_idx, s_q_idx) < 0)
1309 goto done; 1337 goto done;
1310 1338
1311cont: 1339cont:
@@ -1344,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1344 u32 qid = TC_H_MAJ(clid); 1372 u32 qid = TC_H_MAJ(clid);
1345 int err; 1373 int err;
1346 1374
1347 if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1375 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1376 if (!dev)
1348 return -ENODEV; 1377 return -ENODEV;
1349 1378
1350 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); 1379 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1379,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1379 qid = dev->qdisc->handle; 1408 qid = dev->qdisc->handle;
1380 1409
1381 /* Now qid is genuine qdisc handle consistent 1410 /* Now qid is genuine qdisc handle consistent
1382 both with parent and child. 1411 * both with parent and child.
1383 1412 *
1384 TC_H_MAJ(pid) still may be unspecified, complete it now. 1413 * TC_H_MAJ(pid) still may be unspecified, complete it now.
1385 */ 1414 */
1386 if (pid) 1415 if (pid)
1387 pid = TC_H_MAKE(qid, pid); 1416 pid = TC_H_MAKE(qid, pid);
@@ -1391,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1391 } 1420 }
1392 1421
1393 /* OK. Locate qdisc */ 1422 /* OK. Locate qdisc */
1394 if ((q = qdisc_lookup(dev, qid)) == NULL) 1423 q = qdisc_lookup(dev, qid);
1424 if (!q)
1395 return -ENOENT; 1425 return -ENOENT;
1396 1426
1397 /* An check that it supports classes */ 1427 /* An check that it supports classes */
@@ -1411,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
1411 1441
1412 if (cl == 0) { 1442 if (cl == 0) {
1413 err = -ENOENT; 1443 err = -ENOENT;
1414 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) 1444 if (n->nlmsg_type != RTM_NEWTCLASS ||
1445 !(n->nlmsg_flags & NLM_F_CREATE))
1415 goto out; 1446 goto out;
1416 } else { 1447 } else {
1417 switch (n->nlmsg_type) { 1448 switch (n->nlmsg_type) {
1418 case RTM_NEWTCLASS: 1449 case RTM_NEWTCLASS:
1419 err = -EEXIST; 1450 err = -EEXIST;
1420 if (n->nlmsg_flags&NLM_F_EXCL) 1451 if (n->nlmsg_flags & NLM_F_EXCL)
1421 goto out; 1452 goto out;
1422 break; 1453 break;
1423 case RTM_DELTCLASS: 1454 case RTM_DELTCLASS:
@@ -1509,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
1509 return -EINVAL; 1540 return -EINVAL;
1510 } 1541 }
1511 1542
1512 return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); 1543 return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
1544 n->nlmsg_flags & NLM_F_ECHO);
1513} 1545}
1514 1546
1515struct qdisc_dump_args 1547struct qdisc_dump_args {
1516{ 1548 struct qdisc_walker w;
1517 struct qdisc_walker w; 1549 struct sk_buff *skb;
1518 struct sk_buff *skb; 1550 struct netlink_callback *cb;
1519 struct netlink_callback *cb;
1520}; 1551};
1521 1552
1522static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) 1553static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1578,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1578 1609
1579static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) 1610static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1580{ 1611{
1581 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); 1612 struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
1582 struct net *net = sock_net(skb->sk); 1613 struct net *net = sock_net(skb->sk);
1583 struct netdev_queue *dev_queue; 1614 struct netdev_queue *dev_queue;
1584 struct net_device *dev; 1615 struct net_device *dev;
@@ -1586,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1586 1617
1587 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) 1618 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1588 return 0; 1619 return 0;
1589 if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) 1620 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1621 if (!dev)
1590 return 0; 1622 return 0;
1591 1623
1592 s_t = cb->args[0]; 1624 s_t = cb->args[0];
@@ -1595,8 +1627,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1595 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 1627 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1596 goto done; 1628 goto done;
1597 1629
1598 dev_queue = &dev->rx_queue; 1630 dev_queue = dev_ingress_queue(dev);
1599 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) 1631 if (dev_queue &&
1632 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1633 &t, s_t) < 0)
1600 goto done; 1634 goto done;
1601 1635
1602done: 1636done:
@@ -1607,19 +1641,22 @@ done:
1607} 1641}
1608 1642
1609/* Main classifier routine: scans classifier chain attached 1643/* Main classifier routine: scans classifier chain attached
1610 to this qdisc, (optionally) tests for protocol and asks 1644 * to this qdisc, (optionally) tests for protocol and asks
1611 specific classifiers. 1645 * specific classifiers.
1612 */ 1646 */
1613int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, 1647int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1614 struct tcf_result *res) 1648 struct tcf_result *res)
1615{ 1649{
1616 __be16 protocol = skb->protocol; 1650 __be16 protocol = skb->protocol;
1617 int err = 0; 1651 int err;
1618 1652
1619 for (; tp; tp = tp->next) { 1653 for (; tp; tp = tp->next) {
1620 if ((tp->protocol == protocol || 1654 if (tp->protocol != protocol &&
1621 tp->protocol == htons(ETH_P_ALL)) && 1655 tp->protocol != htons(ETH_P_ALL))
1622 (err = tp->classify(skb, tp, res)) >= 0) { 1656 continue;
1657 err = tp->classify(skb, tp, res);
1658
1659 if (err >= 0) {
1623#ifdef CONFIG_NET_CLS_ACT 1660#ifdef CONFIG_NET_CLS_ACT
1624 if (err != TC_ACT_RECLASSIFY && skb->tc_verd) 1661 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1625 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); 1662 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1635,12 +1672,10 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1635 struct tcf_result *res) 1672 struct tcf_result *res)
1636{ 1673{
1637 int err = 0; 1674 int err = 0;
1638 __be16 protocol;
1639#ifdef CONFIG_NET_CLS_ACT 1675#ifdef CONFIG_NET_CLS_ACT
1640 struct tcf_proto *otp = tp; 1676 struct tcf_proto *otp = tp;
1641reclassify: 1677reclassify:
1642#endif 1678#endif
1643 protocol = skb->protocol;
1644 1679
1645 err = tc_classify_compat(skb, tp, res); 1680 err = tc_classify_compat(skb, tp, res);
1646#ifdef CONFIG_NET_CLS_ACT 1681#ifdef CONFIG_NET_CLS_ACT
@@ -1650,11 +1685,11 @@ reclassify:
1650 1685
1651 if (verd++ >= MAX_REC_LOOP) { 1686 if (verd++ >= MAX_REC_LOOP) {
1652 if (net_ratelimit()) 1687 if (net_ratelimit())
1653 printk(KERN_NOTICE 1688 pr_notice("%s: packet reclassify loop"
1654 "%s: packet reclassify loop"
1655 " rule prio %u protocol %02x\n", 1689 " rule prio %u protocol %02x\n",
1656 tp->q->ops->id, 1690 tp->q->ops->id,
1657 tp->prio & 0xffff, ntohs(tp->protocol)); 1691 tp->prio & 0xffff,
1692 ntohs(tp->protocol));
1658 return TC_ACT_SHOT; 1693 return TC_ACT_SHOT;
1659 } 1694 }
1660 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); 1695 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1747,7 +1782,7 @@ static int __init pktsched_init(void)
1747 1782
1748 err = register_pernet_subsys(&psched_net_ops); 1783 err = register_pernet_subsys(&psched_net_ops);
1749 if (err) { 1784 if (err) {
1750 printk(KERN_ERR "pktsched_init: " 1785 pr_err("pktsched_init: "
1751 "cannot initialize per netns operations\n"); 1786 "cannot initialize per netns operations\n");
1752 return err; 1787 return err;
1753 } 1788 }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6318e1136b83..3f08158b8688 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
275 goto err_out; 275 goto err_out;
276 } 276 }
277 flow->filter_list = NULL; 277 flow->filter_list = NULL;
278 flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 278 flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
279 &pfifo_qdisc_ops, classid);
280 if (!flow->q) 279 if (!flow->q)
281 flow->q = &noop_qdisc; 280 flow->q = &noop_qdisc;
282 pr_debug("atm_tc_change: qdisc %p\n", flow->q); 281 pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -320,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
320 * creation), and one for the reference held when calling delete. 319 * creation), and one for the reference held when calling delete.
321 */ 320 */
322 if (flow->ref < 2) { 321 if (flow->ref < 2) {
323 printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); 322 pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
324 return -EINVAL; 323 return -EINVAL;
325 } 324 }
326 if (flow->ref > 2) 325 if (flow->ref > 2)
@@ -385,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
385 } 384 }
386 } 385 }
387 flow = NULL; 386 flow = NULL;
388 done: 387done:
389 ; 388 ;
390 } 389 }
391 if (!flow) 390 if (!flow) {
392 flow = &p->link; 391 flow = &p->link;
393 else { 392 } else {
394 if (flow->vcc) 393 if (flow->vcc)
395 ATM_SKB(skb)->atm_options = flow->vcc->atm_options; 394 ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
396 /*@@@ looks good ... but it's not supposed to work :-) */ 395 /*@@@ looks good ... but it's not supposed to work :-) */
@@ -423,10 +422,8 @@ drop: __maybe_unused
423 } 422 }
424 return ret; 423 return ret;
425 } 424 }
426 sch->bstats.bytes += qdisc_pkt_len(skb); 425 qdisc_bstats_update(sch, skb);
427 sch->bstats.packets++; 426 bstats_update(&flow->bstats, skb);
428 flow->bstats.bytes += qdisc_pkt_len(skb);
429 flow->bstats.packets++;
430 /* 427 /*
431 * Okay, this may seem weird. We pretend we've dropped the packet if 428 * Okay, this may seem weird. We pretend we've dropped the packet if
432 * it goes via ATM. The reason for this is that the outer qdisc 429 * it goes via ATM. The reason for this is that the outer qdisc
@@ -543,7 +540,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
543 INIT_LIST_HEAD(&p->flows); 540 INIT_LIST_HEAD(&p->flows);
544 INIT_LIST_HEAD(&p->link.list); 541 INIT_LIST_HEAD(&p->link.list);
545 list_add(&p->link.list, &p->flows); 542 list_add(&p->link.list, &p->flows);
546 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 543 p->link.q = qdisc_create_dflt(sch->dev_queue,
547 &pfifo_qdisc_ops, sch->handle); 544 &pfifo_qdisc_ops, sch->handle);
548 if (!p->link.q) 545 if (!p->link.q)
549 p->link.q = &noop_qdisc; 546 p->link.q = &noop_qdisc;
@@ -579,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
579 576
580 list_for_each_entry_safe(flow, tmp, &p->flows, list) { 577 list_for_each_entry_safe(flow, tmp, &p->flows, list) {
581 if (flow->ref > 1) 578 if (flow->ref > 1)
582 printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, 579 pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
583 flow->ref);
584 atm_tc_put(sch, (unsigned long)flow); 580 atm_tc_put(sch, (unsigned long)flow);
585 } 581 }
586 tasklet_kill(&p->task); 582 tasklet_kill(&p->task);
@@ -619,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
619 } 615 }
620 if (flow->excess) 616 if (flow->excess)
621 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); 617 NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
622 else { 618 else
623 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); 619 NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
624 }
625 620
626 nla_nest_end(skb, nest); 621 nla_nest_end(skb, nest);
627 return skb->len; 622 return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 28c01ef5abc8..24d94c097b35 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
72struct cbq_sched_data; 72struct cbq_sched_data;
73 73
74 74
75struct cbq_class 75struct cbq_class {
76{
77 struct Qdisc_class_common common; 76 struct Qdisc_class_common common;
78 struct cbq_class *next_alive; /* next class with backlog in this priority band */ 77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
79 78
@@ -139,19 +138,18 @@ struct cbq_class
139 int refcnt; 138 int refcnt;
140 int filters; 139 int filters;
141 140
142 struct cbq_class *defaults[TC_PRIO_MAX+1]; 141 struct cbq_class *defaults[TC_PRIO_MAX + 1];
143}; 142};
144 143
145struct cbq_sched_data 144struct cbq_sched_data {
146{
147 struct Qdisc_class_hash clhash; /* Hash table of all classes */ 145 struct Qdisc_class_hash clhash; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1]; 146 int nclasses[TC_CBQ_MAXPRIO + 1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1]; 147 unsigned int quanta[TC_CBQ_MAXPRIO + 1];
150 148
151 struct cbq_class link; 149 struct cbq_class link;
152 150
153 unsigned activemask; 151 unsigned int activemask;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes 152 struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
155 with backlog */ 153 with backlog */
156 154
157#ifdef CONFIG_NET_CLS_ACT 155#ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
162 int tx_len; 160 int tx_len;
163 psched_time_t now; /* Cached timestamp */ 161 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */ 162 psched_time_t now_rt; /* Cached real time */
165 unsigned pmask; 163 unsigned int pmask;
166 164
167 struct hrtimer delay_timer; 165 struct hrtimer delay_timer;
168 struct qdisc_watchdog watchdog; /* Watchdog timer, 166 struct qdisc_watchdog watchdog; /* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
175}; 173};
176 174
177 175
178#define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) 176#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
179 177
180static __inline__ struct cbq_class * 178static inline struct cbq_class *
181cbq_class_lookup(struct cbq_sched_data *q, u32 classid) 179cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
182{ 180{
183 struct Qdisc_class_common *clc; 181 struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
193static struct cbq_class * 191static struct cbq_class *
194cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) 192cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
195{ 193{
196 struct cbq_class *cl, *new; 194 struct cbq_class *cl;
197 195
198 for (cl = this->tparent; cl; cl = cl->tparent) 196 for (cl = this->tparent; cl; cl = cl->tparent) {
199 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) 197 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
200 return new;
201 198
199 if (new != NULL && new != this)
200 return new;
201 }
202 return NULL; 202 return NULL;
203} 203}
204 204
205#endif 205#endif
206 206
207/* Classify packet. The procedure is pretty complicated, but 207/* Classify packet. The procedure is pretty complicated, but
208 it allows us to combine link sharing and priority scheduling 208 * it allows us to combine link sharing and priority scheduling
209 transparently. 209 * transparently.
210 210 *
211 Namely, you can put link sharing rules (f.e. route based) at root of CBQ, 211 * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
212 so that it resolves to split nodes. Then packets are classified 212 * so that it resolves to split nodes. Then packets are classified
213 by logical priority, or a more specific classifier may be attached 213 * by logical priority, or a more specific classifier may be attached
214 to the split node. 214 * to the split node.
215 */ 215 */
216 216
217static struct cbq_class * 217static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227 /* 227 /*
228 * Step 1. If skb->priority points to one of our classes, use it. 228 * Step 1. If skb->priority points to one of our classes, use it.
229 */ 229 */
230 if (TC_H_MAJ(prio^sch->handle) == 0 && 230 if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
231 (cl = cbq_class_lookup(q, prio)) != NULL) 231 (cl = cbq_class_lookup(q, prio)) != NULL)
232 return cl; 232 return cl;
233 233
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) 243 (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
244 goto fallback; 244 goto fallback;
245 245
246 if ((cl = (void*)res.class) == NULL) { 246 cl = (void *)res.class;
247 if (!cl) {
247 if (TC_H_MAJ(res.classid)) 248 if (TC_H_MAJ(res.classid))
248 cl = cbq_class_lookup(q, res.classid); 249 cl = cbq_class_lookup(q, res.classid);
249 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) 250 else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
250 cl = defmap[TC_PRIO_BESTEFFORT]; 251 cl = defmap[TC_PRIO_BESTEFFORT];
251 252
252 if (cl == NULL || cl->level >= head->level) 253 if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
282 * Step 4. No success... 283 * Step 4. No success...
283 */ 284 */
284 if (TC_H_MAJ(prio) == 0 && 285 if (TC_H_MAJ(prio) == 0 &&
285 !(cl = head->defaults[prio&TC_PRIO_MAX]) && 286 !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
286 !(cl = head->defaults[TC_PRIO_BESTEFFORT])) 287 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
287 return head; 288 return head;
288 289
@@ -290,12 +291,12 @@ fallback:
290} 291}
291 292
292/* 293/*
293 A packet has just been enqueued on the empty class. 294 * A packet has just been enqueued on the empty class.
294 cbq_activate_class adds it to the tail of active class list 295 * cbq_activate_class adds it to the tail of active class list
295 of its priority band. 296 * of its priority band.
296 */ 297 */
297 298
298static __inline__ void cbq_activate_class(struct cbq_class *cl) 299static inline void cbq_activate_class(struct cbq_class *cl)
299{ 300{
300 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 301 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
301 int prio = cl->cpriority; 302 int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
314} 315}
315 316
316/* 317/*
317 Unlink class from active chain. 318 * Unlink class from active chain.
318 Note that this same procedure is done directly in cbq_dequeue* 319 * Note that this same procedure is done directly in cbq_dequeue*
319 during round-robin procedure. 320 * during round-robin procedure.
320 */ 321 */
321 322
322static void cbq_deactivate_class(struct cbq_class *this) 323static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
350{ 351{
351 int toplevel = q->toplevel; 352 int toplevel = q->toplevel;
352 353
353 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { 354 if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
354 psched_time_t now; 355 psched_time_t now;
355 psched_tdiff_t incr; 356 psched_tdiff_t incr;
356 357
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
363 q->toplevel = cl->level; 364 q->toplevel = cl->level;
364 return; 365 return;
365 } 366 }
366 } while ((cl=cl->borrow) != NULL && toplevel > cl->level); 367 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
367 } 368 }
368} 369}
369 370
@@ -390,8 +391,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
390 ret = qdisc_enqueue(skb, cl->q); 391 ret = qdisc_enqueue(skb, cl->q);
391 if (ret == NET_XMIT_SUCCESS) { 392 if (ret == NET_XMIT_SUCCESS) {
392 sch->q.qlen++; 393 sch->q.qlen++;
393 sch->bstats.packets++;
394 sch->bstats.bytes += qdisc_pkt_len(skb);
395 cbq_mark_toplevel(q, cl); 394 cbq_mark_toplevel(q, cl);
396 if (!cl->next_alive) 395 if (!cl->next_alive)
397 cbq_activate_class(cl); 396 cbq_activate_class(cl);
@@ -419,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
419 delay += cl->offtime; 418 delay += cl->offtime;
420 419
421 /* 420 /*
422 Class goes to sleep, so that it will have no 421 * Class goes to sleep, so that it will have no
423 chance to work avgidle. Let's forgive it 8) 422 * chance to work avgidle. Let's forgive it 8)
424 423 *
425 BTW cbq-2.0 has a crap in this 424 * BTW cbq-2.0 has a crap in this
426 place, apparently they forgot to shift it by cl->ewma_log. 425 * place, apparently they forgot to shift it by cl->ewma_log.
427 */ 426 */
428 if (cl->avgidle < 0) 427 if (cl->avgidle < 0)
429 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); 428 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -440,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
440 q->wd_expires = delay; 439 q->wd_expires = delay;
441 440
442 /* Dirty work! We must schedule wakeups based on 441 /* Dirty work! We must schedule wakeups based on
443 real available rate, rather than leaf rate, 442 * real available rate, rather than leaf rate,
444 which may be tiny (even zero). 443 * which may be tiny (even zero).
445 */ 444 */
446 if (q->toplevel == TC_CBQ_MAXLEVEL) { 445 if (q->toplevel == TC_CBQ_MAXLEVEL) {
447 struct cbq_class *b; 446 struct cbq_class *b;
@@ -461,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
461} 460}
462 461
463/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when 462/* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
464 they go overlimit 463 * they go overlimit
465 */ 464 */
466 465
467static void cbq_ovl_rclassic(struct cbq_class *cl) 466static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -596,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
596 struct Qdisc *sch = q->watchdog.qdisc; 595 struct Qdisc *sch = q->watchdog.qdisc;
597 psched_time_t now; 596 psched_time_t now;
598 psched_tdiff_t delay = 0; 597 psched_tdiff_t delay = 0;
599 unsigned pmask; 598 unsigned int pmask;
600 599
601 now = psched_get_time(); 600 now = psched_get_time();
602 601
@@ -625,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
625 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); 624 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
626 } 625 }
627 626
628 sch->flags &= ~TCQ_F_THROTTLED; 627 qdisc_unthrottled(sch);
629 __netif_schedule(qdisc_root(sch)); 628 __netif_schedule(qdisc_root(sch));
630 return HRTIMER_NORESTART; 629 return HRTIMER_NORESTART;
631} 630}
@@ -650,8 +649,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
650 ret = qdisc_enqueue(skb, cl->q); 649 ret = qdisc_enqueue(skb, cl->q);
651 if (ret == NET_XMIT_SUCCESS) { 650 if (ret == NET_XMIT_SUCCESS) {
652 sch->q.qlen++; 651 sch->q.qlen++;
653 sch->bstats.packets++;
654 sch->bstats.bytes += qdisc_pkt_len(skb);
655 if (!cl->next_alive) 652 if (!cl->next_alive)
656 cbq_activate_class(cl); 653 cbq_activate_class(cl);
657 return 0; 654 return 0;
@@ -667,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
667#endif 664#endif
668 665
669/* 666/*
670 It is mission critical procedure. 667 * It is mission critical procedure.
671 668 *
672 We "regenerate" toplevel cutoff, if transmitting class 669 * We "regenerate" toplevel cutoff, if transmitting class
673 has backlog and it is not regulated. It is not part of 670 * has backlog and it is not regulated. It is not part of
674 original CBQ description, but looks more reasonable. 671 * original CBQ description, but looks more reasonable.
675 Probably, it is wrong. This question needs further investigation. 672 * Probably, it is wrong. This question needs further investigation.
676*/ 673 */
677 674
678static __inline__ void 675static inline void
679cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, 676cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
680 struct cbq_class *borrowed) 677 struct cbq_class *borrowed)
681{ 678{
@@ -686,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
686 q->toplevel = borrowed->level; 683 q->toplevel = borrowed->level;
687 return; 684 return;
688 } 685 }
689 } while ((borrowed=borrowed->borrow) != NULL); 686 } while ((borrowed = borrowed->borrow) != NULL);
690 } 687 }
691#if 0 688#if 0
692 /* It is not necessary now. Uncommenting it 689 /* It is not necessary now. Uncommenting it
@@ -714,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
714 cl->bstats.bytes += len; 711 cl->bstats.bytes += len;
715 712
716 /* 713 /*
717 (now - last) is total time between packet right edges. 714 * (now - last) is total time between packet right edges.
718 (last_pktlen/rate) is "virtual" busy time, so that 715 * (last_pktlen/rate) is "virtual" busy time, so that
719 716 *
720 idle = (now - last) - last_pktlen/rate 717 * idle = (now - last) - last_pktlen/rate
721 */ 718 */
722 719
723 idle = q->now - cl->last; 720 idle = q->now - cl->last;
@@ -727,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
727 idle -= L2T(cl, len); 724 idle -= L2T(cl, len);
728 725
729 /* true_avgidle := (1-W)*true_avgidle + W*idle, 726 /* true_avgidle := (1-W)*true_avgidle + W*idle,
730 where W=2^{-ewma_log}. But cl->avgidle is scaled: 727 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
731 cl->avgidle == true_avgidle/W, 728 * cl->avgidle == true_avgidle/W,
732 hence: 729 * hence:
733 */ 730 */
734 avgidle += idle - (avgidle>>cl->ewma_log); 731 avgidle += idle - (avgidle>>cl->ewma_log);
735 } 732 }
@@ -743,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
743 cl->avgidle = avgidle; 740 cl->avgidle = avgidle;
744 741
745 /* Calculate expected time, when this class 742 /* Calculate expected time, when this class
746 will be allowed to send. 743 * will be allowed to send.
747 It will occur, when: 744 * It will occur, when:
748 (1-W)*true_avgidle + W*delay = 0, i.e. 745 * (1-W)*true_avgidle + W*delay = 0, i.e.
749 idle = (1/W - 1)*(-true_avgidle) 746 * idle = (1/W - 1)*(-true_avgidle)
750 or 747 * or
751 idle = (1 - W)*(-cl->avgidle); 748 * idle = (1 - W)*(-cl->avgidle);
752 */ 749 */
753 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); 750 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
754 751
755 /* 752 /*
756 That is not all. 753 * That is not all.
757 To maintain the rate allocated to the class, 754 * To maintain the rate allocated to the class,
758 we add to undertime virtual clock, 755 * we add to undertime virtual clock,
759 necessary to complete transmitted packet. 756 * necessary to complete transmitted packet.
760 (len/phys_bandwidth has been already passed 757 * (len/phys_bandwidth has been already passed
761 to the moment of cbq_update) 758 * to the moment of cbq_update)
762 */ 759 */
763 760
764 idle -= L2T(&q->link, len); 761 idle -= L2T(&q->link, len);
@@ -780,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
780 cbq_update_toplevel(q, this, q->tx_borrowed); 777 cbq_update_toplevel(q, this, q->tx_borrowed);
781} 778}
782 779
783static __inline__ struct cbq_class * 780static inline struct cbq_class *
784cbq_under_limit(struct cbq_class *cl) 781cbq_under_limit(struct cbq_class *cl)
785{ 782{
786 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 783 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -796,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
796 793
797 do { 794 do {
798 /* It is very suspicious place. Now overlimit 795 /* It is very suspicious place. Now overlimit
799 action is generated for not bounded classes 796 * action is generated for not bounded classes
800 only if link is completely congested. 797 * only if link is completely congested.
801 Though it is in agree with ancestor-only paradigm, 798 * Though it is in agree with ancestor-only paradigm,
802 it looks very stupid. Particularly, 799 * it looks very stupid. Particularly,
803 it means that this chunk of code will either 800 * it means that this chunk of code will either
804 never be called or result in strong amplification 801 * never be called or result in strong amplification
805 of burstiness. Dangerous, silly, and, however, 802 * of burstiness. Dangerous, silly, and, however,
806 no another solution exists. 803 * no another solution exists.
807 */ 804 */
808 if ((cl = cl->borrow) == NULL) { 805 cl = cl->borrow;
806 if (!cl) {
809 this_cl->qstats.overlimits++; 807 this_cl->qstats.overlimits++;
810 this_cl->overlimit(this_cl); 808 this_cl->overlimit(this_cl);
811 return NULL; 809 return NULL;
@@ -818,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
818 return cl; 816 return cl;
819} 817}
820 818
821static __inline__ struct sk_buff * 819static inline struct sk_buff *
822cbq_dequeue_prio(struct Qdisc *sch, int prio) 820cbq_dequeue_prio(struct Qdisc *sch, int prio)
823{ 821{
824 struct cbq_sched_data *q = qdisc_priv(sch); 822 struct cbq_sched_data *q = qdisc_priv(sch);
@@ -842,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
842 840
843 if (cl->deficit <= 0) { 841 if (cl->deficit <= 0) {
844 /* Class exhausted its allotment per 842 /* Class exhausted its allotment per
845 this round. Switch to the next one. 843 * this round. Switch to the next one.
846 */ 844 */
847 deficit = 1; 845 deficit = 1;
848 cl->deficit += cl->quantum; 846 cl->deficit += cl->quantum;
@@ -852,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
852 skb = cl->q->dequeue(cl->q); 850 skb = cl->q->dequeue(cl->q);
853 851
854 /* Class did not give us any skb :-( 852 /* Class did not give us any skb :-(
855 It could occur even if cl->q->q.qlen != 0 853 * It could occur even if cl->q->q.qlen != 0
856 f.e. if cl->q == "tbf" 854 * f.e. if cl->q == "tbf"
857 */ 855 */
858 if (skb == NULL) 856 if (skb == NULL)
859 goto skip_class; 857 goto skip_class;
@@ -882,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
882skip_class: 880skip_class:
883 if (cl->q->q.qlen == 0 || prio != cl->cpriority) { 881 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
884 /* Class is empty or penalized. 882 /* Class is empty or penalized.
885 Unlink it from active chain. 883 * Unlink it from active chain.
886 */ 884 */
887 cl_prev->next_alive = cl->next_alive; 885 cl_prev->next_alive = cl->next_alive;
888 cl->next_alive = NULL; 886 cl->next_alive = NULL;
@@ -921,14 +919,14 @@ next_class:
921 return NULL; 919 return NULL;
922} 920}
923 921
924static __inline__ struct sk_buff * 922static inline struct sk_buff *
925cbq_dequeue_1(struct Qdisc *sch) 923cbq_dequeue_1(struct Qdisc *sch)
926{ 924{
927 struct cbq_sched_data *q = qdisc_priv(sch); 925 struct cbq_sched_data *q = qdisc_priv(sch);
928 struct sk_buff *skb; 926 struct sk_buff *skb;
929 unsigned activemask; 927 unsigned int activemask;
930 928
931 activemask = q->activemask&0xFF; 929 activemask = q->activemask & 0xFF;
932 while (activemask) { 930 while (activemask) {
933 int prio = ffz(~activemask); 931 int prio = ffz(~activemask);
934 activemask &= ~(1<<prio); 932 activemask &= ~(1<<prio);
@@ -953,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
953 if (q->tx_class) { 951 if (q->tx_class) {
954 psched_tdiff_t incr2; 952 psched_tdiff_t incr2;
955 /* Time integrator. We calculate EOS time 953 /* Time integrator. We calculate EOS time
956 by adding expected packet transmission time. 954 * by adding expected packet transmission time.
957 If real time is greater, we warp artificial clock, 955 * If real time is greater, we warp artificial clock,
958 so that: 956 * so that:
959 957 *
960 cbq_time = max(real_time, work); 958 * cbq_time = max(real_time, work);
961 */ 959 */
962 incr2 = L2T(&q->link, q->tx_len); 960 incr2 = L2T(&q->link, q->tx_len);
963 q->now += incr2; 961 q->now += incr2;
@@ -973,28 +971,29 @@ cbq_dequeue(struct Qdisc *sch)
973 971
974 skb = cbq_dequeue_1(sch); 972 skb = cbq_dequeue_1(sch);
975 if (skb) { 973 if (skb) {
974 qdisc_bstats_update(sch, skb);
976 sch->q.qlen--; 975 sch->q.qlen--;
977 sch->flags &= ~TCQ_F_THROTTLED; 976 qdisc_unthrottled(sch);
978 return skb; 977 return skb;
979 } 978 }
980 979
981 /* All the classes are overlimit. 980 /* All the classes are overlimit.
982 981 *
983 It is possible, if: 982 * It is possible, if:
984 983 *
985 1. Scheduler is empty. 984 * 1. Scheduler is empty.
986 2. Toplevel cutoff inhibited borrowing. 985 * 2. Toplevel cutoff inhibited borrowing.
987 3. Root class is overlimit. 986 * 3. Root class is overlimit.
988 987 *
989 Reset 2d and 3d conditions and retry. 988 * Reset 2d and 3d conditions and retry.
990 989 *
991 Note, that NS and cbq-2.0 are buggy, peeking 990 * Note, that NS and cbq-2.0 are buggy, peeking
992 an arbitrary class is appropriate for ancestor-only 991 * an arbitrary class is appropriate for ancestor-only
993 sharing, but not for toplevel algorithm. 992 * sharing, but not for toplevel algorithm.
994 993 *
995 Our version is better, but slower, because it requires 994 * Our version is better, but slower, because it requires
996 two passes, but it is unavoidable with top-level sharing. 995 * two passes, but it is unavoidable with top-level sharing.
997 */ 996 */
998 997
999 if (q->toplevel == TC_CBQ_MAXLEVEL && 998 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1000 q->link.undertime == PSCHED_PASTPERFECT) 999 q->link.undertime == PSCHED_PASTPERFECT)
@@ -1005,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
1005 } 1004 }
1006 1005
1007 /* No packets in scheduler or nobody wants to give them to us :-( 1006 /* No packets in scheduler or nobody wants to give them to us :-(
1008 Sigh... start watchdog timer in the last case. */ 1007 * Sigh... start watchdog timer in the last case.
1008 */
1009 1009
1010 if (sch->q.qlen) { 1010 if (sch->q.qlen) {
1011 sch->qstats.overlimits++; 1011 sch->qstats.overlimits++;
@@ -1027,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
1027 int level = 0; 1027 int level = 0;
1028 struct cbq_class *cl; 1028 struct cbq_class *cl;
1029 1029
1030 if ((cl = this->children) != NULL) { 1030 cl = this->children;
1031 if (cl) {
1031 do { 1032 do {
1032 if (cl->level > level) 1033 if (cl->level > level)
1033 level = cl->level; 1034 level = cl->level;
1034 } while ((cl = cl->sibling) != this->children); 1035 } while ((cl = cl->sibling) != this->children);
1035 } 1036 }
1036 this->level = level+1; 1037 this->level = level + 1;
1037 } while ((this = this->tparent) != NULL); 1038 } while ((this = this->tparent) != NULL);
1038} 1039}
1039 1040
@@ -1049,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1049 for (h = 0; h < q->clhash.hashsize; h++) { 1050 for (h = 0; h < q->clhash.hashsize; h++) {
1050 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { 1051 hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
1051 /* BUGGGG... Beware! This expression suffer of 1052 /* BUGGGG... Beware! This expression suffer of
1052 arithmetic overflows! 1053 * arithmetic overflows!
1053 */ 1054 */
1054 if (cl->priority == prio) { 1055 if (cl->priority == prio) {
1055 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ 1056 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1056 q->quanta[prio]; 1057 q->quanta[prio];
1057 } 1058 }
1058 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { 1059 if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
1059 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); 1060 pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1061 cl->common.classid, cl->quantum);
1060 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; 1062 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1061 } 1063 }
1062 } 1064 }
@@ -1067,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
1067{ 1069{
1068 struct cbq_sched_data *q = qdisc_priv(cl->qdisc); 1070 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1069 struct cbq_class *split = cl->split; 1071 struct cbq_class *split = cl->split;
1070 unsigned h; 1072 unsigned int h;
1071 int i; 1073 int i;
1072 1074
1073 if (split == NULL) 1075 if (split == NULL)
1074 return; 1076 return;
1075 1077
1076 for (i=0; i<=TC_PRIO_MAX; i++) { 1078 for (i = 0; i <= TC_PRIO_MAX; i++) {
1077 if (split->defaults[i] == cl && !(cl->defmap&(1<<i))) 1079 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1078 split->defaults[i] = NULL; 1080 split->defaults[i] = NULL;
1079 } 1081 }
1080 1082
1081 for (i=0; i<=TC_PRIO_MAX; i++) { 1083 for (i = 0; i <= TC_PRIO_MAX; i++) {
1082 int level = split->level; 1084 int level = split->level;
1083 1085
1084 if (split->defaults[i]) 1086 if (split->defaults[i])
@@ -1091,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
1091 hlist_for_each_entry(c, n, &q->clhash.hash[h], 1093 hlist_for_each_entry(c, n, &q->clhash.hash[h],
1092 common.hnode) { 1094 common.hnode) {
1093 if (c->split == split && c->level < level && 1095 if (c->split == split && c->level < level &&
1094 c->defmap&(1<<i)) { 1096 c->defmap & (1<<i)) {
1095 split->defaults[i] = c; 1097 split->defaults[i] = c;
1096 level = c->level; 1098 level = c->level;
1097 } 1099 }
@@ -1105,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
1105 struct cbq_class *split = NULL; 1107 struct cbq_class *split = NULL;
1106 1108
1107 if (splitid == 0) { 1109 if (splitid == 0) {
1108 if ((split = cl->split) == NULL) 1110 split = cl->split;
1111 if (!split)
1109 return; 1112 return;
1110 splitid = split->common.classid; 1113 splitid = split->common.classid;
1111 } 1114 }
@@ -1123,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
1123 cl->defmap = 0; 1126 cl->defmap = 0;
1124 cbq_sync_defmap(cl); 1127 cbq_sync_defmap(cl);
1125 cl->split = split; 1128 cl->split = split;
1126 cl->defmap = def&mask; 1129 cl->defmap = def & mask;
1127 } else 1130 } else
1128 cl->defmap = (cl->defmap&~mask)|(def&mask); 1131 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1129 1132
1130 cbq_sync_defmap(cl); 1133 cbq_sync_defmap(cl);
1131} 1134}
@@ -1138,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
1138 qdisc_class_hash_remove(&q->clhash, &this->common); 1141 qdisc_class_hash_remove(&q->clhash, &this->common);
1139 1142
1140 if (this->tparent) { 1143 if (this->tparent) {
1141 clp=&this->sibling; 1144 clp = &this->sibling;
1142 cl = *clp; 1145 cl = *clp;
1143 do { 1146 do {
1144 if (cl == this) { 1147 if (cl == this) {
@@ -1177,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
1177 } 1180 }
1178} 1181}
1179 1182
1180static unsigned int cbq_drop(struct Qdisc* sch) 1183static unsigned int cbq_drop(struct Qdisc *sch)
1181{ 1184{
1182 struct cbq_sched_data *q = qdisc_priv(sch); 1185 struct cbq_sched_data *q = qdisc_priv(sch);
1183 struct cbq_class *cl, *cl_head; 1186 struct cbq_class *cl, *cl_head;
@@ -1185,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
1185 unsigned int len; 1188 unsigned int len;
1186 1189
1187 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { 1190 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1188 if ((cl_head = q->active[prio]) == NULL) 1191 cl_head = q->active[prio];
1192 if (!cl_head)
1189 continue; 1193 continue;
1190 1194
1191 cl = cl_head; 1195 cl = cl_head;
@@ -1202,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
1202} 1206}
1203 1207
1204static void 1208static void
1205cbq_reset(struct Qdisc* sch) 1209cbq_reset(struct Qdisc *sch)
1206{ 1210{
1207 struct cbq_sched_data *q = qdisc_priv(sch); 1211 struct cbq_sched_data *q = qdisc_priv(sch);
1208 struct cbq_class *cl; 1212 struct cbq_class *cl;
1209 struct hlist_node *n; 1213 struct hlist_node *n;
1210 int prio; 1214 int prio;
1211 unsigned h; 1215 unsigned int h;
1212 1216
1213 q->activemask = 0; 1217 q->activemask = 0;
1214 q->pmask = 0; 1218 q->pmask = 0;
@@ -1240,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
1240 1244
1241static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) 1245static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1242{ 1246{
1243 if (lss->change&TCF_CBQ_LSS_FLAGS) { 1247 if (lss->change & TCF_CBQ_LSS_FLAGS) {
1244 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; 1248 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1245 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; 1249 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1246 } 1250 }
1247 if (lss->change&TCF_CBQ_LSS_EWMA) 1251 if (lss->change & TCF_CBQ_LSS_EWMA)
1248 cl->ewma_log = lss->ewma_log; 1252 cl->ewma_log = lss->ewma_log;
1249 if (lss->change&TCF_CBQ_LSS_AVPKT) 1253 if (lss->change & TCF_CBQ_LSS_AVPKT)
1250 cl->avpkt = lss->avpkt; 1254 cl->avpkt = lss->avpkt;
1251 if (lss->change&TCF_CBQ_LSS_MINIDLE) 1255 if (lss->change & TCF_CBQ_LSS_MINIDLE)
1252 cl->minidle = -(long)lss->minidle; 1256 cl->minidle = -(long)lss->minidle;
1253 if (lss->change&TCF_CBQ_LSS_MAXIDLE) { 1257 if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1254 cl->maxidle = lss->maxidle; 1258 cl->maxidle = lss->maxidle;
1255 cl->avgidle = lss->maxidle; 1259 cl->avgidle = lss->maxidle;
1256 } 1260 }
1257 if (lss->change&TCF_CBQ_LSS_OFFTIME) 1261 if (lss->change & TCF_CBQ_LSS_OFFTIME)
1258 cl->offtime = lss->offtime; 1262 cl->offtime = lss->offtime;
1259 return 0; 1263 return 0;
1260} 1264}
@@ -1282,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1282 if (wrr->weight) 1286 if (wrr->weight)
1283 cl->weight = wrr->weight; 1287 cl->weight = wrr->weight;
1284 if (wrr->priority) { 1288 if (wrr->priority) {
1285 cl->priority = wrr->priority-1; 1289 cl->priority = wrr->priority - 1;
1286 cl->cpriority = cl->priority; 1290 cl->cpriority = cl->priority;
1287 if (cl->priority >= cl->priority2) 1291 if (cl->priority >= cl->priority2)
1288 cl->priority2 = TC_CBQ_MAXPRIO-1; 1292 cl->priority2 = TC_CBQ_MAXPRIO - 1;
1289 } 1293 }
1290 1294
1291 cbq_addprio(q, cl); 1295 cbq_addprio(q, cl);
@@ -1302,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1302 cl->overlimit = cbq_ovl_delay; 1306 cl->overlimit = cbq_ovl_delay;
1303 break; 1307 break;
1304 case TC_CBQ_OVL_LOWPRIO: 1308 case TC_CBQ_OVL_LOWPRIO:
1305 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || 1309 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1306 ovl->priority2-1 <= cl->priority) 1310 ovl->priority2 - 1 <= cl->priority)
1307 return -EINVAL; 1311 return -EINVAL;
1308 cl->priority2 = ovl->priority2-1; 1312 cl->priority2 = ovl->priority2 - 1;
1309 cl->overlimit = cbq_ovl_lowprio; 1313 cl->overlimit = cbq_ovl_lowprio;
1310 break; 1314 break;
1311 case TC_CBQ_OVL_DROP: 1315 case TC_CBQ_OVL_DROP:
@@ -1379,14 +1383,14 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1379 q->link.sibling = &q->link; 1383 q->link.sibling = &q->link;
1380 q->link.common.classid = sch->handle; 1384 q->link.common.classid = sch->handle;
1381 q->link.qdisc = sch; 1385 q->link.qdisc = sch;
1382 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1386 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1383 &pfifo_qdisc_ops, 1387 sch->handle);
1384 sch->handle))) 1388 if (!q->link.q)
1385 q->link.q = &noop_qdisc; 1389 q->link.q = &noop_qdisc;
1386 1390
1387 q->link.priority = TC_CBQ_MAXPRIO-1; 1391 q->link.priority = TC_CBQ_MAXPRIO - 1;
1388 q->link.priority2 = TC_CBQ_MAXPRIO-1; 1392 q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1389 q->link.cpriority = TC_CBQ_MAXPRIO-1; 1393 q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1390 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; 1394 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1391 q->link.overlimit = cbq_ovl_classic; 1395 q->link.overlimit = cbq_ovl_classic;
1392 q->link.allot = psched_mtu(qdisc_dev(sch)); 1396 q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1417,7 +1421,7 @@ put_rtab:
1417 return err; 1421 return err;
1418} 1422}
1419 1423
1420static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) 1424static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1421{ 1425{
1422 unsigned char *b = skb_tail_pointer(skb); 1426 unsigned char *b = skb_tail_pointer(skb);
1423 1427
@@ -1429,7 +1433,7 @@ nla_put_failure:
1429 return -1; 1433 return -1;
1430} 1434}
1431 1435
1432static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) 1436static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1433{ 1437{
1434 unsigned char *b = skb_tail_pointer(skb); 1438 unsigned char *b = skb_tail_pointer(skb);
1435 struct tc_cbq_lssopt opt; 1439 struct tc_cbq_lssopt opt;
@@ -1454,15 +1458,15 @@ nla_put_failure:
1454 return -1; 1458 return -1;
1455} 1459}
1456 1460
1457static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) 1461static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1458{ 1462{
1459 unsigned char *b = skb_tail_pointer(skb); 1463 unsigned char *b = skb_tail_pointer(skb);
1460 struct tc_cbq_wrropt opt; 1464 struct tc_cbq_wrropt opt;
1461 1465
1462 opt.flags = 0; 1466 opt.flags = 0;
1463 opt.allot = cl->allot; 1467 opt.allot = cl->allot;
1464 opt.priority = cl->priority+1; 1468 opt.priority = cl->priority + 1;
1465 opt.cpriority = cl->cpriority+1; 1469 opt.cpriority = cl->cpriority + 1;
1466 opt.weight = cl->weight; 1470 opt.weight = cl->weight;
1467 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); 1471 NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1468 return skb->len; 1472 return skb->len;
@@ -1472,13 +1476,13 @@ nla_put_failure:
1472 return -1; 1476 return -1;
1473} 1477}
1474 1478
1475static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) 1479static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1476{ 1480{
1477 unsigned char *b = skb_tail_pointer(skb); 1481 unsigned char *b = skb_tail_pointer(skb);
1478 struct tc_cbq_ovl opt; 1482 struct tc_cbq_ovl opt;
1479 1483
1480 opt.strategy = cl->ovl_strategy; 1484 opt.strategy = cl->ovl_strategy;
1481 opt.priority2 = cl->priority2+1; 1485 opt.priority2 = cl->priority2 + 1;
1482 opt.pad = 0; 1486 opt.pad = 0;
1483 opt.penalty = cl->penalty; 1487 opt.penalty = cl->penalty;
1484 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); 1488 NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1489,7 +1493,7 @@ nla_put_failure:
1489 return -1; 1493 return -1;
1490} 1494}
1491 1495
1492static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) 1496static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1493{ 1497{
1494 unsigned char *b = skb_tail_pointer(skb); 1498 unsigned char *b = skb_tail_pointer(skb);
1495 struct tc_cbq_fopt opt; 1499 struct tc_cbq_fopt opt;
@@ -1508,7 +1512,7 @@ nla_put_failure:
1508} 1512}
1509 1513
1510#ifdef CONFIG_NET_CLS_ACT 1514#ifdef CONFIG_NET_CLS_ACT
1511static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) 1515static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1512{ 1516{
1513 unsigned char *b = skb_tail_pointer(skb); 1517 unsigned char *b = skb_tail_pointer(skb);
1514 struct tc_cbq_police opt; 1518 struct tc_cbq_police opt;
@@ -1572,7 +1576,7 @@ static int
1572cbq_dump_class(struct Qdisc *sch, unsigned long arg, 1576cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1573 struct sk_buff *skb, struct tcmsg *tcm) 1577 struct sk_buff *skb, struct tcmsg *tcm)
1574{ 1578{
1575 struct cbq_class *cl = (struct cbq_class*)arg; 1579 struct cbq_class *cl = (struct cbq_class *)arg;
1576 struct nlattr *nest; 1580 struct nlattr *nest;
1577 1581
1578 if (cl->tparent) 1582 if (cl->tparent)
@@ -1600,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1600 struct gnet_dump *d) 1604 struct gnet_dump *d)
1601{ 1605{
1602 struct cbq_sched_data *q = qdisc_priv(sch); 1606 struct cbq_sched_data *q = qdisc_priv(sch);
1603 struct cbq_class *cl = (struct cbq_class*)arg; 1607 struct cbq_class *cl = (struct cbq_class *)arg;
1604 1608
1605 cl->qstats.qlen = cl->q->q.qlen; 1609 cl->qstats.qlen = cl->q->q.qlen;
1606 cl->xstats.avgidle = cl->avgidle; 1610 cl->xstats.avgidle = cl->avgidle;
@@ -1620,10 +1624,10 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1620static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, 1624static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1621 struct Qdisc **old) 1625 struct Qdisc **old)
1622{ 1626{
1623 struct cbq_class *cl = (struct cbq_class*)arg; 1627 struct cbq_class *cl = (struct cbq_class *)arg;
1624 1628
1625 if (new == NULL) { 1629 if (new == NULL) {
1626 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1630 new = qdisc_create_dflt(sch->dev_queue,
1627 &pfifo_qdisc_ops, cl->common.classid); 1631 &pfifo_qdisc_ops, cl->common.classid);
1628 if (new == NULL) 1632 if (new == NULL)
1629 return -ENOBUFS; 1633 return -ENOBUFS;
@@ -1643,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1643 return 0; 1647 return 0;
1644} 1648}
1645 1649
1646static struct Qdisc * 1650static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1647cbq_leaf(struct Qdisc *sch, unsigned long arg)
1648{ 1651{
1649 struct cbq_class *cl = (struct cbq_class*)arg; 1652 struct cbq_class *cl = (struct cbq_class *)arg;
1650 1653
1651 return cl->q; 1654 return cl->q;
1652} 1655}
@@ -1685,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1685 kfree(cl); 1688 kfree(cl);
1686} 1689}
1687 1690
1688static void 1691static void cbq_destroy(struct Qdisc *sch)
1689cbq_destroy(struct Qdisc* sch)
1690{ 1692{
1691 struct cbq_sched_data *q = qdisc_priv(sch); 1693 struct cbq_sched_data *q = qdisc_priv(sch);
1692 struct hlist_node *n, *next; 1694 struct hlist_node *n, *next;
1693 struct cbq_class *cl; 1695 struct cbq_class *cl;
1694 unsigned h; 1696 unsigned int h;
1695 1697
1696#ifdef CONFIG_NET_CLS_ACT 1698#ifdef CONFIG_NET_CLS_ACT
1697 q->rx_class = NULL; 1699 q->rx_class = NULL;
@@ -1715,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
1715 1717
1716static void cbq_put(struct Qdisc *sch, unsigned long arg) 1718static void cbq_put(struct Qdisc *sch, unsigned long arg)
1717{ 1719{
1718 struct cbq_class *cl = (struct cbq_class*)arg; 1720 struct cbq_class *cl = (struct cbq_class *)arg;
1719 1721
1720 if (--cl->refcnt == 0) { 1722 if (--cl->refcnt == 0) {
1721#ifdef CONFIG_NET_CLS_ACT 1723#ifdef CONFIG_NET_CLS_ACT
@@ -1738,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1738{ 1740{
1739 int err; 1741 int err;
1740 struct cbq_sched_data *q = qdisc_priv(sch); 1742 struct cbq_sched_data *q = qdisc_priv(sch);
1741 struct cbq_class *cl = (struct cbq_class*)*arg; 1743 struct cbq_class *cl = (struct cbq_class *)*arg;
1742 struct nlattr *opt = tca[TCA_OPTIONS]; 1744 struct nlattr *opt = tca[TCA_OPTIONS];
1743 struct nlattr *tb[TCA_CBQ_MAX + 1]; 1745 struct nlattr *tb[TCA_CBQ_MAX + 1];
1744 struct cbq_class *parent; 1746 struct cbq_class *parent;
@@ -1830,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1830 1832
1831 if (classid) { 1833 if (classid) {
1832 err = -EINVAL; 1834 err = -EINVAL;
1833 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) 1835 if (TC_H_MAJ(classid ^ sch->handle) ||
1836 cbq_class_lookup(q, classid))
1834 goto failure; 1837 goto failure;
1835 } else { 1838 } else {
1836 int i; 1839 int i;
1837 classid = TC_H_MAKE(sch->handle,0x8000); 1840 classid = TC_H_MAKE(sch->handle, 0x8000);
1838 1841
1839 for (i=0; i<0x8000; i++) { 1842 for (i = 0; i < 0x8000; i++) {
1840 if (++q->hgenerator >= 0x8000) 1843 if (++q->hgenerator >= 0x8000)
1841 q->hgenerator = 1; 1844 q->hgenerator = 1;
1842 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) 1845 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1874,8 +1877,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1874 cl->R_tab = rtab; 1877 cl->R_tab = rtab;
1875 rtab = NULL; 1878 rtab = NULL;
1876 cl->refcnt = 1; 1879 cl->refcnt = 1;
1877 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1880 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1878 &pfifo_qdisc_ops, classid))) 1881 if (!cl->q)
1879 cl->q = &noop_qdisc; 1882 cl->q = &noop_qdisc;
1880 cl->common.classid = classid; 1883 cl->common.classid = classid;
1881 cl->tparent = parent; 1884 cl->tparent = parent;
@@ -1893,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1893 cl->minidle = -0x7FFFFFFF; 1896 cl->minidle = -0x7FFFFFFF;
1894 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); 1897 cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1895 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); 1898 cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1896 if (cl->ewma_log==0) 1899 if (cl->ewma_log == 0)
1897 cl->ewma_log = q->link.ewma_log; 1900 cl->ewma_log = q->link.ewma_log;
1898 if (cl->maxidle==0) 1901 if (cl->maxidle == 0)
1899 cl->maxidle = q->link.maxidle; 1902 cl->maxidle = q->link.maxidle;
1900 if (cl->avpkt==0) 1903 if (cl->avpkt == 0)
1901 cl->avpkt = q->link.avpkt; 1904 cl->avpkt = q->link.avpkt;
1902 cl->overlimit = cbq_ovl_classic; 1905 cl->overlimit = cbq_ovl_classic;
1903 if (tb[TCA_CBQ_OVL_STRATEGY]) 1906 if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1923,7 +1926,7 @@ failure:
1923static int cbq_delete(struct Qdisc *sch, unsigned long arg) 1926static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1924{ 1927{
1925 struct cbq_sched_data *q = qdisc_priv(sch); 1928 struct cbq_sched_data *q = qdisc_priv(sch);
1926 struct cbq_class *cl = (struct cbq_class*)arg; 1929 struct cbq_class *cl = (struct cbq_class *)arg;
1927 unsigned int qlen; 1930 unsigned int qlen;
1928 1931
1929 if (cl->filters || cl->children || cl == &q->link) 1932 if (cl->filters || cl->children || cl == &q->link)
@@ -1981,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1981 u32 classid) 1984 u32 classid)
1982{ 1985{
1983 struct cbq_sched_data *q = qdisc_priv(sch); 1986 struct cbq_sched_data *q = qdisc_priv(sch);
1984 struct cbq_class *p = (struct cbq_class*)parent; 1987 struct cbq_class *p = (struct cbq_class *)parent;
1985 struct cbq_class *cl = cbq_class_lookup(q, classid); 1988 struct cbq_class *cl = cbq_class_lookup(q, classid);
1986 1989
1987 if (cl) { 1990 if (cl) {
@@ -1995,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1995 1998
1996static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) 1999static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
1997{ 2000{
1998 struct cbq_class *cl = (struct cbq_class*)arg; 2001 struct cbq_class *cl = (struct cbq_class *)arg;
1999 2002
2000 cl->filters--; 2003 cl->filters--;
2001} 2004}
@@ -2005,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2005 struct cbq_sched_data *q = qdisc_priv(sch); 2008 struct cbq_sched_data *q = qdisc_priv(sch);
2006 struct cbq_class *cl; 2009 struct cbq_class *cl;
2007 struct hlist_node *n; 2010 struct hlist_node *n;
2008 unsigned h; 2011 unsigned int h;
2009 2012
2010 if (arg->stop) 2013 if (arg->stop)
2011 return; 2014 return;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
new file mode 100644
index 000000000000..06afbaeb4c88
--- /dev/null
+++ b/net/sched/sch_choke.c
@@ -0,0 +1,688 @@
1/*
2 * net/sched/sch_choke.c CHOKE scheduler
3 *
4 * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/kernel.h>
16#include <linux/skbuff.h>
17#include <linux/reciprocal_div.h>
18#include <linux/vmalloc.h>
19#include <net/pkt_sched.h>
20#include <net/inet_ecn.h>
21#include <net/red.h>
22#include <linux/ip.h>
23#include <net/ip.h>
24#include <linux/ipv6.h>
25#include <net/ipv6.h>
26
27/*
28 CHOKe stateless AQM for fair bandwidth allocation
29 =================================================
30
31 CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
32 unresponsive flows) is a variant of RED that penalizes misbehaving flows but
33 maintains no flow state. The difference from RED is an additional step
34 during the enqueuing process. If average queue size is over the
35 low threshold (qmin), a packet is chosen at random from the queue.
36 If both the new and chosen packet are from the same flow, both
37 are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
38 needs to access packets in queue randomly. It has a minimal class
39 interface to allow overriding the builtin flow classifier with
40 filters.
41
42 Source:
43 R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
44 Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
45 IEEE INFOCOM, 2000.
46
47 A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
48 Characteristics", IEEE/ACM Transactions on Networking, 2004
49
50 */
51
52/* Upper bound on size of sk_buff table (packets) */
53#define CHOKE_MAX_QUEUE (128*1024 - 1)
54
55struct choke_sched_data {
56/* Parameters */
57 u32 limit;
58 unsigned char flags;
59
60 struct red_parms parms;
61
62/* Variables */
63 struct tcf_proto *filter_list;
64 struct {
65 u32 prob_drop; /* Early probability drops */
66 u32 prob_mark; /* Early probability marks */
67 u32 forced_drop; /* Forced drops, qavg > max_thresh */
68 u32 forced_mark; /* Forced marks, qavg > max_thresh */
69 u32 pdrop; /* Drops due to queue limits */
70 u32 other; /* Drops due to drop() calls */
71 u32 matched; /* Drops to flow match */
72 } stats;
73
74 unsigned int head;
75 unsigned int tail;
76
77 unsigned int tab_mask; /* size - 1 */
78
79 struct sk_buff **tab;
80};
81
82/* deliver a random number between 0 and N - 1 */
83static u32 random_N(unsigned int N)
84{
85 return reciprocal_divide(random32(), N);
86}
87
88/* number of elements in queue including holes */
89static unsigned int choke_len(const struct choke_sched_data *q)
90{
91 return (q->tail - q->head) & q->tab_mask;
92}
93
94/* Is ECN parameter configured */
95static int use_ecn(const struct choke_sched_data *q)
96{
97 return q->flags & TC_RED_ECN;
98}
99
100/* Should packets over max just be dropped (versus marked) */
101static int use_harddrop(const struct choke_sched_data *q)
102{
103 return q->flags & TC_RED_HARDDROP;
104}
105
106/* Move head pointer forward to skip over holes */
107static void choke_zap_head_holes(struct choke_sched_data *q)
108{
109 do {
110 q->head = (q->head + 1) & q->tab_mask;
111 if (q->head == q->tail)
112 break;
113 } while (q->tab[q->head] == NULL);
114}
115
116/* Move tail pointer backwards to reuse holes */
117static void choke_zap_tail_holes(struct choke_sched_data *q)
118{
119 do {
120 q->tail = (q->tail - 1) & q->tab_mask;
121 if (q->head == q->tail)
122 break;
123 } while (q->tab[q->tail] == NULL);
124}
125
126/* Drop packet from queue array by creating a "hole" */
127static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
128{
129 struct choke_sched_data *q = qdisc_priv(sch);
130 struct sk_buff *skb = q->tab[idx];
131
132 q->tab[idx] = NULL;
133
134 if (idx == q->head)
135 choke_zap_head_holes(q);
136 if (idx == q->tail)
137 choke_zap_tail_holes(q);
138
139 sch->qstats.backlog -= qdisc_pkt_len(skb);
140 qdisc_drop(skb, sch);
141 qdisc_tree_decrease_qlen(sch, 1);
142 --sch->q.qlen;
143}
144
145/*
146 * Compare flow of two packets
147 * Returns true only if source and destination address and port match.
148 * false for special cases
149 */
150static bool choke_match_flow(struct sk_buff *skb1,
151 struct sk_buff *skb2)
152{
153 int off1, off2, poff;
154 const u32 *ports1, *ports2;
155 u8 ip_proto;
156 __u32 hash1;
157
158 if (skb1->protocol != skb2->protocol)
159 return false;
160
161 /* Use hash value as quick check
162 * Assumes that __skb_get_rxhash makes IP header and ports linear
163 */
164 hash1 = skb_get_rxhash(skb1);
165 if (!hash1 || hash1 != skb_get_rxhash(skb2))
166 return false;
167
168 /* Probably match, but be sure to avoid hash collisions */
169 off1 = skb_network_offset(skb1);
170 off2 = skb_network_offset(skb2);
171
172 switch (skb1->protocol) {
173 case __constant_htons(ETH_P_IP): {
174 const struct iphdr *ip1, *ip2;
175
176 ip1 = (const struct iphdr *) (skb1->data + off1);
177 ip2 = (const struct iphdr *) (skb2->data + off2);
178
179 ip_proto = ip1->protocol;
180 if (ip_proto != ip2->protocol ||
181 ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
182 return false;
183
184 if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
185 ip_proto = 0;
186 off1 += ip1->ihl * 4;
187 off2 += ip2->ihl * 4;
188 break;
189 }
190
191 case __constant_htons(ETH_P_IPV6): {
192 const struct ipv6hdr *ip1, *ip2;
193
194 ip1 = (const struct ipv6hdr *) (skb1->data + off1);
195 ip2 = (const struct ipv6hdr *) (skb2->data + off2);
196
197 ip_proto = ip1->nexthdr;
198 if (ip_proto != ip2->nexthdr ||
199 ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
200 ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
201 return false;
202 off1 += 40;
203 off2 += 40;
204 }
205
206 default: /* Maybe compare MAC header here? */
207 return false;
208 }
209
210 poff = proto_ports_offset(ip_proto);
211 if (poff < 0)
212 return true;
213
214 off1 += poff;
215 off2 += poff;
216
217 ports1 = (__force u32 *)(skb1->data + off1);
218 ports2 = (__force u32 *)(skb2->data + off2);
219 return *ports1 == *ports2;
220}
221
222struct choke_skb_cb {
223 u16 classid;
224};
225
226static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
227{
228 BUILD_BUG_ON(sizeof(skb->cb) <
229 sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb));
230 return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
231}
232
233static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
234{
235 choke_skb_cb(skb)->classid = classid;
236}
237
238static u16 choke_get_classid(const struct sk_buff *skb)
239{
240 return choke_skb_cb(skb)->classid;
241}
242
243/*
244 * Classify flow using either:
245 * 1. pre-existing classification result in skb
246 * 2. fast internal classification
247 * 3. use TC filter based classification
248 */
249static bool choke_classify(struct sk_buff *skb,
250 struct Qdisc *sch, int *qerr)
251
252{
253 struct choke_sched_data *q = qdisc_priv(sch);
254 struct tcf_result res;
255 int result;
256
257 result = tc_classify(skb, q->filter_list, &res);
258 if (result >= 0) {
259#ifdef CONFIG_NET_CLS_ACT
260 switch (result) {
261 case TC_ACT_STOLEN:
262 case TC_ACT_QUEUED:
263 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
264 case TC_ACT_SHOT:
265 return false;
266 }
267#endif
268 choke_set_classid(skb, TC_H_MIN(res.classid));
269 return true;
270 }
271
272 return false;
273}
274
275/*
276 * Select a packet at random from queue
277 * HACK: since queue can have holes from previous deletion; retry several
278 * times to find a random skb but then just give up and return the head
279 * Will return NULL if queue is empty (q->head == q->tail)
280 */
281static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
282 unsigned int *pidx)
283{
284 struct sk_buff *skb;
285 int retrys = 3;
286
287 do {
288 *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
289 skb = q->tab[*pidx];
290 if (skb)
291 return skb;
292 } while (--retrys > 0);
293
294 return q->tab[*pidx = q->head];
295}
296
297/*
298 * Compare new packet with random packet in queue
299 * returns true if matched and sets *pidx
300 */
301static bool choke_match_random(const struct choke_sched_data *q,
302 struct sk_buff *nskb,
303 unsigned int *pidx)
304{
305 struct sk_buff *oskb;
306
307 if (q->head == q->tail)
308 return false;
309
310 oskb = choke_peek_random(q, pidx);
311 if (q->filter_list)
312 return choke_get_classid(nskb) == choke_get_classid(oskb);
313
314 return choke_match_flow(oskb, nskb);
315}
316
317static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
318{
319 struct choke_sched_data *q = qdisc_priv(sch);
320 struct red_parms *p = &q->parms;
321 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
322
323 if (q->filter_list) {
324 /* If using external classifiers, get result and record it. */
325 if (!choke_classify(skb, sch, &ret))
326 goto other_drop; /* Packet was eaten by filter */
327 }
328
329 /* Compute average queue usage (see RED) */
330 p->qavg = red_calc_qavg(p, sch->q.qlen);
331 if (red_is_idling(p))
332 red_end_of_idle_period(p);
333
334 /* Is queue small? */
335 if (p->qavg <= p->qth_min)
336 p->qcount = -1;
337 else {
338 unsigned int idx;
339
340 /* Draw a packet at random from queue and compare flow */
341 if (choke_match_random(q, skb, &idx)) {
342 q->stats.matched++;
343 choke_drop_by_idx(sch, idx);
344 goto congestion_drop;
345 }
346
347 /* Queue is large, always mark/drop */
348 if (p->qavg > p->qth_max) {
349 p->qcount = -1;
350
351 sch->qstats.overlimits++;
352 if (use_harddrop(q) || !use_ecn(q) ||
353 !INET_ECN_set_ce(skb)) {
354 q->stats.forced_drop++;
355 goto congestion_drop;
356 }
357
358 q->stats.forced_mark++;
359 } else if (++p->qcount) {
360 if (red_mark_probability(p, p->qavg)) {
361 p->qcount = 0;
362 p->qR = red_random(p);
363
364 sch->qstats.overlimits++;
365 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
366 q->stats.prob_drop++;
367 goto congestion_drop;
368 }
369
370 q->stats.prob_mark++;
371 }
372 } else
373 p->qR = red_random(p);
374 }
375
376 /* Admit new packet */
377 if (sch->q.qlen < q->limit) {
378 q->tab[q->tail] = skb;
379 q->tail = (q->tail + 1) & q->tab_mask;
380 ++sch->q.qlen;
381 sch->qstats.backlog += qdisc_pkt_len(skb);
382 return NET_XMIT_SUCCESS;
383 }
384
385 q->stats.pdrop++;
386 sch->qstats.drops++;
387 kfree_skb(skb);
388 return NET_XMIT_DROP;
389
390 congestion_drop:
391 qdisc_drop(skb, sch);
392 return NET_XMIT_CN;
393
394 other_drop:
395 if (ret & __NET_XMIT_BYPASS)
396 sch->qstats.drops++;
397 kfree_skb(skb);
398 return ret;
399}
400
401static struct sk_buff *choke_dequeue(struct Qdisc *sch)
402{
403 struct choke_sched_data *q = qdisc_priv(sch);
404 struct sk_buff *skb;
405
406 if (q->head == q->tail) {
407 if (!red_is_idling(&q->parms))
408 red_start_of_idle_period(&q->parms);
409 return NULL;
410 }
411
412 skb = q->tab[q->head];
413 q->tab[q->head] = NULL;
414 choke_zap_head_holes(q);
415 --sch->q.qlen;
416 sch->qstats.backlog -= qdisc_pkt_len(skb);
417 qdisc_bstats_update(sch, skb);
418
419 return skb;
420}
421
422static unsigned int choke_drop(struct Qdisc *sch)
423{
424 struct choke_sched_data *q = qdisc_priv(sch);
425 unsigned int len;
426
427 len = qdisc_queue_drop(sch);
428 if (len > 0)
429 q->stats.other++;
430 else {
431 if (!red_is_idling(&q->parms))
432 red_start_of_idle_period(&q->parms);
433 }
434
435 return len;
436}
437
438static void choke_reset(struct Qdisc *sch)
439{
440 struct choke_sched_data *q = qdisc_priv(sch);
441
442 red_restart(&q->parms);
443}
444
445static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
446 [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) },
447 [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE },
448};
449
450
451static void choke_free(void *addr)
452{
453 if (addr) {
454 if (is_vmalloc_addr(addr))
455 vfree(addr);
456 else
457 kfree(addr);
458 }
459}
460
461static int choke_change(struct Qdisc *sch, struct nlattr *opt)
462{
463 struct choke_sched_data *q = qdisc_priv(sch);
464 struct nlattr *tb[TCA_CHOKE_MAX + 1];
465 const struct tc_red_qopt *ctl;
466 int err;
467 struct sk_buff **old = NULL;
468 unsigned int mask;
469
470 if (opt == NULL)
471 return -EINVAL;
472
473 err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
474 if (err < 0)
475 return err;
476
477 if (tb[TCA_CHOKE_PARMS] == NULL ||
478 tb[TCA_CHOKE_STAB] == NULL)
479 return -EINVAL;
480
481 ctl = nla_data(tb[TCA_CHOKE_PARMS]);
482
483 if (ctl->limit > CHOKE_MAX_QUEUE)
484 return -EINVAL;
485
486 mask = roundup_pow_of_two(ctl->limit + 1) - 1;
487 if (mask != q->tab_mask) {
488 struct sk_buff **ntab;
489
490 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
491 if (!ntab)
492 ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
493 if (!ntab)
494 return -ENOMEM;
495
496 sch_tree_lock(sch);
497 old = q->tab;
498 if (old) {
499 unsigned int oqlen = sch->q.qlen, tail = 0;
500
501 while (q->head != q->tail) {
502 struct sk_buff *skb = q->tab[q->head];
503
504 q->head = (q->head + 1) & q->tab_mask;
505 if (!skb)
506 continue;
507 if (tail < mask) {
508 ntab[tail++] = skb;
509 continue;
510 }
511 sch->qstats.backlog -= qdisc_pkt_len(skb);
512 --sch->q.qlen;
513 qdisc_drop(skb, sch);
514 }
515 qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
516 q->head = 0;
517 q->tail = tail;
518 }
519
520 q->tab_mask = mask;
521 q->tab = ntab;
522 } else
523 sch_tree_lock(sch);
524
525 q->flags = ctl->flags;
526 q->limit = ctl->limit;
527
528 red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
529 ctl->Plog, ctl->Scell_log,
530 nla_data(tb[TCA_CHOKE_STAB]));
531
532 if (q->head == q->tail)
533 red_end_of_idle_period(&q->parms);
534
535 sch_tree_unlock(sch);
536 choke_free(old);
537 return 0;
538}
539
540static int choke_init(struct Qdisc *sch, struct nlattr *opt)
541{
542 return choke_change(sch, opt);
543}
544
545static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
546{
547 struct choke_sched_data *q = qdisc_priv(sch);
548 struct nlattr *opts = NULL;
549 struct tc_red_qopt opt = {
550 .limit = q->limit,
551 .flags = q->flags,
552 .qth_min = q->parms.qth_min >> q->parms.Wlog,
553 .qth_max = q->parms.qth_max >> q->parms.Wlog,
554 .Wlog = q->parms.Wlog,
555 .Plog = q->parms.Plog,
556 .Scell_log = q->parms.Scell_log,
557 };
558
559 opts = nla_nest_start(skb, TCA_OPTIONS);
560 if (opts == NULL)
561 goto nla_put_failure;
562
563 NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
564 return nla_nest_end(skb, opts);
565
566nla_put_failure:
567 nla_nest_cancel(skb, opts);
568 return -EMSGSIZE;
569}
570
571static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
572{
573 struct choke_sched_data *q = qdisc_priv(sch);
574 struct tc_choke_xstats st = {
575 .early = q->stats.prob_drop + q->stats.forced_drop,
576 .marked = q->stats.prob_mark + q->stats.forced_mark,
577 .pdrop = q->stats.pdrop,
578 .other = q->stats.other,
579 .matched = q->stats.matched,
580 };
581
582 return gnet_stats_copy_app(d, &st, sizeof(st));
583}
584
585static void choke_destroy(struct Qdisc *sch)
586{
587 struct choke_sched_data *q = qdisc_priv(sch);
588
589 tcf_destroy_chain(&q->filter_list);
590 choke_free(q->tab);
591}
592
593static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
594{
595 return NULL;
596}
597
598static unsigned long choke_get(struct Qdisc *sch, u32 classid)
599{
600 return 0;
601}
602
603static void choke_put(struct Qdisc *q, unsigned long cl)
604{
605}
606
607static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
608 u32 classid)
609{
610 return 0;
611}
612
613static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
614{
615 struct choke_sched_data *q = qdisc_priv(sch);
616
617 if (cl)
618 return NULL;
619 return &q->filter_list;
620}
621
622static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
623 struct sk_buff *skb, struct tcmsg *tcm)
624{
625 tcm->tcm_handle |= TC_H_MIN(cl);
626 return 0;
627}
628
629static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
630{
631 if (!arg->stop) {
632 if (arg->fn(sch, 1, arg) < 0) {
633 arg->stop = 1;
634 return;
635 }
636 arg->count++;
637 }
638}
639
640static const struct Qdisc_class_ops choke_class_ops = {
641 .leaf = choke_leaf,
642 .get = choke_get,
643 .put = choke_put,
644 .tcf_chain = choke_find_tcf,
645 .bind_tcf = choke_bind,
646 .unbind_tcf = choke_put,
647 .dump = choke_dump_class,
648 .walk = choke_walk,
649};
650
651static struct sk_buff *choke_peek_head(struct Qdisc *sch)
652{
653 struct choke_sched_data *q = qdisc_priv(sch);
654
655 return (q->head != q->tail) ? q->tab[q->head] : NULL;
656}
657
658static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
659 .id = "choke",
660 .priv_size = sizeof(struct choke_sched_data),
661
662 .enqueue = choke_enqueue,
663 .dequeue = choke_dequeue,
664 .peek = choke_peek_head,
665 .drop = choke_drop,
666 .init = choke_init,
667 .destroy = choke_destroy,
668 .reset = choke_reset,
669 .change = choke_change,
670 .dump = choke_dump,
671 .dump_stats = choke_dump_stats,
672 .owner = THIS_MODULE,
673};
674
675static int __init choke_module_init(void)
676{
677 return register_qdisc(&choke_qdisc_ops);
678}
679
680static void __exit choke_module_exit(void)
681{
682 unregister_qdisc(&choke_qdisc_ops);
683}
684
685module_init(choke_module_init)
686module_exit(choke_module_exit)
687
688MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index b74046a95397..6b7fe4a84f13 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
110 cl->refcnt = 1; 110 cl->refcnt = 1;
111 cl->common.classid = classid; 111 cl->common.classid = classid;
112 cl->quantum = quantum; 112 cl->quantum = quantum;
113 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 113 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
114 &pfifo_qdisc_ops, classid); 114 &pfifo_qdisc_ops, classid);
115 if (cl->qdisc == NULL) 115 if (cl->qdisc == NULL)
116 cl->qdisc = &noop_qdisc; 116 cl->qdisc = &noop_qdisc;
@@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
218 struct drr_class *cl = (struct drr_class *)arg; 218 struct drr_class *cl = (struct drr_class *)arg;
219 219
220 if (new == NULL) { 220 if (new == NULL) {
221 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 221 new = qdisc_create_dflt(sch->dev_queue,
222 &pfifo_qdisc_ops, cl->common.classid); 222 &pfifo_qdisc_ops, cl->common.classid);
223 if (new == NULL) 223 if (new == NULL)
224 new = &noop_qdisc; 224 new = &noop_qdisc;
@@ -351,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
351{ 351{
352 struct drr_sched *q = qdisc_priv(sch); 352 struct drr_sched *q = qdisc_priv(sch);
353 struct drr_class *cl; 353 struct drr_class *cl;
354 unsigned int len;
355 int err; 354 int err;
356 355
357 cl = drr_classify(skb, sch, &err); 356 cl = drr_classify(skb, sch, &err);
@@ -362,7 +361,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
362 return err; 361 return err;
363 } 362 }
364 363
365 len = qdisc_pkt_len(skb);
366 err = qdisc_enqueue(skb, cl->qdisc); 364 err = qdisc_enqueue(skb, cl->qdisc);
367 if (unlikely(err != NET_XMIT_SUCCESS)) { 365 if (unlikely(err != NET_XMIT_SUCCESS)) {
368 if (net_xmit_drop_count(err)) { 366 if (net_xmit_drop_count(err)) {
@@ -377,10 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
377 cl->deficit = cl->quantum; 375 cl->deficit = cl->quantum;
378 } 376 }
379 377
380 cl->bstats.packets++; 378 bstats_update(&cl->bstats, skb);
381 cl->bstats.bytes += len;
382 sch->bstats.packets++;
383 sch->bstats.bytes += len;
384 379
385 sch->q.qlen++; 380 sch->q.qlen++;
386 return err; 381 return err;
@@ -407,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
407 skb = qdisc_dequeue_peeked(cl->qdisc); 402 skb = qdisc_dequeue_peeked(cl->qdisc);
408 if (cl->qdisc->q.qlen == 0) 403 if (cl->qdisc->q.qlen == 0)
409 list_del(&cl->alist); 404 list_del(&cl->alist);
405 qdisc_bstats_update(sch, skb);
410 sch->q.qlen--; 406 sch->q.qlen--;
411 return skb; 407 return skb;
412 } 408 }
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 63d41f86679c..2c790204d042 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
61 sch, p, new, old); 61 sch, p, new, old);
62 62
63 if (new == NULL) { 63 if (new == NULL) {
64 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 64 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
65 &pfifo_qdisc_ops,
66 sch->handle); 65 sch->handle);
67 if (new == NULL) 66 if (new == NULL)
68 new = &noop_qdisc; 67 new = &noop_qdisc;
@@ -138,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
138 mask = nla_get_u8(tb[TCA_DSMARK_MASK]); 137 mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
139 138
140 if (tb[TCA_DSMARK_VALUE]) 139 if (tb[TCA_DSMARK_VALUE])
141 p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); 140 p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
142 141
143 if (tb[TCA_DSMARK_MASK]) 142 if (tb[TCA_DSMARK_MASK])
144 p->mask[*arg-1] = mask; 143 p->mask[*arg - 1] = mask;
145 144
146 err = 0; 145 err = 0;
147 146
@@ -156,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
156 if (!dsmark_valid_index(p, arg)) 155 if (!dsmark_valid_index(p, arg))
157 return -EINVAL; 156 return -EINVAL;
158 157
159 p->mask[arg-1] = 0xff; 158 p->mask[arg - 1] = 0xff;
160 p->value[arg-1] = 0; 159 p->value[arg - 1] = 0;
161 160
162 return 0; 161 return 0;
163} 162}
@@ -176,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
176 if (p->mask[i] == 0xff && !p->value[i]) 175 if (p->mask[i] == 0xff && !p->value[i])
177 goto ignore; 176 goto ignore;
178 if (walker->count >= walker->skip) { 177 if (walker->count >= walker->skip) {
179 if (walker->fn(sch, i+1, walker) < 0) { 178 if (walker->fn(sch, i + 1, walker) < 0) {
180 walker->stop = 1; 179 walker->stop = 1;
181 break; 180 break;
182 } 181 }
@@ -261,8 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
261 return err; 260 return err;
262 } 261 }
263 262
264 sch->bstats.bytes += qdisc_pkt_len(skb);
265 sch->bstats.packets++;
266 sch->q.qlen++; 263 sch->q.qlen++;
267 264
268 return NET_XMIT_SUCCESS; 265 return NET_XMIT_SUCCESS;
@@ -285,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
285 if (skb == NULL) 282 if (skb == NULL)
286 return NULL; 283 return NULL;
287 284
285 qdisc_bstats_update(sch, skb);
288 sch->q.qlen--; 286 sch->q.qlen--;
289 287
290 index = skb->tc_index & (p->indices - 1); 288 index = skb->tc_index & (p->indices - 1);
@@ -306,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
306 * and don't need yet another qdisc as a bypass. 304 * and don't need yet another qdisc as a bypass.
307 */ 305 */
308 if (p->mask[index] != 0xff || p->value[index]) 306 if (p->mask[index] != 0xff || p->value[index])
309 printk(KERN_WARNING 307 pr_warning("dsmark_dequeue: unsupported protocol %d\n",
310 "dsmark_dequeue: unsupported protocol %d\n", 308 ntohs(skb->protocol));
311 ntohs(skb->protocol));
312 break; 309 break;
313 } 310 }
314 311
@@ -384,8 +381,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
384 p->default_index = default_index; 381 p->default_index = default_index;
385 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); 382 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
386 383
387 p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 384 p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
388 &pfifo_qdisc_ops, sch->handle);
389 if (p->q == NULL) 385 if (p->q == NULL)
390 p->q = &noop_qdisc; 386 p->q = &noop_qdisc;
391 387
@@ -427,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
427 if (!dsmark_valid_index(p, cl)) 423 if (!dsmark_valid_index(p, cl))
428 return -EINVAL; 424 return -EINVAL;
429 425
430 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); 426 tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
431 tcm->tcm_info = p->q->handle; 427 tcm->tcm_info = p->q->handle;
432 428
433 opts = nla_nest_start(skb, TCA_OPTIONS); 429 opts = nla_nest_start(skb, TCA_OPTIONS);
434 if (opts == NULL) 430 if (opts == NULL)
435 goto nla_put_failure; 431 goto nla_put_failure;
436 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]); 432 NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
437 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]); 433 NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
438 434
439 return nla_nest_end(skb, opts); 435 return nla_nest_end(skb, opts);
440 436
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 5948bafa8ce2..66effe2da8e0 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,46 +19,30 @@
19 19
20/* 1 band FIFO pseudo-"scheduler" */ 20/* 1 band FIFO pseudo-"scheduler" */
21 21
22struct fifo_sched_data 22static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
23{ 23{
24 u32 limit; 24 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
25};
26
27static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
28{
29 struct fifo_sched_data *q = qdisc_priv(sch);
30
31 if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
32 return qdisc_enqueue_tail(skb, sch); 25 return qdisc_enqueue_tail(skb, sch);
33 26
34 return qdisc_reshape_fail(skb, sch); 27 return qdisc_reshape_fail(skb, sch);
35} 28}
36 29
37static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) 30static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
38{ 31{
39 struct fifo_sched_data *q = qdisc_priv(sch); 32 if (likely(skb_queue_len(&sch->q) < sch->limit))
40
41 if (likely(skb_queue_len(&sch->q) < q->limit))
42 return qdisc_enqueue_tail(skb, sch); 33 return qdisc_enqueue_tail(skb, sch);
43 34
44 return qdisc_reshape_fail(skb, sch); 35 return qdisc_reshape_fail(skb, sch);
45} 36}
46 37
47static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) 38static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
48{ 39{
49 struct sk_buff *skb_head; 40 if (likely(skb_queue_len(&sch->q) < sch->limit))
50 struct fifo_sched_data *q = qdisc_priv(sch);
51
52 if (likely(skb_queue_len(&sch->q) < q->limit))
53 return qdisc_enqueue_tail(skb, sch); 41 return qdisc_enqueue_tail(skb, sch);
54 42
55 /* queue full, remove one skb to fulfill the limit */ 43 /* queue full, remove one skb to fulfill the limit */
56 skb_head = qdisc_dequeue_head(sch); 44 __qdisc_queue_drop_head(sch, &sch->q);
57 sch->bstats.bytes -= qdisc_pkt_len(skb_head);
58 sch->bstats.packets--;
59 sch->qstats.drops++; 45 sch->qstats.drops++;
60 kfree_skb(skb_head);
61
62 qdisc_enqueue_tail(skb, sch); 46 qdisc_enqueue_tail(skb, sch);
63 47
64 return NET_XMIT_CN; 48 return NET_XMIT_CN;
@@ -66,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
66 50
67static int fifo_init(struct Qdisc *sch, struct nlattr *opt) 51static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
68{ 52{
69 struct fifo_sched_data *q = qdisc_priv(sch); 53 bool bypass;
54 bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
70 55
71 if (opt == NULL) { 56 if (opt == NULL) {
72 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; 57 u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
73 58
74 if (sch->ops == &bfifo_qdisc_ops) 59 if (is_bfifo)
75 limit *= psched_mtu(qdisc_dev(sch)); 60 limit *= psched_mtu(qdisc_dev(sch));
76 61
77 q->limit = limit; 62 sch->limit = limit;
78 } else { 63 } else {
79 struct tc_fifo_qopt *ctl = nla_data(opt); 64 struct tc_fifo_qopt *ctl = nla_data(opt);
80 65
81 if (nla_len(opt) < sizeof(*ctl)) 66 if (nla_len(opt) < sizeof(*ctl))
82 return -EINVAL; 67 return -EINVAL;
83 68
84 q->limit = ctl->limit; 69 sch->limit = ctl->limit;
85 } 70 }
86 71
72 if (is_bfifo)
73 bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
74 else
75 bypass = sch->limit >= 1;
76
77 if (bypass)
78 sch->flags |= TCQ_F_CAN_BYPASS;
79 else
80 sch->flags &= ~TCQ_F_CAN_BYPASS;
87 return 0; 81 return 0;
88} 82}
89 83
90static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) 84static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
91{ 85{
92 struct fifo_sched_data *q = qdisc_priv(sch); 86 struct tc_fifo_qopt opt = { .limit = sch->limit };
93 struct tc_fifo_qopt opt = { .limit = q->limit };
94 87
95 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 88 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
96 return skb->len; 89 return skb->len;
@@ -101,7 +94,7 @@ nla_put_failure:
101 94
102struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { 95struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
103 .id = "pfifo", 96 .id = "pfifo",
104 .priv_size = sizeof(struct fifo_sched_data), 97 .priv_size = 0,
105 .enqueue = pfifo_enqueue, 98 .enqueue = pfifo_enqueue,
106 .dequeue = qdisc_dequeue_head, 99 .dequeue = qdisc_dequeue_head,
107 .peek = qdisc_peek_head, 100 .peek = qdisc_peek_head,
@@ -116,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops);
116 109
117struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { 110struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
118 .id = "bfifo", 111 .id = "bfifo",
119 .priv_size = sizeof(struct fifo_sched_data), 112 .priv_size = 0,
120 .enqueue = bfifo_enqueue, 113 .enqueue = bfifo_enqueue,
121 .dequeue = qdisc_dequeue_head, 114 .dequeue = qdisc_dequeue_head,
122 .peek = qdisc_peek_head, 115 .peek = qdisc_peek_head,
@@ -131,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops);
131 124
132struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { 125struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
133 .id = "pfifo_head_drop", 126 .id = "pfifo_head_drop",
134 .priv_size = sizeof(struct fifo_sched_data), 127 .priv_size = 0,
135 .enqueue = pfifo_tail_enqueue, 128 .enqueue = pfifo_tail_enqueue,
136 .dequeue = qdisc_dequeue_head, 129 .dequeue = qdisc_dequeue_head,
137 .peek = qdisc_peek_head, 130 .peek = qdisc_peek_head,
@@ -172,8 +165,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
172 struct Qdisc *q; 165 struct Qdisc *q;
173 int err = -ENOMEM; 166 int err = -ENOMEM;
174 167
175 q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 168 q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1));
176 ops, TC_H_MAKE(sch->handle, 1));
177 if (q) { 169 if (q) {
178 err = fifo_set_limit(q, limit); 170 err = fifo_set_limit(q, limit);
179 if (err < 0) { 171 if (err < 0) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4386a1..b4c680900d7a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
60 60
61 /* check the reason of requeuing without tx lock first */ 61 /* check the reason of requeuing without tx lock first */
62 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); 62 txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
63 if (!netif_tx_queue_stopped(txq) && 63 if (!netif_tx_queue_frozen_or_stopped(txq)) {
64 !netif_tx_queue_frozen(txq)) {
65 q->gso_skb = NULL; 64 q->gso_skb = NULL;
66 q->q.qlen--; 65 q->q.qlen--;
67 } else 66 } else
@@ -88,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
88 */ 87 */
89 kfree_skb(skb); 88 kfree_skb(skb);
90 if (net_ratelimit()) 89 if (net_ratelimit())
91 printk(KERN_WARNING "Dead loop on netdevice %s, " 90 pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
92 "fix it urgently!\n", dev_queue->dev->name); 91 dev_queue->dev->name);
93 ret = qdisc_qlen(q); 92 ret = qdisc_qlen(q);
94 } else { 93 } else {
95 /* 94 /*
@@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
122 spin_unlock(root_lock); 121 spin_unlock(root_lock);
123 122
124 HARD_TX_LOCK(dev, txq, smp_processor_id()); 123 HARD_TX_LOCK(dev, txq, smp_processor_id());
125 if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) 124 if (!netif_tx_queue_frozen_or_stopped(txq))
126 ret = dev_hard_start_xmit(skb, dev, txq); 125 ret = dev_hard_start_xmit(skb, dev, txq);
127 126
128 HARD_TX_UNLOCK(dev, txq); 127 HARD_TX_UNLOCK(dev, txq);
@@ -138,14 +137,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
138 } else { 137 } else {
139 /* Driver returned NETDEV_TX_BUSY - requeue skb */ 138 /* Driver returned NETDEV_TX_BUSY - requeue skb */
140 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) 139 if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
141 printk(KERN_WARNING "BUG %s code %d qlen %d\n", 140 pr_warning("BUG %s code %d qlen %d\n",
142 dev->name, ret, q->q.qlen); 141 dev->name, ret, q->q.qlen);
143 142
144 ret = dev_requeue_skb(skb, q); 143 ret = dev_requeue_skb(skb, q);
145 } 144 }
146 145
147 if (ret && (netif_tx_queue_stopped(txq) || 146 if (ret && netif_tx_queue_frozen_or_stopped(txq))
148 netif_tx_queue_frozen(txq)))
149 ret = 0; 147 ret = 0;
150 148
151 return ret; 149 return ret;
@@ -253,9 +251,8 @@ static void dev_watchdog(unsigned long arg)
253 } 251 }
254 252
255 if (some_queue_timedout) { 253 if (some_queue_timedout) {
256 char drivername[64];
257 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", 254 WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
258 dev->name, netdev_drivername(dev, drivername, 64), i); 255 dev->name, netdev_drivername(dev), i);
259 dev->netdev_ops->ndo_tx_timeout(dev); 256 dev->netdev_ops->ndo_tx_timeout(dev);
260 } 257 }
261 if (!mod_timer(&dev->watchdog_timer, 258 if (!mod_timer(&dev->watchdog_timer,
@@ -383,6 +380,7 @@ struct Qdisc noop_qdisc = {
383 .list = LIST_HEAD_INIT(noop_qdisc.list), 380 .list = LIST_HEAD_INIT(noop_qdisc.list),
384 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), 381 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
385 .dev_queue = &noop_netdev_queue, 382 .dev_queue = &noop_netdev_queue,
383 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
386}; 384};
387EXPORT_SYMBOL(noop_qdisc); 385EXPORT_SYMBOL(noop_qdisc);
388 386
@@ -409,11 +407,13 @@ static struct Qdisc noqueue_qdisc = {
409 .list = LIST_HEAD_INIT(noqueue_qdisc.list), 407 .list = LIST_HEAD_INIT(noqueue_qdisc.list),
410 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), 408 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
411 .dev_queue = &noqueue_netdev_queue, 409 .dev_queue = &noqueue_netdev_queue,
410 .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
412}; 411};
413 412
414 413
415static const u8 prio2band[TC_PRIO_MAX+1] = 414static const u8 prio2band[TC_PRIO_MAX + 1] = {
416 { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; 415 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
416};
417 417
418/* 3-band FIFO queue: old style, but should be a bit faster than 418/* 3-band FIFO queue: old style, but should be a bit faster than
419 generic prio+fifo combination. 419 generic prio+fifo combination.
@@ -445,7 +445,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
445 return priv->q + band; 445 return priv->q + band;
446} 446}
447 447
448static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) 448static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
449{ 449{
450 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { 450 if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
451 int band = prio2band[skb->priority & TC_PRIO_MAX]; 451 int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +460,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
460 return qdisc_drop(skb, qdisc); 460 return qdisc_drop(skb, qdisc);
461} 461}
462 462
463static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) 463static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
464{ 464{
465 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 465 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
466 int band = bitmap2band[priv->bitmap]; 466 int band = bitmap2band[priv->bitmap];
@@ -479,7 +479,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
479 return NULL; 479 return NULL;
480} 480}
481 481
482static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) 482static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
483{ 483{
484 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 484 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
485 int band = bitmap2band[priv->bitmap]; 485 int band = bitmap2band[priv->bitmap];
@@ -493,7 +493,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
493 return NULL; 493 return NULL;
494} 494}
495 495
496static void pfifo_fast_reset(struct Qdisc* qdisc) 496static void pfifo_fast_reset(struct Qdisc *qdisc)
497{ 497{
498 int prio; 498 int prio;
499 struct pfifo_fast_priv *priv = qdisc_priv(qdisc); 499 struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +510,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
510{ 510{
511 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; 511 struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
512 512
513 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); 513 memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
514 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 514 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
515 return skb->len; 515 return skb->len;
516 516
@@ -526,6 +526,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
526 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) 526 for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
527 skb_queue_head_init(band2list(priv, prio)); 527 skb_queue_head_init(band2list(priv, prio));
528 528
529 /* Can by-pass the queue discipline */
530 qdisc->flags |= TCQ_F_CAN_BYPASS;
529 return 0; 531 return 0;
530} 532}
531 533
@@ -540,25 +542,32 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
540 .dump = pfifo_fast_dump, 542 .dump = pfifo_fast_dump,
541 .owner = THIS_MODULE, 543 .owner = THIS_MODULE,
542}; 544};
545EXPORT_SYMBOL(pfifo_fast_ops);
543 546
544struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, 547struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
545 struct Qdisc_ops *ops) 548 struct Qdisc_ops *ops)
546{ 549{
547 void *p; 550 void *p;
548 struct Qdisc *sch; 551 struct Qdisc *sch;
549 unsigned int size; 552 unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
550 int err = -ENOBUFS; 553 int err = -ENOBUFS;
551 554
552 /* ensure that the Qdisc and the private data are 64-byte aligned */ 555 p = kzalloc_node(size, GFP_KERNEL,
553 size = QDISC_ALIGN(sizeof(*sch)); 556 netdev_queue_numa_node_read(dev_queue));
554 size += ops->priv_size + (QDISC_ALIGNTO - 1);
555 557
556 p = kzalloc(size, GFP_KERNEL);
557 if (!p) 558 if (!p)
558 goto errout; 559 goto errout;
559 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); 560 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
560 sch->padded = (char *) sch - (char *) p; 561 /* if we got non aligned memory, ask more and do alignment ourself */
561 562 if (sch != p) {
563 kfree(p);
564 p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
565 netdev_queue_numa_node_read(dev_queue));
566 if (!p)
567 goto errout;
568 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
569 sch->padded = (char *) sch - (char *) p;
570 }
562 INIT_LIST_HEAD(&sch->list); 571 INIT_LIST_HEAD(&sch->list);
563 skb_queue_head_init(&sch->q); 572 skb_queue_head_init(&sch->q);
564 spin_lock_init(&sch->busylock); 573 spin_lock_init(&sch->busylock);
@@ -574,10 +583,8 @@ errout:
574 return ERR_PTR(err); 583 return ERR_PTR(err);
575} 584}
576 585
577struct Qdisc * qdisc_create_dflt(struct net_device *dev, 586struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
578 struct netdev_queue *dev_queue, 587 struct Qdisc_ops *ops, unsigned int parentid)
579 struct Qdisc_ops *ops,
580 unsigned int parentid)
581{ 588{
582 struct Qdisc *sch; 589 struct Qdisc *sch;
583 590
@@ -630,7 +637,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
630#ifdef CONFIG_NET_SCHED 637#ifdef CONFIG_NET_SCHED
631 qdisc_list_del(qdisc); 638 qdisc_list_del(qdisc);
632 639
633 qdisc_put_stab(qdisc->stab); 640 qdisc_put_stab(rtnl_dereference(qdisc->stab));
634#endif 641#endif
635 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); 642 gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
636 if (ops->reset) 643 if (ops->reset)
@@ -674,25 +681,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
674 681
675 return oqdisc; 682 return oqdisc;
676} 683}
684EXPORT_SYMBOL(dev_graft_qdisc);
677 685
678static void attach_one_default_qdisc(struct net_device *dev, 686static void attach_one_default_qdisc(struct net_device *dev,
679 struct netdev_queue *dev_queue, 687 struct netdev_queue *dev_queue,
680 void *_unused) 688 void *_unused)
681{ 689{
682 struct Qdisc *qdisc; 690 struct Qdisc *qdisc = &noqueue_qdisc;
683 691
684 if (dev->tx_queue_len) { 692 if (dev->tx_queue_len) {
685 qdisc = qdisc_create_dflt(dev, dev_queue, 693 qdisc = qdisc_create_dflt(dev_queue,
686 &pfifo_fast_ops, TC_H_ROOT); 694 &pfifo_fast_ops, TC_H_ROOT);
687 if (!qdisc) { 695 if (!qdisc) {
688 printk(KERN_INFO "%s: activation failed\n", dev->name); 696 netdev_info(dev, "activation failed\n");
689 return; 697 return;
690 } 698 }
691
692 /* Can by-pass the queue discipline for default qdisc */
693 qdisc->flags |= TCQ_F_CAN_BYPASS;
694 } else {
695 qdisc = &noqueue_qdisc;
696 } 699 }
697 dev_queue->qdisc_sleeping = qdisc; 700 dev_queue->qdisc_sleeping = qdisc;
698} 701}
@@ -709,7 +712,7 @@ static void attach_default_qdiscs(struct net_device *dev)
709 dev->qdisc = txq->qdisc_sleeping; 712 dev->qdisc = txq->qdisc_sleeping;
710 atomic_inc(&dev->qdisc->refcnt); 713 atomic_inc(&dev->qdisc->refcnt);
711 } else { 714 } else {
712 qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); 715 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
713 if (qdisc) { 716 if (qdisc) {
714 qdisc->ops->attach(qdisc); 717 qdisc->ops->attach(qdisc);
715 dev->qdisc = qdisc; 718 dev->qdisc = qdisc;
@@ -753,13 +756,15 @@ void dev_activate(struct net_device *dev)
753 756
754 need_watchdog = 0; 757 need_watchdog = 0;
755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 758 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
756 transition_one_qdisc(dev, &dev->rx_queue, NULL); 759 if (dev_ingress_queue(dev))
760 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
757 761
758 if (need_watchdog) { 762 if (need_watchdog) {
759 dev->trans_start = jiffies; 763 dev->trans_start = jiffies;
760 dev_watchdog_up(dev); 764 dev_watchdog_up(dev);
761 } 765 }
762} 766}
767EXPORT_SYMBOL(dev_activate);
763 768
764static void dev_deactivate_queue(struct net_device *dev, 769static void dev_deactivate_queue(struct net_device *dev,
765 struct netdev_queue *dev_queue, 770 struct netdev_queue *dev_queue,
@@ -809,20 +814,51 @@ static bool some_qdisc_is_busy(struct net_device *dev)
809 return false; 814 return false;
810} 815}
811 816
812void dev_deactivate(struct net_device *dev) 817/**
818 * dev_deactivate_many - deactivate transmissions on several devices
819 * @head: list of devices to deactivate
820 *
821 * This function returns only when all outstanding transmissions
822 * have completed, unless all devices are in dismantle phase.
823 */
824void dev_deactivate_many(struct list_head *head)
813{ 825{
814 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); 826 struct net_device *dev;
815 dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); 827 bool sync_needed = false;
816 828
817 dev_watchdog_down(dev); 829 list_for_each_entry(dev, head, unreg_list) {
830 netdev_for_each_tx_queue(dev, dev_deactivate_queue,
831 &noop_qdisc);
832 if (dev_ingress_queue(dev))
833 dev_deactivate_queue(dev, dev_ingress_queue(dev),
834 &noop_qdisc);
818 835
819 /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ 836 dev_watchdog_down(dev);
820 synchronize_rcu(); 837 sync_needed |= !dev->dismantle;
838 }
839
840 /* Wait for outstanding qdisc-less dev_queue_xmit calls.
841 * This is avoided if all devices are in dismantle phase :
842 * Caller will call synchronize_net() for us
843 */
844 if (sync_needed)
845 synchronize_net();
821 846
822 /* Wait for outstanding qdisc_run calls. */ 847 /* Wait for outstanding qdisc_run calls. */
823 while (some_qdisc_is_busy(dev)) 848 list_for_each_entry(dev, head, unreg_list)
824 yield(); 849 while (some_qdisc_is_busy(dev))
850 yield();
851}
852
853void dev_deactivate(struct net_device *dev)
854{
855 LIST_HEAD(single);
856
857 list_add(&dev->unreg_list, &single);
858 dev_deactivate_many(&single);
859 list_del(&single);
825} 860}
861EXPORT_SYMBOL(dev_deactivate);
826 862
827static void dev_init_scheduler_queue(struct net_device *dev, 863static void dev_init_scheduler_queue(struct net_device *dev,
828 struct netdev_queue *dev_queue, 864 struct netdev_queue *dev_queue,
@@ -838,7 +874,8 @@ void dev_init_scheduler(struct net_device *dev)
838{ 874{
839 dev->qdisc = &noop_qdisc; 875 dev->qdisc = &noop_qdisc;
840 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); 876 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
841 dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 877 if (dev_ingress_queue(dev))
878 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
842 879
843 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 880 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
844} 881}
@@ -861,7 +898,8 @@ static void shutdown_scheduler_queue(struct net_device *dev,
861void dev_shutdown(struct net_device *dev) 898void dev_shutdown(struct net_device *dev)
862{ 899{
863 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); 900 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
864 shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 901 if (dev_ingress_queue(dev))
902 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
865 qdisc_destroy(dev->qdisc); 903 qdisc_destroy(dev->qdisc);
866 dev->qdisc = &noop_qdisc; 904 dev->qdisc = &noop_qdisc;
867 905
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2aa5c92..b9493a09a870 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
32struct gred_sched_data; 32struct gred_sched_data;
33struct gred_sched; 33struct gred_sched;
34 34
35struct gred_sched_data 35struct gred_sched_data {
36{
37 u32 limit; /* HARD maximal queue length */ 36 u32 limit; /* HARD maximal queue length */
38 u32 DP; /* the drop pramaters */ 37 u32 DP; /* the drop pramaters */
39 u32 bytesin; /* bytes seen on virtualQ so far*/ 38 u32 bytesin; /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
50 GRED_RIO_MODE, 49 GRED_RIO_MODE,
51}; 50};
52 51
53struct gred_sched 52struct gred_sched {
54{
55 struct gred_sched_data *tab[MAX_DPs]; 53 struct gred_sched_data *tab[MAX_DPs];
56 unsigned long flags; 54 unsigned long flags;
57 u32 red_flags; 55 u32 red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
150 return t->red_flags & TC_RED_HARDDROP; 148 return t->red_flags & TC_RED_HARDDROP;
151} 149}
152 150
153static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) 151static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
154{ 152{
155 struct gred_sched_data *q=NULL; 153 struct gred_sched_data *q = NULL;
156 struct gred_sched *t= qdisc_priv(sch); 154 struct gred_sched *t = qdisc_priv(sch);
157 unsigned long qavg = 0; 155 unsigned long qavg = 0;
158 u16 dp = tc_index_to_dp(skb); 156 u16 dp = tc_index_to_dp(skb);
159 157
160 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 158 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
161 dp = t->def; 159 dp = t->def;
162 160
163 if ((q = t->tab[dp]) == NULL) { 161 q = t->tab[dp];
162 if (!q) {
164 /* Pass through packets not assigned to a DP 163 /* Pass through packets not assigned to a DP
165 * if no default DP has been configured. This 164 * if no default DP has been configured. This
166 * allows for DP flows to be left untouched. 165 * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
183 for (i = 0; i < t->DPs; i++) { 182 for (i = 0; i < t->DPs; i++) {
184 if (t->tab[i] && t->tab[i]->prio < q->prio && 183 if (t->tab[i] && t->tab[i]->prio < q->prio &&
185 !red_is_idling(&t->tab[i]->parms)) 184 !red_is_idling(&t->tab[i]->parms))
186 qavg +=t->tab[i]->parms.qavg; 185 qavg += t->tab[i]->parms.qavg;
187 } 186 }
188 187
189 } 188 }
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
203 gred_store_wred_set(t, q); 202 gred_store_wred_set(t, q);
204 203
205 switch (red_action(&q->parms, q->parms.qavg + qavg)) { 204 switch (red_action(&q->parms, q->parms.qavg + qavg)) {
206 case RED_DONT_MARK: 205 case RED_DONT_MARK:
207 break; 206 break;
208 207
209 case RED_PROB_MARK: 208 case RED_PROB_MARK:
210 sch->qstats.overlimits++; 209 sch->qstats.overlimits++;
211 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { 210 if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
212 q->stats.prob_drop++; 211 q->stats.prob_drop++;
213 goto congestion_drop; 212 goto congestion_drop;
214 } 213 }
215 214
216 q->stats.prob_mark++; 215 q->stats.prob_mark++;
217 break; 216 break;
218 217
219 case RED_HARD_MARK: 218 case RED_HARD_MARK:
220 sch->qstats.overlimits++; 219 sch->qstats.overlimits++;
221 if (gred_use_harddrop(t) || !gred_use_ecn(t) || 220 if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
222 !INET_ECN_set_ce(skb)) { 221 !INET_ECN_set_ce(skb)) {
223 q->stats.forced_drop++; 222 q->stats.forced_drop++;
224 goto congestion_drop; 223 goto congestion_drop;
225 } 224 }
226 q->stats.forced_mark++; 225 q->stats.forced_mark++;
227 break; 226 break;
228 } 227 }
229 228
230 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { 229 if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
241 return NET_XMIT_CN; 240 return NET_XMIT_CN;
242} 241}
243 242
244static struct sk_buff *gred_dequeue(struct Qdisc* sch) 243static struct sk_buff *gred_dequeue(struct Qdisc *sch)
245{ 244{
246 struct sk_buff *skb; 245 struct sk_buff *skb;
247 struct gred_sched *t = qdisc_priv(sch); 246 struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
254 253
255 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 254 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
256 if (net_ratelimit()) 255 if (net_ratelimit())
257 printk(KERN_WARNING "GRED: Unable to relocate " 256 pr_warning("GRED: Unable to relocate VQ 0x%x "
258 "VQ 0x%x after dequeue, screwing up " 257 "after dequeue, screwing up "
259 "backlog.\n", tc_index_to_dp(skb)); 258 "backlog.\n", tc_index_to_dp(skb));
260 } else { 259 } else {
261 q->backlog -= qdisc_pkt_len(skb); 260 q->backlog -= qdisc_pkt_len(skb);
262 261
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
273 return NULL; 272 return NULL;
274} 273}
275 274
276static unsigned int gred_drop(struct Qdisc* sch) 275static unsigned int gred_drop(struct Qdisc *sch)
277{ 276{
278 struct sk_buff *skb; 277 struct sk_buff *skb;
279 struct gred_sched *t = qdisc_priv(sch); 278 struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
286 285
287 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { 286 if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
288 if (net_ratelimit()) 287 if (net_ratelimit())
289 printk(KERN_WARNING "GRED: Unable to relocate " 288 pr_warning("GRED: Unable to relocate VQ 0x%x "
290 "VQ 0x%x while dropping, screwing up " 289 "while dropping, screwing up "
291 "backlog.\n", tc_index_to_dp(skb)); 290 "backlog.\n", tc_index_to_dp(skb));
292 } else { 291 } else {
293 q->backlog -= len; 292 q->backlog -= len;
294 q->stats.other++; 293 q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
308 307
309} 308}
310 309
311static void gred_reset(struct Qdisc* sch) 310static void gred_reset(struct Qdisc *sch)
312{ 311{
313 int i; 312 int i;
314 struct gred_sched *t = qdisc_priv(sch); 313 struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
369 368
370 for (i = table->DPs; i < MAX_DPs; i++) { 369 for (i = table->DPs; i < MAX_DPs; i++) {
371 if (table->tab[i]) { 370 if (table->tab[i]) {
372 printk(KERN_WARNING "GRED: Warning: Destroying " 371 pr_warning("GRED: Warning: Destroying "
373 "shadowed VQ 0x%x\n", i); 372 "shadowed VQ 0x%x\n", i);
374 gred_destroy_vq(table->tab[i]); 373 gred_destroy_vq(table->tab[i]);
375 table->tab[i] = NULL; 374 table->tab[i] = NULL;
376 } 375 }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 47496098d35c..6488e6425652 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
81 * that are expensive on 32-bit architectures. 81 * that are expensive on 32-bit architectures.
82 */ 82 */
83 83
84struct internal_sc 84struct internal_sc {
85{
86 u64 sm1; /* scaled slope of the 1st segment */ 85 u64 sm1; /* scaled slope of the 1st segment */
87 u64 ism1; /* scaled inverse-slope of the 1st segment */ 86 u64 ism1; /* scaled inverse-slope of the 1st segment */
88 u64 dx; /* the x-projection of the 1st segment */ 87 u64 dx; /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
92}; 91};
93 92
94/* runtime service curve */ 93/* runtime service curve */
95struct runtime_sc 94struct runtime_sc {
96{
97 u64 x; /* current starting position on x-axis */ 95 u64 x; /* current starting position on x-axis */
98 u64 y; /* current starting position on y-axis */ 96 u64 y; /* current starting position on y-axis */
99 u64 sm1; /* scaled slope of the 1st segment */ 97 u64 sm1; /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
104 u64 ism2; /* scaled inverse-slope of the 2nd segment */ 102 u64 ism2; /* scaled inverse-slope of the 2nd segment */
105}; 103};
106 104
107enum hfsc_class_flags 105enum hfsc_class_flags {
108{
109 HFSC_RSC = 0x1, 106 HFSC_RSC = 0x1,
110 HFSC_FSC = 0x2, 107 HFSC_FSC = 0x2,
111 HFSC_USC = 0x4 108 HFSC_USC = 0x4
112}; 109};
113 110
114struct hfsc_class 111struct hfsc_class {
115{
116 struct Qdisc_class_common cl_common; 112 struct Qdisc_class_common cl_common;
117 unsigned int refcnt; /* usage count */ 113 unsigned int refcnt; /* usage count */
118 114
@@ -140,8 +136,8 @@ struct hfsc_class
140 u64 cl_cumul; /* cumulative work in bytes done by 136 u64 cl_cumul; /* cumulative work in bytes done by
141 real-time criteria */ 137 real-time criteria */
142 138
143 u64 cl_d; /* deadline*/ 139 u64 cl_d; /* deadline*/
144 u64 cl_e; /* eligible time */ 140 u64 cl_e; /* eligible time */
145 u64 cl_vt; /* virtual time */ 141 u64 cl_vt; /* virtual time */
146 u64 cl_f; /* time when this class will fit for 142 u64 cl_f; /* time when this class will fit for
147 link-sharing, max(myf, cfmin) */ 143 link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
176 unsigned long cl_nactive; /* number of active children */ 172 unsigned long cl_nactive; /* number of active children */
177}; 173};
178 174
179struct hfsc_sched 175struct hfsc_sched {
180{
181 u16 defcls; /* default class id */ 176 u16 defcls; /* default class id */
182 struct hfsc_class root; /* root class */ 177 struct hfsc_class root; /* root class */
183 struct Qdisc_class_hash clhash; /* class hash */ 178 struct Qdisc_class_hash clhash; /* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
693 if (go_active) { 688 if (go_active) {
694 n = rb_last(&cl->cl_parent->vt_tree); 689 n = rb_last(&cl->cl_parent->vt_tree);
695 if (n != NULL) { 690 if (n != NULL) {
696 max_cl = rb_entry(n, struct hfsc_class,vt_node); 691 max_cl = rb_entry(n, struct hfsc_class, vt_node);
697 /* 692 /*
698 * set vt to the average of the min and max 693 * set vt to the average of the min and max
699 * classes. if the parent's period didn't 694 * classes. if the parent's period didn't
@@ -1088,7 +1083,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1088 cl->refcnt = 1; 1083 cl->refcnt = 1;
1089 cl->sched = q; 1084 cl->sched = q;
1090 cl->cl_parent = parent; 1085 cl->cl_parent = parent;
1091 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1086 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
1092 &pfifo_qdisc_ops, classid); 1087 &pfifo_qdisc_ops, classid);
1093 if (cl->qdisc == NULL) 1088 if (cl->qdisc == NULL)
1094 cl->qdisc = &noop_qdisc; 1089 cl->qdisc = &noop_qdisc;
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
1177 return NULL; 1172 return NULL;
1178 } 1173 }
1179#endif 1174#endif
1180 if ((cl = (struct hfsc_class *)res.class) == NULL) { 1175 cl = (struct hfsc_class *)res.class;
1181 if ((cl = hfsc_find_class(res.classid, sch)) == NULL) 1176 if (!cl) {
1177 cl = hfsc_find_class(res.classid, sch);
1178 if (!cl)
1182 break; /* filter selected invalid classid */ 1179 break; /* filter selected invalid classid */
1183 if (cl->level >= head->level) 1180 if (cl->level >= head->level)
1184 break; /* filter may only point downwards */ 1181 break; /* filter may only point downwards */
@@ -1209,8 +1206,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1209 if (cl->level > 0) 1206 if (cl->level > 0)
1210 return -EINVAL; 1207 return -EINVAL;
1211 if (new == NULL) { 1208 if (new == NULL) {
1212 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1209 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1213 &pfifo_qdisc_ops,
1214 cl->cl_common.classid); 1210 cl->cl_common.classid);
1215 if (new == NULL) 1211 if (new == NULL)
1216 new = &noop_qdisc; 1212 new = &noop_qdisc;
@@ -1317,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
1317 return -1; 1313 return -1;
1318} 1314}
1319 1315
1320static inline int 1316static int
1321hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) 1317hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
1322{ 1318{
1323 if ((cl->cl_flags & HFSC_RSC) && 1319 if ((cl->cl_flags & HFSC_RSC) &&
@@ -1421,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
1421 struct hfsc_class *cl; 1417 struct hfsc_class *cl;
1422 u64 next_time = 0; 1418 u64 next_time = 0;
1423 1419
1424 if ((cl = eltree_get_minel(q)) != NULL) 1420 cl = eltree_get_minel(q);
1421 if (cl)
1425 next_time = cl->cl_e; 1422 next_time = cl->cl_e;
1426 if (q->root.cl_cfmin != 0) { 1423 if (q->root.cl_cfmin != 0) {
1427 if (next_time == 0 || next_time > q->root.cl_cfmin) 1424 if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1452,8 +1449,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1452 q->root.cl_common.classid = sch->handle; 1449 q->root.cl_common.classid = sch->handle;
1453 q->root.refcnt = 1; 1450 q->root.refcnt = 1;
1454 q->root.sched = q; 1451 q->root.sched = q;
1455 q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1452 q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1456 &pfifo_qdisc_ops,
1457 sch->handle); 1453 sch->handle);
1458 if (q->root.qdisc == NULL) 1454 if (q->root.qdisc == NULL)
1459 q->root.qdisc = &noop_qdisc; 1455 q->root.qdisc = &noop_qdisc;
@@ -1601,10 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1601 if (cl->qdisc->q.qlen == 1) 1597 if (cl->qdisc->q.qlen == 1)
1602 set_active(cl, qdisc_pkt_len(skb)); 1598 set_active(cl, qdisc_pkt_len(skb));
1603 1599
1604 cl->bstats.packets++; 1600 bstats_update(&cl->bstats, skb);
1605 cl->bstats.bytes += qdisc_pkt_len(skb);
1606 sch->bstats.packets++;
1607 sch->bstats.bytes += qdisc_pkt_len(skb);
1608 sch->q.qlen++; 1601 sch->q.qlen++;
1609 1602
1610 return NET_XMIT_SUCCESS; 1603 return NET_XMIT_SUCCESS;
@@ -1630,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
1630 * find the class with the minimum deadline among 1623 * find the class with the minimum deadline among
1631 * the eligible classes. 1624 * the eligible classes.
1632 */ 1625 */
1633 if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { 1626 cl = eltree_get_mindl(q, cur_time);
1627 if (cl) {
1634 realtime = 1; 1628 realtime = 1;
1635 } else { 1629 } else {
1636 /* 1630 /*
@@ -1669,7 +1663,8 @@ hfsc_dequeue(struct Qdisc *sch)
1669 set_passive(cl); 1663 set_passive(cl);
1670 } 1664 }
1671 1665
1672 sch->flags &= ~TCQ_F_THROTTLED; 1666 qdisc_unthrottled(sch);
1667 qdisc_bstats_update(sch, skb);
1673 sch->q.qlen--; 1668 sch->q.qlen--;
1674 1669
1675 return skb; 1670 return skb;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4be8d04b262d..29b942ce9e82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
99 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ 99 struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */
100 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ 100 struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */
101 /* When class changes from state 1->2 and disconnects from 101 /* When class changes from state 1->2 and disconnects from
102 parent's feed then we lost ptr value and start from the 102 * parent's feed then we lost ptr value and start from the
103 first child again. Here we store classid of the 103 * first child again. Here we store classid of the
104 last valid ptr (used when ptr is NULL). */ 104 * last valid ptr (used when ptr is NULL).
105 */
105 u32 last_ptr_id[TC_HTB_NUMPRIO]; 106 u32 last_ptr_id[TC_HTB_NUMPRIO];
106 } inner; 107 } inner;
107 } un; 108 } un;
@@ -182,10 +183,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
182 * filters in qdisc and in inner nodes (if higher filter points to the inner 183 * filters in qdisc and in inner nodes (if higher filter points to the inner
183 * node). If we end up with classid MAJOR:0 we enqueue the skb into special 184 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
184 * internal fifo (direct). These packets then go directly thru. If we still 185 * internal fifo (direct). These packets then go directly thru. If we still
185 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull 186 * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
186 * then finish and return direct queue. 187 * then finish and return direct queue.
187 */ 188 */
188#define HTB_DIRECT (struct htb_class*)-1 189#define HTB_DIRECT ((struct htb_class *)-1L)
189 190
190static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, 191static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
191 int *qerr) 192 int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
197 int result; 198 int result;
198 199
199 /* allow to select class by setting skb->priority to valid classid; 200 /* allow to select class by setting skb->priority to valid classid;
200 note that nfmark can be used too by attaching filter fw with no 201 * note that nfmark can be used too by attaching filter fw with no
201 rules in it */ 202 * rules in it
203 */
202 if (skb->priority == sch->handle) 204 if (skb->priority == sch->handle)
203 return HTB_DIRECT; /* X:0 (direct flow) selected */ 205 return HTB_DIRECT; /* X:0 (direct flow) selected */
204 if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) 206 cl = htb_find(skb->priority, sch);
207 if (cl && cl->level == 0)
205 return cl; 208 return cl;
206 209
207 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; 210 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
216 return NULL; 219 return NULL;
217 } 220 }
218#endif 221#endif
219 if ((cl = (void *)res.class) == NULL) { 222 cl = (void *)res.class;
223 if (!cl) {
220 if (res.classid == sch->handle) 224 if (res.classid == sch->handle)
221 return HTB_DIRECT; /* X:0 (direct flow) */ 225 return HTB_DIRECT; /* X:0 (direct flow) */
222 if ((cl = htb_find(res.classid, sch)) == NULL) 226 cl = htb_find(res.classid, sch);
227 if (!cl)
223 break; /* filter selected invalid classid */ 228 break; /* filter selected invalid classid */
224 } 229 }
225 if (!cl->level) 230 if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
378 383
379 if (p->un.inner.feed[prio].rb_node) 384 if (p->un.inner.feed[prio].rb_node)
380 /* parent already has its feed in use so that 385 /* parent already has its feed in use so that
381 reset bit in mask as parent is already ok */ 386 * reset bit in mask as parent is already ok
387 */
382 mask &= ~(1 << prio); 388 mask &= ~(1 << prio);
383 389
384 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); 390 htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
413 419
414 if (p->un.inner.ptr[prio] == cl->node + prio) { 420 if (p->un.inner.ptr[prio] == cl->node + prio) {
415 /* we are removing child which is pointed to from 421 /* we are removing child which is pointed to from
416 parent feed - forget the pointer but remember 422 * parent feed - forget the pointer but remember
417 classid */ 423 * classid
424 */
418 p->un.inner.last_ptr_id[prio] = cl->common.classid; 425 p->un.inner.last_ptr_id[prio] = cl->common.classid;
419 p->un.inner.ptr[prio] = NULL; 426 p->un.inner.ptr[prio] = NULL;
420 } 427 }
@@ -569,15 +576,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
569 } 576 }
570 return ret; 577 return ret;
571 } else { 578 } else {
572 cl->bstats.packets += 579 bstats_update(&cl->bstats, skb);
573 skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
574 cl->bstats.bytes += qdisc_pkt_len(skb);
575 htb_activate(q, cl); 580 htb_activate(q, cl);
576 } 581 }
577 582
578 sch->q.qlen++; 583 sch->q.qlen++;
579 sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
580 sch->bstats.bytes += qdisc_pkt_len(skb);
581 return NET_XMIT_SUCCESS; 584 return NET_XMIT_SUCCESS;
582} 585}
583 586
@@ -648,12 +651,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
648 htb_add_to_wait_tree(q, cl, diff); 651 htb_add_to_wait_tree(q, cl, diff);
649 } 652 }
650 653
651 /* update byte stats except for leaves which are already updated */ 654 /* update basic stats except for leaves which are already updated */
652 if (cl->level) { 655 if (cl->level)
653 cl->bstats.bytes += bytes; 656 bstats_update(&cl->bstats, skb);
654 cl->bstats.packets += skb_is_gso(skb)? 657
655 skb_shinfo(skb)->gso_segs:1;
656 }
657 cl = cl->parent; 658 cl = cl->parent;
658 } 659 }
659} 660}
@@ -669,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
669 unsigned long start) 670 unsigned long start)
670{ 671{
671 /* don't run for longer than 2 jiffies; 2 is used instead of 672 /* don't run for longer than 2 jiffies; 2 is used instead of
672 1 to simplify things when jiffy is going to be incremented 673 * 1 to simplify things when jiffy is going to be incremented
673 too soon */ 674 * too soon
675 */
674 unsigned long stop_at = start + 2; 676 unsigned long stop_at = start + 2;
675 while (time_before(jiffies, stop_at)) { 677 while (time_before(jiffies, stop_at)) {
676 struct htb_class *cl; 678 struct htb_class *cl;
@@ -693,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
693 695
694 /* too much load - let's continue after a break for scheduling */ 696 /* too much load - let's continue after a break for scheduling */
695 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { 697 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
696 printk(KERN_WARNING "htb: too many events!\n"); 698 pr_warning("htb: too many events!\n");
697 q->warned |= HTB_WARN_TOOMANYEVENTS; 699 q->warned |= HTB_WARN_TOOMANYEVENTS;
698 } 700 }
699 701
@@ -701,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
701} 703}
702 704
703/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 705/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
704 is no such one exists. */ 706 * is no such one exists.
707 */
705static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, 708static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
706 u32 id) 709 u32 id)
707{ 710{
@@ -745,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
745 for (i = 0; i < 65535; i++) { 748 for (i = 0; i < 65535; i++) {
746 if (!*sp->pptr && *sp->pid) { 749 if (!*sp->pptr && *sp->pid) {
747 /* ptr was invalidated but id is valid - try to recover 750 /* ptr was invalidated but id is valid - try to recover
748 the original or next ptr */ 751 * the original or next ptr
752 */
749 *sp->pptr = 753 *sp->pptr =
750 htb_id_find_next_upper(prio, sp->root, *sp->pid); 754 htb_id_find_next_upper(prio, sp->root, *sp->pid);
751 } 755 }
752 *sp->pid = 0; /* ptr is valid now so that remove this hint as it 756 *sp->pid = 0; /* ptr is valid now so that remove this hint as it
753 can become out of date quickly */ 757 * can become out of date quickly
758 */
754 if (!*sp->pptr) { /* we are at right end; rewind & go up */ 759 if (!*sp->pptr) { /* we are at right end; rewind & go up */
755 *sp->pptr = sp->root; 760 *sp->pptr = sp->root;
756 while ((*sp->pptr)->rb_left) 761 while ((*sp->pptr)->rb_left)
@@ -778,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
778} 783}
779 784
780/* dequeues packet at given priority and level; call only if 785/* dequeues packet at given priority and level; call only if
781 you are sure that there is active class at prio/level */ 786 * you are sure that there is active class at prio/level
787 */
782static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, 788static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
783 int level) 789 int level)
784{ 790{
@@ -795,9 +801,10 @@ next:
795 return NULL; 801 return NULL;
796 802
797 /* class can be empty - it is unlikely but can be true if leaf 803 /* class can be empty - it is unlikely but can be true if leaf
798 qdisc drops packets in enqueue routine or if someone used 804 * qdisc drops packets in enqueue routine or if someone used
799 graft operation on the leaf since last dequeue; 805 * graft operation on the leaf since last dequeue;
800 simply deactivate and skip such class */ 806 * simply deactivate and skip such class
807 */
801 if (unlikely(cl->un.leaf.q->q.qlen == 0)) { 808 if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
802 struct htb_class *next; 809 struct htb_class *next;
803 htb_deactivate(q, cl); 810 htb_deactivate(q, cl);
@@ -837,7 +844,8 @@ next:
837 ptr[0]) + prio); 844 ptr[0]) + prio);
838 } 845 }
839 /* this used to be after charge_class but this constelation 846 /* this used to be after charge_class but this constelation
840 gives us slightly better performance */ 847 * gives us slightly better performance
848 */
841 if (!cl->un.leaf.q->q.qlen) 849 if (!cl->un.leaf.q->q.qlen)
842 htb_deactivate(q, cl); 850 htb_deactivate(q, cl);
843 htb_charge_class(q, cl, level, skb); 851 htb_charge_class(q, cl, level, skb);
@@ -847,7 +855,7 @@ next:
847 855
848static struct sk_buff *htb_dequeue(struct Qdisc *sch) 856static struct sk_buff *htb_dequeue(struct Qdisc *sch)
849{ 857{
850 struct sk_buff *skb = NULL; 858 struct sk_buff *skb;
851 struct htb_sched *q = qdisc_priv(sch); 859 struct htb_sched *q = qdisc_priv(sch);
852 int level; 860 int level;
853 psched_time_t next_event; 861 psched_time_t next_event;
@@ -856,7 +864,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
856 /* try to dequeue direct packets as high prio (!) to minimize cpu work */ 864 /* try to dequeue direct packets as high prio (!) to minimize cpu work */
857 skb = __skb_dequeue(&q->direct_queue); 865 skb = __skb_dequeue(&q->direct_queue);
858 if (skb != NULL) { 866 if (skb != NULL) {
859 sch->flags &= ~TCQ_F_THROTTLED; 867ok:
868 qdisc_bstats_update(sch, skb);
869 qdisc_unthrottled(sch);
860 sch->q.qlen--; 870 sch->q.qlen--;
861 return skb; 871 return skb;
862 } 872 }
@@ -887,13 +897,11 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
887 m = ~q->row_mask[level]; 897 m = ~q->row_mask[level];
888 while (m != (int)(-1)) { 898 while (m != (int)(-1)) {
889 int prio = ffz(m); 899 int prio = ffz(m);
900
890 m |= 1 << prio; 901 m |= 1 << prio;
891 skb = htb_dequeue_tree(q, prio, level); 902 skb = htb_dequeue_tree(q, prio, level);
892 if (likely(skb != NULL)) { 903 if (likely(skb != NULL))
893 sch->q.qlen--; 904 goto ok;
894 sch->flags &= ~TCQ_F_THROTTLED;
895 goto fin;
896 }
897 } 905 }
898 } 906 }
899 sch->qstats.overlimits++; 907 sch->qstats.overlimits++;
@@ -994,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
994 return err; 1002 return err;
995 1003
996 if (tb[TCA_HTB_INIT] == NULL) { 1004 if (tb[TCA_HTB_INIT] == NULL) {
997 printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); 1005 pr_err("HTB: hey probably you have bad tc tool ?\n");
998 return -EINVAL; 1006 return -EINVAL;
999 } 1007 }
1000 gopt = nla_data(tb[TCA_HTB_INIT]); 1008 gopt = nla_data(tb[TCA_HTB_INIT]);
1001 if (gopt->version != HTB_VER >> 16) { 1009 if (gopt->version != HTB_VER >> 16) {
1002 printk(KERN_ERR 1010 pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
1003 "HTB: need tc/htb version %d (minor is %d), you have %d\n",
1004 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); 1011 HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
1005 return -EINVAL; 1012 return -EINVAL;
1006 } 1013 }
@@ -1121,8 +1128,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1121 if (cl->level) 1128 if (cl->level)
1122 return -EINVAL; 1129 return -EINVAL;
1123 if (new == NULL && 1130 if (new == NULL &&
1124 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1131 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1125 &pfifo_qdisc_ops,
1126 cl->common.classid)) == NULL) 1132 cl->common.classid)) == NULL)
1127 return -ENOBUFS; 1133 return -ENOBUFS;
1128 1134
@@ -1214,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
1214 cancel_work_sync(&q->work); 1220 cancel_work_sync(&q->work);
1215 qdisc_watchdog_cancel(&q->watchdog); 1221 qdisc_watchdog_cancel(&q->watchdog);
1216 /* This line used to be after htb_destroy_class call below 1222 /* This line used to be after htb_destroy_class call below
1217 and surprisingly it worked in 2.4. But it must precede it 1223 * and surprisingly it worked in 2.4. But it must precede it
1218 because filter need its target class alive to be able to call 1224 * because filter need its target class alive to be able to call
1219 unbind_filter on it (without Oops). */ 1225 * unbind_filter on it (without Oops).
1226 */
1220 tcf_destroy_chain(&q->filter_list); 1227 tcf_destroy_chain(&q->filter_list);
1221 1228
1222 for (i = 0; i < q->clhash.hashsize; i++) { 1229 for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1247,8 +1254,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1247 return -EBUSY; 1254 return -EBUSY;
1248 1255
1249 if (!cl->level && htb_parent_last_child(cl)) { 1256 if (!cl->level && htb_parent_last_child(cl)) {
1250 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1257 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1251 &pfifo_qdisc_ops,
1252 cl->parent->common.classid); 1258 cl->parent->common.classid);
1253 last_child = 1; 1259 last_child = 1;
1254 } 1260 }
@@ -1302,14 +1308,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1302 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1308 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303 struct nlattr *opt = tca[TCA_OPTIONS]; 1309 struct nlattr *opt = tca[TCA_OPTIONS];
1304 struct qdisc_rate_table *rtab = NULL, *ctab = NULL; 1310 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1305 struct nlattr *tb[TCA_HTB_RTAB + 1]; 1311 struct nlattr *tb[__TCA_HTB_MAX];
1306 struct tc_htb_opt *hopt; 1312 struct tc_htb_opt *hopt;
1307 1313
1308 /* extract all subattrs from opt attr */ 1314 /* extract all subattrs from opt attr */
1309 if (!opt) 1315 if (!opt)
1310 goto failure; 1316 goto failure;
1311 1317
1312 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy); 1318 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1313 if (err < 0) 1319 if (err < 0)
1314 goto failure; 1320 goto failure;
1315 1321
@@ -1351,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1351 1357
1352 /* check maximal depth */ 1358 /* check maximal depth */
1353 if (parent && parent->parent && parent->parent->level < 2) { 1359 if (parent && parent->parent && parent->parent->level < 2) {
1354 printk(KERN_ERR "htb: tree is too deep\n"); 1360 pr_err("htb: tree is too deep\n");
1355 goto failure; 1361 goto failure;
1356 } 1362 }
1357 err = -ENOBUFS; 1363 err = -ENOBUFS;
1358 if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) 1364 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1365 if (!cl)
1359 goto failure; 1366 goto failure;
1360 1367
1361 err = gen_new_estimator(&cl->bstats, &cl->rate_est, 1368 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1375,9 +1382,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1375 RB_CLEAR_NODE(&cl->node[prio]); 1382 RB_CLEAR_NODE(&cl->node[prio]);
1376 1383
1377 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1384 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1378 so that can't be used inside of sch_tree_lock 1385 * so that can't be used inside of sch_tree_lock
1379 -- thanks to Karlis Peisenieks */ 1386 * -- thanks to Karlis Peisenieks
1380 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1387 */
1388 new_q = qdisc_create_dflt(sch->dev_queue,
1381 &pfifo_qdisc_ops, classid); 1389 &pfifo_qdisc_ops, classid);
1382 sch_tree_lock(sch); 1390 sch_tree_lock(sch);
1383 if (parent && !parent->level) { 1391 if (parent && !parent->level) {
@@ -1428,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1428 } 1436 }
1429 1437
1430 /* it used to be a nasty bug here, we have to check that node 1438 /* it used to be a nasty bug here, we have to check that node
1431 is really leaf before changing cl->un.leaf ! */ 1439 * is really leaf before changing cl->un.leaf !
1440 */
1432 if (!cl->level) { 1441 if (!cl->level) {
1433 cl->quantum = rtab->rate.rate / q->rate2quantum; 1442 cl->quantum = rtab->rate.rate / q->rate2quantum;
1434 if (!hopt->quantum && cl->quantum < 1000) { 1443 if (!hopt->quantum && cl->quantum < 1000) {
1435 printk(KERN_WARNING 1444 pr_warning(
1436 "HTB: quantum of class %X is small. Consider r2q change.\n", 1445 "HTB: quantum of class %X is small. Consider r2q change.\n",
1437 cl->common.classid); 1446 cl->common.classid);
1438 cl->quantum = 1000; 1447 cl->quantum = 1000;
1439 } 1448 }
1440 if (!hopt->quantum && cl->quantum > 200000) { 1449 if (!hopt->quantum && cl->quantum > 200000) {
1441 printk(KERN_WARNING 1450 pr_warning(
1442 "HTB: quantum of class %X is big. Consider r2q change.\n", 1451 "HTB: quantum of class %X is big. Consider r2q change.\n",
1443 cl->common.classid); 1452 cl->common.classid);
1444 cl->quantum = 200000; 1453 cl->quantum = 200000;
@@ -1487,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
1487 struct htb_class *cl = htb_find(classid, sch); 1496 struct htb_class *cl = htb_find(classid, sch);
1488 1497
1489 /*if (cl && !cl->level) return 0; 1498 /*if (cl && !cl->level) return 0;
1490 The line above used to be there to prevent attaching filters to 1499 * The line above used to be there to prevent attaching filters to
1491 leaves. But at least tc_index filter uses this just to get class 1500 * leaves. But at least tc_index filter uses this just to get class
1492 for other reasons so that we have to allow for it. 1501 * for other reasons so that we have to allow for it.
1493 ---- 1502 * ----
1494 19.6.2002 As Werner explained it is ok - bind filter is just 1503 * 19.6.2002 As Werner explained it is ok - bind filter is just
1495 another way to "lock" the class - unlike "get" this lock can 1504 * another way to "lock" the class - unlike "get" this lock can
1496 be broken by class during destroy IIUC. 1505 * be broken by class during destroy IIUC.
1497 */ 1506 */
1498 if (cl) 1507 if (cl)
1499 cl->filter_cnt++; 1508 cl->filter_cnt++;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f10e34a68445..bce1665239b8 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -63,8 +63,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
63 63
64 result = tc_classify(skb, p->filter_list, &res); 64 result = tc_classify(skb, p->filter_list, &res);
65 65
66 sch->bstats.packets++; 66 qdisc_bstats_update(sch, skb);
67 sch->bstats.bytes += qdisc_pkt_len(skb);
68 switch (result) { 67 switch (result) {
69 case TC_ACT_SHOT: 68 case TC_ACT_SHOT:
70 result = TC_ACT_SHOT; 69 result = TC_ACT_SHOT;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index fe91e50f9d98..ec5cbc848963 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -56,12 +56,11 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
56 56
57 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 57 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
58 dev_queue = netdev_get_tx_queue(dev, ntx); 58 dev_queue = netdev_get_tx_queue(dev, ntx);
59 qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, 59 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
60 TC_H_MAKE(TC_H_MAJ(sch->handle), 60 TC_H_MAKE(TC_H_MAJ(sch->handle),
61 TC_H_MIN(ntx + 1))); 61 TC_H_MIN(ntx + 1)));
62 if (qdisc == NULL) 62 if (qdisc == NULL)
63 goto err; 63 goto err;
64 qdisc->flags |= TCQ_F_CAN_BYPASS;
65 priv->qdiscs[ntx] = qdisc; 64 priv->qdiscs[ntx] = qdisc;
66 } 65 }
67 66
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 000000000000..ea17cbed29ef
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,418 @@
1/*
2 * net/sched/sch_mqprio.c
3 *
4 * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/types.h>
12#include <linux/slab.h>
13#include <linux/kernel.h>
14#include <linux/string.h>
15#include <linux/errno.h>
16#include <linux/skbuff.h>
17#include <net/netlink.h>
18#include <net/pkt_sched.h>
19#include <net/sch_generic.h>
20
21struct mqprio_sched {
22 struct Qdisc **qdiscs;
23 int hw_owned;
24};
25
26static void mqprio_destroy(struct Qdisc *sch)
27{
28 struct net_device *dev = qdisc_dev(sch);
29 struct mqprio_sched *priv = qdisc_priv(sch);
30 unsigned int ntx;
31
32 if (priv->qdiscs) {
33 for (ntx = 0;
34 ntx < dev->num_tx_queues && priv->qdiscs[ntx];
35 ntx++)
36 qdisc_destroy(priv->qdiscs[ntx]);
37 kfree(priv->qdiscs);
38 }
39
40 if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
41 dev->netdev_ops->ndo_setup_tc(dev, 0);
42 else
43 netdev_set_num_tc(dev, 0);
44}
45
46static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
47{
48 int i, j;
49
50 /* Verify num_tc is not out of max range */
51 if (qopt->num_tc > TC_MAX_QUEUE)
52 return -EINVAL;
53
54 /* Verify priority mapping uses valid tcs */
55 for (i = 0; i < TC_BITMASK + 1; i++) {
56 if (qopt->prio_tc_map[i] >= qopt->num_tc)
57 return -EINVAL;
58 }
59
60 /* net_device does not support requested operation */
61 if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
62 return -EINVAL;
63
64 /* if hw owned qcount and qoffset are taken from LLD so
65 * no reason to verify them here
66 */
67 if (qopt->hw)
68 return 0;
69
70 for (i = 0; i < qopt->num_tc; i++) {
71 unsigned int last = qopt->offset[i] + qopt->count[i];
72
73 /* Verify the queue count is in tx range being equal to the
74 * real_num_tx_queues indicates the last queue is in use.
75 */
76 if (qopt->offset[i] >= dev->real_num_tx_queues ||
77 !qopt->count[i] ||
78 last > dev->real_num_tx_queues)
79 return -EINVAL;
80
81 /* Verify that the offset and counts do not overlap */
82 for (j = i + 1; j < qopt->num_tc; j++) {
83 if (last > qopt->offset[j])
84 return -EINVAL;
85 }
86 }
87
88 return 0;
89}
90
91static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
92{
93 struct net_device *dev = qdisc_dev(sch);
94 struct mqprio_sched *priv = qdisc_priv(sch);
95 struct netdev_queue *dev_queue;
96 struct Qdisc *qdisc;
97 int i, err = -EOPNOTSUPP;
98 struct tc_mqprio_qopt *qopt = NULL;
99
100 BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
101 BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
102
103 if (sch->parent != TC_H_ROOT)
104 return -EOPNOTSUPP;
105
106 if (!netif_is_multiqueue(dev))
107 return -EOPNOTSUPP;
108
109 if (nla_len(opt) < sizeof(*qopt))
110 return -EINVAL;
111
112 qopt = nla_data(opt);
113 if (mqprio_parse_opt(dev, qopt))
114 return -EINVAL;
115
116 /* pre-allocate qdisc, attachment can't fail */
117 priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
118 GFP_KERNEL);
119 if (priv->qdiscs == NULL) {
120 err = -ENOMEM;
121 goto err;
122 }
123
124 for (i = 0; i < dev->num_tx_queues; i++) {
125 dev_queue = netdev_get_tx_queue(dev, i);
126 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
127 TC_H_MAKE(TC_H_MAJ(sch->handle),
128 TC_H_MIN(i + 1)));
129 if (qdisc == NULL) {
130 err = -ENOMEM;
131 goto err;
132 }
133 priv->qdiscs[i] = qdisc;
134 }
135
136 /* If the mqprio options indicate that hardware should own
137 * the queue mapping then run ndo_setup_tc otherwise use the
138 * supplied and verified mapping
139 */
140 if (qopt->hw) {
141 priv->hw_owned = 1;
142 err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
143 if (err)
144 goto err;
145 } else {
146 netdev_set_num_tc(dev, qopt->num_tc);
147 for (i = 0; i < qopt->num_tc; i++)
148 netdev_set_tc_queue(dev, i,
149 qopt->count[i], qopt->offset[i]);
150 }
151
152 /* Always use supplied priority mappings */
153 for (i = 0; i < TC_BITMASK + 1; i++)
154 netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
155
156 sch->flags |= TCQ_F_MQROOT;
157 return 0;
158
159err:
160 mqprio_destroy(sch);
161 return err;
162}
163
164static void mqprio_attach(struct Qdisc *sch)
165{
166 struct net_device *dev = qdisc_dev(sch);
167 struct mqprio_sched *priv = qdisc_priv(sch);
168 struct Qdisc *qdisc;
169 unsigned int ntx;
170
171 /* Attach underlying qdisc */
172 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
173 qdisc = priv->qdiscs[ntx];
174 qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
175 if (qdisc)
176 qdisc_destroy(qdisc);
177 }
178 kfree(priv->qdiscs);
179 priv->qdiscs = NULL;
180}
181
182static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
183 unsigned long cl)
184{
185 struct net_device *dev = qdisc_dev(sch);
186 unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
187
188 if (ntx >= dev->num_tx_queues)
189 return NULL;
190 return netdev_get_tx_queue(dev, ntx);
191}
192
193static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
194 struct Qdisc **old)
195{
196 struct net_device *dev = qdisc_dev(sch);
197 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
198
199 if (!dev_queue)
200 return -EINVAL;
201
202 if (dev->flags & IFF_UP)
203 dev_deactivate(dev);
204
205 *old = dev_graft_qdisc(dev_queue, new);
206
207 if (dev->flags & IFF_UP)
208 dev_activate(dev);
209
210 return 0;
211}
212
213static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
214{
215 struct net_device *dev = qdisc_dev(sch);
216 struct mqprio_sched *priv = qdisc_priv(sch);
217 unsigned char *b = skb_tail_pointer(skb);
218 struct tc_mqprio_qopt opt = { 0 };
219 struct Qdisc *qdisc;
220 unsigned int i;
221
222 sch->q.qlen = 0;
223 memset(&sch->bstats, 0, sizeof(sch->bstats));
224 memset(&sch->qstats, 0, sizeof(sch->qstats));
225
226 for (i = 0; i < dev->num_tx_queues; i++) {
227 qdisc = netdev_get_tx_queue(dev, i)->qdisc;
228 spin_lock_bh(qdisc_lock(qdisc));
229 sch->q.qlen += qdisc->q.qlen;
230 sch->bstats.bytes += qdisc->bstats.bytes;
231 sch->bstats.packets += qdisc->bstats.packets;
232 sch->qstats.qlen += qdisc->qstats.qlen;
233 sch->qstats.backlog += qdisc->qstats.backlog;
234 sch->qstats.drops += qdisc->qstats.drops;
235 sch->qstats.requeues += qdisc->qstats.requeues;
236 sch->qstats.overlimits += qdisc->qstats.overlimits;
237 spin_unlock_bh(qdisc_lock(qdisc));
238 }
239
240 opt.num_tc = netdev_get_num_tc(dev);
241 memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
242 opt.hw = priv->hw_owned;
243
244 for (i = 0; i < netdev_get_num_tc(dev); i++) {
245 opt.count[i] = dev->tc_to_txq[i].count;
246 opt.offset[i] = dev->tc_to_txq[i].offset;
247 }
248
249 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
250
251 return skb->len;
252nla_put_failure:
253 nlmsg_trim(skb, b);
254 return -1;
255}
256
257static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
258{
259 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
260
261 if (!dev_queue)
262 return NULL;
263
264 return dev_queue->qdisc_sleeping;
265}
266
267static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
268{
269 struct net_device *dev = qdisc_dev(sch);
270 unsigned int ntx = TC_H_MIN(classid);
271
272 if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
273 return 0;
274 return ntx;
275}
276
277static void mqprio_put(struct Qdisc *sch, unsigned long cl)
278{
279}
280
281static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
282 struct sk_buff *skb, struct tcmsg *tcm)
283{
284 struct net_device *dev = qdisc_dev(sch);
285
286 if (cl <= netdev_get_num_tc(dev)) {
287 tcm->tcm_parent = TC_H_ROOT;
288 tcm->tcm_info = 0;
289 } else {
290 int i;
291 struct netdev_queue *dev_queue;
292
293 dev_queue = mqprio_queue_get(sch, cl);
294 tcm->tcm_parent = 0;
295 for (i = 0; i < netdev_get_num_tc(dev); i++) {
296 struct netdev_tc_txq tc = dev->tc_to_txq[i];
297 int q_idx = cl - netdev_get_num_tc(dev);
298
299 if (q_idx > tc.offset &&
300 q_idx <= tc.offset + tc.count) {
301 tcm->tcm_parent =
302 TC_H_MAKE(TC_H_MAJ(sch->handle),
303 TC_H_MIN(i + 1));
304 break;
305 }
306 }
307 tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
308 }
309 tcm->tcm_handle |= TC_H_MIN(cl);
310 return 0;
311}
312
313static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
314 struct gnet_dump *d)
315 __releases(d->lock)
316 __acquires(d->lock)
317{
318 struct net_device *dev = qdisc_dev(sch);
319
320 if (cl <= netdev_get_num_tc(dev)) {
321 int i;
322 struct Qdisc *qdisc;
323 struct gnet_stats_queue qstats = {0};
324 struct gnet_stats_basic_packed bstats = {0};
325 struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
326
327 /* Drop lock here it will be reclaimed before touching
328 * statistics this is required because the d->lock we
329 * hold here is the look on dev_queue->qdisc_sleeping
330 * also acquired below.
331 */
332 spin_unlock_bh(d->lock);
333
334 for (i = tc.offset; i < tc.offset + tc.count; i++) {
335 qdisc = netdev_get_tx_queue(dev, i)->qdisc;
336 spin_lock_bh(qdisc_lock(qdisc));
337 bstats.bytes += qdisc->bstats.bytes;
338 bstats.packets += qdisc->bstats.packets;
339 qstats.qlen += qdisc->qstats.qlen;
340 qstats.backlog += qdisc->qstats.backlog;
341 qstats.drops += qdisc->qstats.drops;
342 qstats.requeues += qdisc->qstats.requeues;
343 qstats.overlimits += qdisc->qstats.overlimits;
344 spin_unlock_bh(qdisc_lock(qdisc));
345 }
346 /* Reclaim root sleeping lock before completing stats */
347 spin_lock_bh(d->lock);
348 if (gnet_stats_copy_basic(d, &bstats) < 0 ||
349 gnet_stats_copy_queue(d, &qstats) < 0)
350 return -1;
351 } else {
352 struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
353
354 sch = dev_queue->qdisc_sleeping;
355 sch->qstats.qlen = sch->q.qlen;
356 if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
357 gnet_stats_copy_queue(d, &sch->qstats) < 0)
358 return -1;
359 }
360 return 0;
361}
362
363static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
364{
365 struct net_device *dev = qdisc_dev(sch);
366 unsigned long ntx;
367
368 if (arg->stop)
369 return;
370
371 /* Walk hierarchy with a virtual class per tc */
372 arg->count = arg->skip;
373 for (ntx = arg->skip;
374 ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
375 ntx++) {
376 if (arg->fn(sch, ntx + 1, arg) < 0) {
377 arg->stop = 1;
378 break;
379 }
380 arg->count++;
381 }
382}
383
384static const struct Qdisc_class_ops mqprio_class_ops = {
385 .graft = mqprio_graft,
386 .leaf = mqprio_leaf,
387 .get = mqprio_get,
388 .put = mqprio_put,
389 .walk = mqprio_walk,
390 .dump = mqprio_dump_class,
391 .dump_stats = mqprio_dump_class_stats,
392};
393
394static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
395 .cl_ops = &mqprio_class_ops,
396 .id = "mqprio",
397 .priv_size = sizeof(struct mqprio_sched),
398 .init = mqprio_init,
399 .destroy = mqprio_destroy,
400 .attach = mqprio_attach,
401 .dump = mqprio_dump,
402 .owner = THIS_MODULE,
403};
404
405static int __init mqprio_module_init(void)
406{
407 return register_qdisc(&mqprio_qdisc_ops);
408}
409
410static void __exit mqprio_module_exit(void)
411{
412 unregister_qdisc(&mqprio_qdisc_ops);
413}
414
415module_init(mqprio_module_init);
416module_exit(mqprio_module_exit);
417
418MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 6ae251279fc2..edc1950e0e77 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -83,8 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
83 83
84 ret = qdisc_enqueue(skb, qdisc); 84 ret = qdisc_enqueue(skb, qdisc);
85 if (ret == NET_XMIT_SUCCESS) { 85 if (ret == NET_XMIT_SUCCESS) {
86 sch->bstats.bytes += qdisc_pkt_len(skb);
87 sch->bstats.packets++;
88 sch->q.qlen++; 86 sch->q.qlen++;
89 return NET_XMIT_SUCCESS; 87 return NET_XMIT_SUCCESS;
90 } 88 }
@@ -113,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
113 qdisc = q->queues[q->curband]; 111 qdisc = q->queues[q->curband];
114 skb = qdisc->dequeue(qdisc); 112 skb = qdisc->dequeue(qdisc);
115 if (skb) { 113 if (skb) {
114 qdisc_bstats_update(sch, skb);
116 sch->q.qlen--; 115 sch->q.qlen--;
117 return skb; 116 return skb;
118 } 117 }
@@ -157,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
157 unsigned int len; 156 unsigned int len;
158 struct Qdisc *qdisc; 157 struct Qdisc *qdisc;
159 158
160 for (band = q->bands-1; band >= 0; band--) { 159 for (band = q->bands - 1; band >= 0; band--) {
161 qdisc = q->queues[band]; 160 qdisc = q->queues[band];
162 if (qdisc->ops->drop) { 161 if (qdisc->ops->drop) {
163 len = qdisc->ops->drop(qdisc); 162 len = qdisc->ops->drop(qdisc);
@@ -227,8 +226,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
227 for (i = 0; i < q->bands; i++) { 226 for (i = 0; i < q->bands; i++) {
228 if (q->queues[i] == &noop_qdisc) { 227 if (q->queues[i] == &noop_qdisc) {
229 struct Qdisc *child, *old; 228 struct Qdisc *child, *old;
230 child = qdisc_create_dflt(qdisc_dev(sch), 229 child = qdisc_create_dflt(sch->dev_queue,
231 sch->dev_queue,
232 &pfifo_qdisc_ops, 230 &pfifo_qdisc_ops,
233 TC_H_MAKE(sch->handle, 231 TC_H_MAKE(sch->handle,
234 i + 1)); 232 i + 1));
@@ -267,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
267 for (i = 0; i < q->max_bands; i++) 265 for (i = 0; i < q->max_bands; i++)
268 q->queues[i] = &noop_qdisc; 266 q->queues[i] = &noop_qdisc;
269 267
270 err = multiq_tune(sch,opt); 268 err = multiq_tune(sch, opt);
271 269
272 if (err) 270 if (err)
273 kfree(q->queues); 271 kfree(q->queues);
@@ -348,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
348 struct multiq_sched_data *q = qdisc_priv(sch); 346 struct multiq_sched_data *q = qdisc_priv(sch);
349 347
350 tcm->tcm_handle |= TC_H_MIN(cl); 348 tcm->tcm_handle |= TC_H_MIN(cl);
351 tcm->tcm_info = q->queues[cl-1]->handle; 349 tcm->tcm_info = q->queues[cl - 1]->handle;
352 return 0; 350 return 0;
353} 351}
354 352
@@ -380,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
380 arg->count++; 378 arg->count++;
381 continue; 379 continue;
382 } 380 }
383 if (arg->fn(sch, band+1, arg) < 0) { 381 if (arg->fn(sch, band + 1, arg) < 0) {
384 arg->stop = 1; 382 arg->stop = 1;
385 break; 383 break;
386 } 384 }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4714ff162bbd..69c35f6cd13f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -19,12 +19,13 @@
19#include <linux/kernel.h> 19#include <linux/kernel.h>
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/vmalloc.h>
22#include <linux/rtnetlink.h> 23#include <linux/rtnetlink.h>
23 24
24#include <net/netlink.h> 25#include <net/netlink.h>
25#include <net/pkt_sched.h> 26#include <net/pkt_sched.h>
26 27
27#define VERSION "1.2" 28#define VERSION "1.3"
28 29
29/* Network Emulation Queuing algorithm. 30/* Network Emulation Queuing algorithm.
30 ==================================== 31 ====================================
@@ -47,6 +48,20 @@
47 layering other disciplines. It does not need to do bandwidth 48 layering other disciplines. It does not need to do bandwidth
48 control either since that can be handled by using token 49 control either since that can be handled by using token
49 bucket or other rate control. 50 bucket or other rate control.
51
52 Correlated Loss Generator models
53
54 Added generation of correlated loss according to the
55 "Gilbert-Elliot" model, a 4-state markov model.
56
57 References:
58 [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
59 [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
60 and intuitive loss model for packet networks and its implementation
61 in the Netem module in the Linux kernel", available in [1]
62
63 Authors: Stefano Salsano <stefano.salsano at uniroma2.it
64 Fabio Ludovici <fabio.ludovici at yahoo.it>
50*/ 65*/
51 66
52struct netem_sched_data { 67struct netem_sched_data {
@@ -73,6 +88,26 @@ struct netem_sched_data {
73 u32 size; 88 u32 size;
74 s16 table[0]; 89 s16 table[0];
75 } *delay_dist; 90 } *delay_dist;
91
92 enum {
93 CLG_RANDOM,
94 CLG_4_STATES,
95 CLG_GILB_ELL,
96 } loss_model;
97
98 /* Correlated Loss Generation models */
99 struct clgstate {
100 /* state of the Markov chain */
101 u8 state;
102
103 /* 4-states and Gilbert-Elliot models */
104 u32 a1; /* p13 for 4-states or p for GE */
105 u32 a2; /* p31 for 4-states or r for GE */
106 u32 a3; /* p32 for 4-states or h for GE */
107 u32 a4; /* p14 for 4-states or 1-k for GE */
108 u32 a5; /* p23 used only in 4-states */
109 } clg;
110
76}; 111};
77 112
78/* Time stamp put into socket buffer control block */ 113/* Time stamp put into socket buffer control block */
@@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state)
115 return answer; 150 return answer;
116} 151}
117 152
153/* loss_4state - 4-state model loss generator
154 * Generates losses according to the 4-state Markov chain adopted in
155 * the GI (General and Intuitive) loss model.
156 */
157static bool loss_4state(struct netem_sched_data *q)
158{
159 struct clgstate *clg = &q->clg;
160 u32 rnd = net_random();
161
162 /*
163 * Makes a comparison between rnd and the transition
164 * probabilities outgoing from the current state, then decides the
165 * next state and if the next packet has to be transmitted or lost.
166 * The four states correspond to:
167 * 1 => successfully transmitted packets within a gap period
168 * 4 => isolated losses within a gap period
169 * 3 => lost packets within a burst period
170 * 2 => successfully transmitted packets within a burst period
171 */
172 switch (clg->state) {
173 case 1:
174 if (rnd < clg->a4) {
175 clg->state = 4;
176 return true;
177 } else if (clg->a4 < rnd && rnd < clg->a1) {
178 clg->state = 3;
179 return true;
180 } else if (clg->a1 < rnd)
181 clg->state = 1;
182
183 break;
184 case 2:
185 if (rnd < clg->a5) {
186 clg->state = 3;
187 return true;
188 } else
189 clg->state = 2;
190
191 break;
192 case 3:
193 if (rnd < clg->a3)
194 clg->state = 2;
195 else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
196 clg->state = 1;
197 return true;
198 } else if (clg->a2 + clg->a3 < rnd) {
199 clg->state = 3;
200 return true;
201 }
202 break;
203 case 4:
204 clg->state = 1;
205 break;
206 }
207
208 return false;
209}
210
211/* loss_gilb_ell - Gilbert-Elliot model loss generator
212 * Generates losses according to the Gilbert-Elliot loss model or
213 * its special cases (Gilbert or Simple Gilbert)
214 *
215 * Makes a comparison between random number and the transition
216 * probabilities outgoing from the current state, then decides the
217 * next state. A second random number is extracted and the comparison
218 * with the loss probability of the current state decides if the next
219 * packet will be transmitted or lost.
220 */
221static bool loss_gilb_ell(struct netem_sched_data *q)
222{
223 struct clgstate *clg = &q->clg;
224
225 switch (clg->state) {
226 case 1:
227 if (net_random() < clg->a1)
228 clg->state = 2;
229 if (net_random() < clg->a4)
230 return true;
231 case 2:
232 if (net_random() < clg->a2)
233 clg->state = 1;
234 if (clg->a3 > net_random())
235 return true;
236 }
237
238 return false;
239}
240
241static bool loss_event(struct netem_sched_data *q)
242{
243 switch (q->loss_model) {
244 case CLG_RANDOM:
245 /* Random packet drop 0 => none, ~0 => all */
246 return q->loss && q->loss >= get_crandom(&q->loss_cor);
247
248 case CLG_4_STATES:
249 /* 4state loss model algorithm (used also for GI model)
250 * Extracts a value from the markov 4 state loss generator,
251 * if it is 1 drops a packet and if needed writes the event in
252 * the kernel logs
253 */
254 return loss_4state(q);
255
256 case CLG_GILB_ELL:
257 /* Gilbert-Elliot loss model algorithm
258 * Extracts a value from the Gilbert-Elliot loss generator,
259 * if it is 1 drops a packet and if needed writes the event in
260 * the kernel logs
261 */
262 return loss_gilb_ell(q);
263 }
264
265 return false; /* not reached */
266}
267
268
118/* tabledist - return a pseudo-randomly distributed value with mean mu and 269/* tabledist - return a pseudo-randomly distributed value with mean mu and
119 * std deviation sigma. Uses table lookup to approximate the desired 270 * std deviation sigma. Uses table lookup to approximate the desired
120 * distribution, and a uniformly-distributed pseudo-random source. 271 * distribution, and a uniformly-distributed pseudo-random source.
@@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
161 int ret; 312 int ret;
162 int count = 1; 313 int count = 1;
163 314
164 pr_debug("netem_enqueue skb=%p\n", skb);
165
166 /* Random duplication */ 315 /* Random duplication */
167 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) 316 if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
168 ++count; 317 ++count;
169 318
170 /* Random packet drop 0 => none, ~0 => all */ 319 /* Drop packet? */
171 if (q->loss && q->loss >= get_crandom(&q->loss_cor)) 320 if (loss_event(q))
172 --count; 321 --count;
173 322
174 if (count == 0) { 323 if (count == 0) {
@@ -211,8 +360,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
211 } 360 }
212 361
213 cb = netem_skb_cb(skb); 362 cb = netem_skb_cb(skb);
214 if (q->gap == 0 || /* not doing reordering */ 363 if (q->gap == 0 || /* not doing reordering */
215 q->counter < q->gap || /* inside last reordering gap */ 364 q->counter < q->gap || /* inside last reordering gap */
216 q->reorder < get_crandom(&q->reorder_cor)) { 365 q->reorder < get_crandom(&q->reorder_cor)) {
217 psched_time_t now; 366 psched_time_t now;
218 psched_tdiff_t delay; 367 psched_tdiff_t delay;
@@ -238,19 +387,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
238 ret = NET_XMIT_SUCCESS; 387 ret = NET_XMIT_SUCCESS;
239 } 388 }
240 389
241 if (likely(ret == NET_XMIT_SUCCESS)) { 390 if (ret != NET_XMIT_SUCCESS) {
242 sch->q.qlen++; 391 if (net_xmit_drop_count(ret)) {
243 sch->bstats.bytes += qdisc_pkt_len(skb); 392 sch->qstats.drops++;
244 sch->bstats.packets++; 393 return ret;
245 } else if (net_xmit_drop_count(ret)) { 394 }
246 sch->qstats.drops++;
247 } 395 }
248 396
249 pr_debug("netem: enqueue ret %d\n", ret); 397 sch->q.qlen++;
250 return ret; 398 return NET_XMIT_SUCCESS;
251} 399}
252 400
253static unsigned int netem_drop(struct Qdisc* sch) 401static unsigned int netem_drop(struct Qdisc *sch)
254{ 402{
255 struct netem_sched_data *q = qdisc_priv(sch); 403 struct netem_sched_data *q = qdisc_priv(sch);
256 unsigned int len = 0; 404 unsigned int len = 0;
@@ -267,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
267 struct netem_sched_data *q = qdisc_priv(sch); 415 struct netem_sched_data *q = qdisc_priv(sch);
268 struct sk_buff *skb; 416 struct sk_buff *skb;
269 417
270 if (sch->flags & TCQ_F_THROTTLED) 418 if (qdisc_is_throttled(sch))
271 return NULL; 419 return NULL;
272 420
273 skb = q->qdisc->ops->peek(q->qdisc); 421 skb = q->qdisc->ops->peek(q->qdisc);
@@ -289,8 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
289 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) 437 if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
290 skb->tstamp.tv64 = 0; 438 skb->tstamp.tv64 = 0;
291#endif 439#endif
292 pr_debug("netem_dequeue: return skb=%p\n", skb); 440
293 sch->q.qlen--; 441 sch->q.qlen--;
442 qdisc_unthrottled(sch);
443 qdisc_bstats_update(sch, skb);
294 return skb; 444 return skb;
295 } 445 }
296 446
@@ -309,6 +459,16 @@ static void netem_reset(struct Qdisc *sch)
309 qdisc_watchdog_cancel(&q->watchdog); 459 qdisc_watchdog_cancel(&q->watchdog);
310} 460}
311 461
462static void dist_free(struct disttable *d)
463{
464 if (d) {
465 if (is_vmalloc_addr(d))
466 vfree(d);
467 else
468 kfree(d);
469 }
470}
471
312/* 472/*
313 * Distribution data is a variable size payload containing 473 * Distribution data is a variable size payload containing
314 * signed 16 bit values. 474 * signed 16 bit values.
@@ -316,16 +476,20 @@ static void netem_reset(struct Qdisc *sch)
316static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) 476static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
317{ 477{
318 struct netem_sched_data *q = qdisc_priv(sch); 478 struct netem_sched_data *q = qdisc_priv(sch);
319 unsigned long n = nla_len(attr)/sizeof(__s16); 479 size_t n = nla_len(attr)/sizeof(__s16);
320 const __s16 *data = nla_data(attr); 480 const __s16 *data = nla_data(attr);
321 spinlock_t *root_lock; 481 spinlock_t *root_lock;
322 struct disttable *d; 482 struct disttable *d;
323 int i; 483 int i;
484 size_t s;
324 485
325 if (n > 65536) 486 if (n > NETEM_DIST_MAX)
326 return -EINVAL; 487 return -EINVAL;
327 488
328 d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); 489 s = sizeof(struct disttable) + n * sizeof(s16);
490 d = kmalloc(s, GFP_KERNEL);
491 if (!d)
492 d = vmalloc(s);
329 if (!d) 493 if (!d)
330 return -ENOMEM; 494 return -ENOMEM;
331 495
@@ -336,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
336 root_lock = qdisc_root_sleeping_lock(sch); 500 root_lock = qdisc_root_sleeping_lock(sch);
337 501
338 spin_lock_bh(root_lock); 502 spin_lock_bh(root_lock);
339 kfree(q->delay_dist); 503 dist_free(q->delay_dist);
340 q->delay_dist = d; 504 q->delay_dist = d;
341 spin_unlock_bh(root_lock); 505 spin_unlock_bh(root_lock);
342 return 0; 506 return 0;
@@ -370,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
370 init_crandom(&q->corrupt_cor, r->correlation); 534 init_crandom(&q->corrupt_cor, r->correlation);
371} 535}
372 536
537static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
538{
539 struct netem_sched_data *q = qdisc_priv(sch);
540 const struct nlattr *la;
541 int rem;
542
543 nla_for_each_nested(la, attr, rem) {
544 u16 type = nla_type(la);
545
546 switch(type) {
547 case NETEM_LOSS_GI: {
548 const struct tc_netem_gimodel *gi = nla_data(la);
549
550 if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
551 pr_info("netem: incorrect gi model size\n");
552 return -EINVAL;
553 }
554
555 q->loss_model = CLG_4_STATES;
556
557 q->clg.state = 1;
558 q->clg.a1 = gi->p13;
559 q->clg.a2 = gi->p31;
560 q->clg.a3 = gi->p32;
561 q->clg.a4 = gi->p14;
562 q->clg.a5 = gi->p23;
563 break;
564 }
565
566 case NETEM_LOSS_GE: {
567 const struct tc_netem_gemodel *ge = nla_data(la);
568
569 if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
570 pr_info("netem: incorrect gi model size\n");
571 return -EINVAL;
572 }
573
574 q->loss_model = CLG_GILB_ELL;
575 q->clg.state = 1;
576 q->clg.a1 = ge->p;
577 q->clg.a2 = ge->r;
578 q->clg.a3 = ge->h;
579 q->clg.a4 = ge->k1;
580 break;
581 }
582
583 default:
584 pr_info("netem: unknown loss type %u\n", type);
585 return -EINVAL;
586 }
587 }
588
589 return 0;
590}
591
373static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { 592static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
374 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, 593 [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) },
375 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, 594 [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) },
376 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, 595 [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
596 [TCA_NETEM_LOSS] = { .type = NLA_NESTED },
377}; 597};
378 598
379static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, 599static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -381,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
381{ 601{
382 int nested_len = nla_len(nla) - NLA_ALIGN(len); 602 int nested_len = nla_len(nla) - NLA_ALIGN(len);
383 603
384 if (nested_len < 0) 604 if (nested_len < 0) {
605 pr_info("netem: invalid attributes len %d\n", nested_len);
385 return -EINVAL; 606 return -EINVAL;
607 }
608
386 if (nested_len >= nla_attr_size(0)) 609 if (nested_len >= nla_attr_size(0))
387 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), 610 return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
388 nested_len, policy); 611 nested_len, policy);
612
389 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); 613 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
390 return 0; 614 return 0;
391} 615}
@@ -408,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
408 632
409 ret = fifo_set_limit(q->qdisc, qopt->limit); 633 ret = fifo_set_limit(q->qdisc, qopt->limit);
410 if (ret) { 634 if (ret) {
411 pr_debug("netem: can't set fifo limit\n"); 635 pr_info("netem: can't set fifo limit\n");
412 return ret; 636 return ret;
413 } 637 }
414 638
@@ -441,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
441 if (tb[TCA_NETEM_CORRUPT]) 665 if (tb[TCA_NETEM_CORRUPT])
442 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); 666 get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
443 667
444 return 0; 668 q->loss_model = CLG_RANDOM;
669 if (tb[TCA_NETEM_LOSS])
670 ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
671
672 return ret;
445} 673}
446 674
447/* 675/*
@@ -477,8 +705,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
477 __skb_queue_after(list, skb, nskb); 705 __skb_queue_after(list, skb, nskb);
478 706
479 sch->qstats.backlog += qdisc_pkt_len(nskb); 707 sch->qstats.backlog += qdisc_pkt_len(nskb);
480 sch->bstats.bytes += qdisc_pkt_len(nskb);
481 sch->bstats.packets++;
482 708
483 return NET_XMIT_SUCCESS; 709 return NET_XMIT_SUCCESS;
484 } 710 }
@@ -538,17 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
538 764
539 qdisc_watchdog_init(&q->watchdog, sch); 765 qdisc_watchdog_init(&q->watchdog, sch);
540 766
541 q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 767 q->loss_model = CLG_RANDOM;
542 &tfifo_qdisc_ops, 768 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
543 TC_H_MAKE(sch->handle, 1)); 769 TC_H_MAKE(sch->handle, 1));
544 if (!q->qdisc) { 770 if (!q->qdisc) {
545 pr_debug("netem: qdisc create failed\n"); 771 pr_notice("netem: qdisc create tfifo qdisc failed\n");
546 return -ENOMEM; 772 return -ENOMEM;
547 } 773 }
548 774
549 ret = netem_change(sch, opt); 775 ret = netem_change(sch, opt);
550 if (ret) { 776 if (ret) {
551 pr_debug("netem: change failed\n"); 777 pr_info("netem: change failed\n");
552 qdisc_destroy(q->qdisc); 778 qdisc_destroy(q->qdisc);
553 } 779 }
554 return ret; 780 return ret;
@@ -560,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch)
560 786
561 qdisc_watchdog_cancel(&q->watchdog); 787 qdisc_watchdog_cancel(&q->watchdog);
562 qdisc_destroy(q->qdisc); 788 qdisc_destroy(q->qdisc);
563 kfree(q->delay_dist); 789 dist_free(q->delay_dist);
790}
791
792static int dump_loss_model(const struct netem_sched_data *q,
793 struct sk_buff *skb)
794{
795 struct nlattr *nest;
796
797 nest = nla_nest_start(skb, TCA_NETEM_LOSS);
798 if (nest == NULL)
799 goto nla_put_failure;
800
801 switch (q->loss_model) {
802 case CLG_RANDOM:
803 /* legacy loss model */
804 nla_nest_cancel(skb, nest);
805 return 0; /* no data */
806
807 case CLG_4_STATES: {
808 struct tc_netem_gimodel gi = {
809 .p13 = q->clg.a1,
810 .p31 = q->clg.a2,
811 .p32 = q->clg.a3,
812 .p14 = q->clg.a4,
813 .p23 = q->clg.a5,
814 };
815
816 NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
817 break;
818 }
819 case CLG_GILB_ELL: {
820 struct tc_netem_gemodel ge = {
821 .p = q->clg.a1,
822 .r = q->clg.a2,
823 .h = q->clg.a3,
824 .k1 = q->clg.a4,
825 };
826
827 NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
828 break;
829 }
830 }
831
832 nla_nest_end(skb, nest);
833 return 0;
834
835nla_put_failure:
836 nla_nest_cancel(skb, nest);
837 return -1;
564} 838}
565 839
566static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) 840static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
567{ 841{
568 const struct netem_sched_data *q = qdisc_priv(sch); 842 const struct netem_sched_data *q = qdisc_priv(sch);
569 unsigned char *b = skb_tail_pointer(skb); 843 struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
570 struct nlattr *nla = (struct nlattr *) b;
571 struct tc_netem_qopt qopt; 844 struct tc_netem_qopt qopt;
572 struct tc_netem_corr cor; 845 struct tc_netem_corr cor;
573 struct tc_netem_reorder reorder; 846 struct tc_netem_reorder reorder;
@@ -594,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
594 corrupt.correlation = q->corrupt_cor.rho; 867 corrupt.correlation = q->corrupt_cor.rho;
595 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); 868 NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
596 869
597 nla->nla_len = skb_tail_pointer(skb) - b; 870 if (dump_loss_model(q, skb) != 0)
871 goto nla_put_failure;
598 872
599 return skb->len; 873 return nla_nest_end(skb, nla);
600 874
601nla_put_failure: 875nla_put_failure:
602 nlmsg_trim(skb, b); 876 nlmsg_trim(skb, nla);
603 return -1; 877 return -1;
604} 878}
605 879
880static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
881 struct sk_buff *skb, struct tcmsg *tcm)
882{
883 struct netem_sched_data *q = qdisc_priv(sch);
884
885 if (cl != 1) /* only one class */
886 return -ENOENT;
887
888 tcm->tcm_handle |= TC_H_MIN(1);
889 tcm->tcm_info = q->qdisc->handle;
890
891 return 0;
892}
893
894static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
895 struct Qdisc **old)
896{
897 struct netem_sched_data *q = qdisc_priv(sch);
898
899 if (new == NULL)
900 new = &noop_qdisc;
901
902 sch_tree_lock(sch);
903 *old = q->qdisc;
904 q->qdisc = new;
905 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
906 qdisc_reset(*old);
907 sch_tree_unlock(sch);
908
909 return 0;
910}
911
912static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
913{
914 struct netem_sched_data *q = qdisc_priv(sch);
915 return q->qdisc;
916}
917
918static unsigned long netem_get(struct Qdisc *sch, u32 classid)
919{
920 return 1;
921}
922
923static void netem_put(struct Qdisc *sch, unsigned long arg)
924{
925}
926
927static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
928{
929 if (!walker->stop) {
930 if (walker->count >= walker->skip)
931 if (walker->fn(sch, 1, walker) < 0) {
932 walker->stop = 1;
933 return;
934 }
935 walker->count++;
936 }
937}
938
939static const struct Qdisc_class_ops netem_class_ops = {
940 .graft = netem_graft,
941 .leaf = netem_leaf,
942 .get = netem_get,
943 .put = netem_put,
944 .walk = netem_walk,
945 .dump = netem_dump_class,
946};
947
606static struct Qdisc_ops netem_qdisc_ops __read_mostly = { 948static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
607 .id = "netem", 949 .id = "netem",
950 .cl_ops = &netem_class_ops,
608 .priv_size = sizeof(struct netem_sched_data), 951 .priv_size = sizeof(struct netem_sched_data),
609 .enqueue = netem_enqueue, 952 .enqueue = netem_enqueue,
610 .dequeue = netem_dequeue, 953 .dequeue = netem_dequeue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0748fb1e3a49..2a318f2dc3e5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
22#include <net/pkt_sched.h> 22#include <net/pkt_sched.h>
23 23
24 24
25struct prio_sched_data 25struct prio_sched_data {
26{
27 int bands; 26 int bands;
28 struct tcf_proto *filter_list; 27 struct tcf_proto *filter_list;
29 u8 prio2band[TC_PRIO_MAX+1]; 28 u8 prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
54 if (!q->filter_list || err < 0) { 53 if (!q->filter_list || err < 0) {
55 if (TC_H_MAJ(band)) 54 if (TC_H_MAJ(band))
56 band = 0; 55 band = 0;
57 return q->queues[q->prio2band[band&TC_PRIO_MAX]]; 56 return q->queues[q->prio2band[band & TC_PRIO_MAX]];
58 } 57 }
59 band = res.classid; 58 band = res.classid;
60 } 59 }
@@ -84,8 +83,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
84 83
85 ret = qdisc_enqueue(skb, qdisc); 84 ret = qdisc_enqueue(skb, qdisc);
86 if (ret == NET_XMIT_SUCCESS) { 85 if (ret == NET_XMIT_SUCCESS) {
87 sch->bstats.bytes += qdisc_pkt_len(skb);
88 sch->bstats.packets++;
89 sch->q.qlen++; 86 sch->q.qlen++;
90 return NET_XMIT_SUCCESS; 87 return NET_XMIT_SUCCESS;
91 } 88 }
@@ -108,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
108 return NULL; 105 return NULL;
109} 106}
110 107
111static struct sk_buff *prio_dequeue(struct Qdisc* sch) 108static struct sk_buff *prio_dequeue(struct Qdisc *sch)
112{ 109{
113 struct prio_sched_data *q = qdisc_priv(sch); 110 struct prio_sched_data *q = qdisc_priv(sch);
114 int prio; 111 int prio;
@@ -117,6 +114,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
117 struct Qdisc *qdisc = q->queues[prio]; 114 struct Qdisc *qdisc = q->queues[prio];
118 struct sk_buff *skb = qdisc->dequeue(qdisc); 115 struct sk_buff *skb = qdisc->dequeue(qdisc);
119 if (skb) { 116 if (skb) {
117 qdisc_bstats_update(sch, skb);
120 sch->q.qlen--; 118 sch->q.qlen--;
121 return skb; 119 return skb;
122 } 120 }
@@ -125,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
125 123
126} 124}
127 125
128static unsigned int prio_drop(struct Qdisc* sch) 126static unsigned int prio_drop(struct Qdisc *sch)
129{ 127{
130 struct prio_sched_data *q = qdisc_priv(sch); 128 struct prio_sched_data *q = qdisc_priv(sch);
131 int prio; 129 int prio;
@@ -144,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
144 142
145 143
146static void 144static void
147prio_reset(struct Qdisc* sch) 145prio_reset(struct Qdisc *sch)
148{ 146{
149 int prio; 147 int prio;
150 struct prio_sched_data *q = qdisc_priv(sch); 148 struct prio_sched_data *q = qdisc_priv(sch);
151 149
152 for (prio=0; prio<q->bands; prio++) 150 for (prio = 0; prio < q->bands; prio++)
153 qdisc_reset(q->queues[prio]); 151 qdisc_reset(q->queues[prio]);
154 sch->q.qlen = 0; 152 sch->q.qlen = 0;
155} 153}
156 154
157static void 155static void
158prio_destroy(struct Qdisc* sch) 156prio_destroy(struct Qdisc *sch)
159{ 157{
160 int prio; 158 int prio;
161 struct prio_sched_data *q = qdisc_priv(sch); 159 struct prio_sched_data *q = qdisc_priv(sch);
162 160
163 tcf_destroy_chain(&q->filter_list); 161 tcf_destroy_chain(&q->filter_list);
164 for (prio=0; prio<q->bands; prio++) 162 for (prio = 0; prio < q->bands; prio++)
165 qdisc_destroy(q->queues[prio]); 163 qdisc_destroy(q->queues[prio]);
166} 164}
167 165
@@ -178,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
178 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) 176 if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
179 return -EINVAL; 177 return -EINVAL;
180 178
181 for (i=0; i<=TC_PRIO_MAX; i++) { 179 for (i = 0; i <= TC_PRIO_MAX; i++) {
182 if (qopt->priomap[i] >= qopt->bands) 180 if (qopt->priomap[i] >= qopt->bands)
183 return -EINVAL; 181 return -EINVAL;
184 } 182 }
@@ -187,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
187 q->bands = qopt->bands; 185 q->bands = qopt->bands;
188 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); 186 memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
189 187
190 for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { 188 for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
191 struct Qdisc *child = q->queues[i]; 189 struct Qdisc *child = q->queues[i];
192 q->queues[i] = &noop_qdisc; 190 q->queues[i] = &noop_qdisc;
193 if (child != &noop_qdisc) { 191 if (child != &noop_qdisc) {
@@ -197,10 +195,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
197 } 195 }
198 sch_tree_unlock(sch); 196 sch_tree_unlock(sch);
199 197
200 for (i=0; i<q->bands; i++) { 198 for (i = 0; i < q->bands; i++) {
201 if (q->queues[i] == &noop_qdisc) { 199 if (q->queues[i] == &noop_qdisc) {
202 struct Qdisc *child, *old; 200 struct Qdisc *child, *old;
203 child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 201
202 child = qdisc_create_dflt(sch->dev_queue,
204 &pfifo_qdisc_ops, 203 &pfifo_qdisc_ops,
205 TC_H_MAKE(sch->handle, i + 1)); 204 TC_H_MAKE(sch->handle, i + 1));
206 if (child) { 205 if (child) {
@@ -225,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
225 struct prio_sched_data *q = qdisc_priv(sch); 224 struct prio_sched_data *q = qdisc_priv(sch);
226 int i; 225 int i;
227 226
228 for (i=0; i<TCQ_PRIO_BANDS; i++) 227 for (i = 0; i < TCQ_PRIO_BANDS; i++)
229 q->queues[i] = &noop_qdisc; 228 q->queues[i] = &noop_qdisc;
230 229
231 if (opt == NULL) { 230 if (opt == NULL) {
@@ -233,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
233 } else { 232 } else {
234 int err; 233 int err;
235 234
236 if ((err= prio_tune(sch, opt)) != 0) 235 if ((err = prio_tune(sch, opt)) != 0)
237 return err; 236 return err;
238 } 237 }
239 return 0; 238 return 0;
@@ -246,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
246 struct tc_prio_qopt opt; 245 struct tc_prio_qopt opt;
247 246
248 opt.bands = q->bands; 247 opt.bands = q->bands;
249 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); 248 memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
250 249
251 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 250 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
252 251
@@ -343,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
343 arg->count++; 342 arg->count++;
344 continue; 343 continue;
345 } 344 }
346 if (arg->fn(sch, prio+1, arg) < 0) { 345 if (arg->fn(sch, prio + 1, arg) < 0) {
347 arg->stop = 1; 346 arg->stop = 1;
348 break; 347 break;
349 } 348 }
@@ -351,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
351 } 350 }
352} 351}
353 352
354static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) 353static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
355{ 354{
356 struct prio_sched_data *q = qdisc_priv(sch); 355 struct prio_sched_data *q = qdisc_priv(sch);
357 356
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
new file mode 100644
index 000000000000..103343408593
--- /dev/null
+++ b/net/sched/sch_qfq.c
@@ -0,0 +1,1137 @@
1/*
2 * net/sched/sch_qfq.c Quick Fair Queueing Scheduler.
3 *
4 * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * version 2 as published by the Free Software Foundation.
9 */
10
11#include <linux/module.h>
12#include <linux/init.h>
13#include <linux/bitops.h>
14#include <linux/errno.h>
15#include <linux/netdevice.h>
16#include <linux/pkt_sched.h>
17#include <net/sch_generic.h>
18#include <net/pkt_sched.h>
19#include <net/pkt_cls.h>
20
21
22/* Quick Fair Queueing
23 ===================
24
25 Sources:
26
27 Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
28 Packet Scheduling with Tight Bandwidth Distribution Guarantees."
29
30 See also:
31 http://retis.sssup.it/~fabio/linux/qfq/
32 */
33
34/*
35
36 Virtual time computations.
37
38 S, F and V are all computed in fixed point arithmetic with
39 FRAC_BITS decimal bits.
40
41 QFQ_MAX_INDEX is the maximum index allowed for a group. We need
42 one bit per index.
43 QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
44
45 The layout of the bits is as below:
46
47 [ MTU_SHIFT ][ FRAC_BITS ]
48 [ MAX_INDEX ][ MIN_SLOT_SHIFT ]
49 ^.__grp->index = 0
50 *.__grp->slot_shift
51
52 where MIN_SLOT_SHIFT is derived by difference from the others.
53
54 The max group index corresponds to Lmax/w_min, where
55 Lmax=1<<MTU_SHIFT, w_min = 1 .
56 From this, and knowing how many groups (MAX_INDEX) we want,
57 we can derive the shift corresponding to each group.
58
59 Because we often need to compute
60 F = S + len/w_i and V = V + len/wsum
61 instead of storing w_i store the value
62 inv_w = (1<<FRAC_BITS)/w_i
63 so we can do F = S + len * inv_w * wsum.
64 We use W_TOT in the formulas so we can easily move between
65 static and adaptive weight sum.
66
67 The per-scheduler-instance data contain all the data structures
68 for the scheduler: bitmaps and bucket lists.
69
70 */
71
72/*
73 * Maximum number of consecutive slots occupied by backlogged classes
74 * inside a group.
75 */
76#define QFQ_MAX_SLOTS 32
77
78/*
79 * Shifts used for class<->group mapping. We allow class weights that are
80 * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
81 * group with the smallest index that can support the L_i / r_i configured
82 * for the class.
83 *
84 * grp->index is the index of the group; and grp->slot_shift
85 * is the shift for the corresponding (scaled) sigma_i.
86 */
87#define QFQ_MAX_INDEX 19
88#define QFQ_MAX_WSHIFT 16
89
90#define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT)
91#define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT)
92
93#define FRAC_BITS 30 /* fixed point arithmetic */
94#define ONE_FP (1UL << FRAC_BITS)
95#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
96
97#define QFQ_MTU_SHIFT 11
98#define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
99
100/*
101 * Possible group states. These values are used as indexes for the bitmaps
102 * array of struct qfq_queue.
103 */
104enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
105
106struct qfq_group;
107
108struct qfq_class {
109 struct Qdisc_class_common common;
110
111 unsigned int refcnt;
112 unsigned int filter_cnt;
113
114 struct gnet_stats_basic_packed bstats;
115 struct gnet_stats_queue qstats;
116 struct gnet_stats_rate_est rate_est;
117 struct Qdisc *qdisc;
118
119 struct hlist_node next; /* Link for the slot list. */
120 u64 S, F; /* flow timestamps (exact) */
121
122 /* group we belong to. In principle we would need the index,
123 * which is log_2(lmax/weight), but we never reference it
124 * directly, only the group.
125 */
126 struct qfq_group *grp;
127
128 /* these are copied from the flowset. */
129 u32 inv_w; /* ONE_FP/weight */
130 u32 lmax; /* Max packet size for this flow. */
131};
132
133struct qfq_group {
134 u64 S, F; /* group timestamps (approx). */
135 unsigned int slot_shift; /* Slot shift. */
136 unsigned int index; /* Group index. */
137 unsigned int front; /* Index of the front slot. */
138 unsigned long full_slots; /* non-empty slots */
139
140 /* Array of RR lists of active classes. */
141 struct hlist_head slots[QFQ_MAX_SLOTS];
142};
143
144struct qfq_sched {
145 struct tcf_proto *filter_list;
146 struct Qdisc_class_hash clhash;
147
148 u64 V; /* Precise virtual time. */
149 u32 wsum; /* weight sum */
150
151 unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
152 struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
153};
154
155static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
156{
157 struct qfq_sched *q = qdisc_priv(sch);
158 struct Qdisc_class_common *clc;
159
160 clc = qdisc_class_find(&q->clhash, classid);
161 if (clc == NULL)
162 return NULL;
163 return container_of(clc, struct qfq_class, common);
164}
165
166static void qfq_purge_queue(struct qfq_class *cl)
167{
168 unsigned int len = cl->qdisc->q.qlen;
169
170 qdisc_reset(cl->qdisc);
171 qdisc_tree_decrease_qlen(cl->qdisc, len);
172}
173
174static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
175 [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
176 [TCA_QFQ_LMAX] = { .type = NLA_U32 },
177};
178
179/*
180 * Calculate a flow index, given its weight and maximum packet length.
181 * index = log_2(maxlen/weight) but we need to apply the scaling.
182 * This is used only once at flow creation.
183 */
184static int qfq_calc_index(u32 inv_w, unsigned int maxlen)
185{
186 u64 slot_size = (u64)maxlen * inv_w;
187 unsigned long size_map;
188 int index = 0;
189
190 size_map = slot_size >> QFQ_MIN_SLOT_SHIFT;
191 if (!size_map)
192 goto out;
193
194 index = __fls(size_map) + 1; /* basically a log_2 */
195 index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
196
197 if (index < 0)
198 index = 0;
199out:
200 pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
201 (unsigned long) ONE_FP/inv_w, maxlen, index);
202
203 return index;
204}
205
206static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
207 struct nlattr **tca, unsigned long *arg)
208{
209 struct qfq_sched *q = qdisc_priv(sch);
210 struct qfq_class *cl = (struct qfq_class *)*arg;
211 struct nlattr *tb[TCA_QFQ_MAX + 1];
212 u32 weight, lmax, inv_w;
213 int i, err;
214
215 if (tca[TCA_OPTIONS] == NULL) {
216 pr_notice("qfq: no options\n");
217 return -EINVAL;
218 }
219
220 err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
221 if (err < 0)
222 return err;
223
224 if (tb[TCA_QFQ_WEIGHT]) {
225 weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
226 if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
227 pr_notice("qfq: invalid weight %u\n", weight);
228 return -EINVAL;
229 }
230 } else
231 weight = 1;
232
233 inv_w = ONE_FP / weight;
234 weight = ONE_FP / inv_w;
235 if (q->wsum + weight > QFQ_MAX_WSUM) {
236 pr_notice("qfq: total weight out of range (%u + %u)\n",
237 weight, q->wsum);
238 return -EINVAL;
239 }
240
241 if (tb[TCA_QFQ_LMAX]) {
242 lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
243 if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
244 pr_notice("qfq: invalid max length %u\n", lmax);
245 return -EINVAL;
246 }
247 } else
248 lmax = 1UL << QFQ_MTU_SHIFT;
249
250 if (cl != NULL) {
251 if (tca[TCA_RATE]) {
252 err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
253 qdisc_root_sleeping_lock(sch),
254 tca[TCA_RATE]);
255 if (err)
256 return err;
257 }
258
259 sch_tree_lock(sch);
260 if (tb[TCA_QFQ_WEIGHT]) {
261 q->wsum = weight - ONE_FP / cl->inv_w;
262 cl->inv_w = inv_w;
263 }
264 sch_tree_unlock(sch);
265
266 return 0;
267 }
268
269 cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
270 if (cl == NULL)
271 return -ENOBUFS;
272
273 cl->refcnt = 1;
274 cl->common.classid = classid;
275 cl->lmax = lmax;
276 cl->inv_w = inv_w;
277 i = qfq_calc_index(cl->inv_w, cl->lmax);
278
279 cl->grp = &q->groups[i];
280 q->wsum += weight;
281
282 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
283 &pfifo_qdisc_ops, classid);
284 if (cl->qdisc == NULL)
285 cl->qdisc = &noop_qdisc;
286
287 if (tca[TCA_RATE]) {
288 err = gen_new_estimator(&cl->bstats, &cl->rate_est,
289 qdisc_root_sleeping_lock(sch),
290 tca[TCA_RATE]);
291 if (err) {
292 qdisc_destroy(cl->qdisc);
293 kfree(cl);
294 return err;
295 }
296 }
297
298 sch_tree_lock(sch);
299 qdisc_class_hash_insert(&q->clhash, &cl->common);
300 sch_tree_unlock(sch);
301
302 qdisc_class_hash_grow(sch, &q->clhash);
303
304 *arg = (unsigned long)cl;
305 return 0;
306}
307
308static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
309{
310 struct qfq_sched *q = qdisc_priv(sch);
311
312 if (cl->inv_w) {
313 q->wsum -= ONE_FP / cl->inv_w;
314 cl->inv_w = 0;
315 }
316
317 gen_kill_estimator(&cl->bstats, &cl->rate_est);
318 qdisc_destroy(cl->qdisc);
319 kfree(cl);
320}
321
322static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
323{
324 struct qfq_sched *q = qdisc_priv(sch);
325 struct qfq_class *cl = (struct qfq_class *)arg;
326
327 if (cl->filter_cnt > 0)
328 return -EBUSY;
329
330 sch_tree_lock(sch);
331
332 qfq_purge_queue(cl);
333 qdisc_class_hash_remove(&q->clhash, &cl->common);
334
335 BUG_ON(--cl->refcnt == 0);
336 /*
337 * This shouldn't happen: we "hold" one cops->get() when called
338 * from tc_ctl_tclass; the destroy method is done from cops->put().
339 */
340
341 sch_tree_unlock(sch);
342 return 0;
343}
344
345static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
346{
347 struct qfq_class *cl = qfq_find_class(sch, classid);
348
349 if (cl != NULL)
350 cl->refcnt++;
351
352 return (unsigned long)cl;
353}
354
355static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
356{
357 struct qfq_class *cl = (struct qfq_class *)arg;
358
359 if (--cl->refcnt == 0)
360 qfq_destroy_class(sch, cl);
361}
362
363static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
364{
365 struct qfq_sched *q = qdisc_priv(sch);
366
367 if (cl)
368 return NULL;
369
370 return &q->filter_list;
371}
372
373static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
374 u32 classid)
375{
376 struct qfq_class *cl = qfq_find_class(sch, classid);
377
378 if (cl != NULL)
379 cl->filter_cnt++;
380
381 return (unsigned long)cl;
382}
383
384static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
385{
386 struct qfq_class *cl = (struct qfq_class *)arg;
387
388 cl->filter_cnt--;
389}
390
391static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
392 struct Qdisc *new, struct Qdisc **old)
393{
394 struct qfq_class *cl = (struct qfq_class *)arg;
395
396 if (new == NULL) {
397 new = qdisc_create_dflt(sch->dev_queue,
398 &pfifo_qdisc_ops, cl->common.classid);
399 if (new == NULL)
400 new = &noop_qdisc;
401 }
402
403 sch_tree_lock(sch);
404 qfq_purge_queue(cl);
405 *old = cl->qdisc;
406 cl->qdisc = new;
407 sch_tree_unlock(sch);
408 return 0;
409}
410
411static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
412{
413 struct qfq_class *cl = (struct qfq_class *)arg;
414
415 return cl->qdisc;
416}
417
418static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
419 struct sk_buff *skb, struct tcmsg *tcm)
420{
421 struct qfq_class *cl = (struct qfq_class *)arg;
422 struct nlattr *nest;
423
424 tcm->tcm_parent = TC_H_ROOT;
425 tcm->tcm_handle = cl->common.classid;
426 tcm->tcm_info = cl->qdisc->handle;
427
428 nest = nla_nest_start(skb, TCA_OPTIONS);
429 if (nest == NULL)
430 goto nla_put_failure;
431 NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
432 NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
433 return nla_nest_end(skb, nest);
434
435nla_put_failure:
436 nla_nest_cancel(skb, nest);
437 return -EMSGSIZE;
438}
439
440static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
441 struct gnet_dump *d)
442{
443 struct qfq_class *cl = (struct qfq_class *)arg;
444 struct tc_qfq_stats xstats;
445
446 memset(&xstats, 0, sizeof(xstats));
447 cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
448
449 xstats.weight = ONE_FP/cl->inv_w;
450 xstats.lmax = cl->lmax;
451
452 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
453 gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
454 gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
455 return -1;
456
457 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
458}
459
460static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
461{
462 struct qfq_sched *q = qdisc_priv(sch);
463 struct qfq_class *cl;
464 struct hlist_node *n;
465 unsigned int i;
466
467 if (arg->stop)
468 return;
469
470 for (i = 0; i < q->clhash.hashsize; i++) {
471 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
472 if (arg->count < arg->skip) {
473 arg->count++;
474 continue;
475 }
476 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
477 arg->stop = 1;
478 return;
479 }
480 arg->count++;
481 }
482 }
483}
484
485static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
486 int *qerr)
487{
488 struct qfq_sched *q = qdisc_priv(sch);
489 struct qfq_class *cl;
490 struct tcf_result res;
491 int result;
492
493 if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
494 pr_debug("qfq_classify: found %d\n", skb->priority);
495 cl = qfq_find_class(sch, skb->priority);
496 if (cl != NULL)
497 return cl;
498 }
499
500 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
501 result = tc_classify(skb, q->filter_list, &res);
502 if (result >= 0) {
503#ifdef CONFIG_NET_CLS_ACT
504 switch (result) {
505 case TC_ACT_QUEUED:
506 case TC_ACT_STOLEN:
507 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
508 case TC_ACT_SHOT:
509 return NULL;
510 }
511#endif
512 cl = (struct qfq_class *)res.class;
513 if (cl == NULL)
514 cl = qfq_find_class(sch, res.classid);
515 return cl;
516 }
517
518 return NULL;
519}
520
521/* Generic comparison function, handling wraparound. */
522static inline int qfq_gt(u64 a, u64 b)
523{
524 return (s64)(a - b) > 0;
525}
526
527/* Round a precise timestamp to its slotted value. */
528static inline u64 qfq_round_down(u64 ts, unsigned int shift)
529{
530 return ts & ~((1ULL << shift) - 1);
531}
532
533/* return the pointer to the group with lowest index in the bitmap */
534static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
535 unsigned long bitmap)
536{
537 int index = __ffs(bitmap);
538 return &q->groups[index];
539}
540/* Calculate a mask to mimic what would be ffs_from(). */
541static inline unsigned long mask_from(unsigned long bitmap, int from)
542{
543 return bitmap & ~((1UL << from) - 1);
544}
545
546/*
547 * The state computation relies on ER=0, IR=1, EB=2, IB=3
548 * First compute eligibility comparing grp->S, q->V,
549 * then check if someone is blocking us and possibly add EB
550 */
551static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
552{
553 /* if S > V we are not eligible */
554 unsigned int state = qfq_gt(grp->S, q->V);
555 unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
556 struct qfq_group *next;
557
558 if (mask) {
559 next = qfq_ffs(q, mask);
560 if (qfq_gt(grp->F, next->F))
561 state |= EB;
562 }
563
564 return state;
565}
566
567
568/*
569 * In principle
570 * q->bitmaps[dst] |= q->bitmaps[src] & mask;
571 * q->bitmaps[src] &= ~mask;
572 * but we should make sure that src != dst
573 */
574static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
575 int src, int dst)
576{
577 q->bitmaps[dst] |= q->bitmaps[src] & mask;
578 q->bitmaps[src] &= ~mask;
579}
580
581static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
582{
583 unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
584 struct qfq_group *next;
585
586 if (mask) {
587 next = qfq_ffs(q, mask);
588 if (!qfq_gt(next->F, old_F))
589 return;
590 }
591
592 mask = (1UL << index) - 1;
593 qfq_move_groups(q, mask, EB, ER);
594 qfq_move_groups(q, mask, IB, IR);
595}
596
597/*
598 * perhaps
599 *
600 old_V ^= q->V;
601 old_V >>= QFQ_MIN_SLOT_SHIFT;
602 if (old_V) {
603 ...
604 }
605 *
606 */
607static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
608{
609 unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
610 unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
611
612 if (vslot != old_vslot) {
613 unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
614 qfq_move_groups(q, mask, IR, ER);
615 qfq_move_groups(q, mask, IB, EB);
616 }
617}
618
619
620/*
621 * XXX we should make sure that slot becomes less than 32.
622 * This is guaranteed by the input values.
623 * roundedS is always cl->S rounded on grp->slot_shift bits.
624 */
625static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
626 u64 roundedS)
627{
628 u64 slot = (roundedS - grp->S) >> grp->slot_shift;
629 unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
630
631 hlist_add_head(&cl->next, &grp->slots[i]);
632 __set_bit(slot, &grp->full_slots);
633}
634
635/* Maybe introduce hlist_first_entry?? */
636static struct qfq_class *qfq_slot_head(struct qfq_group *grp)
637{
638 return hlist_entry(grp->slots[grp->front].first,
639 struct qfq_class, next);
640}
641
642/*
643 * remove the entry from the slot
644 */
645static void qfq_front_slot_remove(struct qfq_group *grp)
646{
647 struct qfq_class *cl = qfq_slot_head(grp);
648
649 BUG_ON(!cl);
650 hlist_del(&cl->next);
651 if (hlist_empty(&grp->slots[grp->front]))
652 __clear_bit(0, &grp->full_slots);
653}
654
655/*
656 * Returns the first full queue in a group. As a side effect,
657 * adjust the bucket list so the first non-empty bucket is at
658 * position 0 in full_slots.
659 */
660static struct qfq_class *qfq_slot_scan(struct qfq_group *grp)
661{
662 unsigned int i;
663
664 pr_debug("qfq slot_scan: grp %u full %#lx\n",
665 grp->index, grp->full_slots);
666
667 if (grp->full_slots == 0)
668 return NULL;
669
670 i = __ffs(grp->full_slots); /* zero based */
671 if (i > 0) {
672 grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
673 grp->full_slots >>= i;
674 }
675
676 return qfq_slot_head(grp);
677}
678
679/*
680 * adjust the bucket list. When the start time of a group decreases,
681 * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
682 * move the objects. The mask of occupied slots must be shifted
683 * because we use ffs() to find the first non-empty slot.
684 * This covers decreases in the group's start time, but what about
685 * increases of the start time ?
686 * Here too we should make sure that i is less than 32
687 */
688static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
689{
690 unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
691
692 grp->full_slots <<= i;
693 grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
694}
695
696static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
697{
698 struct qfq_group *grp;
699 unsigned long ineligible;
700
701 ineligible = q->bitmaps[IR] | q->bitmaps[IB];
702 if (ineligible) {
703 if (!q->bitmaps[ER]) {
704 grp = qfq_ffs(q, ineligible);
705 if (qfq_gt(grp->S, q->V))
706 q->V = grp->S;
707 }
708 qfq_make_eligible(q, old_V);
709 }
710}
711
712/* What is length of next packet in queue (0 if queue is empty) */
713static unsigned int qdisc_peek_len(struct Qdisc *sch)
714{
715 struct sk_buff *skb;
716
717 skb = sch->ops->peek(sch);
718 return skb ? qdisc_pkt_len(skb) : 0;
719}
720
721/*
722 * Updates the class, returns true if also the group needs to be updated.
723 */
724static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl)
725{
726 unsigned int len = qdisc_peek_len(cl->qdisc);
727
728 cl->S = cl->F;
729 if (!len)
730 qfq_front_slot_remove(grp); /* queue is empty */
731 else {
732 u64 roundedS;
733
734 cl->F = cl->S + (u64)len * cl->inv_w;
735 roundedS = qfq_round_down(cl->S, grp->slot_shift);
736 if (roundedS == grp->S)
737 return false;
738
739 qfq_front_slot_remove(grp);
740 qfq_slot_insert(grp, cl, roundedS);
741 }
742
743 return true;
744}
745
746static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
747{
748 struct qfq_sched *q = qdisc_priv(sch);
749 struct qfq_group *grp;
750 struct qfq_class *cl;
751 struct sk_buff *skb;
752 unsigned int len;
753 u64 old_V;
754
755 if (!q->bitmaps[ER])
756 return NULL;
757
758 grp = qfq_ffs(q, q->bitmaps[ER]);
759
760 cl = qfq_slot_head(grp);
761 skb = qdisc_dequeue_peeked(cl->qdisc);
762 if (!skb) {
763 WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
764 return NULL;
765 }
766
767 sch->q.qlen--;
768 qdisc_bstats_update(sch, skb);
769
770 old_V = q->V;
771 len = qdisc_pkt_len(skb);
772 q->V += (u64)len * IWSUM;
773 pr_debug("qfq dequeue: len %u F %lld now %lld\n",
774 len, (unsigned long long) cl->F, (unsigned long long) q->V);
775
776 if (qfq_update_class(grp, cl)) {
777 u64 old_F = grp->F;
778
779 cl = qfq_slot_scan(grp);
780 if (!cl)
781 __clear_bit(grp->index, &q->bitmaps[ER]);
782 else {
783 u64 roundedS = qfq_round_down(cl->S, grp->slot_shift);
784 unsigned int s;
785
786 if (grp->S == roundedS)
787 goto skip_unblock;
788 grp->S = roundedS;
789 grp->F = roundedS + (2ULL << grp->slot_shift);
790 __clear_bit(grp->index, &q->bitmaps[ER]);
791 s = qfq_calc_state(q, grp);
792 __set_bit(grp->index, &q->bitmaps[s]);
793 }
794
795 qfq_unblock_groups(q, grp->index, old_F);
796 }
797
798skip_unblock:
799 qfq_update_eligible(q, old_V);
800
801 return skb;
802}
803
804/*
805 * Assign a reasonable start time for a new flow k in group i.
806 * Admissible values for \hat(F) are multiples of \sigma_i
807 * no greater than V+\sigma_i . Larger values mean that
808 * we had a wraparound so we consider the timestamp to be stale.
809 *
810 * If F is not stale and F >= V then we set S = F.
811 * Otherwise we should assign S = V, but this may violate
812 * the ordering in ER. So, if we have groups in ER, set S to
813 * the F_j of the first group j which would be blocking us.
814 * We are guaranteed not to move S backward because
815 * otherwise our group i would still be blocked.
816 */
817static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
818{
819 unsigned long mask;
820 uint32_t limit, roundedF;
821 int slot_shift = cl->grp->slot_shift;
822
823 roundedF = qfq_round_down(cl->F, slot_shift);
824 limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
825
826 if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
827 /* timestamp was stale */
828 mask = mask_from(q->bitmaps[ER], cl->grp->index);
829 if (mask) {
830 struct qfq_group *next = qfq_ffs(q, mask);
831 if (qfq_gt(roundedF, next->F)) {
832 cl->S = next->F;
833 return;
834 }
835 }
836 cl->S = q->V;
837 } else /* timestamp is not stale */
838 cl->S = cl->F;
839}
840
841static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
842{
843 struct qfq_sched *q = qdisc_priv(sch);
844 struct qfq_group *grp;
845 struct qfq_class *cl;
846 int err;
847 u64 roundedS;
848 int s;
849
850 cl = qfq_classify(skb, sch, &err);
851 if (cl == NULL) {
852 if (err & __NET_XMIT_BYPASS)
853 sch->qstats.drops++;
854 kfree_skb(skb);
855 return err;
856 }
857 pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
858
859 err = qdisc_enqueue(skb, cl->qdisc);
860 if (unlikely(err != NET_XMIT_SUCCESS)) {
861 pr_debug("qfq_enqueue: enqueue failed %d\n", err);
862 if (net_xmit_drop_count(err)) {
863 cl->qstats.drops++;
864 sch->qstats.drops++;
865 }
866 return err;
867 }
868
869 bstats_update(&cl->bstats, skb);
870 ++sch->q.qlen;
871
872 /* If the new skb is not the head of queue, then done here. */
873 if (cl->qdisc->q.qlen != 1)
874 return err;
875
876 /* If reach this point, queue q was idle */
877 grp = cl->grp;
878 qfq_update_start(q, cl);
879
880 /* compute new finish time and rounded start. */
881 cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
882 roundedS = qfq_round_down(cl->S, grp->slot_shift);
883
884 /*
885 * insert cl in the correct bucket.
886 * If cl->S >= grp->S we don't need to adjust the
887 * bucket list and simply go to the insertion phase.
888 * Otherwise grp->S is decreasing, we must make room
889 * in the bucket list, and also recompute the group state.
890 * Finally, if there were no flows in this group and nobody
891 * was in ER make sure to adjust V.
892 */
893 if (grp->full_slots) {
894 if (!qfq_gt(grp->S, cl->S))
895 goto skip_update;
896
897 /* create a slot for this cl->S */
898 qfq_slot_rotate(grp, roundedS);
899 /* group was surely ineligible, remove */
900 __clear_bit(grp->index, &q->bitmaps[IR]);
901 __clear_bit(grp->index, &q->bitmaps[IB]);
902 } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
903 q->V = roundedS;
904
905 grp->S = roundedS;
906 grp->F = roundedS + (2ULL << grp->slot_shift);
907 s = qfq_calc_state(q, grp);
908 __set_bit(grp->index, &q->bitmaps[s]);
909
910 pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
911 s, q->bitmaps[s],
912 (unsigned long long) cl->S,
913 (unsigned long long) cl->F,
914 (unsigned long long) q->V);
915
916skip_update:
917 qfq_slot_insert(grp, cl, roundedS);
918
919 return err;
920}
921
922
923static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
924 struct qfq_class *cl)
925{
926 unsigned int i, offset;
927 u64 roundedS;
928
929 roundedS = qfq_round_down(cl->S, grp->slot_shift);
930 offset = (roundedS - grp->S) >> grp->slot_shift;
931 i = (grp->front + offset) % QFQ_MAX_SLOTS;
932
933 hlist_del(&cl->next);
934 if (hlist_empty(&grp->slots[i]))
935 __clear_bit(offset, &grp->full_slots);
936}
937
938/*
939 * called to forcibly destroy a queue.
940 * If the queue is not in the front bucket, or if it has
941 * other queues in the front bucket, we can simply remove
942 * the queue with no other side effects.
943 * Otherwise we must propagate the event up.
944 */
945static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
946{
947 struct qfq_group *grp = cl->grp;
948 unsigned long mask;
949 u64 roundedS;
950 int s;
951
952 cl->F = cl->S;
953 qfq_slot_remove(q, grp, cl);
954
955 if (!grp->full_slots) {
956 __clear_bit(grp->index, &q->bitmaps[IR]);
957 __clear_bit(grp->index, &q->bitmaps[EB]);
958 __clear_bit(grp->index, &q->bitmaps[IB]);
959
960 if (test_bit(grp->index, &q->bitmaps[ER]) &&
961 !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
962 mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
963 if (mask)
964 mask = ~((1UL << __fls(mask)) - 1);
965 else
966 mask = ~0UL;
967 qfq_move_groups(q, mask, EB, ER);
968 qfq_move_groups(q, mask, IB, IR);
969 }
970 __clear_bit(grp->index, &q->bitmaps[ER]);
971 } else if (hlist_empty(&grp->slots[grp->front])) {
972 cl = qfq_slot_scan(grp);
973 roundedS = qfq_round_down(cl->S, grp->slot_shift);
974 if (grp->S != roundedS) {
975 __clear_bit(grp->index, &q->bitmaps[ER]);
976 __clear_bit(grp->index, &q->bitmaps[IR]);
977 __clear_bit(grp->index, &q->bitmaps[EB]);
978 __clear_bit(grp->index, &q->bitmaps[IB]);
979 grp->S = roundedS;
980 grp->F = roundedS + (2ULL << grp->slot_shift);
981 s = qfq_calc_state(q, grp);
982 __set_bit(grp->index, &q->bitmaps[s]);
983 }
984 }
985
986 qfq_update_eligible(q, q->V);
987}
988
989static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
990{
991 struct qfq_sched *q = qdisc_priv(sch);
992 struct qfq_class *cl = (struct qfq_class *)arg;
993
994 if (cl->qdisc->q.qlen == 0)
995 qfq_deactivate_class(q, cl);
996}
997
998static unsigned int qfq_drop(struct Qdisc *sch)
999{
1000 struct qfq_sched *q = qdisc_priv(sch);
1001 struct qfq_group *grp;
1002 unsigned int i, j, len;
1003
1004 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1005 grp = &q->groups[i];
1006 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1007 struct qfq_class *cl;
1008 struct hlist_node *n;
1009
1010 hlist_for_each_entry(cl, n, &grp->slots[j], next) {
1011
1012 if (!cl->qdisc->ops->drop)
1013 continue;
1014
1015 len = cl->qdisc->ops->drop(cl->qdisc);
1016 if (len > 0) {
1017 sch->q.qlen--;
1018 if (!cl->qdisc->q.qlen)
1019 qfq_deactivate_class(q, cl);
1020
1021 return len;
1022 }
1023 }
1024 }
1025 }
1026
1027 return 0;
1028}
1029
1030static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1031{
1032 struct qfq_sched *q = qdisc_priv(sch);
1033 struct qfq_group *grp;
1034 int i, j, err;
1035
1036 err = qdisc_class_hash_init(&q->clhash);
1037 if (err < 0)
1038 return err;
1039
1040 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1041 grp = &q->groups[i];
1042 grp->index = i;
1043 grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS
1044 - (QFQ_MAX_INDEX - i);
1045 for (j = 0; j < QFQ_MAX_SLOTS; j++)
1046 INIT_HLIST_HEAD(&grp->slots[j]);
1047 }
1048
1049 return 0;
1050}
1051
1052static void qfq_reset_qdisc(struct Qdisc *sch)
1053{
1054 struct qfq_sched *q = qdisc_priv(sch);
1055 struct qfq_group *grp;
1056 struct qfq_class *cl;
1057 struct hlist_node *n, *tmp;
1058 unsigned int i, j;
1059
1060 for (i = 0; i <= QFQ_MAX_INDEX; i++) {
1061 grp = &q->groups[i];
1062 for (j = 0; j < QFQ_MAX_SLOTS; j++) {
1063 hlist_for_each_entry_safe(cl, n, tmp,
1064 &grp->slots[j], next) {
1065 qfq_deactivate_class(q, cl);
1066 }
1067 }
1068 }
1069
1070 for (i = 0; i < q->clhash.hashsize; i++) {
1071 hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
1072 qdisc_reset(cl->qdisc);
1073 }
1074 sch->q.qlen = 0;
1075}
1076
1077static void qfq_destroy_qdisc(struct Qdisc *sch)
1078{
1079 struct qfq_sched *q = qdisc_priv(sch);
1080 struct qfq_class *cl;
1081 struct hlist_node *n, *next;
1082 unsigned int i;
1083
1084 tcf_destroy_chain(&q->filter_list);
1085
1086 for (i = 0; i < q->clhash.hashsize; i++) {
1087 hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
1088 common.hnode) {
1089 qfq_destroy_class(sch, cl);
1090 }
1091 }
1092 qdisc_class_hash_destroy(&q->clhash);
1093}
1094
1095static const struct Qdisc_class_ops qfq_class_ops = {
1096 .change = qfq_change_class,
1097 .delete = qfq_delete_class,
1098 .get = qfq_get_class,
1099 .put = qfq_put_class,
1100 .tcf_chain = qfq_tcf_chain,
1101 .bind_tcf = qfq_bind_tcf,
1102 .unbind_tcf = qfq_unbind_tcf,
1103 .graft = qfq_graft_class,
1104 .leaf = qfq_class_leaf,
1105 .qlen_notify = qfq_qlen_notify,
1106 .dump = qfq_dump_class,
1107 .dump_stats = qfq_dump_class_stats,
1108 .walk = qfq_walk,
1109};
1110
1111static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
1112 .cl_ops = &qfq_class_ops,
1113 .id = "qfq",
1114 .priv_size = sizeof(struct qfq_sched),
1115 .enqueue = qfq_enqueue,
1116 .dequeue = qfq_dequeue,
1117 .peek = qdisc_peek_dequeued,
1118 .drop = qfq_drop,
1119 .init = qfq_init_qdisc,
1120 .reset = qfq_reset_qdisc,
1121 .destroy = qfq_destroy_qdisc,
1122 .owner = THIS_MODULE,
1123};
1124
1125static int __init qfq_init(void)
1126{
1127 return register_qdisc(&qfq_qdisc_ops);
1128}
1129
1130static void __exit qfq_exit(void)
1131{
1132 unregister_qdisc(&qfq_qdisc_ops);
1133}
1134
1135module_init(qfq_init);
1136module_exit(qfq_exit);
1137MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 8d42bb3ba540..6649463da1b6 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
36 if RED works correctly. 36 if RED works correctly.
37 */ 37 */
38 38
39struct red_sched_data 39struct red_sched_data {
40{
41 u32 limit; /* HARD maximal queue length */ 40 u32 limit; /* HARD maximal queue length */
42 unsigned char flags; 41 unsigned char flags;
43 struct red_parms parms; 42 struct red_parms parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
55 return q->flags & TC_RED_HARDDROP; 54 return q->flags & TC_RED_HARDDROP;
56} 55}
57 56
58static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) 57static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
59{ 58{
60 struct red_sched_data *q = qdisc_priv(sch); 59 struct red_sched_data *q = qdisc_priv(sch);
61 struct Qdisc *child = q->qdisc; 60 struct Qdisc *child = q->qdisc;
@@ -67,35 +66,33 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
67 red_end_of_idle_period(&q->parms); 66 red_end_of_idle_period(&q->parms);
68 67
69 switch (red_action(&q->parms, q->parms.qavg)) { 68 switch (red_action(&q->parms, q->parms.qavg)) {
70 case RED_DONT_MARK: 69 case RED_DONT_MARK:
71 break; 70 break;
72 71
73 case RED_PROB_MARK: 72 case RED_PROB_MARK:
74 sch->qstats.overlimits++; 73 sch->qstats.overlimits++;
75 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { 74 if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76 q->stats.prob_drop++; 75 q->stats.prob_drop++;
77 goto congestion_drop; 76 goto congestion_drop;
78 } 77 }
79 78
80 q->stats.prob_mark++; 79 q->stats.prob_mark++;
81 break; 80 break;
82 81
83 case RED_HARD_MARK: 82 case RED_HARD_MARK:
84 sch->qstats.overlimits++; 83 sch->qstats.overlimits++;
85 if (red_use_harddrop(q) || !red_use_ecn(q) || 84 if (red_use_harddrop(q) || !red_use_ecn(q) ||
86 !INET_ECN_set_ce(skb)) { 85 !INET_ECN_set_ce(skb)) {
87 q->stats.forced_drop++; 86 q->stats.forced_drop++;
88 goto congestion_drop; 87 goto congestion_drop;
89 } 88 }
90 89
91 q->stats.forced_mark++; 90 q->stats.forced_mark++;
92 break; 91 break;
93 } 92 }
94 93
95 ret = qdisc_enqueue(skb, child); 94 ret = qdisc_enqueue(skb, child);
96 if (likely(ret == NET_XMIT_SUCCESS)) { 95 if (likely(ret == NET_XMIT_SUCCESS)) {
97 sch->bstats.bytes += qdisc_pkt_len(skb);
98 sch->bstats.packets++;
99 sch->q.qlen++; 96 sch->q.qlen++;
100 } else if (net_xmit_drop_count(ret)) { 97 } else if (net_xmit_drop_count(ret)) {
101 q->stats.pdrop++; 98 q->stats.pdrop++;
@@ -108,22 +105,24 @@ congestion_drop:
108 return NET_XMIT_CN; 105 return NET_XMIT_CN;
109} 106}
110 107
111static struct sk_buff * red_dequeue(struct Qdisc* sch) 108static struct sk_buff *red_dequeue(struct Qdisc *sch)
112{ 109{
113 struct sk_buff *skb; 110 struct sk_buff *skb;
114 struct red_sched_data *q = qdisc_priv(sch); 111 struct red_sched_data *q = qdisc_priv(sch);
115 struct Qdisc *child = q->qdisc; 112 struct Qdisc *child = q->qdisc;
116 113
117 skb = child->dequeue(child); 114 skb = child->dequeue(child);
118 if (skb) 115 if (skb) {
116 qdisc_bstats_update(sch, skb);
119 sch->q.qlen--; 117 sch->q.qlen--;
120 else if (!red_is_idling(&q->parms)) 118 } else {
121 red_start_of_idle_period(&q->parms); 119 if (!red_is_idling(&q->parms))
122 120 red_start_of_idle_period(&q->parms);
121 }
123 return skb; 122 return skb;
124} 123}
125 124
126static struct sk_buff * red_peek(struct Qdisc* sch) 125static struct sk_buff *red_peek(struct Qdisc *sch)
127{ 126{
128 struct red_sched_data *q = qdisc_priv(sch); 127 struct red_sched_data *q = qdisc_priv(sch);
129 struct Qdisc *child = q->qdisc; 128 struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
131 return child->ops->peek(child); 130 return child->ops->peek(child);
132} 131}
133 132
134static unsigned int red_drop(struct Qdisc* sch) 133static unsigned int red_drop(struct Qdisc *sch)
135{ 134{
136 struct red_sched_data *q = qdisc_priv(sch); 135 struct red_sched_data *q = qdisc_priv(sch);
137 struct Qdisc *child = q->qdisc; 136 struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
150 return 0; 149 return 0;
151} 150}
152 151
153static void red_reset(struct Qdisc* sch) 152static void red_reset(struct Qdisc *sch)
154{ 153{
155 struct red_sched_data *q = qdisc_priv(sch); 154 struct red_sched_data *q = qdisc_priv(sch);
156 155
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
217 return 0; 216 return 0;
218} 217}
219 218
220static int red_init(struct Qdisc* sch, struct nlattr *opt) 219static int red_init(struct Qdisc *sch, struct nlattr *opt)
221{ 220{
222 struct red_sched_data *q = qdisc_priv(sch); 221 struct red_sched_data *q = qdisc_priv(sch);
223 222
@@ -239,6 +238,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
239 .Scell_log = q->parms.Scell_log, 238 .Scell_log = q->parms.Scell_log,
240 }; 239 };
241 240
241 sch->qstats.backlog = q->qdisc->qstats.backlog;
242 opts = nla_nest_start(skb, TCA_OPTIONS); 242 opts = nla_nest_start(skb, TCA_OPTIONS);
243 if (opts == NULL) 243 if (opts == NULL)
244 goto nla_put_failure; 244 goto nla_put_failure;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
new file mode 100644
index 000000000000..0a833d0c1f61
--- /dev/null
+++ b/net/sched/sch_sfb.c
@@ -0,0 +1,709 @@
1/*
2 * net/sched/sch_sfb.c Stochastic Fair Blue
3 *
4 * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
5 * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * version 2 as published by the Free Software Foundation.
10 *
11 * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
12 * A New Class of Active Queue Management Algorithms.
13 * U. Michigan CSE-TR-387-99, April 1999.
14 *
15 * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
16 *
17 */
18
19#include <linux/module.h>
20#include <linux/types.h>
21#include <linux/kernel.h>
22#include <linux/errno.h>
23#include <linux/skbuff.h>
24#include <linux/random.h>
25#include <linux/jhash.h>
26#include <net/ip.h>
27#include <net/pkt_sched.h>
28#include <net/inet_ecn.h>
29
30/*
31 * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
32 * This implementation uses L = 8 and N = 16
33 * This permits us to split one 32bit hash (provided per packet by rxhash or
34 * external classifier) into 8 subhashes of 4 bits.
35 */
36#define SFB_BUCKET_SHIFT 4
37#define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */
38#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1)
39#define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */
40
41/* SFB algo uses a virtual queue, named "bin" */
42struct sfb_bucket {
43 u16 qlen; /* length of virtual queue */
44 u16 p_mark; /* marking probability */
45};
46
47/* We use a double buffering right before hash change
48 * (Section 4.4 of SFB reference : moving hash functions)
49 */
50struct sfb_bins {
51 u32 perturbation; /* jhash perturbation */
52 struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
53};
54
55struct sfb_sched_data {
56 struct Qdisc *qdisc;
57 struct tcf_proto *filter_list;
58 unsigned long rehash_interval;
59 unsigned long warmup_time; /* double buffering warmup time in jiffies */
60 u32 max;
61 u32 bin_size; /* maximum queue length per bin */
62 u32 increment; /* d1 */
63 u32 decrement; /* d2 */
64 u32 limit; /* HARD maximal queue length */
65 u32 penalty_rate;
66 u32 penalty_burst;
67 u32 tokens_avail;
68 unsigned long rehash_time;
69 unsigned long token_time;
70
71 u8 slot; /* current active bins (0 or 1) */
72 bool double_buffering;
73 struct sfb_bins bins[2];
74
75 struct {
76 u32 earlydrop;
77 u32 penaltydrop;
78 u32 bucketdrop;
79 u32 queuedrop;
80 u32 childdrop; /* drops in child qdisc */
81 u32 marked; /* ECN mark */
82 } stats;
83};
84
85/*
86 * Each queued skb might be hashed on one or two bins
87 * We store in skb_cb the two hash values.
88 * (A zero value means double buffering was not used)
89 */
90struct sfb_skb_cb {
91 u32 hashes[2];
92};
93
94static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
95{
96 BUILD_BUG_ON(sizeof(skb->cb) <
97 sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb));
98 return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
99}
100
101/*
102 * If using 'internal' SFB flow classifier, hash comes from skb rxhash
103 * If using external classifier, hash comes from the classid.
104 */
105static u32 sfb_hash(const struct sk_buff *skb, u32 slot)
106{
107 return sfb_skb_cb(skb)->hashes[slot];
108}
109
110/* Probabilities are coded as Q0.16 fixed-point values,
111 * with 0xFFFF representing 65535/65536 (almost 1.0)
112 * Addition and subtraction are saturating in [0, 65535]
113 */
114static u32 prob_plus(u32 p1, u32 p2)
115{
116 u32 res = p1 + p2;
117
118 return min_t(u32, res, SFB_MAX_PROB);
119}
120
121static u32 prob_minus(u32 p1, u32 p2)
122{
123 return p1 > p2 ? p1 - p2 : 0;
124}
125
126static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
127{
128 int i;
129 struct sfb_bucket *b = &q->bins[slot].bins[0][0];
130
131 for (i = 0; i < SFB_LEVELS; i++) {
132 u32 hash = sfbhash & SFB_BUCKET_MASK;
133
134 sfbhash >>= SFB_BUCKET_SHIFT;
135 if (b[hash].qlen < 0xFFFF)
136 b[hash].qlen++;
137 b += SFB_NUMBUCKETS; /* next level */
138 }
139}
140
141static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
142{
143 u32 sfbhash;
144
145 sfbhash = sfb_hash(skb, 0);
146 if (sfbhash)
147 increment_one_qlen(sfbhash, 0, q);
148
149 sfbhash = sfb_hash(skb, 1);
150 if (sfbhash)
151 increment_one_qlen(sfbhash, 1, q);
152}
153
154static void decrement_one_qlen(u32 sfbhash, u32 slot,
155 struct sfb_sched_data *q)
156{
157 int i;
158 struct sfb_bucket *b = &q->bins[slot].bins[0][0];
159
160 for (i = 0; i < SFB_LEVELS; i++) {
161 u32 hash = sfbhash & SFB_BUCKET_MASK;
162
163 sfbhash >>= SFB_BUCKET_SHIFT;
164 if (b[hash].qlen > 0)
165 b[hash].qlen--;
166 b += SFB_NUMBUCKETS; /* next level */
167 }
168}
169
170static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
171{
172 u32 sfbhash;
173
174 sfbhash = sfb_hash(skb, 0);
175 if (sfbhash)
176 decrement_one_qlen(sfbhash, 0, q);
177
178 sfbhash = sfb_hash(skb, 1);
179 if (sfbhash)
180 decrement_one_qlen(sfbhash, 1, q);
181}
182
183static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
184{
185 b->p_mark = prob_minus(b->p_mark, q->decrement);
186}
187
188static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
189{
190 b->p_mark = prob_plus(b->p_mark, q->increment);
191}
192
193static void sfb_zero_all_buckets(struct sfb_sched_data *q)
194{
195 memset(&q->bins, 0, sizeof(q->bins));
196}
197
198/*
199 * compute max qlen, max p_mark, and avg p_mark
200 */
201static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q)
202{
203 int i;
204 u32 qlen = 0, prob = 0, totalpm = 0;
205 const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0];
206
207 for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) {
208 if (qlen < b->qlen)
209 qlen = b->qlen;
210 totalpm += b->p_mark;
211 if (prob < b->p_mark)
212 prob = b->p_mark;
213 b++;
214 }
215 *prob_r = prob;
216 *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS);
217 return qlen;
218}
219
220
221static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
222{
223 q->bins[slot].perturbation = net_random();
224}
225
226static void sfb_swap_slot(struct sfb_sched_data *q)
227{
228 sfb_init_perturbation(q->slot, q);
229 q->slot ^= 1;
230 q->double_buffering = false;
231}
232
233/* Non elastic flows are allowed to use part of the bandwidth, expressed
234 * in "penalty_rate" packets per second, with "penalty_burst" burst
235 */
236static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
237{
238 if (q->penalty_rate == 0 || q->penalty_burst == 0)
239 return true;
240
241 if (q->tokens_avail < 1) {
242 unsigned long age = min(10UL * HZ, jiffies - q->token_time);
243
244 q->tokens_avail = (age * q->penalty_rate) / HZ;
245 if (q->tokens_avail > q->penalty_burst)
246 q->tokens_avail = q->penalty_burst;
247 q->token_time = jiffies;
248 if (q->tokens_avail < 1)
249 return true;
250 }
251
252 q->tokens_avail--;
253 return false;
254}
255
256static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
257 int *qerr, u32 *salt)
258{
259 struct tcf_result res;
260 int result;
261
262 result = tc_classify(skb, q->filter_list, &res);
263 if (result >= 0) {
264#ifdef CONFIG_NET_CLS_ACT
265 switch (result) {
266 case TC_ACT_STOLEN:
267 case TC_ACT_QUEUED:
268 *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
269 case TC_ACT_SHOT:
270 return false;
271 }
272#endif
273 *salt = TC_H_MIN(res.classid);
274 return true;
275 }
276 return false;
277}
278
279static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
280{
281
282 struct sfb_sched_data *q = qdisc_priv(sch);
283 struct Qdisc *child = q->qdisc;
284 int i;
285 u32 p_min = ~0;
286 u32 minqlen = ~0;
287 u32 r, slot, salt, sfbhash;
288 int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
289
290 if (q->rehash_interval > 0) {
291 unsigned long limit = q->rehash_time + q->rehash_interval;
292
293 if (unlikely(time_after(jiffies, limit))) {
294 sfb_swap_slot(q);
295 q->rehash_time = jiffies;
296 } else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
297 time_after(jiffies, limit - q->warmup_time))) {
298 q->double_buffering = true;
299 }
300 }
301
302 if (q->filter_list) {
303 /* If using external classifiers, get result and record it. */
304 if (!sfb_classify(skb, q, &ret, &salt))
305 goto other_drop;
306 } else {
307 salt = skb_get_rxhash(skb);
308 }
309
310 slot = q->slot;
311
312 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
313 if (!sfbhash)
314 sfbhash = 1;
315 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
316
317 for (i = 0; i < SFB_LEVELS; i++) {
318 u32 hash = sfbhash & SFB_BUCKET_MASK;
319 struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
320
321 sfbhash >>= SFB_BUCKET_SHIFT;
322 if (b->qlen == 0)
323 decrement_prob(b, q);
324 else if (b->qlen >= q->bin_size)
325 increment_prob(b, q);
326 if (minqlen > b->qlen)
327 minqlen = b->qlen;
328 if (p_min > b->p_mark)
329 p_min = b->p_mark;
330 }
331
332 slot ^= 1;
333 sfb_skb_cb(skb)->hashes[slot] = 0;
334
335 if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) {
336 sch->qstats.overlimits++;
337 if (minqlen >= q->max)
338 q->stats.bucketdrop++;
339 else
340 q->stats.queuedrop++;
341 goto drop;
342 }
343
344 if (unlikely(p_min >= SFB_MAX_PROB)) {
345 /* Inelastic flow */
346 if (q->double_buffering) {
347 sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
348 if (!sfbhash)
349 sfbhash = 1;
350 sfb_skb_cb(skb)->hashes[slot] = sfbhash;
351
352 for (i = 0; i < SFB_LEVELS; i++) {
353 u32 hash = sfbhash & SFB_BUCKET_MASK;
354 struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
355
356 sfbhash >>= SFB_BUCKET_SHIFT;
357 if (b->qlen == 0)
358 decrement_prob(b, q);
359 else if (b->qlen >= q->bin_size)
360 increment_prob(b, q);
361 }
362 }
363 if (sfb_rate_limit(skb, q)) {
364 sch->qstats.overlimits++;
365 q->stats.penaltydrop++;
366 goto drop;
367 }
368 goto enqueue;
369 }
370
371 r = net_random() & SFB_MAX_PROB;
372
373 if (unlikely(r < p_min)) {
374 if (unlikely(p_min > SFB_MAX_PROB / 2)) {
375 /* If we're marking that many packets, then either
376 * this flow is unresponsive, or we're badly congested.
377 * In either case, we want to start dropping packets.
378 */
379 if (r < (p_min - SFB_MAX_PROB / 2) * 2) {
380 q->stats.earlydrop++;
381 goto drop;
382 }
383 }
384 if (INET_ECN_set_ce(skb)) {
385 q->stats.marked++;
386 } else {
387 q->stats.earlydrop++;
388 goto drop;
389 }
390 }
391
392enqueue:
393 ret = qdisc_enqueue(skb, child);
394 if (likely(ret == NET_XMIT_SUCCESS)) {
395 sch->q.qlen++;
396 increment_qlen(skb, q);
397 } else if (net_xmit_drop_count(ret)) {
398 q->stats.childdrop++;
399 sch->qstats.drops++;
400 }
401 return ret;
402
403drop:
404 qdisc_drop(skb, sch);
405 return NET_XMIT_CN;
406other_drop:
407 if (ret & __NET_XMIT_BYPASS)
408 sch->qstats.drops++;
409 kfree_skb(skb);
410 return ret;
411}
412
413static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
414{
415 struct sfb_sched_data *q = qdisc_priv(sch);
416 struct Qdisc *child = q->qdisc;
417 struct sk_buff *skb;
418
419 skb = child->dequeue(q->qdisc);
420
421 if (skb) {
422 qdisc_bstats_update(sch, skb);
423 sch->q.qlen--;
424 decrement_qlen(skb, q);
425 }
426
427 return skb;
428}
429
430static struct sk_buff *sfb_peek(struct Qdisc *sch)
431{
432 struct sfb_sched_data *q = qdisc_priv(sch);
433 struct Qdisc *child = q->qdisc;
434
435 return child->ops->peek(child);
436}
437
438/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */
439
440static void sfb_reset(struct Qdisc *sch)
441{
442 struct sfb_sched_data *q = qdisc_priv(sch);
443
444 qdisc_reset(q->qdisc);
445 sch->q.qlen = 0;
446 q->slot = 0;
447 q->double_buffering = false;
448 sfb_zero_all_buckets(q);
449 sfb_init_perturbation(0, q);
450}
451
452static void sfb_destroy(struct Qdisc *sch)
453{
454 struct sfb_sched_data *q = qdisc_priv(sch);
455
456 tcf_destroy_chain(&q->filter_list);
457 qdisc_destroy(q->qdisc);
458}
459
460static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
461 [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) },
462};
463
464static const struct tc_sfb_qopt sfb_default_ops = {
465 .rehash_interval = 600 * MSEC_PER_SEC,
466 .warmup_time = 60 * MSEC_PER_SEC,
467 .limit = 0,
468 .max = 25,
469 .bin_size = 20,
470 .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */
471 .decrement = (SFB_MAX_PROB + 3000) / 6000,
472 .penalty_rate = 10,
473 .penalty_burst = 20,
474};
475
476static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
477{
478 struct sfb_sched_data *q = qdisc_priv(sch);
479 struct Qdisc *child;
480 struct nlattr *tb[TCA_SFB_MAX + 1];
481 const struct tc_sfb_qopt *ctl = &sfb_default_ops;
482 u32 limit;
483 int err;
484
485 if (opt) {
486 err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
487 if (err < 0)
488 return -EINVAL;
489
490 if (tb[TCA_SFB_PARMS] == NULL)
491 return -EINVAL;
492
493 ctl = nla_data(tb[TCA_SFB_PARMS]);
494 }
495
496 limit = ctl->limit;
497 if (limit == 0)
498 limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
499
500 child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
501 if (IS_ERR(child))
502 return PTR_ERR(child);
503
504 sch_tree_lock(sch);
505
506 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
507 qdisc_destroy(q->qdisc);
508 q->qdisc = child;
509
510 q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
511 q->warmup_time = msecs_to_jiffies(ctl->warmup_time);
512 q->rehash_time = jiffies;
513 q->limit = limit;
514 q->increment = ctl->increment;
515 q->decrement = ctl->decrement;
516 q->max = ctl->max;
517 q->bin_size = ctl->bin_size;
518 q->penalty_rate = ctl->penalty_rate;
519 q->penalty_burst = ctl->penalty_burst;
520 q->tokens_avail = ctl->penalty_burst;
521 q->token_time = jiffies;
522
523 q->slot = 0;
524 q->double_buffering = false;
525 sfb_zero_all_buckets(q);
526 sfb_init_perturbation(0, q);
527 sfb_init_perturbation(1, q);
528
529 sch_tree_unlock(sch);
530
531 return 0;
532}
533
534static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
535{
536 struct sfb_sched_data *q = qdisc_priv(sch);
537
538 q->qdisc = &noop_qdisc;
539 return sfb_change(sch, opt);
540}
541
542static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
543{
544 struct sfb_sched_data *q = qdisc_priv(sch);
545 struct nlattr *opts;
546 struct tc_sfb_qopt opt = {
547 .rehash_interval = jiffies_to_msecs(q->rehash_interval),
548 .warmup_time = jiffies_to_msecs(q->warmup_time),
549 .limit = q->limit,
550 .max = q->max,
551 .bin_size = q->bin_size,
552 .increment = q->increment,
553 .decrement = q->decrement,
554 .penalty_rate = q->penalty_rate,
555 .penalty_burst = q->penalty_burst,
556 };
557
558 sch->qstats.backlog = q->qdisc->qstats.backlog;
559 opts = nla_nest_start(skb, TCA_OPTIONS);
560 NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
561 return nla_nest_end(skb, opts);
562
563nla_put_failure:
564 nla_nest_cancel(skb, opts);
565 return -EMSGSIZE;
566}
567
568static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
569{
570 struct sfb_sched_data *q = qdisc_priv(sch);
571 struct tc_sfb_xstats st = {
572 .earlydrop = q->stats.earlydrop,
573 .penaltydrop = q->stats.penaltydrop,
574 .bucketdrop = q->stats.bucketdrop,
575 .queuedrop = q->stats.queuedrop,
576 .childdrop = q->stats.childdrop,
577 .marked = q->stats.marked,
578 };
579
580 st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);
581
582 return gnet_stats_copy_app(d, &st, sizeof(st));
583}
584
585static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
586 struct sk_buff *skb, struct tcmsg *tcm)
587{
588 return -ENOSYS;
589}
590
591static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
592 struct Qdisc **old)
593{
594 struct sfb_sched_data *q = qdisc_priv(sch);
595
596 if (new == NULL)
597 new = &noop_qdisc;
598
599 sch_tree_lock(sch);
600 *old = q->qdisc;
601 q->qdisc = new;
602 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
603 qdisc_reset(*old);
604 sch_tree_unlock(sch);
605 return 0;
606}
607
608static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
609{
610 struct sfb_sched_data *q = qdisc_priv(sch);
611
612 return q->qdisc;
613}
614
615static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
616{
617 return 1;
618}
619
620static void sfb_put(struct Qdisc *sch, unsigned long arg)
621{
622}
623
624static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
625 struct nlattr **tca, unsigned long *arg)
626{
627 return -ENOSYS;
628}
629
630static int sfb_delete(struct Qdisc *sch, unsigned long cl)
631{
632 return -ENOSYS;
633}
634
635static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
636{
637 if (!walker->stop) {
638 if (walker->count >= walker->skip)
639 if (walker->fn(sch, 1, walker) < 0) {
640 walker->stop = 1;
641 return;
642 }
643 walker->count++;
644 }
645}
646
647static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
648{
649 struct sfb_sched_data *q = qdisc_priv(sch);
650
651 if (cl)
652 return NULL;
653 return &q->filter_list;
654}
655
656static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
657 u32 classid)
658{
659 return 0;
660}
661
662
663static const struct Qdisc_class_ops sfb_class_ops = {
664 .graft = sfb_graft,
665 .leaf = sfb_leaf,
666 .get = sfb_get,
667 .put = sfb_put,
668 .change = sfb_change_class,
669 .delete = sfb_delete,
670 .walk = sfb_walk,
671 .tcf_chain = sfb_find_tcf,
672 .bind_tcf = sfb_bind,
673 .unbind_tcf = sfb_put,
674 .dump = sfb_dump_class,
675};
676
677static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
678 .id = "sfb",
679 .priv_size = sizeof(struct sfb_sched_data),
680 .cl_ops = &sfb_class_ops,
681 .enqueue = sfb_enqueue,
682 .dequeue = sfb_dequeue,
683 .peek = sfb_peek,
684 .init = sfb_init,
685 .reset = sfb_reset,
686 .destroy = sfb_destroy,
687 .change = sfb_change,
688 .dump = sfb_dump,
689 .dump_stats = sfb_dump_stats,
690 .owner = THIS_MODULE,
691};
692
693static int __init sfb_module_init(void)
694{
695 return register_qdisc(&sfb_qdisc_ops);
696}
697
698static void __exit sfb_module_exit(void)
699{
700 unregister_qdisc(&sfb_qdisc_ops);
701}
702
703module_init(sfb_module_init)
704module_exit(sfb_module_exit)
705
706MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline");
707MODULE_AUTHOR("Juliusz Chroboczek");
708MODULE_AUTHOR("Eric Dumazet");
709MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..b6ea6afa55b0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
21#include <linux/skbuff.h> 21#include <linux/skbuff.h>
22#include <linux/jhash.h> 22#include <linux/jhash.h>
23#include <linux/slab.h> 23#include <linux/slab.h>
24#include <linux/vmalloc.h>
24#include <net/ip.h> 25#include <net/ip.h>
25#include <net/netlink.h> 26#include <net/netlink.h>
26#include <net/pkt_sched.h> 27#include <net/pkt_sched.h>
@@ -67,55 +68,81 @@
67 68
68 IMPLEMENTATION: 69 IMPLEMENTATION:
69 This implementation limits maximal queue length to 128; 70 This implementation limits maximal queue length to 128;
70 maximal mtu to 2^15-1; number of hash buckets to 1024. 71 max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
71 The only goal of this restrictions was that all data 72 The only goal of this restrictions was that all data
72 fit into one 4K page :-). Struct sfq_sched_data is 73 fit into one 4K page on 32bit arches.
73 organized in anti-cache manner: all the data for a bucket
74 are scattered over different locations. This is not good,
75 but it allowed me to put it into 4K.
76 74
77 It is easy to increase these values, but not in flight. */ 75 It is easy to increase these values, but not in flight. */
78 76
79#define SFQ_DEPTH 128 77#define SFQ_DEPTH 128 /* max number of packets per flow */
80#define SFQ_HASH_DIVISOR 1024 78#define SFQ_SLOTS 128 /* max number of flows */
79#define SFQ_EMPTY_SLOT 255
80#define SFQ_DEFAULT_HASH_DIVISOR 1024
81 81
82/* This type should contain at least SFQ_DEPTH*2 values */ 82/* We use 16 bits to store allot, and want to handle packets up to 64K
83 * Scale allot by 8 (1<<3) so that no overflow occurs.
84 */
85#define SFQ_ALLOT_SHIFT 3
86#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
87
88/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
83typedef unsigned char sfq_index; 89typedef unsigned char sfq_index;
84 90
85struct sfq_head 91/*
86{ 92 * We dont use pointers to save space.
93 * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
94 * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
95 * are 'pointers' to dep[] array
96 */
97struct sfq_head {
87 sfq_index next; 98 sfq_index next;
88 sfq_index prev; 99 sfq_index prev;
89}; 100};
90 101
91struct sfq_sched_data 102struct sfq_slot {
92{ 103 struct sk_buff *skblist_next;
104 struct sk_buff *skblist_prev;
105 sfq_index qlen; /* number of skbs in skblist */
106 sfq_index next; /* next slot in sfq chain */
107 struct sfq_head dep; /* anchor in dep[] chains */
108 unsigned short hash; /* hash value (index in ht[]) */
109 short allot; /* credit for this slot */
110};
111
112struct sfq_sched_data {
93/* Parameters */ 113/* Parameters */
94 int perturb_period; 114 int perturb_period;
95 unsigned quantum; /* Allotment per round: MUST BE >= MTU */ 115 unsigned int quantum; /* Allotment per round: MUST BE >= MTU */
96 int limit; 116 int limit;
97 117 unsigned int divisor; /* number of slots in hash table */
98/* Variables */ 118/* Variables */
99 struct tcf_proto *filter_list; 119 struct tcf_proto *filter_list;
100 struct timer_list perturb_timer; 120 struct timer_list perturb_timer;
101 u32 perturbation; 121 u32 perturbation;
102 sfq_index tail; /* Index of current slot in round */ 122 sfq_index cur_depth; /* depth of longest slot */
103 sfq_index max_depth; /* Maximal depth */ 123 unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
104 124 struct sfq_slot *tail; /* current slot in round */
105 sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ 125 sfq_index *ht; /* Hash table (divisor slots) */
106 sfq_index next[SFQ_DEPTH]; /* Active slots link */ 126 struct sfq_slot slots[SFQ_SLOTS];
107 short allot[SFQ_DEPTH]; /* Current allotment per slot */ 127 struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
108 unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109 struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110 struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */
111}; 128};
112 129
113static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) 130/*
131 * sfq_head are either in a sfq_slot or in dep[] array
132 */
133static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
114{ 134{
115 return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); 135 if (val < SFQ_SLOTS)
136 return &q->slots[val].dep;
137 return &q->dep[val - SFQ_SLOTS];
116} 138}
117 139
118static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) 140static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
141{
142 return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
143}
144
145static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
119{ 146{
120 u32 h, h2; 147 u32 h, h2;
121 148
@@ -123,40 +150,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 150 case htons(ETH_P_IP):
124 { 151 {
125 const struct iphdr *iph; 152 const struct iphdr *iph;
153 int poff;
126 154
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 155 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 156 goto err;
129 iph = ip_hdr(skb); 157 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 158 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 159 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 160 if (iph->frag_off & htons(IP_MF | IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 161 break;
134 iph->protocol == IPPROTO_UDP || 162 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 163 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 164 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 165 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 166 h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 167 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 168 break;
142 } 169 }
143 case htons(ETH_P_IPV6): 170 case htons(ETH_P_IPV6):
144 { 171 {
145 struct ipv6hdr *iph; 172 const struct ipv6hdr *iph;
173 int poff;
146 174
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 175 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 176 goto err;
149 iph = ipv6_hdr(skb); 177 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 178 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 179 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 180 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 181 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 182 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 183 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 184 h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 185 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 186 break;
161 } 187 }
162 default: 188 default:
@@ -177,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
177 203
178 if (TC_H_MAJ(skb->priority) == sch->handle && 204 if (TC_H_MAJ(skb->priority) == sch->handle &&
179 TC_H_MIN(skb->priority) > 0 && 205 TC_H_MIN(skb->priority) > 0 &&
180 TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) 206 TC_H_MIN(skb->priority) <= q->divisor)
181 return TC_H_MIN(skb->priority); 207 return TC_H_MIN(skb->priority);
182 208
183 if (!q->filter_list) 209 if (!q->filter_list)
@@ -195,36 +221,47 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
195 return 0; 221 return 0;
196 } 222 }
197#endif 223#endif
198 if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) 224 if (TC_H_MIN(res.classid) <= q->divisor)
199 return TC_H_MIN(res.classid); 225 return TC_H_MIN(res.classid);
200 } 226 }
201 return 0; 227 return 0;
202} 228}
203 229
230/*
231 * x : slot number [0 .. SFQ_SLOTS - 1]
232 */
204static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) 233static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
205{ 234{
206 sfq_index p, n; 235 sfq_index p, n;
207 int d = q->qs[x].qlen + SFQ_DEPTH; 236 int qlen = q->slots[x].qlen;
208 237
209 p = d; 238 p = qlen + SFQ_SLOTS;
210 n = q->dep[d].next; 239 n = q->dep[qlen].next;
211 q->dep[x].next = n; 240
212 q->dep[x].prev = p; 241 q->slots[x].dep.next = n;
213 q->dep[p].next = q->dep[n].prev = x; 242 q->slots[x].dep.prev = p;
243
244 q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
245 sfq_dep_head(q, n)->prev = x;
214} 246}
215 247
248#define sfq_unlink(q, x, n, p) \
249 n = q->slots[x].dep.next; \
250 p = q->slots[x].dep.prev; \
251 sfq_dep_head(q, p)->next = n; \
252 sfq_dep_head(q, n)->prev = p
253
254
216static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) 255static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
217{ 256{
218 sfq_index p, n; 257 sfq_index p, n;
258 int d;
219 259
220 n = q->dep[x].next; 260 sfq_unlink(q, x, n, p);
221 p = q->dep[x].prev;
222 q->dep[p].next = n;
223 q->dep[n].prev = p;
224
225 if (n == p && q->max_depth == q->qs[x].qlen + 1)
226 q->max_depth--;
227 261
262 d = q->slots[x].qlen--;
263 if (n == p && q->cur_depth == d)
264 q->cur_depth--;
228 sfq_link(q, x); 265 sfq_link(q, x);
229} 266}
230 267
@@ -233,34 +270,74 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
233 sfq_index p, n; 270 sfq_index p, n;
234 int d; 271 int d;
235 272
236 n = q->dep[x].next; 273 sfq_unlink(q, x, n, p);
237 p = q->dep[x].prev;
238 q->dep[p].next = n;
239 q->dep[n].prev = p;
240 d = q->qs[x].qlen;
241 if (q->max_depth < d)
242 q->max_depth = d;
243 274
275 d = ++q->slots[x].qlen;
276 if (q->cur_depth < d)
277 q->cur_depth = d;
244 sfq_link(q, x); 278 sfq_link(q, x);
245} 279}
246 280
281/* helper functions : might be changed when/if skb use a standard list_head */
282
283/* remove one skb from tail of slot queue */
284static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
285{
286 struct sk_buff *skb = slot->skblist_prev;
287
288 slot->skblist_prev = skb->prev;
289 skb->prev->next = (struct sk_buff *)slot;
290 skb->next = skb->prev = NULL;
291 return skb;
292}
293
294/* remove one skb from head of slot queue */
295static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
296{
297 struct sk_buff *skb = slot->skblist_next;
298
299 slot->skblist_next = skb->next;
300 skb->next->prev = (struct sk_buff *)slot;
301 skb->next = skb->prev = NULL;
302 return skb;
303}
304
305static inline void slot_queue_init(struct sfq_slot *slot)
306{
307 slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
308}
309
310/* add skb to slot queue (tail add) */
311static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
312{
313 skb->prev = slot->skblist_prev;
314 skb->next = (struct sk_buff *)slot;
315 slot->skblist_prev->next = skb;
316 slot->skblist_prev = skb;
317}
318
319#define slot_queue_walk(slot, skb) \
320 for (skb = slot->skblist_next; \
321 skb != (struct sk_buff *)slot; \
322 skb = skb->next)
323
247static unsigned int sfq_drop(struct Qdisc *sch) 324static unsigned int sfq_drop(struct Qdisc *sch)
248{ 325{
249 struct sfq_sched_data *q = qdisc_priv(sch); 326 struct sfq_sched_data *q = qdisc_priv(sch);
250 sfq_index d = q->max_depth; 327 sfq_index x, d = q->cur_depth;
251 struct sk_buff *skb; 328 struct sk_buff *skb;
252 unsigned int len; 329 unsigned int len;
330 struct sfq_slot *slot;
253 331
254 /* Queue is full! Find the longest slot and 332 /* Queue is full! Find the longest slot and drop tail packet from it */
255 drop a packet from it */
256
257 if (d > 1) { 333 if (d > 1) {
258 sfq_index x = q->dep[d + SFQ_DEPTH].next; 334 x = q->dep[d].next;
259 skb = q->qs[x].prev; 335 slot = &q->slots[x];
336drop:
337 skb = slot_dequeue_tail(slot);
260 len = qdisc_pkt_len(skb); 338 len = qdisc_pkt_len(skb);
261 __skb_unlink(skb, &q->qs[x]);
262 kfree_skb(skb);
263 sfq_dec(q, x); 339 sfq_dec(q, x);
340 kfree_skb(skb);
264 sch->q.qlen--; 341 sch->q.qlen--;
265 sch->qstats.drops++; 342 sch->qstats.drops++;
266 sch->qstats.backlog -= len; 343 sch->qstats.backlog -= len;
@@ -269,19 +346,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
269 346
270 if (d == 1) { 347 if (d == 1) {
271 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ 348 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
272 d = q->next[q->tail]; 349 x = q->tail->next;
273 q->next[q->tail] = q->next[d]; 350 slot = &q->slots[x];
274 q->allot[q->next[d]] += q->quantum; 351 q->tail->next = slot->next;
275 skb = q->qs[d].prev; 352 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
276 len = qdisc_pkt_len(skb); 353 goto drop;
277 __skb_unlink(skb, &q->qs[d]);
278 kfree_skb(skb);
279 sfq_dec(q, d);
280 sch->q.qlen--;
281 q->ht[q->hash[d]] = SFQ_DEPTH;
282 sch->qstats.drops++;
283 sch->qstats.backlog -= len;
284 return len;
285 } 354 }
286 355
287 return 0; 356 return 0;
@@ -292,7 +361,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
292{ 361{
293 struct sfq_sched_data *q = qdisc_priv(sch); 362 struct sfq_sched_data *q = qdisc_priv(sch);
294 unsigned int hash; 363 unsigned int hash;
295 sfq_index x; 364 sfq_index x, qlen;
365 struct sfq_slot *slot;
296 int uninitialized_var(ret); 366 int uninitialized_var(ret);
297 367
298 hash = sfq_classify(skb, sch, &ret); 368 hash = sfq_classify(skb, sch, &ret);
@@ -305,54 +375,42 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
305 hash--; 375 hash--;
306 376
307 x = q->ht[hash]; 377 x = q->ht[hash];
308 if (x == SFQ_DEPTH) { 378 slot = &q->slots[x];
309 q->ht[hash] = x = q->dep[SFQ_DEPTH].next; 379 if (x == SFQ_EMPTY_SLOT) {
310 q->hash[x] = hash; 380 x = q->dep[0].next; /* get a free slot */
381 q->ht[hash] = x;
382 slot = &q->slots[x];
383 slot->hash = hash;
311 } 384 }
312 385
313 /* If selected queue has length q->limit, this means that 386 /* If selected queue has length q->limit, do simple tail drop,
314 * all another queues are empty and that we do simple tail drop,
315 * i.e. drop _this_ packet. 387 * i.e. drop _this_ packet.
316 */ 388 */
317 if (q->qs[x].qlen >= q->limit) 389 if (slot->qlen >= q->limit)
318 return qdisc_drop(skb, sch); 390 return qdisc_drop(skb, sch);
319 391
320 sch->qstats.backlog += qdisc_pkt_len(skb); 392 sch->qstats.backlog += qdisc_pkt_len(skb);
321 __skb_queue_tail(&q->qs[x], skb); 393 slot_queue_add(slot, skb);
322 sfq_inc(q, x); 394 sfq_inc(q, x);
323 if (q->qs[x].qlen == 1) { /* The flow is new */ 395 if (slot->qlen == 1) { /* The flow is new */
324 if (q->tail == SFQ_DEPTH) { /* It is the first flow */ 396 if (q->tail == NULL) { /* It is the first flow */
325 q->tail = x; 397 slot->next = x;
326 q->next[x] = x;
327 q->allot[x] = q->quantum;
328 } else { 398 } else {
329 q->next[x] = q->next[q->tail]; 399 slot->next = q->tail->next;
330 q->next[q->tail] = x; 400 q->tail->next = x;
331 q->tail = x;
332 } 401 }
402 q->tail = slot;
403 slot->allot = q->scaled_quantum;
333 } 404 }
334 if (++sch->q.qlen <= q->limit) { 405 if (++sch->q.qlen <= q->limit)
335 sch->bstats.bytes += qdisc_pkt_len(skb);
336 sch->bstats.packets++;
337 return NET_XMIT_SUCCESS; 406 return NET_XMIT_SUCCESS;
338 }
339 407
408 qlen = slot->qlen;
340 sfq_drop(sch); 409 sfq_drop(sch);
341 return NET_XMIT_CN; 410 /* Return Congestion Notification only if we dropped a packet
342} 411 * from this flow.
343 412 */
344static struct sk_buff * 413 return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS;
345sfq_peek(struct Qdisc *sch)
346{
347 struct sfq_sched_data *q = qdisc_priv(sch);
348 sfq_index a;
349
350 /* No active slots */
351 if (q->tail == SFQ_DEPTH)
352 return NULL;
353
354 a = q->next[q->tail];
355 return skb_peek(&q->qs[a]);
356} 414}
357 415
358static struct sk_buff * 416static struct sk_buff *
@@ -360,34 +418,38 @@ sfq_dequeue(struct Qdisc *sch)
360{ 418{
361 struct sfq_sched_data *q = qdisc_priv(sch); 419 struct sfq_sched_data *q = qdisc_priv(sch);
362 struct sk_buff *skb; 420 struct sk_buff *skb;
363 sfq_index a, old_a; 421 sfq_index a, next_a;
422 struct sfq_slot *slot;
364 423
365 /* No active slots */ 424 /* No active slots */
366 if (q->tail == SFQ_DEPTH) 425 if (q->tail == NULL)
367 return NULL; 426 return NULL;
368 427
369 a = old_a = q->next[q->tail]; 428next_slot:
370 429 a = q->tail->next;
371 /* Grab packet */ 430 slot = &q->slots[a];
372 skb = __skb_dequeue(&q->qs[a]); 431 if (slot->allot <= 0) {
432 q->tail = slot;
433 slot->allot += q->scaled_quantum;
434 goto next_slot;
435 }
436 skb = slot_dequeue_head(slot);
373 sfq_dec(q, a); 437 sfq_dec(q, a);
438 qdisc_bstats_update(sch, skb);
374 sch->q.qlen--; 439 sch->q.qlen--;
375 sch->qstats.backlog -= qdisc_pkt_len(skb); 440 sch->qstats.backlog -= qdisc_pkt_len(skb);
376 441
377 /* Is the slot empty? */ 442 /* Is the slot empty? */
378 if (q->qs[a].qlen == 0) { 443 if (slot->qlen == 0) {
379 q->ht[q->hash[a]] = SFQ_DEPTH; 444 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
380 a = q->next[a]; 445 next_a = slot->next;
381 if (a == old_a) { 446 if (a == next_a) {
382 q->tail = SFQ_DEPTH; 447 q->tail = NULL; /* no more active slots */
383 return skb; 448 return skb;
384 } 449 }
385 q->next[q->tail] = a; 450 q->tail->next = next_a;
386 q->allot[a] += q->quantum; 451 } else {
387 } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { 452 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
388 q->tail = a;
389 a = q->next[a];
390 q->allot[a] += q->quantum;
391 } 453 }
392 return skb; 454 return skb;
393} 455}
@@ -421,12 +483,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
421 if (opt->nla_len < nla_attr_size(sizeof(*ctl))) 483 if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
422 return -EINVAL; 484 return -EINVAL;
423 485
486 if (ctl->divisor &&
487 (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
488 return -EINVAL;
489
424 sch_tree_lock(sch); 490 sch_tree_lock(sch);
425 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); 491 q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
492 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
426 q->perturb_period = ctl->perturb_period * HZ; 493 q->perturb_period = ctl->perturb_period * HZ;
427 if (ctl->limit) 494 if (ctl->limit)
428 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); 495 q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
429 496 if (ctl->divisor)
497 q->divisor = ctl->divisor;
430 qlen = sch->q.qlen; 498 qlen = sch->q.qlen;
431 while (sch->q.qlen > q->limit) 499 while (sch->q.qlen > q->limit)
432 sfq_drop(sch); 500 sfq_drop(sch);
@@ -444,26 +512,25 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
444static int sfq_init(struct Qdisc *sch, struct nlattr *opt) 512static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
445{ 513{
446 struct sfq_sched_data *q = qdisc_priv(sch); 514 struct sfq_sched_data *q = qdisc_priv(sch);
515 size_t sz;
447 int i; 516 int i;
448 517
449 q->perturb_timer.function = sfq_perturbation; 518 q->perturb_timer.function = sfq_perturbation;
450 q->perturb_timer.data = (unsigned long)sch; 519 q->perturb_timer.data = (unsigned long)sch;
451 init_timer_deferrable(&q->perturb_timer); 520 init_timer_deferrable(&q->perturb_timer);
452 521
453 for (i = 0; i < SFQ_HASH_DIVISOR; i++)
454 q->ht[i] = SFQ_DEPTH;
455
456 for (i = 0; i < SFQ_DEPTH; i++) { 522 for (i = 0; i < SFQ_DEPTH; i++) {
457 skb_queue_head_init(&q->qs[i]); 523 q->dep[i].next = i + SFQ_SLOTS;
458 q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; 524 q->dep[i].prev = i + SFQ_SLOTS;
459 q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
460 } 525 }
461 526
462 q->limit = SFQ_DEPTH - 1; 527 q->limit = SFQ_DEPTH - 1;
463 q->max_depth = 0; 528 q->cur_depth = 0;
464 q->tail = SFQ_DEPTH; 529 q->tail = NULL;
530 q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
465 if (opt == NULL) { 531 if (opt == NULL) {
466 q->quantum = psched_mtu(qdisc_dev(sch)); 532 q->quantum = psched_mtu(qdisc_dev(sch));
533 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
467 q->perturb_period = 0; 534 q->perturb_period = 0;
468 q->perturbation = net_random(); 535 q->perturbation = net_random();
469 } else { 536 } else {
@@ -472,8 +539,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
472 return err; 539 return err;
473 } 540 }
474 541
475 for (i = 0; i < SFQ_DEPTH; i++) 542 sz = sizeof(q->ht[0]) * q->divisor;
543 q->ht = kmalloc(sz, GFP_KERNEL);
544 if (!q->ht && sz > PAGE_SIZE)
545 q->ht = vmalloc(sz);
546 if (!q->ht)
547 return -ENOMEM;
548 for (i = 0; i < q->divisor; i++)
549 q->ht[i] = SFQ_EMPTY_SLOT;
550
551 for (i = 0; i < SFQ_SLOTS; i++) {
552 slot_queue_init(&q->slots[i]);
476 sfq_link(q, i); 553 sfq_link(q, i);
554 }
555 if (q->limit >= 1)
556 sch->flags |= TCQ_F_CAN_BYPASS;
557 else
558 sch->flags &= ~TCQ_F_CAN_BYPASS;
477 return 0; 559 return 0;
478} 560}
479 561
@@ -484,6 +566,10 @@ static void sfq_destroy(struct Qdisc *sch)
484 tcf_destroy_chain(&q->filter_list); 566 tcf_destroy_chain(&q->filter_list);
485 q->perturb_period = 0; 567 q->perturb_period = 0;
486 del_timer_sync(&q->perturb_timer); 568 del_timer_sync(&q->perturb_timer);
569 if (is_vmalloc_addr(q->ht))
570 vfree(q->ht);
571 else
572 kfree(q->ht);
487} 573}
488 574
489static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) 575static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -496,7 +582,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
496 opt.perturb_period = q->perturb_period / HZ; 582 opt.perturb_period = q->perturb_period / HZ;
497 583
498 opt.limit = q->limit; 584 opt.limit = q->limit;
499 opt.divisor = SFQ_HASH_DIVISOR; 585 opt.divisor = q->divisor;
500 opt.flows = q->limit; 586 opt.flows = q->limit;
501 587
502 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); 588 NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -521,6 +607,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
521static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, 607static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
522 u32 classid) 608 u32 classid)
523{ 609{
610 /* we cannot bypass queue discipline anymore */
611 sch->flags &= ~TCQ_F_CAN_BYPASS;
524 return 0; 612 return 0;
525} 613}
526 614
@@ -548,10 +636,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
548 struct gnet_dump *d) 636 struct gnet_dump *d)
549{ 637{
550 struct sfq_sched_data *q = qdisc_priv(sch); 638 struct sfq_sched_data *q = qdisc_priv(sch);
551 sfq_index idx = q->ht[cl-1]; 639 sfq_index idx = q->ht[cl - 1];
552 struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; 640 struct gnet_stats_queue qs = { 0 };
553 struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; 641 struct tc_sfq_xstats xstats = { 0 };
642 struct sk_buff *skb;
554 643
644 if (idx != SFQ_EMPTY_SLOT) {
645 const struct sfq_slot *slot = &q->slots[idx];
646
647 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
648 qs.qlen = slot->qlen;
649 slot_queue_walk(slot, skb)
650 qs.backlog += qdisc_pkt_len(skb);
651 }
555 if (gnet_stats_copy_queue(d, &qs) < 0) 652 if (gnet_stats_copy_queue(d, &qs) < 0)
556 return -1; 653 return -1;
557 return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); 654 return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -565,8 +662,8 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
565 if (arg->stop) 662 if (arg->stop)
566 return; 663 return;
567 664
568 for (i = 0; i < SFQ_HASH_DIVISOR; i++) { 665 for (i = 0; i < q->divisor; i++) {
569 if (q->ht[i] == SFQ_DEPTH || 666 if (q->ht[i] == SFQ_EMPTY_SLOT ||
570 arg->count < arg->skip) { 667 arg->count < arg->skip) {
571 arg->count++; 668 arg->count++;
572 continue; 669 continue;
@@ -597,7 +694,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
597 .priv_size = sizeof(struct sfq_sched_data), 694 .priv_size = sizeof(struct sfq_sched_data),
598 .enqueue = sfq_enqueue, 695 .enqueue = sfq_enqueue,
599 .dequeue = sfq_dequeue, 696 .dequeue = sfq_dequeue,
600 .peek = sfq_peek, 697 .peek = qdisc_peek_dequeued,
601 .drop = sfq_drop, 698 .drop = sfq_drop,
602 .init = sfq_init, 699 .init = sfq_init,
603 .reset = sfq_reset, 700 .reset = sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 641a30d64635..1dcfb5223a86 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
97 changed the limit is not effective anymore. 97 changed the limit is not effective anymore.
98*/ 98*/
99 99
100struct tbf_sched_data 100struct tbf_sched_data {
101{
102/* Parameters */ 101/* Parameters */
103 u32 limit; /* Maximal length of backlog: bytes */ 102 u32 limit; /* Maximal length of backlog: bytes */
104 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ 103 u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
115 struct qdisc_watchdog watchdog; /* Watchdog timer */ 114 struct qdisc_watchdog watchdog; /* Watchdog timer */
116}; 115};
117 116
118#define L2T(q,L) qdisc_l2t((q)->R_tab,L) 117#define L2T(q, L) qdisc_l2t((q)->R_tab, L)
119#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) 118#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
120 119
121static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) 120static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
122{ 121{
123 struct tbf_sched_data *q = qdisc_priv(sch); 122 struct tbf_sched_data *q = qdisc_priv(sch);
124 int ret; 123 int ret;
@@ -134,12 +133,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
134 } 133 }
135 134
136 sch->q.qlen++; 135 sch->q.qlen++;
137 sch->bstats.bytes += qdisc_pkt_len(skb);
138 sch->bstats.packets++;
139 return NET_XMIT_SUCCESS; 136 return NET_XMIT_SUCCESS;
140} 137}
141 138
142static unsigned int tbf_drop(struct Qdisc* sch) 139static unsigned int tbf_drop(struct Qdisc *sch)
143{ 140{
144 struct tbf_sched_data *q = qdisc_priv(sch); 141 struct tbf_sched_data *q = qdisc_priv(sch);
145 unsigned int len = 0; 142 unsigned int len = 0;
@@ -151,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
151 return len; 148 return len;
152} 149}
153 150
154static struct sk_buff *tbf_dequeue(struct Qdisc* sch) 151static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
155{ 152{
156 struct tbf_sched_data *q = qdisc_priv(sch); 153 struct tbf_sched_data *q = qdisc_priv(sch);
157 struct sk_buff *skb; 154 struct sk_buff *skb;
@@ -187,7 +184,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
187 q->tokens = toks; 184 q->tokens = toks;
188 q->ptokens = ptoks; 185 q->ptokens = ptoks;
189 sch->q.qlen--; 186 sch->q.qlen--;
190 sch->flags &= ~TCQ_F_THROTTLED; 187 qdisc_unthrottled(sch);
188 qdisc_bstats_update(sch, skb);
191 return skb; 189 return skb;
192 } 190 }
193 191
@@ -210,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
210 return NULL; 208 return NULL;
211} 209}
212 210
213static void tbf_reset(struct Qdisc* sch) 211static void tbf_reset(struct Qdisc *sch)
214{ 212{
215 struct tbf_sched_data *q = qdisc_priv(sch); 213 struct tbf_sched_data *q = qdisc_priv(sch);
216 214
@@ -228,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
228 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 226 [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
229}; 227};
230 228
231static int tbf_change(struct Qdisc* sch, struct nlattr *opt) 229static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
232{ 230{
233 int err; 231 int err;
234 struct tbf_sched_data *q = qdisc_priv(sch); 232 struct tbf_sched_data *q = qdisc_priv(sch);
@@ -237,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
237 struct qdisc_rate_table *rtab = NULL; 235 struct qdisc_rate_table *rtab = NULL;
238 struct qdisc_rate_table *ptab = NULL; 236 struct qdisc_rate_table *ptab = NULL;
239 struct Qdisc *child = NULL; 237 struct Qdisc *child = NULL;
240 int max_size,n; 238 int max_size, n;
241 239
242 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); 240 err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
243 if (err < 0) 241 if (err < 0)
@@ -260,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
260 } 258 }
261 259
262 for (n = 0; n < 256; n++) 260 for (n = 0; n < 256; n++)
263 if (rtab->data[n] > qopt->buffer) break; 261 if (rtab->data[n] > qopt->buffer)
264 max_size = (n << qopt->rate.cell_log)-1; 262 break;
263 max_size = (n << qopt->rate.cell_log) - 1;
265 if (ptab) { 264 if (ptab) {
266 int size; 265 int size;
267 266
268 for (n = 0; n < 256; n++) 267 for (n = 0; n < 256; n++)
269 if (ptab->data[n] > qopt->mtu) break; 268 if (ptab->data[n] > qopt->mtu)
270 size = (n << qopt->peakrate.cell_log)-1; 269 break;
271 if (size < max_size) max_size = size; 270 size = (n << qopt->peakrate.cell_log) - 1;
271 if (size < max_size)
272 max_size = size;
272 } 273 }
273 if (max_size < 0) 274 if (max_size < 0)
274 goto done; 275 goto done;
@@ -311,7 +312,7 @@ done:
311 return err; 312 return err;
312} 313}
313 314
314static int tbf_init(struct Qdisc* sch, struct nlattr *opt) 315static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
315{ 316{
316 struct tbf_sched_data *q = qdisc_priv(sch); 317 struct tbf_sched_data *q = qdisc_priv(sch);
317 318
@@ -423,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
423 } 424 }
424} 425}
425 426
426static const struct Qdisc_class_ops tbf_class_ops = 427static const struct Qdisc_class_ops tbf_class_ops = {
427{
428 .graft = tbf_graft, 428 .graft = tbf_graft,
429 .leaf = tbf_leaf, 429 .leaf = tbf_leaf,
430 .get = tbf_get, 430 .get = tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc103ce6..45cd30098e34 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,38 +53,38 @@
53 which will not break load balancing, though native slave 53 which will not break load balancing, though native slave
54 traffic will have the highest priority. */ 54 traffic will have the highest priority. */
55 55
56struct teql_master 56struct teql_master {
57{
58 struct Qdisc_ops qops; 57 struct Qdisc_ops qops;
59 struct net_device *dev; 58 struct net_device *dev;
60 struct Qdisc *slaves; 59 struct Qdisc *slaves;
61 struct list_head master_list; 60 struct list_head master_list;
61 unsigned long tx_bytes;
62 unsigned long tx_packets;
63 unsigned long tx_errors;
64 unsigned long tx_dropped;
62}; 65};
63 66
64struct teql_sched_data 67struct teql_sched_data {
65{
66 struct Qdisc *next; 68 struct Qdisc *next;
67 struct teql_master *m; 69 struct teql_master *m;
68 struct neighbour *ncache; 70 struct neighbour *ncache;
69 struct sk_buff_head q; 71 struct sk_buff_head q;
70}; 72};
71 73
72#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) 74#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
73 75
74#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT) 76#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
75 77
76/* "teql*" qdisc routines */ 78/* "teql*" qdisc routines */
77 79
78static int 80static int
79teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) 81teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
80{ 82{
81 struct net_device *dev = qdisc_dev(sch); 83 struct net_device *dev = qdisc_dev(sch);
82 struct teql_sched_data *q = qdisc_priv(sch); 84 struct teql_sched_data *q = qdisc_priv(sch);
83 85
84 if (q->q.qlen < dev->tx_queue_len) { 86 if (q->q.qlen < dev->tx_queue_len) {
85 __skb_queue_tail(&q->q, skb); 87 __skb_queue_tail(&q->q, skb);
86 sch->bstats.bytes += qdisc_pkt_len(skb);
87 sch->bstats.packets++;
88 return NET_XMIT_SUCCESS; 88 return NET_XMIT_SUCCESS;
89 } 89 }
90 90
@@ -94,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
94} 94}
95 95
96static struct sk_buff * 96static struct sk_buff *
97teql_dequeue(struct Qdisc* sch) 97teql_dequeue(struct Qdisc *sch)
98{ 98{
99 struct teql_sched_data *dat = qdisc_priv(sch); 99 struct teql_sched_data *dat = qdisc_priv(sch);
100 struct netdev_queue *dat_queue; 100 struct netdev_queue *dat_queue;
@@ -108,19 +108,21 @@ teql_dequeue(struct Qdisc* sch)
108 dat->m->slaves = sch; 108 dat->m->slaves = sch;
109 netif_wake_queue(m); 109 netif_wake_queue(m);
110 } 110 }
111 } else {
112 qdisc_bstats_update(sch, skb);
111 } 113 }
112 sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; 114 sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113 return skb; 115 return skb;
114} 116}
115 117
116static struct sk_buff * 118static struct sk_buff *
117teql_peek(struct Qdisc* sch) 119teql_peek(struct Qdisc *sch)
118{ 120{
119 /* teql is meant to be used as root qdisc */ 121 /* teql is meant to be used as root qdisc */
120 return NULL; 122 return NULL;
121} 123}
122 124
123static __inline__ void 125static inline void
124teql_neigh_release(struct neighbour *n) 126teql_neigh_release(struct neighbour *n)
125{ 127{
126 if (n) 128 if (n)
@@ -128,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
128} 130}
129 131
130static void 132static void
131teql_reset(struct Qdisc* sch) 133teql_reset(struct Qdisc *sch)
132{ 134{
133 struct teql_sched_data *dat = qdisc_priv(sch); 135 struct teql_sched_data *dat = qdisc_priv(sch);
134 136
@@ -138,13 +140,14 @@ teql_reset(struct Qdisc* sch)
138} 140}
139 141
140static void 142static void
141teql_destroy(struct Qdisc* sch) 143teql_destroy(struct Qdisc *sch)
142{ 144{
143 struct Qdisc *q, *prev; 145 struct Qdisc *q, *prev;
144 struct teql_sched_data *dat = qdisc_priv(sch); 146 struct teql_sched_data *dat = qdisc_priv(sch);
145 struct teql_master *master = dat->m; 147 struct teql_master *master = dat->m;
146 148
147 if ((prev = master->slaves) != NULL) { 149 prev = master->slaves;
150 if (prev) {
148 do { 151 do {
149 q = NEXT_SLAVE(prev); 152 q = NEXT_SLAVE(prev);
150 if (q == sch) { 153 if (q == sch) {
@@ -176,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
176static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) 179static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177{ 180{
178 struct net_device *dev = qdisc_dev(sch); 181 struct net_device *dev = qdisc_dev(sch);
179 struct teql_master *m = (struct teql_master*)sch->ops; 182 struct teql_master *m = (struct teql_master *)sch->ops;
180 struct teql_sched_data *q = qdisc_priv(sch); 183 struct teql_sched_data *q = qdisc_priv(sch);
181 184
182 if (dev->hard_header_len > m->dev->hard_header_len) 185 if (dev->hard_header_len > m->dev->hard_header_len)
@@ -241,11 +244,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
241 } 244 }
242 if (neigh_event_send(n, skb_res) == 0) { 245 if (neigh_event_send(n, skb_res) == 0) {
243 int err; 246 int err;
247 char haddr[MAX_ADDR_LEN];
244 248
245 read_lock(&n->lock); 249 neigh_ha_snapshot(haddr, n, dev);
246 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 250 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
247 n->ha, NULL, skb->len); 251 NULL, skb->len);
248 read_unlock(&n->lock);
249 252
250 if (err < 0) { 253 if (err < 0) {
251 neigh_release(n); 254 neigh_release(n);
@@ -275,7 +278,6 @@ static inline int teql_resolve(struct sk_buff *skb,
275static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) 278static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276{ 279{
277 struct teql_master *master = netdev_priv(dev); 280 struct teql_master *master = netdev_priv(dev);
278 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
279 struct Qdisc *start, *q; 281 struct Qdisc *start, *q;
280 int busy; 282 int busy;
281 int nores; 283 int nores;
@@ -288,7 +290,8 @@ restart:
288 nores = 0; 290 nores = 0;
289 busy = 0; 291 busy = 0;
290 292
291 if ((q = start) == NULL) 293 q = start;
294 if (!q)
292 goto drop; 295 goto drop;
293 296
294 do { 297 do {
@@ -309,15 +312,14 @@ restart:
309 if (__netif_tx_trylock(slave_txq)) { 312 if (__netif_tx_trylock(slave_txq)) {
310 unsigned int length = qdisc_pkt_len(skb); 313 unsigned int length = qdisc_pkt_len(skb);
311 314
312 if (!netif_tx_queue_stopped(slave_txq) && 315 if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
313 !netif_tx_queue_frozen(slave_txq) &&
314 slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { 316 slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
315 txq_trans_update(slave_txq); 317 txq_trans_update(slave_txq);
316 __netif_tx_unlock(slave_txq); 318 __netif_tx_unlock(slave_txq);
317 master->slaves = NEXT_SLAVE(q); 319 master->slaves = NEXT_SLAVE(q);
318 netif_wake_queue(dev); 320 netif_wake_queue(dev);
319 txq->tx_packets++; 321 master->tx_packets++;
320 txq->tx_bytes += length; 322 master->tx_bytes += length;
321 return NETDEV_TX_OK; 323 return NETDEV_TX_OK;
322 } 324 }
323 __netif_tx_unlock(slave_txq); 325 __netif_tx_unlock(slave_txq);
@@ -344,20 +346,20 @@ restart:
344 netif_stop_queue(dev); 346 netif_stop_queue(dev);
345 return NETDEV_TX_BUSY; 347 return NETDEV_TX_BUSY;
346 } 348 }
347 dev->stats.tx_errors++; 349 master->tx_errors++;
348 350
349drop: 351drop:
350 txq->tx_dropped++; 352 master->tx_dropped++;
351 dev_kfree_skb(skb); 353 dev_kfree_skb(skb);
352 return NETDEV_TX_OK; 354 return NETDEV_TX_OK;
353} 355}
354 356
355static int teql_master_open(struct net_device *dev) 357static int teql_master_open(struct net_device *dev)
356{ 358{
357 struct Qdisc * q; 359 struct Qdisc *q;
358 struct teql_master *m = netdev_priv(dev); 360 struct teql_master *m = netdev_priv(dev);
359 int mtu = 0xFFFE; 361 int mtu = 0xFFFE;
360 unsigned flags = IFF_NOARP|IFF_MULTICAST; 362 unsigned int flags = IFF_NOARP | IFF_MULTICAST;
361 363
362 if (m->slaves == NULL) 364 if (m->slaves == NULL)
363 return -EUNATCH; 365 return -EUNATCH;
@@ -400,6 +402,18 @@ static int teql_master_close(struct net_device *dev)
400 return 0; 402 return 0;
401} 403}
402 404
405static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
406 struct rtnl_link_stats64 *stats)
407{
408 struct teql_master *m = netdev_priv(dev);
409
410 stats->tx_packets = m->tx_packets;
411 stats->tx_bytes = m->tx_bytes;
412 stats->tx_errors = m->tx_errors;
413 stats->tx_dropped = m->tx_dropped;
414 return stats;
415}
416
403static int teql_master_mtu(struct net_device *dev, int new_mtu) 417static int teql_master_mtu(struct net_device *dev, int new_mtu)
404{ 418{
405 struct teql_master *m = netdev_priv(dev); 419 struct teql_master *m = netdev_priv(dev);
@@ -413,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
413 do { 427 do {
414 if (new_mtu > qdisc_dev(q)->mtu) 428 if (new_mtu > qdisc_dev(q)->mtu)
415 return -EINVAL; 429 return -EINVAL;
416 } while ((q=NEXT_SLAVE(q)) != m->slaves); 430 } while ((q = NEXT_SLAVE(q)) != m->slaves);
417 } 431 }
418 432
419 dev->mtu = new_mtu; 433 dev->mtu = new_mtu;
@@ -424,6 +438,7 @@ static const struct net_device_ops teql_netdev_ops = {
424 .ndo_open = teql_master_open, 438 .ndo_open = teql_master_open,
425 .ndo_stop = teql_master_close, 439 .ndo_stop = teql_master_close,
426 .ndo_start_xmit = teql_master_xmit, 440 .ndo_start_xmit = teql_master_xmit,
441 .ndo_get_stats64 = teql_master_stats64,
427 .ndo_change_mtu = teql_master_mtu, 442 .ndo_change_mtu = teql_master_mtu,
428}; 443};
429 444