diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/sched | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'net/sched')
50 files changed, 5384 insertions, 1253 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2f691fb180d1..2590e91b3289 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig | |||
@@ -24,7 +24,7 @@ menuconfig NET_SCHED | |||
24 | To administer these schedulers, you'll need the user-level utilities | 24 | To administer these schedulers, you'll need the user-level utilities |
25 | from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>. | 25 | from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>. |
26 | That package also contains some documentation; for more, check out | 26 | That package also contains some documentation; for more, check out |
27 | <http://linux-net.osdl.org/index.php/Iproute2>. | 27 | <http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2>. |
28 | 28 | ||
29 | This Quality of Service (QoS) support will enable you to use | 29 | This Quality of Service (QoS) support will enable you to use |
30 | Differentiated Services (diffserv) and Resource Reservation Protocol | 30 | Differentiated Services (diffserv) and Resource Reservation Protocol |
@@ -126,6 +126,17 @@ config NET_SCH_RED | |||
126 | To compile this code as a module, choose M here: the | 126 | To compile this code as a module, choose M here: the |
127 | module will be called sch_red. | 127 | module will be called sch_red. |
128 | 128 | ||
129 | config NET_SCH_SFB | ||
130 | tristate "Stochastic Fair Blue (SFB)" | ||
131 | ---help--- | ||
132 | Say Y here if you want to use the Stochastic Fair Blue (SFB) | ||
133 | packet scheduling algorithm. | ||
134 | |||
135 | See the top of <file:net/sched/sch_sfb.c> for more details. | ||
136 | |||
137 | To compile this code as a module, choose M here: the | ||
138 | module will be called sch_sfb. | ||
139 | |||
129 | config NET_SCH_SFQ | 140 | config NET_SCH_SFQ |
130 | tristate "Stochastic Fairness Queueing (SFQ)" | 141 | tristate "Stochastic Fairness Queueing (SFQ)" |
131 | ---help--- | 142 | ---help--- |
@@ -205,6 +216,40 @@ config NET_SCH_DRR | |||
205 | 216 | ||
206 | If unsure, say N. | 217 | If unsure, say N. |
207 | 218 | ||
219 | config NET_SCH_MQPRIO | ||
220 | tristate "Multi-queue priority scheduler (MQPRIO)" | ||
221 | help | ||
222 | Say Y here if you want to use the Multi-queue Priority scheduler. | ||
223 | This scheduler allows QOS to be offloaded on NICs that have support | ||
224 | for offloading QOS schedulers. | ||
225 | |||
226 | To compile this driver as a module, choose M here: the module will | ||
227 | be called sch_mqprio. | ||
228 | |||
229 | If unsure, say N. | ||
230 | |||
231 | config NET_SCH_CHOKE | ||
232 | tristate "CHOose and Keep responsive flow scheduler (CHOKE)" | ||
233 | help | ||
234 | Say Y here if you want to use the CHOKe packet scheduler (CHOose | ||
235 | and Keep for responsive flows, CHOose and Kill for unresponsive | ||
236 | flows). This is a variation of RED which trys to penalize flows | ||
237 | that monopolize the queue. | ||
238 | |||
239 | To compile this code as a module, choose M here: the | ||
240 | module will be called sch_choke. | ||
241 | |||
242 | config NET_SCH_QFQ | ||
243 | tristate "Quick Fair Queueing scheduler (QFQ)" | ||
244 | help | ||
245 | Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ) | ||
246 | packet scheduling algorithm. | ||
247 | |||
248 | To compile this driver as a module, choose M here: the module | ||
249 | will be called sch_qfq. | ||
250 | |||
251 | If unsure, say N. | ||
252 | |||
208 | config NET_SCH_INGRESS | 253 | config NET_SCH_INGRESS |
209 | tristate "Ingress Qdisc" | 254 | tristate "Ingress Qdisc" |
210 | depends on NET_CLS_ACT | 255 | depends on NET_CLS_ACT |
@@ -243,7 +288,8 @@ config NET_CLS_TCINDEX | |||
243 | 288 | ||
244 | config NET_CLS_ROUTE4 | 289 | config NET_CLS_ROUTE4 |
245 | tristate "Routing decision (ROUTE)" | 290 | tristate "Routing decision (ROUTE)" |
246 | select NET_CLS_ROUTE | 291 | depends on INET |
292 | select IP_ROUTE_CLASSID | ||
247 | select NET_CLS | 293 | select NET_CLS |
248 | ---help--- | 294 | ---help--- |
249 | If you say Y here, you will be able to classify packets | 295 | If you say Y here, you will be able to classify packets |
@@ -252,9 +298,6 @@ config NET_CLS_ROUTE4 | |||
252 | To compile this code as a module, choose M here: the | 298 | To compile this code as a module, choose M here: the |
253 | module will be called cls_route. | 299 | module will be called cls_route. |
254 | 300 | ||
255 | config NET_CLS_ROUTE | ||
256 | bool | ||
257 | |||
258 | config NET_CLS_FW | 301 | config NET_CLS_FW |
259 | tristate "Netfilter mark (FW)" | 302 | tristate "Netfilter mark (FW)" |
260 | select NET_CLS | 303 | select NET_CLS |
@@ -518,6 +561,16 @@ config NET_ACT_SKBEDIT | |||
518 | To compile this code as a module, choose M here: the | 561 | To compile this code as a module, choose M here: the |
519 | module will be called act_skbedit. | 562 | module will be called act_skbedit. |
520 | 563 | ||
564 | config NET_ACT_CSUM | ||
565 | tristate "Checksum Updating" | ||
566 | depends on NET_CLS_ACT && INET | ||
567 | ---help--- | ||
568 | Say Y here to update some common checksum after some direct | ||
569 | packet alterations. | ||
570 | |||
571 | To compile this code as a module, choose M here: the | ||
572 | module will be called act_csum. | ||
573 | |||
521 | config NET_CLS_IND | 574 | config NET_CLS_IND |
522 | bool "Incoming device classification" | 575 | bool "Incoming device classification" |
523 | depends on NET_CLS_U32 || NET_CLS_FW | 576 | depends on NET_CLS_U32 || NET_CLS_FW |
diff --git a/net/sched/Makefile b/net/sched/Makefile index f14e71bfa58f..dc5889c0a15a 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile | |||
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o | |||
15 | obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o | 15 | obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o |
16 | obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o | 16 | obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o |
17 | obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o | 17 | obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o |
18 | obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o | ||
18 | obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o | 19 | obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o |
19 | obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o | 20 | obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o |
20 | obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o | 21 | obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o |
@@ -23,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED) += sch_red.o | |||
23 | obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o | 24 | obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o |
24 | obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o | 25 | obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o |
25 | obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o | 26 | obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o |
27 | obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o | ||
26 | obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o | 28 | obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o |
27 | obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o | 29 | obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o |
28 | obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o | 30 | obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o |
@@ -31,6 +33,10 @@ obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o | |||
31 | obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o | 33 | obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o |
32 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o | 34 | obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o |
33 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o | 35 | obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o |
36 | obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o | ||
37 | obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o | ||
38 | obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o | ||
39 | |||
34 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o | 40 | obj-$(CONFIG_NET_CLS_U32) += cls_u32.o |
35 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o | 41 | obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o |
36 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o | 42 | obj-$(CONFIG_NET_CLS_FW) += cls_fw.o |
diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 23b25f89e7e0..a606025814a1 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c | |||
@@ -26,11 +26,6 @@ | |||
26 | #include <net/act_api.h> | 26 | #include <net/act_api.h> |
27 | #include <net/netlink.h> | 27 | #include <net/netlink.h> |
28 | 28 | ||
29 | static void tcf_common_free_rcu(struct rcu_head *head) | ||
30 | { | ||
31 | kfree(container_of(head, struct tcf_common, tcfc_rcu)); | ||
32 | } | ||
33 | |||
34 | void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) | 29 | void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) |
35 | { | 30 | { |
36 | unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); | 31 | unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); |
@@ -47,7 +42,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) | |||
47 | * gen_estimator est_timer() might access p->tcfc_lock | 42 | * gen_estimator est_timer() might access p->tcfc_lock |
48 | * or bstats, wait a RCU grace period before freeing p | 43 | * or bstats, wait a RCU grace period before freeing p |
49 | */ | 44 | */ |
50 | call_rcu(&p->tcfc_rcu, tcf_common_free_rcu); | 45 | kfree_rcu(p, tcfc_rcu); |
51 | return; | 46 | return; |
52 | } | 47 | } |
53 | } | 48 | } |
@@ -78,7 +73,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, | |||
78 | struct tc_action *a, struct tcf_hashinfo *hinfo) | 73 | struct tc_action *a, struct tcf_hashinfo *hinfo) |
79 | { | 74 | { |
80 | struct tcf_common *p; | 75 | struct tcf_common *p; |
81 | int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; | 76 | int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; |
82 | struct nlattr *nest; | 77 | struct nlattr *nest; |
83 | 78 | ||
84 | read_lock_bh(hinfo->lock); | 79 | read_lock_bh(hinfo->lock); |
@@ -126,7 +121,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, | |||
126 | { | 121 | { |
127 | struct tcf_common *p, *s_p; | 122 | struct tcf_common *p, *s_p; |
128 | struct nlattr *nest; | 123 | struct nlattr *nest; |
129 | int i= 0, n_i = 0; | 124 | int i = 0, n_i = 0; |
130 | 125 | ||
131 | nest = nla_nest_start(skb, a->order); | 126 | nest = nla_nest_start(skb, a->order); |
132 | if (nest == NULL) | 127 | if (nest == NULL) |
@@ -138,7 +133,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, | |||
138 | while (p != NULL) { | 133 | while (p != NULL) { |
139 | s_p = p->tcfc_next; | 134 | s_p = p->tcfc_next; |
140 | if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) | 135 | if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) |
141 | module_put(a->ops->owner); | 136 | module_put(a->ops->owner); |
142 | n_i++; | 137 | n_i++; |
143 | p = s_p; | 138 | p = s_p; |
144 | } | 139 | } |
@@ -447,7 +442,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) | |||
447 | nest = nla_nest_start(skb, TCA_OPTIONS); | 442 | nest = nla_nest_start(skb, TCA_OPTIONS); |
448 | if (nest == NULL) | 443 | if (nest == NULL) |
449 | goto nla_put_failure; | 444 | goto nla_put_failure; |
450 | if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) { | 445 | err = tcf_action_dump_old(skb, a, bind, ref); |
446 | if (err > 0) { | ||
451 | nla_nest_end(skb, nest); | 447 | nla_nest_end(skb, nest); |
452 | return err; | 448 | return err; |
453 | } | 449 | } |
@@ -491,7 +487,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, | |||
491 | struct tc_action *a; | 487 | struct tc_action *a; |
492 | struct tc_action_ops *a_o; | 488 | struct tc_action_ops *a_o; |
493 | char act_name[IFNAMSIZ]; | 489 | char act_name[IFNAMSIZ]; |
494 | struct nlattr *tb[TCA_ACT_MAX+1]; | 490 | struct nlattr *tb[TCA_ACT_MAX + 1]; |
495 | struct nlattr *kind; | 491 | struct nlattr *kind; |
496 | int err; | 492 | int err; |
497 | 493 | ||
@@ -549,9 +545,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, | |||
549 | goto err_free; | 545 | goto err_free; |
550 | 546 | ||
551 | /* module count goes up only when brand new policy is created | 547 | /* module count goes up only when brand new policy is created |
552 | if it exists and is only bound to in a_o->init() then | 548 | * if it exists and is only bound to in a_o->init() then |
553 | ACT_P_CREATED is not returned (a zero is). | 549 | * ACT_P_CREATED is not returned (a zero is). |
554 | */ | 550 | */ |
555 | if (err != ACT_P_CREATED) | 551 | if (err != ACT_P_CREATED) |
556 | module_put(a_o->owner); | 552 | module_put(a_o->owner); |
557 | a->ops = a_o; | 553 | a->ops = a_o; |
@@ -569,7 +565,7 @@ err_out: | |||
569 | struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, | 565 | struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est, |
570 | char *name, int ovr, int bind) | 566 | char *name, int ovr, int bind) |
571 | { | 567 | { |
572 | struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; | 568 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; |
573 | struct tc_action *head = NULL, *act, *act_prev = NULL; | 569 | struct tc_action *head = NULL, *act, *act_prev = NULL; |
574 | int err; | 570 | int err; |
575 | int i; | 571 | int i; |
@@ -697,7 +693,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, | |||
697 | static struct tc_action * | 693 | static struct tc_action * |
698 | tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) | 694 | tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) |
699 | { | 695 | { |
700 | struct nlattr *tb[TCA_ACT_MAX+1]; | 696 | struct nlattr *tb[TCA_ACT_MAX + 1]; |
701 | struct tc_action *a; | 697 | struct tc_action *a; |
702 | int index; | 698 | int index; |
703 | int err; | 699 | int err; |
@@ -770,7 +766,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, | |||
770 | struct tcamsg *t; | 766 | struct tcamsg *t; |
771 | struct netlink_callback dcb; | 767 | struct netlink_callback dcb; |
772 | struct nlattr *nest; | 768 | struct nlattr *nest; |
773 | struct nlattr *tb[TCA_ACT_MAX+1]; | 769 | struct nlattr *tb[TCA_ACT_MAX + 1]; |
774 | struct nlattr *kind; | 770 | struct nlattr *kind; |
775 | struct tc_action *a = create_a(0); | 771 | struct tc_action *a = create_a(0); |
776 | int err = -ENOMEM; | 772 | int err = -ENOMEM; |
@@ -821,7 +817,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, | |||
821 | nlh->nlmsg_flags |= NLM_F_ROOT; | 817 | nlh->nlmsg_flags |= NLM_F_ROOT; |
822 | module_put(a->ops->owner); | 818 | module_put(a->ops->owner); |
823 | kfree(a); | 819 | kfree(a); |
824 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 820 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
821 | n->nlmsg_flags & NLM_F_ECHO); | ||
825 | if (err > 0) | 822 | if (err > 0) |
826 | return 0; | 823 | return 0; |
827 | 824 | ||
@@ -842,14 +839,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, | |||
842 | u32 pid, int event) | 839 | u32 pid, int event) |
843 | { | 840 | { |
844 | int i, ret; | 841 | int i, ret; |
845 | struct nlattr *tb[TCA_ACT_MAX_PRIO+1]; | 842 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; |
846 | struct tc_action *head = NULL, *act, *act_prev = NULL; | 843 | struct tc_action *head = NULL, *act, *act_prev = NULL; |
847 | 844 | ||
848 | ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); | 845 | ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL); |
849 | if (ret < 0) | 846 | if (ret < 0) |
850 | return ret; | 847 | return ret; |
851 | 848 | ||
852 | if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) { | 849 | if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { |
853 | if (tb[1] != NULL) | 850 | if (tb[1] != NULL) |
854 | return tca_action_flush(net, tb[1], n, pid); | 851 | return tca_action_flush(net, tb[1], n, pid); |
855 | else | 852 | else |
@@ -892,7 +889,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, | |||
892 | /* now do the delete */ | 889 | /* now do the delete */ |
893 | tcf_action_destroy(head, 0); | 890 | tcf_action_destroy(head, 0); |
894 | ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, | 891 | ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
895 | n->nlmsg_flags&NLM_F_ECHO); | 892 | n->nlmsg_flags & NLM_F_ECHO); |
896 | if (ret > 0) | 893 | if (ret > 0) |
897 | return 0; | 894 | return 0; |
898 | return ret; | 895 | return ret; |
@@ -936,7 +933,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, | |||
936 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; | 933 | nlh->nlmsg_len = skb_tail_pointer(skb) - b; |
937 | NETLINK_CB(skb).dst_group = RTNLGRP_TC; | 934 | NETLINK_CB(skb).dst_group = RTNLGRP_TC; |
938 | 935 | ||
939 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO); | 936 | err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); |
940 | if (err > 0) | 937 | if (err > 0) |
941 | err = 0; | 938 | err = 0; |
942 | return err; | 939 | return err; |
@@ -967,7 +964,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, | |||
967 | 964 | ||
968 | /* dump then free all the actions after update; inserted policy | 965 | /* dump then free all the actions after update; inserted policy |
969 | * stays intact | 966 | * stays intact |
970 | * */ | 967 | */ |
971 | ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); | 968 | ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); |
972 | for (a = act; a; a = act) { | 969 | for (a = act; a; a = act) { |
973 | act = a->next; | 970 | act = a->next; |
@@ -993,17 +990,16 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
993 | return -EINVAL; | 990 | return -EINVAL; |
994 | } | 991 | } |
995 | 992 | ||
996 | /* n->nlmsg_flags&NLM_F_CREATE | 993 | /* n->nlmsg_flags & NLM_F_CREATE */ |
997 | * */ | ||
998 | switch (n->nlmsg_type) { | 994 | switch (n->nlmsg_type) { |
999 | case RTM_NEWACTION: | 995 | case RTM_NEWACTION: |
1000 | /* we are going to assume all other flags | 996 | /* we are going to assume all other flags |
1001 | * imply create only if it doesnt exist | 997 | * imply create only if it doesn't exist |
1002 | * Note that CREATE | EXCL implies that | 998 | * Note that CREATE | EXCL implies that |
1003 | * but since we want avoid ambiguity (eg when flags | 999 | * but since we want avoid ambiguity (eg when flags |
1004 | * is zero) then just set this | 1000 | * is zero) then just set this |
1005 | */ | 1001 | */ |
1006 | if (n->nlmsg_flags&NLM_F_REPLACE) | 1002 | if (n->nlmsg_flags & NLM_F_REPLACE) |
1007 | ovr = 1; | 1003 | ovr = 1; |
1008 | replay: | 1004 | replay: |
1009 | ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); | 1005 | ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); |
@@ -1028,7 +1024,7 @@ replay: | |||
1028 | static struct nlattr * | 1024 | static struct nlattr * |
1029 | find_dump_kind(const struct nlmsghdr *n) | 1025 | find_dump_kind(const struct nlmsghdr *n) |
1030 | { | 1026 | { |
1031 | struct nlattr *tb1, *tb2[TCA_ACT_MAX+1]; | 1027 | struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; |
1032 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; | 1028 | struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; |
1033 | struct nlattr *nla[TCAA_MAX + 1]; | 1029 | struct nlattr *nla[TCAA_MAX + 1]; |
1034 | struct nlattr *kind; | 1030 | struct nlattr *kind; |
@@ -1071,9 +1067,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) | |||
1071 | } | 1067 | } |
1072 | 1068 | ||
1073 | a_o = tc_lookup_action(kind); | 1069 | a_o = tc_lookup_action(kind); |
1074 | if (a_o == NULL) { | 1070 | if (a_o == NULL) |
1075 | return 0; | 1071 | return 0; |
1076 | } | ||
1077 | 1072 | ||
1078 | memset(&a, 0, sizeof(struct tc_action)); | 1073 | memset(&a, 0, sizeof(struct tc_action)); |
1079 | a.ops = a_o; | 1074 | a.ops = a_o; |
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c new file mode 100644 index 000000000000..6cdf9abe475f --- /dev/null +++ b/net/sched/act_csum.c | |||
@@ -0,0 +1,594 @@ | |||
1 | /* | ||
2 | * Checksum updating actions | ||
3 | * | ||
4 | * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License as published by the Free | ||
8 | * Software Foundation; either version 2 of the License, or (at your option) | ||
9 | * any later version. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/types.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/spinlock.h> | ||
18 | |||
19 | #include <linux/netlink.h> | ||
20 | #include <net/netlink.h> | ||
21 | #include <linux/rtnetlink.h> | ||
22 | |||
23 | #include <linux/skbuff.h> | ||
24 | |||
25 | #include <net/ip.h> | ||
26 | #include <net/ipv6.h> | ||
27 | #include <net/icmp.h> | ||
28 | #include <linux/icmpv6.h> | ||
29 | #include <linux/igmp.h> | ||
30 | #include <net/tcp.h> | ||
31 | #include <net/udp.h> | ||
32 | #include <net/ip6_checksum.h> | ||
33 | |||
34 | #include <net/act_api.h> | ||
35 | |||
36 | #include <linux/tc_act/tc_csum.h> | ||
37 | #include <net/tc_act/tc_csum.h> | ||
38 | |||
39 | #define CSUM_TAB_MASK 15 | ||
40 | static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1]; | ||
41 | static u32 csum_idx_gen; | ||
42 | static DEFINE_RWLOCK(csum_lock); | ||
43 | |||
44 | static struct tcf_hashinfo csum_hash_info = { | ||
45 | .htab = tcf_csum_ht, | ||
46 | .hmask = CSUM_TAB_MASK, | ||
47 | .lock = &csum_lock, | ||
48 | }; | ||
49 | |||
50 | static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { | ||
51 | [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, | ||
52 | }; | ||
53 | |||
54 | static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, | ||
55 | struct tc_action *a, int ovr, int bind) | ||
56 | { | ||
57 | struct nlattr *tb[TCA_CSUM_MAX + 1]; | ||
58 | struct tc_csum *parm; | ||
59 | struct tcf_common *pc; | ||
60 | struct tcf_csum *p; | ||
61 | int ret = 0, err; | ||
62 | |||
63 | if (nla == NULL) | ||
64 | return -EINVAL; | ||
65 | |||
66 | err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy); | ||
67 | if (err < 0) | ||
68 | return err; | ||
69 | |||
70 | if (tb[TCA_CSUM_PARMS] == NULL) | ||
71 | return -EINVAL; | ||
72 | parm = nla_data(tb[TCA_CSUM_PARMS]); | ||
73 | |||
74 | pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info); | ||
75 | if (!pc) { | ||
76 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, | ||
77 | &csum_idx_gen, &csum_hash_info); | ||
78 | if (IS_ERR(pc)) | ||
79 | return PTR_ERR(pc); | ||
80 | p = to_tcf_csum(pc); | ||
81 | ret = ACT_P_CREATED; | ||
82 | } else { | ||
83 | p = to_tcf_csum(pc); | ||
84 | if (!ovr) { | ||
85 | tcf_hash_release(pc, bind, &csum_hash_info); | ||
86 | return -EEXIST; | ||
87 | } | ||
88 | } | ||
89 | |||
90 | spin_lock_bh(&p->tcf_lock); | ||
91 | p->tcf_action = parm->action; | ||
92 | p->update_flags = parm->update_flags; | ||
93 | spin_unlock_bh(&p->tcf_lock); | ||
94 | |||
95 | if (ret == ACT_P_CREATED) | ||
96 | tcf_hash_insert(pc, &csum_hash_info); | ||
97 | |||
98 | return ret; | ||
99 | } | ||
100 | |||
101 | static int tcf_csum_cleanup(struct tc_action *a, int bind) | ||
102 | { | ||
103 | struct tcf_csum *p = a->priv; | ||
104 | return tcf_hash_release(&p->common, bind, &csum_hash_info); | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * tcf_csum_skb_nextlayer - Get next layer pointer | ||
109 | * @skb: sk_buff to use | ||
110 | * @ihl: previous summed headers length | ||
111 | * @ipl: complete packet length | ||
112 | * @jhl: next header length | ||
113 | * | ||
114 | * Check the expected next layer availability in the specified sk_buff. | ||
115 | * Return the next layer pointer if pass, NULL otherwise. | ||
116 | */ | ||
117 | static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, | ||
118 | unsigned int ihl, unsigned int ipl, | ||
119 | unsigned int jhl) | ||
120 | { | ||
121 | int ntkoff = skb_network_offset(skb); | ||
122 | int hl = ihl + jhl; | ||
123 | |||
124 | if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || | ||
125 | (skb_cloned(skb) && | ||
126 | !skb_clone_writable(skb, hl + ntkoff) && | ||
127 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | ||
128 | return NULL; | ||
129 | else | ||
130 | return (void *)(skb_network_header(skb) + ihl); | ||
131 | } | ||
132 | |||
133 | static int tcf_csum_ipv4_icmp(struct sk_buff *skb, | ||
134 | unsigned int ihl, unsigned int ipl) | ||
135 | { | ||
136 | struct icmphdr *icmph; | ||
137 | |||
138 | icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph)); | ||
139 | if (icmph == NULL) | ||
140 | return 0; | ||
141 | |||
142 | icmph->checksum = 0; | ||
143 | skb->csum = csum_partial(icmph, ipl - ihl, 0); | ||
144 | icmph->checksum = csum_fold(skb->csum); | ||
145 | |||
146 | skb->ip_summed = CHECKSUM_NONE; | ||
147 | |||
148 | return 1; | ||
149 | } | ||
150 | |||
151 | static int tcf_csum_ipv4_igmp(struct sk_buff *skb, | ||
152 | unsigned int ihl, unsigned int ipl) | ||
153 | { | ||
154 | struct igmphdr *igmph; | ||
155 | |||
156 | igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph)); | ||
157 | if (igmph == NULL) | ||
158 | return 0; | ||
159 | |||
160 | igmph->csum = 0; | ||
161 | skb->csum = csum_partial(igmph, ipl - ihl, 0); | ||
162 | igmph->csum = csum_fold(skb->csum); | ||
163 | |||
164 | skb->ip_summed = CHECKSUM_NONE; | ||
165 | |||
166 | return 1; | ||
167 | } | ||
168 | |||
169 | static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, | ||
170 | unsigned int ihl, unsigned int ipl) | ||
171 | { | ||
172 | struct icmp6hdr *icmp6h; | ||
173 | |||
174 | icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); | ||
175 | if (icmp6h == NULL) | ||
176 | return 0; | ||
177 | |||
178 | icmp6h->icmp6_cksum = 0; | ||
179 | skb->csum = csum_partial(icmp6h, ipl - ihl, 0); | ||
180 | icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, | ||
181 | ipl - ihl, IPPROTO_ICMPV6, | ||
182 | skb->csum); | ||
183 | |||
184 | skb->ip_summed = CHECKSUM_NONE; | ||
185 | |||
186 | return 1; | ||
187 | } | ||
188 | |||
189 | static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, | ||
190 | unsigned int ihl, unsigned int ipl) | ||
191 | { | ||
192 | struct tcphdr *tcph; | ||
193 | |||
194 | tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); | ||
195 | if (tcph == NULL) | ||
196 | return 0; | ||
197 | |||
198 | tcph->check = 0; | ||
199 | skb->csum = csum_partial(tcph, ipl - ihl, 0); | ||
200 | tcph->check = tcp_v4_check(ipl - ihl, | ||
201 | iph->saddr, iph->daddr, skb->csum); | ||
202 | |||
203 | skb->ip_summed = CHECKSUM_NONE; | ||
204 | |||
205 | return 1; | ||
206 | } | ||
207 | |||
208 | static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, | ||
209 | unsigned int ihl, unsigned int ipl) | ||
210 | { | ||
211 | struct tcphdr *tcph; | ||
212 | |||
213 | tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); | ||
214 | if (tcph == NULL) | ||
215 | return 0; | ||
216 | |||
217 | tcph->check = 0; | ||
218 | skb->csum = csum_partial(tcph, ipl - ihl, 0); | ||
219 | tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, | ||
220 | ipl - ihl, IPPROTO_TCP, | ||
221 | skb->csum); | ||
222 | |||
223 | skb->ip_summed = CHECKSUM_NONE; | ||
224 | |||
225 | return 1; | ||
226 | } | ||
227 | |||
228 | static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, | ||
229 | unsigned int ihl, unsigned int ipl, int udplite) | ||
230 | { | ||
231 | struct udphdr *udph; | ||
232 | u16 ul; | ||
233 | |||
234 | /* | ||
235 | * Support both UDP and UDPLITE checksum algorithms, Don't use | ||
236 | * udph->len to get the real length without any protocol check, | ||
237 | * UDPLITE uses udph->len for another thing, | ||
238 | * Use iph->tot_len, or just ipl. | ||
239 | */ | ||
240 | |||
241 | udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); | ||
242 | if (udph == NULL) | ||
243 | return 0; | ||
244 | |||
245 | ul = ntohs(udph->len); | ||
246 | |||
247 | if (udplite || udph->check) { | ||
248 | |||
249 | udph->check = 0; | ||
250 | |||
251 | if (udplite) { | ||
252 | if (ul == 0) | ||
253 | skb->csum = csum_partial(udph, ipl - ihl, 0); | ||
254 | else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) | ||
255 | skb->csum = csum_partial(udph, ul, 0); | ||
256 | else | ||
257 | goto ignore_obscure_skb; | ||
258 | } else { | ||
259 | if (ul != ipl - ihl) | ||
260 | goto ignore_obscure_skb; | ||
261 | |||
262 | skb->csum = csum_partial(udph, ul, 0); | ||
263 | } | ||
264 | |||
265 | udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, | ||
266 | ul, iph->protocol, | ||
267 | skb->csum); | ||
268 | |||
269 | if (!udph->check) | ||
270 | udph->check = CSUM_MANGLED_0; | ||
271 | } | ||
272 | |||
273 | skb->ip_summed = CHECKSUM_NONE; | ||
274 | |||
275 | ignore_obscure_skb: | ||
276 | return 1; | ||
277 | } | ||
278 | |||
279 | static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, | ||
280 | unsigned int ihl, unsigned int ipl, int udplite) | ||
281 | { | ||
282 | struct udphdr *udph; | ||
283 | u16 ul; | ||
284 | |||
285 | /* | ||
286 | * Support both UDP and UDPLITE checksum algorithms, Don't use | ||
287 | * udph->len to get the real length without any protocol check, | ||
288 | * UDPLITE uses udph->len for another thing, | ||
289 | * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl. | ||
290 | */ | ||
291 | |||
292 | udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); | ||
293 | if (udph == NULL) | ||
294 | return 0; | ||
295 | |||
296 | ul = ntohs(udph->len); | ||
297 | |||
298 | udph->check = 0; | ||
299 | |||
300 | if (udplite) { | ||
301 | if (ul == 0) | ||
302 | skb->csum = csum_partial(udph, ipl - ihl, 0); | ||
303 | |||
304 | else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) | ||
305 | skb->csum = csum_partial(udph, ul, 0); | ||
306 | |||
307 | else | ||
308 | goto ignore_obscure_skb; | ||
309 | } else { | ||
310 | if (ul != ipl - ihl) | ||
311 | goto ignore_obscure_skb; | ||
312 | |||
313 | skb->csum = csum_partial(udph, ul, 0); | ||
314 | } | ||
315 | |||
316 | udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul, | ||
317 | udplite ? IPPROTO_UDPLITE : IPPROTO_UDP, | ||
318 | skb->csum); | ||
319 | |||
320 | if (!udph->check) | ||
321 | udph->check = CSUM_MANGLED_0; | ||
322 | |||
323 | skb->ip_summed = CHECKSUM_NONE; | ||
324 | |||
325 | ignore_obscure_skb: | ||
326 | return 1; | ||
327 | } | ||
328 | |||
329 | static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) | ||
330 | { | ||
331 | struct iphdr *iph; | ||
332 | int ntkoff; | ||
333 | |||
334 | ntkoff = skb_network_offset(skb); | ||
335 | |||
336 | if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff)) | ||
337 | goto fail; | ||
338 | |||
339 | iph = ip_hdr(skb); | ||
340 | |||
341 | switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { | ||
342 | case IPPROTO_ICMP: | ||
343 | if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) | ||
344 | if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4, | ||
345 | ntohs(iph->tot_len))) | ||
346 | goto fail; | ||
347 | break; | ||
348 | case IPPROTO_IGMP: | ||
349 | if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP) | ||
350 | if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4, | ||
351 | ntohs(iph->tot_len))) | ||
352 | goto fail; | ||
353 | break; | ||
354 | case IPPROTO_TCP: | ||
355 | if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) | ||
356 | if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4, | ||
357 | ntohs(iph->tot_len))) | ||
358 | goto fail; | ||
359 | break; | ||
360 | case IPPROTO_UDP: | ||
361 | if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) | ||
362 | if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, | ||
363 | ntohs(iph->tot_len), 0)) | ||
364 | goto fail; | ||
365 | break; | ||
366 | case IPPROTO_UDPLITE: | ||
367 | if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) | ||
368 | if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4, | ||
369 | ntohs(iph->tot_len), 1)) | ||
370 | goto fail; | ||
371 | break; | ||
372 | } | ||
373 | |||
374 | if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { | ||
375 | if (skb_cloned(skb) && | ||
376 | !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && | ||
377 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) | ||
378 | goto fail; | ||
379 | |||
380 | ip_send_check(iph); | ||
381 | } | ||
382 | |||
383 | return 1; | ||
384 | |||
385 | fail: | ||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, | ||
390 | unsigned int ixhl, unsigned int *pl) | ||
391 | { | ||
392 | int off, len, optlen; | ||
393 | unsigned char *xh = (void *)ip6xh; | ||
394 | |||
395 | off = sizeof(*ip6xh); | ||
396 | len = ixhl - off; | ||
397 | |||
398 | while (len > 1) { | ||
399 | switch (xh[off]) { | ||
400 | case IPV6_TLV_PAD0: | ||
401 | optlen = 1; | ||
402 | break; | ||
403 | case IPV6_TLV_JUMBO: | ||
404 | optlen = xh[off + 1] + 2; | ||
405 | if (optlen != 6 || len < 6 || (off & 3) != 2) | ||
406 | /* wrong jumbo option length/alignment */ | ||
407 | return 0; | ||
408 | *pl = ntohl(*(__be32 *)(xh + off + 2)); | ||
409 | goto done; | ||
410 | default: | ||
411 | optlen = xh[off + 1] + 2; | ||
412 | if (optlen > len) | ||
413 | /* ignore obscure options */ | ||
414 | goto done; | ||
415 | break; | ||
416 | } | ||
417 | off += optlen; | ||
418 | len -= optlen; | ||
419 | } | ||
420 | |||
421 | done: | ||
422 | return 1; | ||
423 | } | ||
424 | |||
425 | static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) | ||
426 | { | ||
427 | struct ipv6hdr *ip6h; | ||
428 | struct ipv6_opt_hdr *ip6xh; | ||
429 | unsigned int hl, ixhl; | ||
430 | unsigned int pl; | ||
431 | int ntkoff; | ||
432 | u8 nexthdr; | ||
433 | |||
434 | ntkoff = skb_network_offset(skb); | ||
435 | |||
436 | hl = sizeof(*ip6h); | ||
437 | |||
438 | if (!pskb_may_pull(skb, hl + ntkoff)) | ||
439 | goto fail; | ||
440 | |||
441 | ip6h = ipv6_hdr(skb); | ||
442 | |||
443 | pl = ntohs(ip6h->payload_len); | ||
444 | nexthdr = ip6h->nexthdr; | ||
445 | |||
446 | do { | ||
447 | switch (nexthdr) { | ||
448 | case NEXTHDR_FRAGMENT: | ||
449 | goto ignore_skb; | ||
450 | case NEXTHDR_ROUTING: | ||
451 | case NEXTHDR_HOP: | ||
452 | case NEXTHDR_DEST: | ||
453 | if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff)) | ||
454 | goto fail; | ||
455 | ip6xh = (void *)(skb_network_header(skb) + hl); | ||
456 | ixhl = ipv6_optlen(ip6xh); | ||
457 | if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) | ||
458 | goto fail; | ||
459 | if ((nexthdr == NEXTHDR_HOP) && | ||
460 | !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) | ||
461 | goto fail; | ||
462 | nexthdr = ip6xh->nexthdr; | ||
463 | hl += ixhl; | ||
464 | break; | ||
465 | case IPPROTO_ICMPV6: | ||
466 | if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) | ||
467 | if (!tcf_csum_ipv6_icmp(skb, ip6h, | ||
468 | hl, pl + sizeof(*ip6h))) | ||
469 | goto fail; | ||
470 | goto done; | ||
471 | case IPPROTO_TCP: | ||
472 | if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) | ||
473 | if (!tcf_csum_ipv6_tcp(skb, ip6h, | ||
474 | hl, pl + sizeof(*ip6h))) | ||
475 | goto fail; | ||
476 | goto done; | ||
477 | case IPPROTO_UDP: | ||
478 | if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) | ||
479 | if (!tcf_csum_ipv6_udp(skb, ip6h, hl, | ||
480 | pl + sizeof(*ip6h), 0)) | ||
481 | goto fail; | ||
482 | goto done; | ||
483 | case IPPROTO_UDPLITE: | ||
484 | if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) | ||
485 | if (!tcf_csum_ipv6_udp(skb, ip6h, hl, | ||
486 | pl + sizeof(*ip6h), 1)) | ||
487 | goto fail; | ||
488 | goto done; | ||
489 | default: | ||
490 | goto ignore_skb; | ||
491 | } | ||
492 | } while (pskb_may_pull(skb, hl + 1 + ntkoff)); | ||
493 | |||
494 | done: | ||
495 | ignore_skb: | ||
496 | return 1; | ||
497 | |||
498 | fail: | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | static int tcf_csum(struct sk_buff *skb, | ||
503 | struct tc_action *a, struct tcf_result *res) | ||
504 | { | ||
505 | struct tcf_csum *p = a->priv; | ||
506 | int action; | ||
507 | u32 update_flags; | ||
508 | |||
509 | spin_lock(&p->tcf_lock); | ||
510 | p->tcf_tm.lastuse = jiffies; | ||
511 | bstats_update(&p->tcf_bstats, skb); | ||
512 | action = p->tcf_action; | ||
513 | update_flags = p->update_flags; | ||
514 | spin_unlock(&p->tcf_lock); | ||
515 | |||
516 | if (unlikely(action == TC_ACT_SHOT)) | ||
517 | goto drop; | ||
518 | |||
519 | switch (skb->protocol) { | ||
520 | case cpu_to_be16(ETH_P_IP): | ||
521 | if (!tcf_csum_ipv4(skb, update_flags)) | ||
522 | goto drop; | ||
523 | break; | ||
524 | case cpu_to_be16(ETH_P_IPV6): | ||
525 | if (!tcf_csum_ipv6(skb, update_flags)) | ||
526 | goto drop; | ||
527 | break; | ||
528 | } | ||
529 | |||
530 | return action; | ||
531 | |||
532 | drop: | ||
533 | spin_lock(&p->tcf_lock); | ||
534 | p->tcf_qstats.drops++; | ||
535 | spin_unlock(&p->tcf_lock); | ||
536 | return TC_ACT_SHOT; | ||
537 | } | ||
538 | |||
539 | static int tcf_csum_dump(struct sk_buff *skb, | ||
540 | struct tc_action *a, int bind, int ref) | ||
541 | { | ||
542 | unsigned char *b = skb_tail_pointer(skb); | ||
543 | struct tcf_csum *p = a->priv; | ||
544 | struct tc_csum opt = { | ||
545 | .update_flags = p->update_flags, | ||
546 | .index = p->tcf_index, | ||
547 | .action = p->tcf_action, | ||
548 | .refcnt = p->tcf_refcnt - ref, | ||
549 | .bindcnt = p->tcf_bindcnt - bind, | ||
550 | }; | ||
551 | struct tcf_t t; | ||
552 | |||
553 | NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt); | ||
554 | t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); | ||
555 | t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); | ||
556 | t.expires = jiffies_to_clock_t(p->tcf_tm.expires); | ||
557 | NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t); | ||
558 | |||
559 | return skb->len; | ||
560 | |||
561 | nla_put_failure: | ||
562 | nlmsg_trim(skb, b); | ||
563 | return -1; | ||
564 | } | ||
565 | |||
566 | static struct tc_action_ops act_csum_ops = { | ||
567 | .kind = "csum", | ||
568 | .hinfo = &csum_hash_info, | ||
569 | .type = TCA_ACT_CSUM, | ||
570 | .capab = TCA_CAP_NONE, | ||
571 | .owner = THIS_MODULE, | ||
572 | .act = tcf_csum, | ||
573 | .dump = tcf_csum_dump, | ||
574 | .cleanup = tcf_csum_cleanup, | ||
575 | .lookup = tcf_hash_search, | ||
576 | .init = tcf_csum_init, | ||
577 | .walk = tcf_generic_walker | ||
578 | }; | ||
579 | |||
580 | MODULE_DESCRIPTION("Checksum updating actions"); | ||
581 | MODULE_LICENSE("GPL"); | ||
582 | |||
583 | static int __init csum_init_module(void) | ||
584 | { | ||
585 | return tcf_register_action(&act_csum_ops); | ||
586 | } | ||
587 | |||
588 | static void __exit csum_cleanup_module(void) | ||
589 | { | ||
590 | tcf_unregister_action(&act_csum_ops); | ||
591 | } | ||
592 | |||
593 | module_init(csum_init_module); | ||
594 | module_exit(csum_cleanup_module); | ||
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index c2ed90a4c0b4..2b4ab4b05ce8 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c | |||
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact) | |||
50 | } | 50 | } |
51 | 51 | ||
52 | typedef int (*g_rand)(struct tcf_gact *gact); | 52 | typedef int (*g_rand)(struct tcf_gact *gact); |
53 | static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; | 53 | static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ }; |
54 | #endif /* CONFIG_GACT_PROB */ | 54 | #endif /* CONFIG_GACT_PROB */ |
55 | 55 | ||
56 | static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { | 56 | static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { |
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est, | |||
89 | pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), | 89 | pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), |
90 | bind, &gact_idx_gen, &gact_hash_info); | 90 | bind, &gact_idx_gen, &gact_hash_info); |
91 | if (IS_ERR(pc)) | 91 | if (IS_ERR(pc)) |
92 | return PTR_ERR(pc); | 92 | return PTR_ERR(pc); |
93 | ret = ACT_P_CREATED; | 93 | ret = ACT_P_CREATED; |
94 | } else { | 94 | } else { |
95 | if (!ovr) { | 95 | if (!ovr) { |
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL"); | |||
205 | static int __init gact_init_module(void) | 205 | static int __init gact_init_module(void) |
206 | { | 206 | { |
207 | #ifdef CONFIG_GACT_PROB | 207 | #ifdef CONFIG_GACT_PROB |
208 | printk(KERN_INFO "GACT probability on\n"); | 208 | pr_info("GACT probability on\n"); |
209 | #else | 209 | #else |
210 | printk(KERN_INFO "GACT probability NOT on\n"); | 210 | pr_info("GACT probability NOT on\n"); |
211 | #endif | 211 | #endif |
212 | return tcf_register_action(&act_gact_ops); | 212 | return tcf_register_action(&act_gact_ops); |
213 | } | 213 | } |
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index c7e59e6ec349..9fc211a1b20e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c | |||
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = { | |||
39 | .lock = &ipt_lock, | 39 | .lock = &ipt_lock, |
40 | }; | 40 | }; |
41 | 41 | ||
42 | static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) | 42 | static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook) |
43 | { | 43 | { |
44 | struct xt_tgchk_param par; | 44 | struct xt_tgchk_param par; |
45 | struct xt_target *target; | 45 | struct xt_target *target; |
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int | |||
66 | return 0; | 66 | return 0; |
67 | } | 67 | } |
68 | 68 | ||
69 | static void ipt_destroy_target(struct ipt_entry_target *t) | 69 | static void ipt_destroy_target(struct xt_entry_target *t) |
70 | { | 70 | { |
71 | struct xt_tgdtor_param par = { | 71 | struct xt_tgdtor_param par = { |
72 | .target = t->u.kernel.target, | 72 | .target = t->u.kernel.target, |
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { | |||
99 | [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, | 99 | [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, |
100 | [TCA_IPT_HOOK] = { .type = NLA_U32 }, | 100 | [TCA_IPT_HOOK] = { .type = NLA_U32 }, |
101 | [TCA_IPT_INDEX] = { .type = NLA_U32 }, | 101 | [TCA_IPT_INDEX] = { .type = NLA_U32 }, |
102 | [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) }, | 102 | [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, |
103 | }; | 103 | }; |
104 | 104 | ||
105 | static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | 105 | static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, |
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | |||
108 | struct nlattr *tb[TCA_IPT_MAX + 1]; | 108 | struct nlattr *tb[TCA_IPT_MAX + 1]; |
109 | struct tcf_ipt *ipt; | 109 | struct tcf_ipt *ipt; |
110 | struct tcf_common *pc; | 110 | struct tcf_common *pc; |
111 | struct ipt_entry_target *td, *t; | 111 | struct xt_entry_target *td, *t; |
112 | char *tname; | 112 | char *tname; |
113 | int ret = 0, err; | 113 | int ret = 0, err; |
114 | u32 hook = 0; | 114 | u32 hook = 0; |
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | |||
126 | if (tb[TCA_IPT_TARG] == NULL) | 126 | if (tb[TCA_IPT_TARG] == NULL) |
127 | return -EINVAL; | 127 | return -EINVAL; |
128 | 128 | ||
129 | td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]); | 129 | td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); |
130 | if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) | 130 | if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) |
131 | return -EINVAL; | 131 | return -EINVAL; |
132 | 132 | ||
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | |||
138 | pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, | 138 | pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, |
139 | &ipt_idx_gen, &ipt_hash_info); | 139 | &ipt_idx_gen, &ipt_hash_info); |
140 | if (IS_ERR(pc)) | 140 | if (IS_ERR(pc)) |
141 | return PTR_ERR(pc); | 141 | return PTR_ERR(pc); |
142 | ret = ACT_P_CREATED; | 142 | ret = ACT_P_CREATED; |
143 | } else { | 143 | } else { |
144 | if (!ovr) { | 144 | if (!ovr) { |
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, | |||
162 | if (unlikely(!t)) | 162 | if (unlikely(!t)) |
163 | goto err2; | 163 | goto err2; |
164 | 164 | ||
165 | if ((err = ipt_init_target(t, tname, hook)) < 0) | 165 | err = ipt_init_target(t, tname, hook); |
166 | if (err < 0) | ||
166 | goto err3; | 167 | goto err3; |
167 | 168 | ||
168 | spin_lock_bh(&ipt->tcf_lock); | 169 | spin_lock_bh(&ipt->tcf_lock); |
@@ -209,12 +210,12 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, | |||
209 | spin_lock(&ipt->tcf_lock); | 210 | spin_lock(&ipt->tcf_lock); |
210 | 211 | ||
211 | ipt->tcf_tm.lastuse = jiffies; | 212 | ipt->tcf_tm.lastuse = jiffies; |
212 | ipt->tcf_bstats.bytes += qdisc_pkt_len(skb); | 213 | bstats_update(&ipt->tcf_bstats, skb); |
213 | ipt->tcf_bstats.packets++; | ||
214 | 214 | ||
215 | /* yes, we have to worry about both in and out dev | 215 | /* yes, we have to worry about both in and out dev |
216 | worry later - danger - this API seems to have changed | 216 | * worry later - danger - this API seems to have changed |
217 | from earlier kernels */ | 217 | * from earlier kernels |
218 | */ | ||
218 | par.in = skb->dev; | 219 | par.in = skb->dev; |
219 | par.out = NULL; | 220 | par.out = NULL; |
220 | par.hooknum = ipt->tcfi_hook; | 221 | par.hooknum = ipt->tcfi_hook; |
@@ -230,7 +231,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, | |||
230 | result = TC_ACT_SHOT; | 231 | result = TC_ACT_SHOT; |
231 | ipt->tcf_qstats.drops++; | 232 | ipt->tcf_qstats.drops++; |
232 | break; | 233 | break; |
233 | case IPT_CONTINUE: | 234 | case XT_CONTINUE: |
234 | result = TC_ACT_PIPE; | 235 | result = TC_ACT_PIPE; |
235 | break; | 236 | break; |
236 | default: | 237 | default: |
@@ -249,14 +250,14 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int | |||
249 | { | 250 | { |
250 | unsigned char *b = skb_tail_pointer(skb); | 251 | unsigned char *b = skb_tail_pointer(skb); |
251 | struct tcf_ipt *ipt = a->priv; | 252 | struct tcf_ipt *ipt = a->priv; |
252 | struct ipt_entry_target *t; | 253 | struct xt_entry_target *t; |
253 | struct tcf_t tm; | 254 | struct tcf_t tm; |
254 | struct tc_cnt c; | 255 | struct tc_cnt c; |
255 | 256 | ||
256 | /* for simple targets kernel size == user size | 257 | /* for simple targets kernel size == user size |
257 | ** user name = target name | 258 | * user name = target name |
258 | ** for foolproof you need to not assume this | 259 | * for foolproof you need to not assume this |
259 | */ | 260 | */ |
260 | 261 | ||
261 | t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); | 262 | t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); |
262 | if (unlikely(!t)) | 263 | if (unlikely(!t)) |
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 0c311be92827..961386e2f2c0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c | |||
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = { | |||
41 | .lock = &mirred_lock, | 41 | .lock = &mirred_lock, |
42 | }; | 42 | }; |
43 | 43 | ||
44 | static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) | 44 | static int tcf_mirred_release(struct tcf_mirred *m, int bind) |
45 | { | 45 | { |
46 | if (m) { | 46 | if (m) { |
47 | if (bind) | 47 | if (bind) |
48 | m->tcf_bindcnt--; | 48 | m->tcf_bindcnt--; |
49 | m->tcf_refcnt--; | 49 | m->tcf_refcnt--; |
50 | if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { | 50 | if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) { |
51 | list_del(&m->tcfm_list); | 51 | list_del(&m->tcfm_list); |
52 | if (m->tcfm_dev) | 52 | if (m->tcfm_dev) |
53 | dev_put(m->tcfm_dev); | 53 | dev_put(m->tcfm_dev); |
@@ -165,8 +165,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, | |||
165 | 165 | ||
166 | spin_lock(&m->tcf_lock); | 166 | spin_lock(&m->tcf_lock); |
167 | m->tcf_tm.lastuse = jiffies; | 167 | m->tcf_tm.lastuse = jiffies; |
168 | m->tcf_bstats.bytes += qdisc_pkt_len(skb); | 168 | bstats_update(&m->tcf_bstats, skb); |
169 | m->tcf_bstats.packets++; | ||
170 | 169 | ||
171 | dev = m->tcfm_dev; | 170 | dev = m->tcfm_dev; |
172 | if (!dev) { | 171 | if (!dev) { |
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 186eb837e600..762b027650a9 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c | |||
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est, | |||
69 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, | 69 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, |
70 | &nat_idx_gen, &nat_hash_info); | 70 | &nat_idx_gen, &nat_hash_info); |
71 | if (IS_ERR(pc)) | 71 | if (IS_ERR(pc)) |
72 | return PTR_ERR(pc); | 72 | return PTR_ERR(pc); |
73 | p = to_tcf_nat(pc); | 73 | p = to_tcf_nat(pc); |
74 | ret = ACT_P_CREATED; | 74 | ret = ACT_P_CREATED; |
75 | } else { | 75 | } else { |
@@ -125,8 +125,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a, | |||
125 | egress = p->flags & TCA_NAT_FLAG_EGRESS; | 125 | egress = p->flags & TCA_NAT_FLAG_EGRESS; |
126 | action = p->tcf_action; | 126 | action = p->tcf_action; |
127 | 127 | ||
128 | p->tcf_bstats.bytes += qdisc_pkt_len(skb); | 128 | bstats_update(&p->tcf_bstats, skb); |
129 | p->tcf_bstats.packets++; | ||
130 | 129 | ||
131 | spin_unlock(&p->tcf_lock); | 130 | spin_unlock(&p->tcf_lock); |
132 | 131 | ||
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index a0593c9640db..7affe9a92757 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c | |||
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est, | |||
70 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, | 70 | pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, |
71 | &pedit_idx_gen, &pedit_hash_info); | 71 | &pedit_idx_gen, &pedit_hash_info); |
72 | if (IS_ERR(pc)) | 72 | if (IS_ERR(pc)) |
73 | return PTR_ERR(pc); | 73 | return PTR_ERR(pc); |
74 | p = to_pedit(pc); | 74 | p = to_pedit(pc); |
75 | keys = kmalloc(ksize, GFP_KERNEL); | 75 | keys = kmalloc(ksize, GFP_KERNEL); |
76 | if (keys == NULL) { | 76 | if (keys == NULL) { |
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, | |||
127 | int i, munged = 0; | 127 | int i, munged = 0; |
128 | unsigned int off; | 128 | unsigned int off; |
129 | 129 | ||
130 | if (skb_cloned(skb)) { | 130 | if (skb_cloned(skb) && |
131 | if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { | 131 | pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) |
132 | return p->tcf_action; | 132 | return p->tcf_action; |
133 | } | ||
134 | } | ||
135 | 133 | ||
136 | off = skb_network_offset(skb); | 134 | off = skb_network_offset(skb); |
137 | 135 | ||
@@ -163,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, | |||
163 | } | 161 | } |
164 | if (offset > 0 && offset > skb->len) { | 162 | if (offset > 0 && offset > skb->len) { |
165 | pr_info("tc filter pedit" | 163 | pr_info("tc filter pedit" |
166 | " offset %d cant exceed pkt length %d\n", | 164 | " offset %d can't exceed pkt length %d\n", |
167 | offset, skb->len); | 165 | offset, skb->len); |
168 | goto bad; | 166 | goto bad; |
169 | } | 167 | } |
@@ -187,8 +185,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, | |||
187 | bad: | 185 | bad: |
188 | p->tcf_qstats.overlimits++; | 186 | p->tcf_qstats.overlimits++; |
189 | done: | 187 | done: |
190 | p->tcf_bstats.bytes += qdisc_pkt_len(skb); | 188 | bstats_update(&p->tcf_bstats, skb); |
191 | p->tcf_bstats.packets++; | ||
192 | spin_unlock(&p->tcf_lock); | 189 | spin_unlock(&p->tcf_lock); |
193 | return p->tcf_action; | 190 | return p->tcf_action; |
194 | } | 191 | } |
diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 7ebf7439b478..b3b9b32f4e00 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c | |||
@@ -22,8 +22,8 @@ | |||
22 | #include <net/act_api.h> | 22 | #include <net/act_api.h> |
23 | #include <net/netlink.h> | 23 | #include <net/netlink.h> |
24 | 24 | ||
25 | #define L2T(p,L) qdisc_l2t((p)->tcfp_R_tab, L) | 25 | #define L2T(p, L) qdisc_l2t((p)->tcfp_R_tab, L) |
26 | #define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L) | 26 | #define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L) |
27 | 27 | ||
28 | #define POL_TAB_MASK 15 | 28 | #define POL_TAB_MASK 15 |
29 | static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; | 29 | static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; |
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = { | |||
37 | }; | 37 | }; |
38 | 38 | ||
39 | /* old policer structure from before tc actions */ | 39 | /* old policer structure from before tc actions */ |
40 | struct tc_police_compat | 40 | struct tc_police_compat { |
41 | { | ||
42 | u32 index; | 41 | u32 index; |
43 | int action; | 42 | int action; |
44 | u32 limit; | 43 | u32 limit; |
@@ -97,11 +96,6 @@ nla_put_failure: | |||
97 | goto done; | 96 | goto done; |
98 | } | 97 | } |
99 | 98 | ||
100 | static void tcf_police_free_rcu(struct rcu_head *head) | ||
101 | { | ||
102 | kfree(container_of(head, struct tcf_police, tcf_rcu)); | ||
103 | } | ||
104 | |||
105 | static void tcf_police_destroy(struct tcf_police *p) | 99 | static void tcf_police_destroy(struct tcf_police *p) |
106 | { | 100 | { |
107 | unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); | 101 | unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); |
@@ -122,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p) | |||
122 | * gen_estimator est_timer() might access p->tcf_lock | 116 | * gen_estimator est_timer() might access p->tcf_lock |
123 | * or bstats, wait a RCU grace period before freeing p | 117 | * or bstats, wait a RCU grace period before freeing p |
124 | */ | 118 | */ |
125 | call_rcu(&p->tcf_rcu, tcf_police_free_rcu); | 119 | kfree_rcu(p, tcf_rcu); |
126 | return; | 120 | return; |
127 | } | 121 | } |
128 | } | 122 | } |
@@ -139,7 +133,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { | |||
139 | static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, | 133 | static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est, |
140 | struct tc_action *a, int ovr, int bind) | 134 | struct tc_action *a, int ovr, int bind) |
141 | { | 135 | { |
142 | unsigned h; | 136 | unsigned int h; |
143 | int ret = 0, err; | 137 | int ret = 0, err; |
144 | struct nlattr *tb[TCA_POLICE_MAX + 1]; | 138 | struct nlattr *tb[TCA_POLICE_MAX + 1]; |
145 | struct tc_police *parm; | 139 | struct tc_police *parm; |
@@ -298,8 +292,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, | |||
298 | 292 | ||
299 | spin_lock(&police->tcf_lock); | 293 | spin_lock(&police->tcf_lock); |
300 | 294 | ||
301 | police->tcf_bstats.bytes += qdisc_pkt_len(skb); | 295 | bstats_update(&police->tcf_bstats, skb); |
302 | police->tcf_bstats.packets++; | ||
303 | 296 | ||
304 | if (police->tcfp_ewma_rate && | 297 | if (police->tcfp_ewma_rate && |
305 | police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { | 298 | police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { |
@@ -403,7 +396,6 @@ static void __exit | |||
403 | police_cleanup_module(void) | 396 | police_cleanup_module(void) |
404 | { | 397 | { |
405 | tcf_unregister_action(&act_police_ops); | 398 | tcf_unregister_action(&act_police_ops); |
406 | rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */ | ||
407 | } | 399 | } |
408 | 400 | ||
409 | module_init(police_init_module); | 401 | module_init(police_init_module); |
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 97e84f3ee775..a34a22de60b3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c | |||
@@ -42,13 +42,12 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result | |||
42 | 42 | ||
43 | spin_lock(&d->tcf_lock); | 43 | spin_lock(&d->tcf_lock); |
44 | d->tcf_tm.lastuse = jiffies; | 44 | d->tcf_tm.lastuse = jiffies; |
45 | d->tcf_bstats.bytes += qdisc_pkt_len(skb); | 45 | bstats_update(&d->tcf_bstats, skb); |
46 | d->tcf_bstats.packets++; | ||
47 | 46 | ||
48 | /* print policy string followed by _ then packet count | 47 | /* print policy string followed by _ then packet count |
49 | * Example if this was the 3rd packet and the string was "hello" | 48 | * Example if this was the 3rd packet and the string was "hello" |
50 | * then it would look like "hello_3" (without quotes) | 49 | * then it would look like "hello_3" (without quotes) |
51 | **/ | 50 | */ |
52 | pr_info("simple: %s_%d\n", | 51 | pr_info("simple: %s_%d\n", |
53 | (char *)d->tcfd_defdata, d->tcf_bstats.packets); | 52 | (char *)d->tcfd_defdata, d->tcf_bstats.packets); |
54 | spin_unlock(&d->tcf_lock); | 53 | spin_unlock(&d->tcf_lock); |
@@ -126,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, | |||
126 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, | 125 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, |
127 | &simp_idx_gen, &simp_hash_info); | 126 | &simp_idx_gen, &simp_hash_info); |
128 | if (IS_ERR(pc)) | 127 | if (IS_ERR(pc)) |
129 | return PTR_ERR(pc); | 128 | return PTR_ERR(pc); |
130 | 129 | ||
131 | d = to_defact(pc); | 130 | d = to_defact(pc); |
132 | ret = alloc_defdata(d, defdata); | 131 | ret = alloc_defdata(d, defdata); |
@@ -150,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est, | |||
150 | return ret; | 149 | return ret; |
151 | } | 150 | } |
152 | 151 | ||
153 | static inline int tcf_simp_cleanup(struct tc_action *a, int bind) | 152 | static int tcf_simp_cleanup(struct tc_action *a, int bind) |
154 | { | 153 | { |
155 | struct tcf_defact *d = a->priv; | 154 | struct tcf_defact *d = a->priv; |
156 | 155 | ||
@@ -159,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind) | |||
159 | return 0; | 158 | return 0; |
160 | } | 159 | } |
161 | 160 | ||
162 | static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, | 161 | static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, |
163 | int bind, int ref) | 162 | int bind, int ref) |
164 | { | 163 | { |
165 | unsigned char *b = skb_tail_pointer(skb); | 164 | unsigned char *b = skb_tail_pointer(skb); |
166 | struct tcf_defact *d = a->priv; | 165 | struct tcf_defact *d = a->priv; |
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 66cbf4eb8855..5f6f0c7c3905 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c | |||
@@ -46,8 +46,7 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, | |||
46 | 46 | ||
47 | spin_lock(&d->tcf_lock); | 47 | spin_lock(&d->tcf_lock); |
48 | d->tcf_tm.lastuse = jiffies; | 48 | d->tcf_tm.lastuse = jiffies; |
49 | d->tcf_bstats.bytes += qdisc_pkt_len(skb); | 49 | bstats_update(&d->tcf_bstats, skb); |
50 | d->tcf_bstats.packets++; | ||
51 | 50 | ||
52 | if (d->flags & SKBEDIT_F_PRIORITY) | 51 | if (d->flags & SKBEDIT_F_PRIORITY) |
53 | skb->priority = d->priority; | 52 | skb->priority = d->priority; |
@@ -114,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, | |||
114 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, | 113 | pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, |
115 | &skbedit_idx_gen, &skbedit_hash_info); | 114 | &skbedit_idx_gen, &skbedit_hash_info); |
116 | if (IS_ERR(pc)) | 115 | if (IS_ERR(pc)) |
117 | return PTR_ERR(pc); | 116 | return PTR_ERR(pc); |
118 | 117 | ||
119 | d = to_skbedit(pc); | 118 | d = to_skbedit(pc); |
120 | ret = ACT_P_CREATED; | 119 | ret = ACT_P_CREATED; |
@@ -145,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, | |||
145 | return ret; | 144 | return ret; |
146 | } | 145 | } |
147 | 146 | ||
148 | static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) | 147 | static int tcf_skbedit_cleanup(struct tc_action *a, int bind) |
149 | { | 148 | { |
150 | struct tcf_skbedit *d = a->priv; | 149 | struct tcf_skbedit *d = a->priv; |
151 | 150 | ||
@@ -154,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) | |||
154 | return 0; | 153 | return 0; |
155 | } | 154 | } |
156 | 155 | ||
157 | static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, | 156 | static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, |
158 | int bind, int ref) | 157 | int bind, int ref) |
159 | { | 158 | { |
160 | unsigned char *b = skb_tail_pointer(skb); | 159 | unsigned char *b = skb_tail_pointer(skb); |
161 | struct tcf_skbedit *d = a->priv; | 160 | struct tcf_skbedit *d = a->priv; |
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5fd0c28ef79a..bb2c523f8158 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c | |||
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) | |||
85 | int rc = -ENOENT; | 85 | int rc = -ENOENT; |
86 | 86 | ||
87 | write_lock(&cls_mod_lock); | 87 | write_lock(&cls_mod_lock); |
88 | for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next) | 88 | for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next) |
89 | if (t == ops) | 89 | if (t == ops) |
90 | break; | 90 | break; |
91 | 91 | ||
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) | |||
111 | u32 first = TC_H_MAKE(0xC0000000U, 0U); | 111 | u32 first = TC_H_MAKE(0xC0000000U, 0U); |
112 | 112 | ||
113 | if (tp) | 113 | if (tp) |
114 | first = tp->prio-1; | 114 | first = tp->prio - 1; |
115 | 115 | ||
116 | return first; | 116 | return first; |
117 | } | 117 | } |
@@ -149,7 +149,8 @@ replay: | |||
149 | 149 | ||
150 | if (prio == 0) { | 150 | if (prio == 0) { |
151 | /* If no priority is given, user wants we allocated it. */ | 151 | /* If no priority is given, user wants we allocated it. */ |
152 | if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) | 152 | if (n->nlmsg_type != RTM_NEWTFILTER || |
153 | !(n->nlmsg_flags & NLM_F_CREATE)) | ||
153 | return -ENOENT; | 154 | return -ENOENT; |
154 | prio = TC_H_MAKE(0x80000000U, 0U); | 155 | prio = TC_H_MAKE(0x80000000U, 0U); |
155 | } | 156 | } |
@@ -176,7 +177,8 @@ replay: | |||
176 | } | 177 | } |
177 | 178 | ||
178 | /* Is it classful? */ | 179 | /* Is it classful? */ |
179 | if ((cops = q->ops->cl_ops) == NULL) | 180 | cops = q->ops->cl_ops; |
181 | if (!cops) | ||
180 | return -EINVAL; | 182 | return -EINVAL; |
181 | 183 | ||
182 | if (cops->tcf_chain == NULL) | 184 | if (cops->tcf_chain == NULL) |
@@ -196,10 +198,11 @@ replay: | |||
196 | goto errout; | 198 | goto errout; |
197 | 199 | ||
198 | /* Check the chain for existence of proto-tcf with this priority */ | 200 | /* Check the chain for existence of proto-tcf with this priority */ |
199 | for (back = chain; (tp=*back) != NULL; back = &tp->next) { | 201 | for (back = chain; (tp = *back) != NULL; back = &tp->next) { |
200 | if (tp->prio >= prio) { | 202 | if (tp->prio >= prio) { |
201 | if (tp->prio == prio) { | 203 | if (tp->prio == prio) { |
202 | if (!nprio || (tp->protocol != protocol && protocol)) | 204 | if (!nprio || |
205 | (tp->protocol != protocol && protocol)) | ||
203 | goto errout; | 206 | goto errout; |
204 | } else | 207 | } else |
205 | tp = NULL; | 208 | tp = NULL; |
@@ -216,7 +219,8 @@ replay: | |||
216 | goto errout; | 219 | goto errout; |
217 | 220 | ||
218 | err = -ENOENT; | 221 | err = -ENOENT; |
219 | if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE)) | 222 | if (n->nlmsg_type != RTM_NEWTFILTER || |
223 | !(n->nlmsg_flags & NLM_F_CREATE)) | ||
220 | goto errout; | 224 | goto errout; |
221 | 225 | ||
222 | 226 | ||
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
420 | 424 | ||
421 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) | 425 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) |
422 | return skb->len; | 426 | return skb->len; |
423 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 427 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
428 | if (!dev) | ||
424 | return skb->len; | 429 | return skb->len; |
425 | 430 | ||
426 | if (!tcm->tcm_parent) | 431 | if (!tcm->tcm_parent) |
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
429 | q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); | 434 | q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); |
430 | if (!q) | 435 | if (!q) |
431 | goto out; | 436 | goto out; |
432 | if ((cops = q->ops->cl_ops) == NULL) | 437 | cops = q->ops->cl_ops; |
438 | if (!cops) | ||
433 | goto errout; | 439 | goto errout; |
434 | if (cops->tcf_chain == NULL) | 440 | if (cops->tcf_chain == NULL) |
435 | goto errout; | 441 | goto errout; |
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
444 | 450 | ||
445 | s_t = cb->args[0]; | 451 | s_t = cb->args[0]; |
446 | 452 | ||
447 | for (tp=*chain, t=0; tp; tp = tp->next, t++) { | 453 | for (tp = *chain, t = 0; tp; tp = tp->next, t++) { |
448 | if (t < s_t) continue; | 454 | if (t < s_t) |
455 | continue; | ||
449 | if (TC_H_MAJ(tcm->tcm_info) && | 456 | if (TC_H_MAJ(tcm->tcm_info) && |
450 | TC_H_MAJ(tcm->tcm_info) != tp->prio) | 457 | TC_H_MAJ(tcm->tcm_info) != tp->prio) |
451 | continue; | 458 | continue; |
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) | |||
468 | arg.skb = skb; | 475 | arg.skb = skb; |
469 | arg.cb = cb; | 476 | arg.cb = cb; |
470 | arg.w.stop = 0; | 477 | arg.w.stop = 0; |
471 | arg.w.skip = cb->args[1]-1; | 478 | arg.w.skip = cb->args[1] - 1; |
472 | arg.w.count = 0; | 479 | arg.w.count = 0; |
473 | tp->ops->walk(tp, &arg.w); | 480 | tp->ops->walk(tp, &arg.w); |
474 | cb->args[1] = arg.w.count+1; | 481 | cb->args[1] = arg.w.count + 1; |
475 | if (arg.w.stop) | 482 | if (arg.w.stop) |
476 | break; | 483 | break; |
477 | } | 484 | } |
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index efd4f95fd050..8be8872dd571 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c | |||
@@ -21,14 +21,12 @@ | |||
21 | #include <net/act_api.h> | 21 | #include <net/act_api.h> |
22 | #include <net/pkt_cls.h> | 22 | #include <net/pkt_cls.h> |
23 | 23 | ||
24 | struct basic_head | 24 | struct basic_head { |
25 | { | ||
26 | u32 hgenerator; | 25 | u32 hgenerator; |
27 | struct list_head flist; | 26 | struct list_head flist; |
28 | }; | 27 | }; |
29 | 28 | ||
30 | struct basic_filter | 29 | struct basic_filter { |
31 | { | ||
32 | u32 handle; | 30 | u32 handle; |
33 | struct tcf_exts exts; | 31 | struct tcf_exts exts; |
34 | struct tcf_ematch_tree ematches; | 32 | struct tcf_ematch_tree ematches; |
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp) | |||
92 | return 0; | 90 | return 0; |
93 | } | 91 | } |
94 | 92 | ||
95 | static inline void basic_delete_filter(struct tcf_proto *tp, | 93 | static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f) |
96 | struct basic_filter *f) | ||
97 | { | 94 | { |
98 | tcf_unbind_filter(tp, &f->res); | 95 | tcf_unbind_filter(tp, &f->res); |
99 | tcf_exts_destroy(tp, &f->exts); | 96 | tcf_exts_destroy(tp, &f->exts); |
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { | |||
135 | [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, | 132 | [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, |
136 | }; | 133 | }; |
137 | 134 | ||
138 | static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, | 135 | static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f, |
139 | unsigned long base, struct nlattr **tb, | 136 | unsigned long base, struct nlattr **tb, |
140 | struct nlattr *est) | 137 | struct nlattr *est) |
141 | { | 138 | { |
142 | int err = -EINVAL; | 139 | int err = -EINVAL; |
143 | struct tcf_exts e; | 140 | struct tcf_exts e; |
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
203 | } while (--i > 0 && basic_get(tp, head->hgenerator)); | 200 | } while (--i > 0 && basic_get(tp, head->hgenerator)); |
204 | 201 | ||
205 | if (i <= 0) { | 202 | if (i <= 0) { |
206 | printk(KERN_ERR "Insufficient number of handles\n"); | 203 | pr_err("Insufficient number of handles\n"); |
207 | goto errout; | 204 | goto errout; |
208 | } | 205 | } |
209 | 206 | ||
@@ -268,6 +265,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, | |||
268 | goto nla_put_failure; | 265 | goto nla_put_failure; |
269 | 266 | ||
270 | nla_nest_end(skb, nest); | 267 | nla_nest_end(skb, nest); |
268 | |||
269 | if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0) | ||
270 | goto nla_put_failure; | ||
271 | |||
271 | return skb->len; | 272 | return skb->len; |
272 | 273 | ||
273 | nla_put_failure: | 274 | nla_put_failure: |
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 78ef2c5e130b..32a335194ca5 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c | |||
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = { | |||
34 | .populate = cgrp_populate, | 34 | .populate = cgrp_populate, |
35 | #ifdef CONFIG_NET_CLS_CGROUP | 35 | #ifdef CONFIG_NET_CLS_CGROUP |
36 | .subsys_id = net_cls_subsys_id, | 36 | .subsys_id = net_cls_subsys_id, |
37 | #else | ||
38 | #define net_cls_subsys_id net_cls_subsys.subsys_id | ||
39 | #endif | 37 | #endif |
40 | .module = THIS_MODULE, | 38 | .module = THIS_MODULE, |
41 | }; | 39 | }; |
@@ -58,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss, | |||
58 | { | 56 | { |
59 | struct cgroup_cls_state *cs; | 57 | struct cgroup_cls_state *cs; |
60 | 58 | ||
61 | if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL))) | 59 | cs = kzalloc(sizeof(*cs), GFP_KERNEL); |
60 | if (!cs) | ||
62 | return ERR_PTR(-ENOMEM); | 61 | return ERR_PTR(-ENOMEM); |
63 | 62 | ||
64 | if (cgrp->parent) | 63 | if (cgrp->parent) |
@@ -96,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp) | |||
96 | return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); | 95 | return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files)); |
97 | } | 96 | } |
98 | 97 | ||
99 | struct cls_cgroup_head | 98 | struct cls_cgroup_head { |
100 | { | ||
101 | u32 handle; | 99 | u32 handle; |
102 | struct tcf_exts exts; | 100 | struct tcf_exts exts; |
103 | struct tcf_ematch_tree ematches; | 101 | struct tcf_ematch_tree ematches; |
@@ -123,7 +121,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
123 | * calls by looking at the number of nested bh disable calls because | 121 | * calls by looking at the number of nested bh disable calls because |
124 | * softirqs always disables bh. | 122 | * softirqs always disables bh. |
125 | */ | 123 | */ |
126 | if (softirq_count() != SOFTIRQ_OFFSET) { | 124 | if (in_serving_softirq()) { |
127 | /* If there is an sk_classid we'll use that. */ | 125 | /* If there is an sk_classid we'll use that. */ |
128 | if (!skb->sk) | 126 | if (!skb->sk) |
129 | return -1; | 127 | return -1; |
@@ -168,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base, | |||
168 | u32 handle, struct nlattr **tca, | 166 | u32 handle, struct nlattr **tca, |
169 | unsigned long *arg) | 167 | unsigned long *arg) |
170 | { | 168 | { |
171 | struct nlattr *tb[TCA_CGROUP_MAX+1]; | 169 | struct nlattr *tb[TCA_CGROUP_MAX + 1]; |
172 | struct cls_cgroup_head *head = tp->root; | 170 | struct cls_cgroup_head *head = tp->root; |
173 | struct tcf_ematch_tree t; | 171 | struct tcf_ematch_tree t; |
174 | struct tcf_exts e; | 172 | struct tcf_exts e; |
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index e17096e3913c..8ec01391d988 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c | |||
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb) | |||
111 | } | 111 | } |
112 | } | 112 | } |
113 | 113 | ||
114 | static int has_ports(u8 protocol) | ||
115 | { | ||
116 | switch (protocol) { | ||
117 | case IPPROTO_TCP: | ||
118 | case IPPROTO_UDP: | ||
119 | case IPPROTO_UDPLITE: | ||
120 | case IPPROTO_SCTP: | ||
121 | case IPPROTO_DCCP: | ||
122 | case IPPROTO_ESP: | ||
123 | return 1; | ||
124 | default: | ||
125 | return 0; | ||
126 | } | ||
127 | } | ||
128 | |||
129 | static u32 flow_get_proto_src(struct sk_buff *skb) | 114 | static u32 flow_get_proto_src(struct sk_buff *skb) |
130 | { | 115 | { |
131 | switch (skb->protocol) { | 116 | switch (skb->protocol) { |
132 | case htons(ETH_P_IP): { | 117 | case htons(ETH_P_IP): { |
133 | struct iphdr *iph; | 118 | struct iphdr *iph; |
119 | int poff; | ||
134 | 120 | ||
135 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | 121 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
136 | break; | 122 | break; |
137 | iph = ip_hdr(skb); | 123 | iph = ip_hdr(skb); |
138 | if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && | 124 | if (iph->frag_off & htons(IP_MF | IP_OFFSET)) |
139 | has_ports(iph->protocol) && | 125 | break; |
140 | pskb_network_may_pull(skb, iph->ihl * 4 + 2)) | 126 | poff = proto_ports_offset(iph->protocol); |
141 | return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); | 127 | if (poff >= 0 && |
128 | pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) { | ||
129 | iph = ip_hdr(skb); | ||
130 | return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + | ||
131 | poff)); | ||
132 | } | ||
142 | break; | 133 | break; |
143 | } | 134 | } |
144 | case htons(ETH_P_IPV6): { | 135 | case htons(ETH_P_IPV6): { |
145 | struct ipv6hdr *iph; | 136 | struct ipv6hdr *iph; |
137 | int poff; | ||
146 | 138 | ||
147 | if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) | 139 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
148 | break; | 140 | break; |
149 | iph = ipv6_hdr(skb); | 141 | iph = ipv6_hdr(skb); |
150 | if (has_ports(iph->nexthdr)) | 142 | poff = proto_ports_offset(iph->nexthdr); |
151 | return ntohs(*(__be16 *)&iph[1]); | 143 | if (poff >= 0 && |
144 | pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) { | ||
145 | iph = ipv6_hdr(skb); | ||
146 | return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) + | ||
147 | poff)); | ||
148 | } | ||
152 | break; | 149 | break; |
153 | } | 150 | } |
154 | } | 151 | } |
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb) | |||
161 | switch (skb->protocol) { | 158 | switch (skb->protocol) { |
162 | case htons(ETH_P_IP): { | 159 | case htons(ETH_P_IP): { |
163 | struct iphdr *iph; | 160 | struct iphdr *iph; |
161 | int poff; | ||
164 | 162 | ||
165 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | 163 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
166 | break; | 164 | break; |
167 | iph = ip_hdr(skb); | 165 | iph = ip_hdr(skb); |
168 | if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && | 166 | if (iph->frag_off & htons(IP_MF | IP_OFFSET)) |
169 | has_ports(iph->protocol) && | 167 | break; |
170 | pskb_network_may_pull(skb, iph->ihl * 4 + 4)) | 168 | poff = proto_ports_offset(iph->protocol); |
171 | return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); | 169 | if (poff >= 0 && |
170 | pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { | ||
171 | iph = ip_hdr(skb); | ||
172 | return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + | ||
173 | 2 + poff)); | ||
174 | } | ||
172 | break; | 175 | break; |
173 | } | 176 | } |
174 | case htons(ETH_P_IPV6): { | 177 | case htons(ETH_P_IPV6): { |
175 | struct ipv6hdr *iph; | 178 | struct ipv6hdr *iph; |
179 | int poff; | ||
176 | 180 | ||
177 | if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) | 181 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
178 | break; | 182 | break; |
179 | iph = ipv6_hdr(skb); | 183 | iph = ipv6_hdr(skb); |
180 | if (has_ports(iph->nexthdr)) | 184 | poff = proto_ports_offset(iph->nexthdr); |
181 | return ntohs(*(__be16 *)((void *)&iph[1] + 2)); | 185 | if (poff >= 0 && |
186 | pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) { | ||
187 | iph = ipv6_hdr(skb); | ||
188 | return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) + | ||
189 | poff + 2)); | ||
190 | } | ||
182 | break; | 191 | break; |
183 | } | 192 | } |
184 | } | 193 | } |
@@ -267,7 +276,7 @@ fallback: | |||
267 | 276 | ||
268 | static u32 flow_get_rtclassid(const struct sk_buff *skb) | 277 | static u32 flow_get_rtclassid(const struct sk_buff *skb) |
269 | { | 278 | { |
270 | #ifdef CONFIG_NET_CLS_ROUTE | 279 | #ifdef CONFIG_IP_ROUTE_CLASSID |
271 | if (skb_dst(skb)) | 280 | if (skb_dst(skb)) |
272 | return skb_dst(skb)->tclassid; | 281 | return skb_dst(skb)->tclassid; |
273 | #endif | 282 | #endif |
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb) | |||
297 | return tag & VLAN_VID_MASK; | 306 | return tag & VLAN_VID_MASK; |
298 | } | 307 | } |
299 | 308 | ||
309 | static u32 flow_get_rxhash(struct sk_buff *skb) | ||
310 | { | ||
311 | return skb_get_rxhash(skb); | ||
312 | } | ||
313 | |||
300 | static u32 flow_key_get(struct sk_buff *skb, int key) | 314 | static u32 flow_key_get(struct sk_buff *skb, int key) |
301 | { | 315 | { |
302 | switch (key) { | 316 | switch (key) { |
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key) | |||
334 | return flow_get_skgid(skb); | 348 | return flow_get_skgid(skb); |
335 | case FLOW_KEY_VLAN_TAG: | 349 | case FLOW_KEY_VLAN_TAG: |
336 | return flow_get_vlan_tag(skb); | 350 | return flow_get_vlan_tag(skb); |
351 | case FLOW_KEY_RXHASH: | ||
352 | return flow_get_rxhash(skb); | ||
337 | default: | 353 | default: |
338 | WARN_ON(1); | 354 | WARN_ON(1); |
339 | return 0; | 355 | return 0; |
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 93b0a7b6f9b4..26e7bc4ffb79 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c | |||
@@ -31,14 +31,12 @@ | |||
31 | 31 | ||
32 | #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) | 32 | #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *)) |
33 | 33 | ||
34 | struct fw_head | 34 | struct fw_head { |
35 | { | ||
36 | struct fw_filter *ht[HTSIZE]; | 35 | struct fw_filter *ht[HTSIZE]; |
37 | u32 mask; | 36 | u32 mask; |
38 | }; | 37 | }; |
39 | 38 | ||
40 | struct fw_filter | 39 | struct fw_filter { |
41 | { | ||
42 | struct fw_filter *next; | 40 | struct fw_filter *next; |
43 | u32 id; | 41 | u32 id; |
44 | struct tcf_result res; | 42 | struct tcf_result res; |
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = { | |||
53 | .police = TCA_FW_POLICE | 51 | .police = TCA_FW_POLICE |
54 | }; | 52 | }; |
55 | 53 | ||
56 | static __inline__ int fw_hash(u32 handle) | 54 | static inline int fw_hash(u32 handle) |
57 | { | 55 | { |
58 | if (HTSIZE == 4096) | 56 | if (HTSIZE == 4096) |
59 | return ((handle >> 24) & 0xFFF) ^ | 57 | return ((handle >> 24) & 0xFFF) ^ |
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle) | |||
82 | static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | 80 | static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, |
83 | struct tcf_result *res) | 81 | struct tcf_result *res) |
84 | { | 82 | { |
85 | struct fw_head *head = (struct fw_head*)tp->root; | 83 | struct fw_head *head = (struct fw_head *)tp->root; |
86 | struct fw_filter *f; | 84 | struct fw_filter *f; |
87 | int r; | 85 | int r; |
88 | u32 id = skb->mark; | 86 | u32 id = skb->mark; |
89 | 87 | ||
90 | if (head != NULL) { | 88 | if (head != NULL) { |
91 | id &= head->mask; | 89 | id &= head->mask; |
92 | for (f=head->ht[fw_hash(id)]; f; f=f->next) { | 90 | for (f = head->ht[fw_hash(id)]; f; f = f->next) { |
93 | if (f->id == id) { | 91 | if (f->id == id) { |
94 | *res = f->res; | 92 | *res = f->res; |
95 | #ifdef CONFIG_NET_CLS_IND | 93 | #ifdef CONFIG_NET_CLS_IND |
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
105 | } | 103 | } |
106 | } else { | 104 | } else { |
107 | /* old method */ | 105 | /* old method */ |
108 | if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) { | 106 | if (id && (TC_H_MAJ(id) == 0 || |
107 | !(TC_H_MAJ(id ^ tp->q->handle)))) { | ||
109 | res->classid = id; | 108 | res->classid = id; |
110 | res->class = 0; | 109 | res->class = 0; |
111 | return 0; | 110 | return 0; |
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
117 | 116 | ||
118 | static unsigned long fw_get(struct tcf_proto *tp, u32 handle) | 117 | static unsigned long fw_get(struct tcf_proto *tp, u32 handle) |
119 | { | 118 | { |
120 | struct fw_head *head = (struct fw_head*)tp->root; | 119 | struct fw_head *head = (struct fw_head *)tp->root; |
121 | struct fw_filter *f; | 120 | struct fw_filter *f; |
122 | 121 | ||
123 | if (head == NULL) | 122 | if (head == NULL) |
124 | return 0; | 123 | return 0; |
125 | 124 | ||
126 | for (f=head->ht[fw_hash(handle)]; f; f=f->next) { | 125 | for (f = head->ht[fw_hash(handle)]; f; f = f->next) { |
127 | if (f->id == handle) | 126 | if (f->id == handle) |
128 | return (unsigned long)f; | 127 | return (unsigned long)f; |
129 | } | 128 | } |
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp) | |||
139 | return 0; | 138 | return 0; |
140 | } | 139 | } |
141 | 140 | ||
142 | static inline void | 141 | static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) |
143 | fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f) | ||
144 | { | 142 | { |
145 | tcf_unbind_filter(tp, &f->res); | 143 | tcf_unbind_filter(tp, &f->res); |
146 | tcf_exts_destroy(tp, &f->exts); | 144 | tcf_exts_destroy(tp, &f->exts); |
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp) | |||
156 | if (head == NULL) | 154 | if (head == NULL) |
157 | return; | 155 | return; |
158 | 156 | ||
159 | for (h=0; h<HTSIZE; h++) { | 157 | for (h = 0; h < HTSIZE; h++) { |
160 | while ((f=head->ht[h]) != NULL) { | 158 | while ((f = head->ht[h]) != NULL) { |
161 | head->ht[h] = f->next; | 159 | head->ht[h] = f->next; |
162 | fw_delete_filter(tp, f); | 160 | fw_delete_filter(tp, f); |
163 | } | 161 | } |
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp) | |||
167 | 165 | ||
168 | static int fw_delete(struct tcf_proto *tp, unsigned long arg) | 166 | static int fw_delete(struct tcf_proto *tp, unsigned long arg) |
169 | { | 167 | { |
170 | struct fw_head *head = (struct fw_head*)tp->root; | 168 | struct fw_head *head = (struct fw_head *)tp->root; |
171 | struct fw_filter *f = (struct fw_filter*)arg; | 169 | struct fw_filter *f = (struct fw_filter *)arg; |
172 | struct fw_filter **fp; | 170 | struct fw_filter **fp; |
173 | 171 | ||
174 | if (head == NULL || f == NULL) | 172 | if (head == NULL || f == NULL) |
175 | goto out; | 173 | goto out; |
176 | 174 | ||
177 | for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { | 175 | for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) { |
178 | if (*fp == f) { | 176 | if (*fp == f) { |
179 | tcf_tree_lock(tp); | 177 | tcf_tree_lock(tp); |
180 | *fp = f->next; | 178 | *fp = f->next; |
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, | |||
240 | struct nlattr **tca, | 238 | struct nlattr **tca, |
241 | unsigned long *arg) | 239 | unsigned long *arg) |
242 | { | 240 | { |
243 | struct fw_head *head = (struct fw_head*)tp->root; | 241 | struct fw_head *head = (struct fw_head *)tp->root; |
244 | struct fw_filter *f = (struct fw_filter *) *arg; | 242 | struct fw_filter *f = (struct fw_filter *) *arg; |
245 | struct nlattr *opt = tca[TCA_OPTIONS]; | 243 | struct nlattr *opt = tca[TCA_OPTIONS]; |
246 | struct nlattr *tb[TCA_FW_MAX + 1]; | 244 | struct nlattr *tb[TCA_FW_MAX + 1]; |
@@ -302,7 +300,7 @@ errout: | |||
302 | 300 | ||
303 | static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) | 301 | static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) |
304 | { | 302 | { |
305 | struct fw_head *head = (struct fw_head*)tp->root; | 303 | struct fw_head *head = (struct fw_head *)tp->root; |
306 | int h; | 304 | int h; |
307 | 305 | ||
308 | if (head == NULL) | 306 | if (head == NULL) |
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, | |||
332 | struct sk_buff *skb, struct tcmsg *t) | 330 | struct sk_buff *skb, struct tcmsg *t) |
333 | { | 331 | { |
334 | struct fw_head *head = (struct fw_head *)tp->root; | 332 | struct fw_head *head = (struct fw_head *)tp->root; |
335 | struct fw_filter *f = (struct fw_filter*)fh; | 333 | struct fw_filter *f = (struct fw_filter *)fh; |
336 | unsigned char *b = skb_tail_pointer(skb); | 334 | unsigned char *b = skb_tail_pointer(skb); |
337 | struct nlattr *nest; | 335 | struct nlattr *nest; |
338 | 336 | ||
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 694dcd85dec8..a907905376df 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c | |||
@@ -23,34 +23,30 @@ | |||
23 | #include <net/pkt_cls.h> | 23 | #include <net/pkt_cls.h> |
24 | 24 | ||
25 | /* | 25 | /* |
26 | 1. For now we assume that route tags < 256. | 26 | * 1. For now we assume that route tags < 256. |
27 | It allows to use direct table lookups, instead of hash tables. | 27 | * It allows to use direct table lookups, instead of hash tables. |
28 | 2. For now we assume that "from TAG" and "fromdev DEV" statements | 28 | * 2. For now we assume that "from TAG" and "fromdev DEV" statements |
29 | are mutually exclusive. | 29 | * are mutually exclusive. |
30 | 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" | 30 | * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX" |
31 | */ | 31 | */ |
32 | 32 | ||
33 | struct route4_fastmap | 33 | struct route4_fastmap { |
34 | { | ||
35 | struct route4_filter *filter; | 34 | struct route4_filter *filter; |
36 | u32 id; | 35 | u32 id; |
37 | int iif; | 36 | int iif; |
38 | }; | 37 | }; |
39 | 38 | ||
40 | struct route4_head | 39 | struct route4_head { |
41 | { | ||
42 | struct route4_fastmap fastmap[16]; | 40 | struct route4_fastmap fastmap[16]; |
43 | struct route4_bucket *table[256+1]; | 41 | struct route4_bucket *table[256 + 1]; |
44 | }; | 42 | }; |
45 | 43 | ||
46 | struct route4_bucket | 44 | struct route4_bucket { |
47 | { | ||
48 | /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ | 45 | /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */ |
49 | struct route4_filter *ht[16+16+1]; | 46 | struct route4_filter *ht[16 + 16 + 1]; |
50 | }; | 47 | }; |
51 | 48 | ||
52 | struct route4_filter | 49 | struct route4_filter { |
53 | { | ||
54 | struct route4_filter *next; | 50 | struct route4_filter *next; |
55 | u32 id; | 51 | u32 id; |
56 | int iif; | 52 | int iif; |
@@ -61,20 +57,20 @@ struct route4_filter | |||
61 | struct route4_bucket *bkt; | 57 | struct route4_bucket *bkt; |
62 | }; | 58 | }; |
63 | 59 | ||
64 | #define ROUTE4_FAILURE ((struct route4_filter*)(-1L)) | 60 | #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) |
65 | 61 | ||
66 | static const struct tcf_ext_map route_ext_map = { | 62 | static const struct tcf_ext_map route_ext_map = { |
67 | .police = TCA_ROUTE4_POLICE, | 63 | .police = TCA_ROUTE4_POLICE, |
68 | .action = TCA_ROUTE4_ACT | 64 | .action = TCA_ROUTE4_ACT |
69 | }; | 65 | }; |
70 | 66 | ||
71 | static __inline__ int route4_fastmap_hash(u32 id, int iif) | 67 | static inline int route4_fastmap_hash(u32 id, int iif) |
72 | { | 68 | { |
73 | return id&0xF; | 69 | return id & 0xF; |
74 | } | 70 | } |
75 | 71 | ||
76 | static inline | 72 | static void |
77 | void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) | 73 | route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) |
78 | { | 74 | { |
79 | spinlock_t *root_lock = qdisc_root_sleeping_lock(q); | 75 | spinlock_t *root_lock = qdisc_root_sleeping_lock(q); |
80 | 76 | ||
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id) | |||
83 | spin_unlock_bh(root_lock); | 79 | spin_unlock_bh(root_lock); |
84 | } | 80 | } |
85 | 81 | ||
86 | static inline void | 82 | static void |
87 | route4_set_fastmap(struct route4_head *head, u32 id, int iif, | 83 | route4_set_fastmap(struct route4_head *head, u32 id, int iif, |
88 | struct route4_filter *f) | 84 | struct route4_filter *f) |
89 | { | 85 | { |
90 | int h = route4_fastmap_hash(id, iif); | 86 | int h = route4_fastmap_hash(id, iif); |
87 | |||
91 | head->fastmap[h].id = id; | 88 | head->fastmap[h].id = id; |
92 | head->fastmap[h].iif = iif; | 89 | head->fastmap[h].iif = iif; |
93 | head->fastmap[h].filter = f; | 90 | head->fastmap[h].filter = f; |
94 | } | 91 | } |
95 | 92 | ||
96 | static __inline__ int route4_hash_to(u32 id) | 93 | static inline int route4_hash_to(u32 id) |
97 | { | 94 | { |
98 | return id&0xFF; | 95 | return id & 0xFF; |
99 | } | 96 | } |
100 | 97 | ||
101 | static __inline__ int route4_hash_from(u32 id) | 98 | static inline int route4_hash_from(u32 id) |
102 | { | 99 | { |
103 | return (id>>16)&0xF; | 100 | return (id >> 16) & 0xF; |
104 | } | 101 | } |
105 | 102 | ||
106 | static __inline__ int route4_hash_iif(int iif) | 103 | static inline int route4_hash_iif(int iif) |
107 | { | 104 | { |
108 | return 16 + ((iif>>16)&0xF); | 105 | return 16 + ((iif >> 16) & 0xF); |
109 | } | 106 | } |
110 | 107 | ||
111 | static __inline__ int route4_hash_wild(void) | 108 | static inline int route4_hash_wild(void) |
112 | { | 109 | { |
113 | return 32; | 110 | return 32; |
114 | } | 111 | } |
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void) | |||
131 | static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, | 128 | static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, |
132 | struct tcf_result *res) | 129 | struct tcf_result *res) |
133 | { | 130 | { |
134 | struct route4_head *head = (struct route4_head*)tp->root; | 131 | struct route4_head *head = (struct route4_head *)tp->root; |
135 | struct dst_entry *dst; | 132 | struct dst_entry *dst; |
136 | struct route4_bucket *b; | 133 | struct route4_bucket *b; |
137 | struct route4_filter *f; | 134 | struct route4_filter *f; |
138 | u32 id, h; | 135 | u32 id, h; |
139 | int iif, dont_cache = 0; | 136 | int iif, dont_cache = 0; |
140 | 137 | ||
141 | if ((dst = skb_dst(skb)) == NULL) | 138 | dst = skb_dst(skb); |
139 | if (!dst) | ||
142 | goto failure; | 140 | goto failure; |
143 | 141 | ||
144 | id = dst->tclassid; | 142 | id = dst->tclassid; |
145 | if (head == NULL) | 143 | if (head == NULL) |
146 | goto old_method; | 144 | goto old_method; |
147 | 145 | ||
148 | iif = ((struct rtable*)dst)->fl.iif; | 146 | iif = ((struct rtable *)dst)->rt_iif; |
149 | 147 | ||
150 | h = route4_fastmap_hash(id, iif); | 148 | h = route4_fastmap_hash(id, iif); |
151 | if (id == head->fastmap[h].id && | 149 | if (id == head->fastmap[h].id && |
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
161 | h = route4_hash_to(id); | 159 | h = route4_hash_to(id); |
162 | 160 | ||
163 | restart: | 161 | restart: |
164 | if ((b = head->table[h]) != NULL) { | 162 | b = head->table[h]; |
163 | if (b) { | ||
165 | for (f = b->ht[route4_hash_from(id)]; f; f = f->next) | 164 | for (f = b->ht[route4_hash_from(id)]; f; f = f->next) |
166 | if (f->id == id) | 165 | if (f->id == id) |
167 | ROUTE4_APPLY_RESULT(); | 166 | ROUTE4_APPLY_RESULT(); |
@@ -197,8 +196,9 @@ old_method: | |||
197 | 196 | ||
198 | static inline u32 to_hash(u32 id) | 197 | static inline u32 to_hash(u32 id) |
199 | { | 198 | { |
200 | u32 h = id&0xFF; | 199 | u32 h = id & 0xFF; |
201 | if (id&0x8000) | 200 | |
201 | if (id & 0x8000) | ||
202 | h += 256; | 202 | h += 256; |
203 | return h; | 203 | return h; |
204 | } | 204 | } |
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id) | |||
211 | if (!(id & 0x8000)) { | 211 | if (!(id & 0x8000)) { |
212 | if (id > 255) | 212 | if (id > 255) |
213 | return 256; | 213 | return 256; |
214 | return id&0xF; | 214 | return id & 0xF; |
215 | } | 215 | } |
216 | return 16 + (id&0xF); | 216 | return 16 + (id & 0xF); |
217 | } | 217 | } |
218 | 218 | ||
219 | static unsigned long route4_get(struct tcf_proto *tp, u32 handle) | 219 | static unsigned long route4_get(struct tcf_proto *tp, u32 handle) |
220 | { | 220 | { |
221 | struct route4_head *head = (struct route4_head*)tp->root; | 221 | struct route4_head *head = (struct route4_head *)tp->root; |
222 | struct route4_bucket *b; | 222 | struct route4_bucket *b; |
223 | struct route4_filter *f; | 223 | struct route4_filter *f; |
224 | unsigned h1, h2; | 224 | unsigned int h1, h2; |
225 | 225 | ||
226 | if (!head) | 226 | if (!head) |
227 | return 0; | 227 | return 0; |
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) | |||
230 | if (h1 > 256) | 230 | if (h1 > 256) |
231 | return 0; | 231 | return 0; |
232 | 232 | ||
233 | h2 = from_hash(handle>>16); | 233 | h2 = from_hash(handle >> 16); |
234 | if (h2 > 32) | 234 | if (h2 > 32) |
235 | return 0; | 235 | return 0; |
236 | 236 | ||
237 | if ((b = head->table[h1]) != NULL) { | 237 | b = head->table[h1]; |
238 | if (b) { | ||
238 | for (f = b->ht[h2]; f; f = f->next) | 239 | for (f = b->ht[h2]; f; f = f->next) |
239 | if (f->handle == handle) | 240 | if (f->handle == handle) |
240 | return (unsigned long)f; | 241 | return (unsigned long)f; |
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp) | |||
251 | return 0; | 252 | return 0; |
252 | } | 253 | } |
253 | 254 | ||
254 | static inline void | 255 | static void |
255 | route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) | 256 | route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f) |
256 | { | 257 | { |
257 | tcf_unbind_filter(tp, &f->res); | 258 | tcf_unbind_filter(tp, &f->res); |
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp) | |||
267 | if (head == NULL) | 268 | if (head == NULL) |
268 | return; | 269 | return; |
269 | 270 | ||
270 | for (h1=0; h1<=256; h1++) { | 271 | for (h1 = 0; h1 <= 256; h1++) { |
271 | struct route4_bucket *b; | 272 | struct route4_bucket *b; |
272 | 273 | ||
273 | if ((b = head->table[h1]) != NULL) { | 274 | b = head->table[h1]; |
274 | for (h2=0; h2<=32; h2++) { | 275 | if (b) { |
276 | for (h2 = 0; h2 <= 32; h2++) { | ||
275 | struct route4_filter *f; | 277 | struct route4_filter *f; |
276 | 278 | ||
277 | while ((f = b->ht[h2]) != NULL) { | 279 | while ((f = b->ht[h2]) != NULL) { |
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp) | |||
287 | 289 | ||
288 | static int route4_delete(struct tcf_proto *tp, unsigned long arg) | 290 | static int route4_delete(struct tcf_proto *tp, unsigned long arg) |
289 | { | 291 | { |
290 | struct route4_head *head = (struct route4_head*)tp->root; | 292 | struct route4_head *head = (struct route4_head *)tp->root; |
291 | struct route4_filter **fp, *f = (struct route4_filter*)arg; | 293 | struct route4_filter **fp, *f = (struct route4_filter *)arg; |
292 | unsigned h = 0; | 294 | unsigned int h = 0; |
293 | struct route4_bucket *b; | 295 | struct route4_bucket *b; |
294 | int i; | 296 | int i; |
295 | 297 | ||
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) | |||
299 | h = f->handle; | 301 | h = f->handle; |
300 | b = f->bkt; | 302 | b = f->bkt; |
301 | 303 | ||
302 | for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) { | 304 | for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) { |
303 | if (*fp == f) { | 305 | if (*fp == f) { |
304 | tcf_tree_lock(tp); | 306 | tcf_tree_lock(tp); |
305 | *fp = f->next; | 307 | *fp = f->next; |
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg) | |||
310 | 312 | ||
311 | /* Strip tree */ | 313 | /* Strip tree */ |
312 | 314 | ||
313 | for (i=0; i<=32; i++) | 315 | for (i = 0; i <= 32; i++) |
314 | if (b->ht[i]) | 316 | if (b->ht[i]) |
315 | return 0; | 317 | return 0; |
316 | 318 | ||
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, | |||
380 | } | 382 | } |
381 | 383 | ||
382 | h1 = to_hash(nhandle); | 384 | h1 = to_hash(nhandle); |
383 | if ((b = head->table[h1]) == NULL) { | 385 | b = head->table[h1]; |
386 | if (!b) { | ||
384 | err = -ENOBUFS; | 387 | err = -ENOBUFS; |
385 | b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); | 388 | b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); |
386 | if (b == NULL) | 389 | if (b == NULL) |
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, | |||
391 | tcf_tree_unlock(tp); | 394 | tcf_tree_unlock(tp); |
392 | } else { | 395 | } else { |
393 | unsigned int h2 = from_hash(nhandle >> 16); | 396 | unsigned int h2 = from_hash(nhandle >> 16); |
397 | |||
394 | err = -EEXIST; | 398 | err = -EEXIST; |
395 | for (fp = b->ht[h2]; fp; fp = fp->next) | 399 | for (fp = b->ht[h2]; fp; fp = fp->next) |
396 | if (fp->handle == f->handle) | 400 | if (fp->handle == f->handle) |
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, | |||
444 | if (err < 0) | 448 | if (err < 0) |
445 | return err; | 449 | return err; |
446 | 450 | ||
447 | if ((f = (struct route4_filter*)*arg) != NULL) { | 451 | f = (struct route4_filter *)*arg; |
452 | if (f) { | ||
448 | if (f->handle != handle && handle) | 453 | if (f->handle != handle && handle) |
449 | return -EINVAL; | 454 | return -EINVAL; |
450 | 455 | ||
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base, | |||
481 | 486 | ||
482 | reinsert: | 487 | reinsert: |
483 | h = from_hash(f->handle >> 16); | 488 | h = from_hash(f->handle >> 16); |
484 | for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next) | 489 | for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next) |
485 | if (f->handle < f1->handle) | 490 | if (f->handle < f1->handle) |
486 | break; | 491 | break; |
487 | 492 | ||
@@ -492,7 +497,8 @@ reinsert: | |||
492 | if (old_handle && f->handle != old_handle) { | 497 | if (old_handle && f->handle != old_handle) { |
493 | th = to_hash(old_handle); | 498 | th = to_hash(old_handle); |
494 | h = from_hash(old_handle >> 16); | 499 | h = from_hash(old_handle >> 16); |
495 | if ((b = head->table[th]) != NULL) { | 500 | b = head->table[th]; |
501 | if (b) { | ||
496 | for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { | 502 | for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) { |
497 | if (*fp == f) { | 503 | if (*fp == f) { |
498 | *fp = f->next; | 504 | *fp = f->next; |
@@ -515,7 +521,7 @@ errout: | |||
515 | static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) | 521 | static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) |
516 | { | 522 | { |
517 | struct route4_head *head = tp->root; | 523 | struct route4_head *head = tp->root; |
518 | unsigned h, h1; | 524 | unsigned int h, h1; |
519 | 525 | ||
520 | if (head == NULL) | 526 | if (head == NULL) |
521 | arg->stop = 1; | 527 | arg->stop = 1; |
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
549 | static int route4_dump(struct tcf_proto *tp, unsigned long fh, | 555 | static int route4_dump(struct tcf_proto *tp, unsigned long fh, |
550 | struct sk_buff *skb, struct tcmsg *t) | 556 | struct sk_buff *skb, struct tcmsg *t) |
551 | { | 557 | { |
552 | struct route4_filter *f = (struct route4_filter*)fh; | 558 | struct route4_filter *f = (struct route4_filter *)fh; |
553 | unsigned char *b = skb_tail_pointer(skb); | 559 | unsigned char *b = skb_tail_pointer(skb); |
554 | struct nlattr *nest; | 560 | struct nlattr *nest; |
555 | u32 id; | 561 | u32 id; |
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh, | |||
563 | if (nest == NULL) | 569 | if (nest == NULL) |
564 | goto nla_put_failure; | 570 | goto nla_put_failure; |
565 | 571 | ||
566 | if (!(f->handle&0x8000)) { | 572 | if (!(f->handle & 0x8000)) { |
567 | id = f->id&0xFF; | 573 | id = f->id & 0xFF; |
568 | NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); | 574 | NLA_PUT_U32(skb, TCA_ROUTE4_TO, id); |
569 | } | 575 | } |
570 | if (f->handle&0x80000000) { | 576 | if (f->handle & 0x80000000) { |
571 | if ((f->handle>>16) != 0xFFFF) | 577 | if ((f->handle >> 16) != 0xFFFF) |
572 | NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); | 578 | NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif); |
573 | } else { | 579 | } else { |
574 | id = f->id>>16; | 580 | id = f->id >> 16; |
575 | NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); | 581 | NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id); |
576 | } | 582 | } |
577 | if (f->res.classid) | 583 | if (f->res.classid) |
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 425a1790b048..402c44b241a3 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h | |||
@@ -66,28 +66,25 @@ | |||
66 | powerful classification engine. */ | 66 | powerful classification engine. */ |
67 | 67 | ||
68 | 68 | ||
69 | struct rsvp_head | 69 | struct rsvp_head { |
70 | { | ||
71 | u32 tmap[256/32]; | 70 | u32 tmap[256/32]; |
72 | u32 hgenerator; | 71 | u32 hgenerator; |
73 | u8 tgenerator; | 72 | u8 tgenerator; |
74 | struct rsvp_session *ht[256]; | 73 | struct rsvp_session *ht[256]; |
75 | }; | 74 | }; |
76 | 75 | ||
77 | struct rsvp_session | 76 | struct rsvp_session { |
78 | { | ||
79 | struct rsvp_session *next; | 77 | struct rsvp_session *next; |
80 | __be32 dst[RSVP_DST_LEN]; | 78 | __be32 dst[RSVP_DST_LEN]; |
81 | struct tc_rsvp_gpi dpi; | 79 | struct tc_rsvp_gpi dpi; |
82 | u8 protocol; | 80 | u8 protocol; |
83 | u8 tunnelid; | 81 | u8 tunnelid; |
84 | /* 16 (src,sport) hash slots, and one wildcard source slot */ | 82 | /* 16 (src,sport) hash slots, and one wildcard source slot */ |
85 | struct rsvp_filter *ht[16+1]; | 83 | struct rsvp_filter *ht[16 + 1]; |
86 | }; | 84 | }; |
87 | 85 | ||
88 | 86 | ||
89 | struct rsvp_filter | 87 | struct rsvp_filter { |
90 | { | ||
91 | struct rsvp_filter *next; | 88 | struct rsvp_filter *next; |
92 | __be32 src[RSVP_DST_LEN]; | 89 | __be32 src[RSVP_DST_LEN]; |
93 | struct tc_rsvp_gpi spi; | 90 | struct tc_rsvp_gpi spi; |
@@ -100,17 +97,19 @@ struct rsvp_filter | |||
100 | struct rsvp_session *sess; | 97 | struct rsvp_session *sess; |
101 | }; | 98 | }; |
102 | 99 | ||
103 | static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) | 100 | static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) |
104 | { | 101 | { |
105 | unsigned h = (__force __u32)dst[RSVP_DST_LEN-1]; | 102 | unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1]; |
103 | |||
106 | h ^= h>>16; | 104 | h ^= h>>16; |
107 | h ^= h>>8; | 105 | h ^= h>>8; |
108 | return (h ^ protocol ^ tunnelid) & 0xFF; | 106 | return (h ^ protocol ^ tunnelid) & 0xFF; |
109 | } | 107 | } |
110 | 108 | ||
111 | static __inline__ unsigned hash_src(__be32 *src) | 109 | static inline unsigned int hash_src(__be32 *src) |
112 | { | 110 | { |
113 | unsigned h = (__force __u32)src[RSVP_DST_LEN-1]; | 111 | unsigned int h = (__force __u32)src[RSVP_DST_LEN-1]; |
112 | |||
114 | h ^= h>>16; | 113 | h ^= h>>16; |
115 | h ^= h>>8; | 114 | h ^= h>>8; |
116 | h ^= h>>4; | 115 | h ^= h>>4; |
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = { | |||
134 | static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, | 133 | static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, |
135 | struct tcf_result *res) | 134 | struct tcf_result *res) |
136 | { | 135 | { |
137 | struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; | 136 | struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; |
138 | struct rsvp_session *s; | 137 | struct rsvp_session *s; |
139 | struct rsvp_filter *f; | 138 | struct rsvp_filter *f; |
140 | unsigned h1, h2; | 139 | unsigned int h1, h2; |
141 | __be32 *dst, *src; | 140 | __be32 *dst, *src; |
142 | u8 protocol; | 141 | u8 protocol; |
143 | u8 tunnelid = 0; | 142 | u8 tunnelid = 0; |
@@ -162,13 +161,13 @@ restart: | |||
162 | src = &nhptr->saddr.s6_addr32[0]; | 161 | src = &nhptr->saddr.s6_addr32[0]; |
163 | dst = &nhptr->daddr.s6_addr32[0]; | 162 | dst = &nhptr->daddr.s6_addr32[0]; |
164 | protocol = nhptr->nexthdr; | 163 | protocol = nhptr->nexthdr; |
165 | xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr); | 164 | xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr); |
166 | #else | 165 | #else |
167 | src = &nhptr->saddr; | 166 | src = &nhptr->saddr; |
168 | dst = &nhptr->daddr; | 167 | dst = &nhptr->daddr; |
169 | protocol = nhptr->protocol; | 168 | protocol = nhptr->protocol; |
170 | xprt = ((u8*)nhptr) + (nhptr->ihl<<2); | 169 | xprt = ((u8 *)nhptr) + (nhptr->ihl<<2); |
171 | if (nhptr->frag_off & htons(IP_MF|IP_OFFSET)) | 170 | if (nhptr->frag_off & htons(IP_MF | IP_OFFSET)) |
172 | return -1; | 171 | return -1; |
173 | #endif | 172 | #endif |
174 | 173 | ||
@@ -176,10 +175,10 @@ restart: | |||
176 | h2 = hash_src(src); | 175 | h2 = hash_src(src); |
177 | 176 | ||
178 | for (s = sht[h1]; s; s = s->next) { | 177 | for (s = sht[h1]; s; s = s->next) { |
179 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && | 178 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] && |
180 | protocol == s->protocol && | 179 | protocol == s->protocol && |
181 | !(s->dpi.mask & | 180 | !(s->dpi.mask & |
182 | (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) && | 181 | (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) && |
183 | #if RSVP_DST_LEN == 4 | 182 | #if RSVP_DST_LEN == 4 |
184 | dst[0] == s->dst[0] && | 183 | dst[0] == s->dst[0] && |
185 | dst[1] == s->dst[1] && | 184 | dst[1] == s->dst[1] && |
@@ -188,8 +187,8 @@ restart: | |||
188 | tunnelid == s->tunnelid) { | 187 | tunnelid == s->tunnelid) { |
189 | 188 | ||
190 | for (f = s->ht[h2]; f; f = f->next) { | 189 | for (f = s->ht[h2]; f; f = f->next) { |
191 | if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] && | 190 | if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] && |
192 | !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key)) | 191 | !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key)) |
193 | #if RSVP_DST_LEN == 4 | 192 | #if RSVP_DST_LEN == 4 |
194 | && | 193 | && |
195 | src[0] == f->src[0] && | 194 | src[0] == f->src[0] && |
@@ -205,7 +204,7 @@ matched: | |||
205 | return 0; | 204 | return 0; |
206 | 205 | ||
207 | tunnelid = f->res.classid; | 206 | tunnelid = f->res.classid; |
208 | nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr)); | 207 | nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr)); |
209 | goto restart; | 208 | goto restart; |
210 | } | 209 | } |
211 | } | 210 | } |
@@ -224,11 +223,11 @@ matched: | |||
224 | 223 | ||
225 | static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) | 224 | static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) |
226 | { | 225 | { |
227 | struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht; | 226 | struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht; |
228 | struct rsvp_session *s; | 227 | struct rsvp_session *s; |
229 | struct rsvp_filter *f; | 228 | struct rsvp_filter *f; |
230 | unsigned h1 = handle&0xFF; | 229 | unsigned int h1 = handle & 0xFF; |
231 | unsigned h2 = (handle>>8)&0xFF; | 230 | unsigned int h2 = (handle >> 8) & 0xFF; |
232 | 231 | ||
233 | if (h2 > 16) | 232 | if (h2 > 16) |
234 | return 0; | 233 | return 0; |
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp) | |||
258 | return -ENOBUFS; | 257 | return -ENOBUFS; |
259 | } | 258 | } |
260 | 259 | ||
261 | static inline void | 260 | static void |
262 | rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) | 261 | rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) |
263 | { | 262 | { |
264 | tcf_unbind_filter(tp, &f->res); | 263 | tcf_unbind_filter(tp, &f->res); |
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp) | |||
277 | 276 | ||
278 | sht = data->ht; | 277 | sht = data->ht; |
279 | 278 | ||
280 | for (h1=0; h1<256; h1++) { | 279 | for (h1 = 0; h1 < 256; h1++) { |
281 | struct rsvp_session *s; | 280 | struct rsvp_session *s; |
282 | 281 | ||
283 | while ((s = sht[h1]) != NULL) { | 282 | while ((s = sht[h1]) != NULL) { |
284 | sht[h1] = s->next; | 283 | sht[h1] = s->next; |
285 | 284 | ||
286 | for (h2=0; h2<=16; h2++) { | 285 | for (h2 = 0; h2 <= 16; h2++) { |
287 | struct rsvp_filter *f; | 286 | struct rsvp_filter *f; |
288 | 287 | ||
289 | while ((f = s->ht[h2]) != NULL) { | 288 | while ((f = s->ht[h2]) != NULL) { |
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp) | |||
299 | 298 | ||
300 | static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) | 299 | static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) |
301 | { | 300 | { |
302 | struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg; | 301 | struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg; |
303 | unsigned h = f->handle; | 302 | unsigned int h = f->handle; |
304 | struct rsvp_session **sp; | 303 | struct rsvp_session **sp; |
305 | struct rsvp_session *s = f->sess; | 304 | struct rsvp_session *s = f->sess; |
306 | int i; | 305 | int i; |
307 | 306 | ||
308 | for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) { | 307 | for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) { |
309 | if (*fp == f) { | 308 | if (*fp == f) { |
310 | tcf_tree_lock(tp); | 309 | tcf_tree_lock(tp); |
311 | *fp = f->next; | 310 | *fp = f->next; |
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) | |||
314 | 313 | ||
315 | /* Strip tree */ | 314 | /* Strip tree */ |
316 | 315 | ||
317 | for (i=0; i<=16; i++) | 316 | for (i = 0; i <= 16; i++) |
318 | if (s->ht[i]) | 317 | if (s->ht[i]) |
319 | return 0; | 318 | return 0; |
320 | 319 | ||
321 | /* OK, session has no flows */ | 320 | /* OK, session has no flows */ |
322 | for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF]; | 321 | for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF]; |
323 | *sp; sp = &(*sp)->next) { | 322 | *sp; sp = &(*sp)->next) { |
324 | if (*sp == s) { | 323 | if (*sp == s) { |
325 | tcf_tree_lock(tp); | 324 | tcf_tree_lock(tp); |
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg) | |||
337 | return 0; | 336 | return 0; |
338 | } | 337 | } |
339 | 338 | ||
340 | static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) | 339 | static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) |
341 | { | 340 | { |
342 | struct rsvp_head *data = tp->root; | 341 | struct rsvp_head *data = tp->root; |
343 | int i = 0xFFFF; | 342 | int i = 0xFFFF; |
344 | 343 | ||
345 | while (i-- > 0) { | 344 | while (i-- > 0) { |
346 | u32 h; | 345 | u32 h; |
346 | |||
347 | if ((data->hgenerator += 0x10000) == 0) | 347 | if ((data->hgenerator += 0x10000) == 0) |
348 | data->hgenerator = 0x10000; | 348 | data->hgenerator = 0x10000; |
349 | h = data->hgenerator|salt; | 349 | h = data->hgenerator|salt; |
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt) | |||
355 | 355 | ||
356 | static int tunnel_bts(struct rsvp_head *data) | 356 | static int tunnel_bts(struct rsvp_head *data) |
357 | { | 357 | { |
358 | int n = data->tgenerator>>5; | 358 | int n = data->tgenerator >> 5; |
359 | u32 b = 1<<(data->tgenerator&0x1F); | 359 | u32 b = 1 << (data->tgenerator & 0x1F); |
360 | 360 | ||
361 | if (data->tmap[n]&b) | 361 | if (data->tmap[n] & b) |
362 | return 0; | 362 | return 0; |
363 | data->tmap[n] |= b; | 363 | data->tmap[n] |= b; |
364 | return 1; | 364 | return 1; |
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data) | |||
372 | 372 | ||
373 | memset(tmap, 0, sizeof(tmap)); | 373 | memset(tmap, 0, sizeof(tmap)); |
374 | 374 | ||
375 | for (h1=0; h1<256; h1++) { | 375 | for (h1 = 0; h1 < 256; h1++) { |
376 | struct rsvp_session *s; | 376 | struct rsvp_session *s; |
377 | for (s = sht[h1]; s; s = s->next) { | 377 | for (s = sht[h1]; s; s = s->next) { |
378 | for (h2=0; h2<=16; h2++) { | 378 | for (h2 = 0; h2 <= 16; h2++) { |
379 | struct rsvp_filter *f; | 379 | struct rsvp_filter *f; |
380 | 380 | ||
381 | for (f = s->ht[h2]; f; f = f->next) { | 381 | for (f = s->ht[h2]; f; f = f->next) { |
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data) | |||
395 | { | 395 | { |
396 | int i, k; | 396 | int i, k; |
397 | 397 | ||
398 | for (k=0; k<2; k++) { | 398 | for (k = 0; k < 2; k++) { |
399 | for (i=255; i>0; i--) { | 399 | for (i = 255; i > 0; i--) { |
400 | if (++data->tgenerator == 0) | 400 | if (++data->tgenerator == 0) |
401 | data->tgenerator = 1; | 401 | data->tgenerator = 1; |
402 | if (tunnel_bts(data)) | 402 | if (tunnel_bts(data)) |
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, | |||
428 | struct nlattr *opt = tca[TCA_OPTIONS-1]; | 428 | struct nlattr *opt = tca[TCA_OPTIONS-1]; |
429 | struct nlattr *tb[TCA_RSVP_MAX + 1]; | 429 | struct nlattr *tb[TCA_RSVP_MAX + 1]; |
430 | struct tcf_exts e; | 430 | struct tcf_exts e; |
431 | unsigned h1, h2; | 431 | unsigned int h1, h2; |
432 | __be32 *dst; | 432 | __be32 *dst; |
433 | int err; | 433 | int err; |
434 | 434 | ||
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, | |||
443 | if (err < 0) | 443 | if (err < 0) |
444 | return err; | 444 | return err; |
445 | 445 | ||
446 | if ((f = (struct rsvp_filter*)*arg) != NULL) { | 446 | f = (struct rsvp_filter *)*arg; |
447 | if (f) { | ||
447 | /* Node exists: adjust only classid */ | 448 | /* Node exists: adjust only classid */ |
448 | 449 | ||
449 | if (f->handle != handle && handle) | 450 | if (f->handle != handle && handle) |
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base, | |||
500 | goto errout; | 501 | goto errout; |
501 | } | 502 | } |
502 | 503 | ||
503 | for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) { | 504 | for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) { |
504 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && | 505 | if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] && |
505 | pinfo && pinfo->protocol == s->protocol && | 506 | pinfo && pinfo->protocol == s->protocol && |
506 | memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && | 507 | memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 && |
@@ -523,7 +524,7 @@ insert: | |||
523 | tcf_exts_change(tp, &f->exts, &e); | 524 | tcf_exts_change(tp, &f->exts, &e); |
524 | 525 | ||
525 | for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) | 526 | for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next) |
526 | if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask) | 527 | if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask) |
527 | break; | 528 | break; |
528 | f->next = *fp; | 529 | f->next = *fp; |
529 | wmb(); | 530 | wmb(); |
@@ -567,7 +568,7 @@ errout2: | |||
567 | static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) | 568 | static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) |
568 | { | 569 | { |
569 | struct rsvp_head *head = tp->root; | 570 | struct rsvp_head *head = tp->root; |
570 | unsigned h, h1; | 571 | unsigned int h, h1; |
571 | 572 | ||
572 | if (arg->stop) | 573 | if (arg->stop) |
573 | return; | 574 | return; |
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
598 | static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, | 599 | static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, |
599 | struct sk_buff *skb, struct tcmsg *t) | 600 | struct sk_buff *skb, struct tcmsg *t) |
600 | { | 601 | { |
601 | struct rsvp_filter *f = (struct rsvp_filter*)fh; | 602 | struct rsvp_filter *f = (struct rsvp_filter *)fh; |
602 | struct rsvp_session *s; | 603 | struct rsvp_session *s; |
603 | unsigned char *b = skb_tail_pointer(skb); | 604 | unsigned char *b = skb_tail_pointer(skb); |
604 | struct nlattr *nest; | 605 | struct nlattr *nest; |
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh, | |||
624 | NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); | 625 | NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo); |
625 | if (f->res.classid) | 626 | if (f->res.classid) |
626 | NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); | 627 | NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid); |
627 | if (((f->handle>>8)&0xFF) != 16) | 628 | if (((f->handle >> 8) & 0xFF) != 16) |
628 | NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); | 629 | NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src); |
629 | 630 | ||
630 | if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) | 631 | if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0) |
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 20ef330bb918..36667fa64237 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c | |||
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
249 | * of the hashing index is below the threshold. | 249 | * of the hashing index is below the threshold. |
250 | */ | 250 | */ |
251 | if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) | 251 | if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) |
252 | cp.hash = (cp.mask >> cp.shift)+1; | 252 | cp.hash = (cp.mask >> cp.shift) + 1; |
253 | else | 253 | else |
254 | cp.hash = DEFAULT_HASH_SIZE; | 254 | cp.hash = DEFAULT_HASH_SIZE; |
255 | } | 255 | } |
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b0c2a82178af..3b93fc0c8955 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c | |||
@@ -42,8 +42,7 @@ | |||
42 | #include <net/act_api.h> | 42 | #include <net/act_api.h> |
43 | #include <net/pkt_cls.h> | 43 | #include <net/pkt_cls.h> |
44 | 44 | ||
45 | struct tc_u_knode | 45 | struct tc_u_knode { |
46 | { | ||
47 | struct tc_u_knode *next; | 46 | struct tc_u_knode *next; |
48 | u32 handle; | 47 | u32 handle; |
49 | struct tc_u_hnode *ht_up; | 48 | struct tc_u_hnode *ht_up; |
@@ -63,19 +62,17 @@ struct tc_u_knode | |||
63 | struct tc_u32_sel sel; | 62 | struct tc_u32_sel sel; |
64 | }; | 63 | }; |
65 | 64 | ||
66 | struct tc_u_hnode | 65 | struct tc_u_hnode { |
67 | { | ||
68 | struct tc_u_hnode *next; | 66 | struct tc_u_hnode *next; |
69 | u32 handle; | 67 | u32 handle; |
70 | u32 prio; | 68 | u32 prio; |
71 | struct tc_u_common *tp_c; | 69 | struct tc_u_common *tp_c; |
72 | int refcnt; | 70 | int refcnt; |
73 | unsigned divisor; | 71 | unsigned int divisor; |
74 | struct tc_u_knode *ht[1]; | 72 | struct tc_u_knode *ht[1]; |
75 | }; | 73 | }; |
76 | 74 | ||
77 | struct tc_u_common | 75 | struct tc_u_common { |
78 | { | ||
79 | struct tc_u_hnode *hlist; | 76 | struct tc_u_hnode *hlist; |
80 | struct Qdisc *q; | 77 | struct Qdisc *q; |
81 | int refcnt; | 78 | int refcnt; |
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = { | |||
87 | .police = TCA_U32_POLICE | 84 | .police = TCA_U32_POLICE |
88 | }; | 85 | }; |
89 | 86 | ||
90 | static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift) | 87 | static inline unsigned int u32_hash_fold(__be32 key, |
88 | const struct tc_u32_sel *sel, | ||
89 | u8 fshift) | ||
91 | { | 90 | { |
92 | unsigned h = ntohl(key & sel->hmask)>>fshift; | 91 | unsigned int h = ntohl(key & sel->hmask) >> fshift; |
93 | 92 | ||
94 | return h; | 93 | return h; |
95 | } | 94 | } |
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re | |||
101 | unsigned int off; | 100 | unsigned int off; |
102 | } stack[TC_U32_MAXDEPTH]; | 101 | } stack[TC_U32_MAXDEPTH]; |
103 | 102 | ||
104 | struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root; | 103 | struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root; |
105 | unsigned int off = skb_network_offset(skb); | 104 | unsigned int off = skb_network_offset(skb); |
106 | struct tc_u_knode *n; | 105 | struct tc_u_knode *n; |
107 | int sdepth = 0; | 106 | int sdepth = 0; |
@@ -120,7 +119,7 @@ next_knode: | |||
120 | struct tc_u32_key *key = n->sel.keys; | 119 | struct tc_u32_key *key = n->sel.keys; |
121 | 120 | ||
122 | #ifdef CONFIG_CLS_U32_PERF | 121 | #ifdef CONFIG_CLS_U32_PERF |
123 | n->pf->rcnt +=1; | 122 | n->pf->rcnt += 1; |
124 | j = 0; | 123 | j = 0; |
125 | #endif | 124 | #endif |
126 | 125 | ||
@@ -133,14 +132,14 @@ next_knode: | |||
133 | } | 132 | } |
134 | #endif | 133 | #endif |
135 | 134 | ||
136 | for (i = n->sel.nkeys; i>0; i--, key++) { | 135 | for (i = n->sel.nkeys; i > 0; i--, key++) { |
137 | int toff = off + key->off + (off2 & key->offmask); | 136 | int toff = off + key->off + (off2 & key->offmask); |
138 | __be32 *data, _data; | 137 | __be32 *data, hdata; |
139 | 138 | ||
140 | if (skb_headroom(skb) + toff > INT_MAX) | 139 | if (skb_headroom(skb) + toff > INT_MAX) |
141 | goto out; | 140 | goto out; |
142 | 141 | ||
143 | data = skb_header_pointer(skb, toff, 4, &_data); | 142 | data = skb_header_pointer(skb, toff, 4, &hdata); |
144 | if (!data) | 143 | if (!data) |
145 | goto out; | 144 | goto out; |
146 | if ((*data ^ key->val) & key->mask) { | 145 | if ((*data ^ key->val) & key->mask) { |
@@ -148,13 +147,13 @@ next_knode: | |||
148 | goto next_knode; | 147 | goto next_knode; |
149 | } | 148 | } |
150 | #ifdef CONFIG_CLS_U32_PERF | 149 | #ifdef CONFIG_CLS_U32_PERF |
151 | n->pf->kcnts[j] +=1; | 150 | n->pf->kcnts[j] += 1; |
152 | j++; | 151 | j++; |
153 | #endif | 152 | #endif |
154 | } | 153 | } |
155 | if (n->ht_down == NULL) { | 154 | if (n->ht_down == NULL) { |
156 | check_terminal: | 155 | check_terminal: |
157 | if (n->sel.flags&TC_U32_TERMINAL) { | 156 | if (n->sel.flags & TC_U32_TERMINAL) { |
158 | 157 | ||
159 | *res = n->res; | 158 | *res = n->res; |
160 | #ifdef CONFIG_NET_CLS_IND | 159 | #ifdef CONFIG_NET_CLS_IND |
@@ -164,7 +163,7 @@ check_terminal: | |||
164 | } | 163 | } |
165 | #endif | 164 | #endif |
166 | #ifdef CONFIG_CLS_U32_PERF | 165 | #ifdef CONFIG_CLS_U32_PERF |
167 | n->pf->rhit +=1; | 166 | n->pf->rhit += 1; |
168 | #endif | 167 | #endif |
169 | r = tcf_exts_exec(skb, &n->exts, res); | 168 | r = tcf_exts_exec(skb, &n->exts, res); |
170 | if (r < 0) { | 169 | if (r < 0) { |
@@ -188,26 +187,26 @@ check_terminal: | |||
188 | ht = n->ht_down; | 187 | ht = n->ht_down; |
189 | sel = 0; | 188 | sel = 0; |
190 | if (ht->divisor) { | 189 | if (ht->divisor) { |
191 | __be32 *data, _data; | 190 | __be32 *data, hdata; |
192 | 191 | ||
193 | data = skb_header_pointer(skb, off + n->sel.hoff, 4, | 192 | data = skb_header_pointer(skb, off + n->sel.hoff, 4, |
194 | &_data); | 193 | &hdata); |
195 | if (!data) | 194 | if (!data) |
196 | goto out; | 195 | goto out; |
197 | sel = ht->divisor & u32_hash_fold(*data, &n->sel, | 196 | sel = ht->divisor & u32_hash_fold(*data, &n->sel, |
198 | n->fshift); | 197 | n->fshift); |
199 | } | 198 | } |
200 | if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT))) | 199 | if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT))) |
201 | goto next_ht; | 200 | goto next_ht; |
202 | 201 | ||
203 | if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) { | 202 | if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) { |
204 | off2 = n->sel.off + 3; | 203 | off2 = n->sel.off + 3; |
205 | if (n->sel.flags & TC_U32_VAROFFSET) { | 204 | if (n->sel.flags & TC_U32_VAROFFSET) { |
206 | __be16 *data, _data; | 205 | __be16 *data, hdata; |
207 | 206 | ||
208 | data = skb_header_pointer(skb, | 207 | data = skb_header_pointer(skb, |
209 | off + n->sel.offoff, | 208 | off + n->sel.offoff, |
210 | 2, &_data); | 209 | 2, &hdata); |
211 | if (!data) | 210 | if (!data) |
212 | goto out; | 211 | goto out; |
213 | off2 += ntohs(n->sel.offmask & *data) >> | 212 | off2 += ntohs(n->sel.offmask & *data) >> |
@@ -215,7 +214,7 @@ check_terminal: | |||
215 | } | 214 | } |
216 | off2 &= ~3; | 215 | off2 &= ~3; |
217 | } | 216 | } |
218 | if (n->sel.flags&TC_U32_EAT) { | 217 | if (n->sel.flags & TC_U32_EAT) { |
219 | off += off2; | 218 | off += off2; |
220 | off2 = 0; | 219 | off2 = 0; |
221 | } | 220 | } |
@@ -236,11 +235,11 @@ out: | |||
236 | 235 | ||
237 | deadloop: | 236 | deadloop: |
238 | if (net_ratelimit()) | 237 | if (net_ratelimit()) |
239 | printk(KERN_WARNING "cls_u32: dead loop\n"); | 238 | pr_warning("cls_u32: dead loop\n"); |
240 | return -1; | 239 | return -1; |
241 | } | 240 | } |
242 | 241 | ||
243 | static __inline__ struct tc_u_hnode * | 242 | static struct tc_u_hnode * |
244 | u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) | 243 | u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) |
245 | { | 244 | { |
246 | struct tc_u_hnode *ht; | 245 | struct tc_u_hnode *ht; |
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) | |||
252 | return ht; | 251 | return ht; |
253 | } | 252 | } |
254 | 253 | ||
255 | static __inline__ struct tc_u_knode * | 254 | static struct tc_u_knode * |
256 | u32_lookup_key(struct tc_u_hnode *ht, u32 handle) | 255 | u32_lookup_key(struct tc_u_hnode *ht, u32 handle) |
257 | { | 256 | { |
258 | unsigned sel; | 257 | unsigned int sel; |
259 | struct tc_u_knode *n = NULL; | 258 | struct tc_u_knode *n = NULL; |
260 | 259 | ||
261 | sel = TC_U32_HASH(handle); | 260 | sel = TC_U32_HASH(handle); |
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c) | |||
300 | do { | 299 | do { |
301 | if (++tp_c->hgenerator == 0x7FF) | 300 | if (++tp_c->hgenerator == 0x7FF) |
302 | tp_c->hgenerator = 1; | 301 | tp_c->hgenerator = 1; |
303 | } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); | 302 | } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); |
304 | 303 | ||
305 | return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; | 304 | return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; |
306 | } | 305 | } |
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key) | |||
378 | static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) | 377 | static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) |
379 | { | 378 | { |
380 | struct tc_u_knode *n; | 379 | struct tc_u_knode *n; |
381 | unsigned h; | 380 | unsigned int h; |
382 | 381 | ||
383 | for (h=0; h<=ht->divisor; h++) { | 382 | for (h = 0; h <= ht->divisor; h++) { |
384 | while ((n = ht->ht[h]) != NULL) { | 383 | while ((n = ht->ht[h]) != NULL) { |
385 | ht->ht[h] = n->next; | 384 | ht->ht[h] = n->next; |
386 | 385 | ||
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp) | |||
446 | 445 | ||
447 | static int u32_delete(struct tcf_proto *tp, unsigned long arg) | 446 | static int u32_delete(struct tcf_proto *tp, unsigned long arg) |
448 | { | 447 | { |
449 | struct tc_u_hnode *ht = (struct tc_u_hnode*)arg; | 448 | struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; |
450 | 449 | ||
451 | if (ht == NULL) | 450 | if (ht == NULL) |
452 | return 0; | 451 | return 0; |
453 | 452 | ||
454 | if (TC_U32_KEY(ht->handle)) | 453 | if (TC_U32_KEY(ht->handle)) |
455 | return u32_delete_key(tp, (struct tc_u_knode*)ht); | 454 | return u32_delete_key(tp, (struct tc_u_knode *)ht); |
456 | 455 | ||
457 | if (tp->root == ht) | 456 | if (tp->root == ht) |
458 | return -EINVAL; | 457 | return -EINVAL; |
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg) | |||
470 | static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) | 469 | static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) |
471 | { | 470 | { |
472 | struct tc_u_knode *n; | 471 | struct tc_u_knode *n; |
473 | unsigned i = 0x7FF; | 472 | unsigned int i = 0x7FF; |
474 | 473 | ||
475 | for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next) | 474 | for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next) |
476 | if (i < TC_U32_NODE(n->handle)) | 475 | if (i < TC_U32_NODE(n->handle)) |
477 | i = TC_U32_NODE(n->handle); | 476 | i = TC_U32_NODE(n->handle); |
478 | i++; | 477 | i++; |
479 | 478 | ||
480 | return handle|(i>0xFFF ? 0xFFF : i); | 479 | return handle | (i > 0xFFF ? 0xFFF : i); |
481 | } | 480 | } |
482 | 481 | ||
483 | static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { | 482 | static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { |
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
566 | if (err < 0) | 565 | if (err < 0) |
567 | return err; | 566 | return err; |
568 | 567 | ||
569 | if ((n = (struct tc_u_knode*)*arg) != NULL) { | 568 | n = (struct tc_u_knode *)*arg; |
569 | if (n) { | ||
570 | if (TC_U32_KEY(n->handle) == 0) | 570 | if (TC_U32_KEY(n->handle) == 0) |
571 | return -EINVAL; | 571 | return -EINVAL; |
572 | 572 | ||
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
574 | } | 574 | } |
575 | 575 | ||
576 | if (tb[TCA_U32_DIVISOR]) { | 576 | if (tb[TCA_U32_DIVISOR]) { |
577 | unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); | 577 | unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]); |
578 | 578 | ||
579 | if (--divisor > 0x100) | 579 | if (--divisor > 0x100) |
580 | return -EINVAL; | 580 | return -EINVAL; |
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, | |||
585 | if (handle == 0) | 585 | if (handle == 0) |
586 | return -ENOMEM; | 586 | return -ENOMEM; |
587 | } | 587 | } |
588 | ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL); | 588 | ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); |
589 | if (ht == NULL) | 589 | if (ht == NULL) |
590 | return -ENOBUFS; | 590 | return -ENOBUFS; |
591 | ht->tp_c = tp_c; | 591 | ht->tp_c = tp_c; |
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
683 | struct tc_u_common *tp_c = tp->data; | 683 | struct tc_u_common *tp_c = tp->data; |
684 | struct tc_u_hnode *ht; | 684 | struct tc_u_hnode *ht; |
685 | struct tc_u_knode *n; | 685 | struct tc_u_knode *n; |
686 | unsigned h; | 686 | unsigned int h; |
687 | 687 | ||
688 | if (arg->stop) | 688 | if (arg->stop) |
689 | return; | 689 | return; |
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) | |||
717 | static int u32_dump(struct tcf_proto *tp, unsigned long fh, | 717 | static int u32_dump(struct tcf_proto *tp, unsigned long fh, |
718 | struct sk_buff *skb, struct tcmsg *t) | 718 | struct sk_buff *skb, struct tcmsg *t) |
719 | { | 719 | { |
720 | struct tc_u_knode *n = (struct tc_u_knode*)fh; | 720 | struct tc_u_knode *n = (struct tc_u_knode *)fh; |
721 | struct nlattr *nest; | 721 | struct nlattr *nest; |
722 | 722 | ||
723 | if (n == NULL) | 723 | if (n == NULL) |
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, | |||
730 | goto nla_put_failure; | 730 | goto nla_put_failure; |
731 | 731 | ||
732 | if (TC_U32_KEY(n->handle) == 0) { | 732 | if (TC_U32_KEY(n->handle) == 0) { |
733 | struct tc_u_hnode *ht = (struct tc_u_hnode*)fh; | 733 | struct tc_u_hnode *ht = (struct tc_u_hnode *)fh; |
734 | u32 divisor = ht->divisor+1; | 734 | u32 divisor = ht->divisor + 1; |
735 | |||
735 | NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); | 736 | NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor); |
736 | } else { | 737 | } else { |
737 | NLA_PUT(skb, TCA_U32_SEL, | 738 | NLA_PUT(skb, TCA_U32_SEL, |
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh, | |||
755 | goto nla_put_failure; | 756 | goto nla_put_failure; |
756 | 757 | ||
757 | #ifdef CONFIG_NET_CLS_IND | 758 | #ifdef CONFIG_NET_CLS_IND |
758 | if(strlen(n->indev)) | 759 | if (strlen(n->indev)) |
759 | NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); | 760 | NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev); |
760 | #endif | 761 | #endif |
761 | #ifdef CONFIG_CLS_U32_PERF | 762 | #ifdef CONFIG_CLS_U32_PERF |
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index bc450397487a..1c8360a2752a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c | |||
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
33 | return 0; | 33 | return 0; |
34 | 34 | ||
35 | switch (cmp->align) { | 35 | switch (cmp->align) { |
36 | case TCF_EM_ALIGN_U8: | 36 | case TCF_EM_ALIGN_U8: |
37 | val = *ptr; | 37 | val = *ptr; |
38 | break; | 38 | break; |
39 | 39 | ||
40 | case TCF_EM_ALIGN_U16: | 40 | case TCF_EM_ALIGN_U16: |
41 | val = get_unaligned_be16(ptr); | 41 | val = get_unaligned_be16(ptr); |
42 | 42 | ||
43 | if (cmp_needs_transformation(cmp)) | 43 | if (cmp_needs_transformation(cmp)) |
44 | val = be16_to_cpu(val); | 44 | val = be16_to_cpu(val); |
45 | break; | 45 | break; |
46 | 46 | ||
47 | case TCF_EM_ALIGN_U32: | 47 | case TCF_EM_ALIGN_U32: |
48 | /* Worth checking boundries? The branching seems | 48 | /* Worth checking boundries? The branching seems |
49 | * to get worse. Visit again. */ | 49 | * to get worse. Visit again. |
50 | val = get_unaligned_be32(ptr); | 50 | */ |
51 | val = get_unaligned_be32(ptr); | ||
51 | 52 | ||
52 | if (cmp_needs_transformation(cmp)) | 53 | if (cmp_needs_transformation(cmp)) |
53 | val = be32_to_cpu(val); | 54 | val = be32_to_cpu(val); |
54 | break; | 55 | break; |
55 | 56 | ||
56 | default: | 57 | default: |
57 | return 0; | 58 | return 0; |
58 | } | 59 | } |
59 | 60 | ||
60 | if (cmp->mask) | 61 | if (cmp->mask) |
61 | val &= cmp->mask; | 62 | val &= cmp->mask; |
62 | 63 | ||
63 | switch (cmp->opnd) { | 64 | switch (cmp->opnd) { |
64 | case TCF_EM_OPND_EQ: | 65 | case TCF_EM_OPND_EQ: |
65 | return val == cmp->val; | 66 | return val == cmp->val; |
66 | case TCF_EM_OPND_LT: | 67 | case TCF_EM_OPND_LT: |
67 | return val < cmp->val; | 68 | return val < cmp->val; |
68 | case TCF_EM_OPND_GT: | 69 | case TCF_EM_OPND_GT: |
69 | return val > cmp->val; | 70 | return val > cmp->val; |
70 | } | 71 | } |
71 | 72 | ||
72 | return 0; | 73 | return 0; |
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 3bcac8aa333c..49130e8abff0 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c | |||
@@ -47,7 +47,7 @@ | |||
47 | * on the meta type. Obviously, the length of the data must also | 47 | * on the meta type. Obviously, the length of the data must also |
48 | * be provided for non-numeric types. | 48 | * be provided for non-numeric types. |
49 | * | 49 | * |
50 | * Additionaly, type dependant modifiers such as shift operators | 50 | * Additionally, type dependent modifiers such as shift operators |
51 | * or mask may be applied to extend the functionaliy. As of now, | 51 | * or mask may be applied to extend the functionaliy. As of now, |
52 | * the variable length type supports shifting the byte string to | 52 | * the variable length type supports shifting the byte string to |
53 | * the right, eating up any number of octets and thus supporting | 53 | * the right, eating up any number of octets and thus supporting |
@@ -73,21 +73,18 @@ | |||
73 | #include <net/pkt_cls.h> | 73 | #include <net/pkt_cls.h> |
74 | #include <net/sock.h> | 74 | #include <net/sock.h> |
75 | 75 | ||
76 | struct meta_obj | 76 | struct meta_obj { |
77 | { | ||
78 | unsigned long value; | 77 | unsigned long value; |
79 | unsigned int len; | 78 | unsigned int len; |
80 | }; | 79 | }; |
81 | 80 | ||
82 | struct meta_value | 81 | struct meta_value { |
83 | { | ||
84 | struct tcf_meta_val hdr; | 82 | struct tcf_meta_val hdr; |
85 | unsigned long val; | 83 | unsigned long val; |
86 | unsigned int len; | 84 | unsigned int len; |
87 | }; | 85 | }; |
88 | 86 | ||
89 | struct meta_match | 87 | struct meta_match { |
90 | { | ||
91 | struct meta_value lvalue; | 88 | struct meta_value lvalue; |
92 | struct meta_value rvalue; | 89 | struct meta_value rvalue; |
93 | }; | 90 | }; |
@@ -223,6 +220,11 @@ META_COLLECTOR(int_maclen) | |||
223 | dst->value = skb->mac_len; | 220 | dst->value = skb->mac_len; |
224 | } | 221 | } |
225 | 222 | ||
223 | META_COLLECTOR(int_rxhash) | ||
224 | { | ||
225 | dst->value = skb_get_rxhash(skb); | ||
226 | } | ||
227 | |||
226 | /************************************************************************** | 228 | /************************************************************************** |
227 | * Netfilter | 229 | * Netfilter |
228 | **************************************************************************/ | 230 | **************************************************************************/ |
@@ -250,7 +252,7 @@ META_COLLECTOR(int_rtclassid) | |||
250 | if (unlikely(skb_dst(skb) == NULL)) | 252 | if (unlikely(skb_dst(skb) == NULL)) |
251 | *err = -1; | 253 | *err = -1; |
252 | else | 254 | else |
253 | #ifdef CONFIG_NET_CLS_ROUTE | 255 | #ifdef CONFIG_IP_ROUTE_CLASSID |
254 | dst->value = skb_dst(skb)->tclassid; | 256 | dst->value = skb_dst(skb)->tclassid; |
255 | #else | 257 | #else |
256 | dst->value = 0; | 258 | dst->value = 0; |
@@ -262,7 +264,7 @@ META_COLLECTOR(int_rtiif) | |||
262 | if (unlikely(skb_rtable(skb) == NULL)) | 264 | if (unlikely(skb_rtable(skb) == NULL)) |
263 | *err = -1; | 265 | *err = -1; |
264 | else | 266 | else |
265 | dst->value = skb_rtable(skb)->fl.iif; | 267 | dst->value = skb_rtable(skb)->rt_iif; |
266 | } | 268 | } |
267 | 269 | ||
268 | /************************************************************************** | 270 | /************************************************************************** |
@@ -399,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf) | |||
399 | META_COLLECTOR(int_sk_alloc) | 401 | META_COLLECTOR(int_sk_alloc) |
400 | { | 402 | { |
401 | SKIP_NONLOCAL(skb); | 403 | SKIP_NONLOCAL(skb); |
402 | dst->value = skb->sk->sk_allocation; | 404 | dst->value = (__force int) skb->sk->sk_allocation; |
403 | } | 405 | } |
404 | 406 | ||
405 | META_COLLECTOR(int_sk_route_caps) | 407 | META_COLLECTOR(int_sk_route_caps) |
@@ -478,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend) | |||
478 | * Meta value collectors assignment table | 480 | * Meta value collectors assignment table |
479 | **************************************************************************/ | 481 | **************************************************************************/ |
480 | 482 | ||
481 | struct meta_ops | 483 | struct meta_ops { |
482 | { | ||
483 | void (*get)(struct sk_buff *, struct tcf_pkt_info *, | 484 | void (*get)(struct sk_buff *, struct tcf_pkt_info *, |
484 | struct meta_value *, struct meta_obj *, int *); | 485 | struct meta_value *, struct meta_obj *, int *); |
485 | }; | 486 | }; |
@@ -489,7 +490,7 @@ struct meta_ops | |||
489 | 490 | ||
490 | /* Meta value operations table listing all meta value collectors and | 491 | /* Meta value operations table listing all meta value collectors and |
491 | * assigns them to a type and meta id. */ | 492 | * assigns them to a type and meta id. */ |
492 | static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { | 493 | static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = { |
493 | [TCF_META_TYPE_VAR] = { | 494 | [TCF_META_TYPE_VAR] = { |
494 | [META_ID(DEV)] = META_FUNC(var_dev), | 495 | [META_ID(DEV)] = META_FUNC(var_dev), |
495 | [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), | 496 | [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), |
@@ -541,10 +542,11 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { | |||
541 | [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), | 542 | [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), |
542 | [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), | 543 | [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), |
543 | [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), | 544 | [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), |
545 | [META_ID(RXHASH)] = META_FUNC(int_rxhash), | ||
544 | } | 546 | } |
545 | }; | 547 | }; |
546 | 548 | ||
547 | static inline struct meta_ops * meta_ops(struct meta_value *val) | 549 | static inline struct meta_ops *meta_ops(struct meta_value *val) |
548 | { | 550 | { |
549 | return &__meta_ops[meta_type(val)][meta_id(val)]; | 551 | return &__meta_ops[meta_type(val)][meta_id(val)]; |
550 | } | 552 | } |
@@ -643,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv) | |||
643 | { | 645 | { |
644 | if (v->len == sizeof(unsigned long)) | 646 | if (v->len == sizeof(unsigned long)) |
645 | NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); | 647 | NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val); |
646 | else if (v->len == sizeof(u32)) { | 648 | else if (v->len == sizeof(u32)) |
647 | NLA_PUT_U32(skb, tlv, v->val); | 649 | NLA_PUT_U32(skb, tlv, v->val); |
648 | } | ||
649 | 650 | ||
650 | return 0; | 651 | return 0; |
651 | 652 | ||
@@ -657,8 +658,7 @@ nla_put_failure: | |||
657 | * Type specific operations table | 658 | * Type specific operations table |
658 | **************************************************************************/ | 659 | **************************************************************************/ |
659 | 660 | ||
660 | struct meta_type_ops | 661 | struct meta_type_ops { |
661 | { | ||
662 | void (*destroy)(struct meta_value *); | 662 | void (*destroy)(struct meta_value *); |
663 | int (*compare)(struct meta_obj *, struct meta_obj *); | 663 | int (*compare)(struct meta_obj *, struct meta_obj *); |
664 | int (*change)(struct meta_value *, struct nlattr *); | 664 | int (*change)(struct meta_value *, struct nlattr *); |
@@ -666,7 +666,7 @@ struct meta_type_ops | |||
666 | int (*dump)(struct sk_buff *, struct meta_value *, int); | 666 | int (*dump)(struct sk_buff *, struct meta_value *, int); |
667 | }; | 667 | }; |
668 | 668 | ||
669 | static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { | 669 | static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = { |
670 | [TCF_META_TYPE_VAR] = { | 670 | [TCF_META_TYPE_VAR] = { |
671 | .destroy = meta_var_destroy, | 671 | .destroy = meta_var_destroy, |
672 | .compare = meta_var_compare, | 672 | .compare = meta_var_compare, |
@@ -682,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = { | |||
682 | } | 682 | } |
683 | }; | 683 | }; |
684 | 684 | ||
685 | static inline struct meta_type_ops * meta_type_ops(struct meta_value *v) | 685 | static inline struct meta_type_ops *meta_type_ops(struct meta_value *v) |
686 | { | 686 | { |
687 | return &__meta_type_ops[meta_type(v)]; | 687 | return &__meta_type_ops[meta_type(v)]; |
688 | } | 688 | } |
@@ -707,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info, | |||
707 | return err; | 707 | return err; |
708 | 708 | ||
709 | if (meta_type_ops(v)->apply_extras) | 709 | if (meta_type_ops(v)->apply_extras) |
710 | meta_type_ops(v)->apply_extras(v, dst); | 710 | meta_type_ops(v)->apply_extras(v, dst); |
711 | 711 | ||
712 | return 0; | 712 | return 0; |
713 | } | 713 | } |
@@ -726,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m, | |||
726 | r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); | 726 | r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value); |
727 | 727 | ||
728 | switch (meta->lvalue.hdr.op) { | 728 | switch (meta->lvalue.hdr.op) { |
729 | case TCF_EM_OPND_EQ: | 729 | case TCF_EM_OPND_EQ: |
730 | return !r; | 730 | return !r; |
731 | case TCF_EM_OPND_LT: | 731 | case TCF_EM_OPND_LT: |
732 | return r < 0; | 732 | return r < 0; |
733 | case TCF_EM_OPND_GT: | 733 | case TCF_EM_OPND_GT: |
734 | return r > 0; | 734 | return r > 0; |
735 | } | 735 | } |
736 | 736 | ||
737 | return 0; | 737 | return 0; |
@@ -765,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla) | |||
765 | 765 | ||
766 | static inline int meta_is_supported(struct meta_value *val) | 766 | static inline int meta_is_supported(struct meta_value *val) |
767 | { | 767 | { |
768 | return (!meta_id(val) || meta_ops(val)->get); | 768 | return !meta_id(val) || meta_ops(val)->get; |
769 | } | 769 | } |
770 | 770 | ||
771 | static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { | 771 | static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = { |
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index 1a4176aee6e5..a3bed07a008b 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c | |||
@@ -18,8 +18,7 @@ | |||
18 | #include <linux/tc_ematch/tc_em_nbyte.h> | 18 | #include <linux/tc_ematch/tc_em_nbyte.h> |
19 | #include <net/pkt_cls.h> | 19 | #include <net/pkt_cls.h> |
20 | 20 | ||
21 | struct nbyte_data | 21 | struct nbyte_data { |
22 | { | ||
23 | struct tcf_em_nbyte hdr; | 22 | struct tcf_em_nbyte hdr; |
24 | char pattern[0]; | 23 | char pattern[0]; |
25 | }; | 24 | }; |
diff --git a/net/sched/em_text.c b/net/sched/em_text.c index 763253257411..15d353d2e4be 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c | |||
@@ -19,8 +19,7 @@ | |||
19 | #include <linux/tc_ematch/tc_em_text.h> | 19 | #include <linux/tc_ematch/tc_em_text.h> |
20 | #include <net/pkt_cls.h> | 20 | #include <net/pkt_cls.h> |
21 | 21 | ||
22 | struct text_match | 22 | struct text_match { |
23 | { | ||
24 | u16 from_offset; | 23 | u16 from_offset; |
25 | u16 to_offset; | 24 | u16 to_offset; |
26 | u8 from_layer; | 25 | u8 from_layer; |
@@ -103,7 +102,8 @@ retry: | |||
103 | 102 | ||
104 | static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) | 103 | static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m) |
105 | { | 104 | { |
106 | textsearch_destroy(EM_TEXT_PRIV(m)->config); | 105 | if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config) |
106 | textsearch_destroy(EM_TEXT_PRIV(m)->config); | ||
107 | } | 107 | } |
108 | 108 | ||
109 | static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) | 109 | static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m) |
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 953f1479f7da..797bdb88c010 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c | |||
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
35 | if (!tcf_valid_offset(skb, ptr, sizeof(u32))) | 35 | if (!tcf_valid_offset(skb, ptr, sizeof(u32))) |
36 | return 0; | 36 | return 0; |
37 | 37 | ||
38 | return !(((*(__be32*) ptr) ^ key->val) & key->mask); | 38 | return !(((*(__be32 *) ptr) ^ key->val) & key->mask); |
39 | } | 39 | } |
40 | 40 | ||
41 | static struct tcf_ematch_ops em_u32_ops = { | 41 | static struct tcf_ematch_ops em_u32_ops = { |
diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5e37da961f80..88d93eb92507 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c | |||
@@ -93,7 +93,7 @@ | |||
93 | static LIST_HEAD(ematch_ops); | 93 | static LIST_HEAD(ematch_ops); |
94 | static DEFINE_RWLOCK(ematch_mod_lock); | 94 | static DEFINE_RWLOCK(ematch_mod_lock); |
95 | 95 | ||
96 | static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind) | 96 | static struct tcf_ematch_ops *tcf_em_lookup(u16 kind) |
97 | { | 97 | { |
98 | struct tcf_ematch_ops *e = NULL; | 98 | struct tcf_ematch_ops *e = NULL; |
99 | 99 | ||
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops) | |||
163 | } | 163 | } |
164 | EXPORT_SYMBOL(tcf_em_unregister); | 164 | EXPORT_SYMBOL(tcf_em_unregister); |
165 | 165 | ||
166 | static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree, | 166 | static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree, |
167 | int index) | 167 | int index) |
168 | { | 168 | { |
169 | return &tree->matches[index]; | 169 | return &tree->matches[index]; |
170 | } | 170 | } |
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
184 | 184 | ||
185 | if (em_hdr->kind == TCF_EM_CONTAINER) { | 185 | if (em_hdr->kind == TCF_EM_CONTAINER) { |
186 | /* Special ematch called "container", carries an index | 186 | /* Special ematch called "container", carries an index |
187 | * referencing an external ematch sequence. */ | 187 | * referencing an external ematch sequence. |
188 | */ | ||
188 | u32 ref; | 189 | u32 ref; |
189 | 190 | ||
190 | if (data_len < sizeof(ref)) | 191 | if (data_len < sizeof(ref)) |
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
195 | goto errout; | 196 | goto errout; |
196 | 197 | ||
197 | /* We do not allow backward jumps to avoid loops and jumps | 198 | /* We do not allow backward jumps to avoid loops and jumps |
198 | * to our own position are of course illegal. */ | 199 | * to our own position are of course illegal. |
200 | */ | ||
199 | if (ref <= idx) | 201 | if (ref <= idx) |
200 | goto errout; | 202 | goto errout; |
201 | 203 | ||
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
208 | * which automatically releases the reference again, therefore | 210 | * which automatically releases the reference again, therefore |
209 | * the module MUST not be given back under any circumstances | 211 | * the module MUST not be given back under any circumstances |
210 | * here. Be aware, the destroy function assumes that the | 212 | * here. Be aware, the destroy function assumes that the |
211 | * module is held if the ops field is non zero. */ | 213 | * module is held if the ops field is non zero. |
214 | */ | ||
212 | em->ops = tcf_em_lookup(em_hdr->kind); | 215 | em->ops = tcf_em_lookup(em_hdr->kind); |
213 | 216 | ||
214 | if (em->ops == NULL) { | 217 | if (em->ops == NULL) { |
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
221 | if (em->ops) { | 224 | if (em->ops) { |
222 | /* We dropped the RTNL mutex in order to | 225 | /* We dropped the RTNL mutex in order to |
223 | * perform the module load. Tell the caller | 226 | * perform the module load. Tell the caller |
224 | * to replay the request. */ | 227 | * to replay the request. |
228 | */ | ||
225 | module_put(em->ops->owner); | 229 | module_put(em->ops->owner); |
226 | err = -EAGAIN; | 230 | err = -EAGAIN; |
227 | } | 231 | } |
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
230 | } | 234 | } |
231 | 235 | ||
232 | /* ematch module provides expected length of data, so we | 236 | /* ematch module provides expected length of data, so we |
233 | * can do a basic sanity check. */ | 237 | * can do a basic sanity check. |
238 | */ | ||
234 | if (em->ops->datalen && data_len < em->ops->datalen) | 239 | if (em->ops->datalen && data_len < em->ops->datalen) |
235 | goto errout; | 240 | goto errout; |
236 | 241 | ||
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp, | |||
246 | * TCF_EM_SIMPLE may be specified stating that the | 251 | * TCF_EM_SIMPLE may be specified stating that the |
247 | * data only consists of a u32 integer and the module | 252 | * data only consists of a u32 integer and the module |
248 | * does not expected a memory reference but rather | 253 | * does not expected a memory reference but rather |
249 | * the value carried. */ | 254 | * the value carried. |
255 | */ | ||
250 | if (em_hdr->flags & TCF_EM_SIMPLE) { | 256 | if (em_hdr->flags & TCF_EM_SIMPLE) { |
251 | if (data_len < sizeof(u32)) | 257 | if (data_len < sizeof(u32)) |
252 | goto errout; | 258 | goto errout; |
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, | |||
334 | * The array of rt attributes is parsed in the order as they are | 340 | * The array of rt attributes is parsed in the order as they are |
335 | * provided, their type must be incremental from 1 to n. Even | 341 | * provided, their type must be incremental from 1 to n. Even |
336 | * if it does not serve any real purpose, a failure of sticking | 342 | * if it does not serve any real purpose, a failure of sticking |
337 | * to this policy will result in parsing failure. */ | 343 | * to this policy will result in parsing failure. |
344 | */ | ||
338 | for (idx = 0; nla_ok(rt_match, list_len); idx++) { | 345 | for (idx = 0; nla_ok(rt_match, list_len); idx++) { |
339 | err = -EINVAL; | 346 | err = -EINVAL; |
340 | 347 | ||
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla, | |||
359 | /* Check if the number of matches provided by userspace actually | 366 | /* Check if the number of matches provided by userspace actually |
360 | * complies with the array of matches. The number was used for | 367 | * complies with the array of matches. The number was used for |
361 | * the validation of references and a mismatch could lead to | 368 | * the validation of references and a mismatch could lead to |
362 | * undefined references during the matching process. */ | 369 | * undefined references during the matching process. |
370 | */ | ||
363 | if (idx != tree_hdr->nmatches) { | 371 | if (idx != tree_hdr->nmatches) { |
364 | err = -EINVAL; | 372 | err = -EINVAL; |
365 | goto errout_abort; | 373 | goto errout_abort; |
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv) | |||
449 | .flags = em->flags | 457 | .flags = em->flags |
450 | }; | 458 | }; |
451 | 459 | ||
452 | NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr); | 460 | NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr); |
453 | 461 | ||
454 | if (em->ops && em->ops->dump) { | 462 | if (em->ops && em->ops->dump) { |
455 | if (em->ops->dump(skb, em) < 0) | 463 | if (em->ops->dump(skb, em) < 0) |
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em, | |||
478 | struct tcf_pkt_info *info) | 486 | struct tcf_pkt_info *info) |
479 | { | 487 | { |
480 | int r = em->ops->match(skb, em, info); | 488 | int r = em->ops->match(skb, em, info); |
489 | |||
481 | return tcf_em_is_inverted(em) ? !r : r; | 490 | return tcf_em_is_inverted(em) ? !r : r; |
482 | } | 491 | } |
483 | 492 | ||
@@ -527,8 +536,8 @@ pop_stack: | |||
527 | 536 | ||
528 | stack_overflow: | 537 | stack_overflow: |
529 | if (net_ratelimit()) | 538 | if (net_ratelimit()) |
530 | printk(KERN_WARNING "tc ematch: local stack overflow," | 539 | pr_warning("tc ematch: local stack overflow," |
531 | " increase NET_EMATCH_STACK\n"); | 540 | " increase NET_EMATCH_STACK\n"); |
532 | return -1; | 541 | return -1; |
533 | } | 542 | } |
534 | EXPORT_SYMBOL(__tcf_em_tree_match); | 543 | EXPORT_SYMBOL(__tcf_em_tree_match); |
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 408eea7086aa..6b8627661c98 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c | |||
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops) | |||
187 | int err = -ENOENT; | 187 | int err = -ENOENT; |
188 | 188 | ||
189 | write_lock(&qdisc_mod_lock); | 189 | write_lock(&qdisc_mod_lock); |
190 | for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next) | 190 | for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next) |
191 | if (q == qops) | 191 | if (q == qops) |
192 | break; | 192 | break; |
193 | if (q) { | 193 | if (q) { |
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) | |||
240 | if (q) | 240 | if (q) |
241 | goto out; | 241 | goto out; |
242 | 242 | ||
243 | q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); | 243 | if (dev_ingress_queue(dev)) |
244 | q = qdisc_match_from_root( | ||
245 | dev_ingress_queue(dev)->qdisc_sleeping, | ||
246 | handle); | ||
244 | out: | 247 | out: |
245 | return q; | 248 | return q; |
246 | } | 249 | } |
@@ -318,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab) | |||
318 | if (!tab || --tab->refcnt) | 321 | if (!tab || --tab->refcnt) |
319 | return; | 322 | return; |
320 | 323 | ||
321 | for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) { | 324 | for (rtabp = &qdisc_rtab_list; |
325 | (rtab = *rtabp) != NULL; | ||
326 | rtabp = &rtab->next) { | ||
322 | if (rtab == tab) { | 327 | if (rtab == tab) { |
323 | *rtabp = rtab->next; | 328 | *rtabp = rtab->next; |
324 | kfree(rtab); | 329 | kfree(rtab); |
@@ -360,7 +365,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) | |||
360 | tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); | 365 | tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); |
361 | } | 366 | } |
362 | 367 | ||
363 | if (!s || tsize != s->tsize || (!tab && tsize > 0)) | 368 | if (tsize != s->tsize || (!tab && tsize > 0)) |
364 | return ERR_PTR(-EINVAL); | 369 | return ERR_PTR(-EINVAL); |
365 | 370 | ||
366 | spin_lock(&qdisc_stab_lock); | 371 | spin_lock(&qdisc_stab_lock); |
@@ -393,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt) | |||
393 | return stab; | 398 | return stab; |
394 | } | 399 | } |
395 | 400 | ||
401 | static void stab_kfree_rcu(struct rcu_head *head) | ||
402 | { | ||
403 | kfree(container_of(head, struct qdisc_size_table, rcu)); | ||
404 | } | ||
405 | |||
396 | void qdisc_put_stab(struct qdisc_size_table *tab) | 406 | void qdisc_put_stab(struct qdisc_size_table *tab) |
397 | { | 407 | { |
398 | if (!tab) | 408 | if (!tab) |
@@ -402,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) | |||
402 | 412 | ||
403 | if (--tab->refcnt == 0) { | 413 | if (--tab->refcnt == 0) { |
404 | list_del(&tab->list); | 414 | list_del(&tab->list); |
405 | kfree(tab); | 415 | call_rcu_bh(&tab->rcu, stab_kfree_rcu); |
406 | } | 416 | } |
407 | 417 | ||
408 | spin_unlock(&qdisc_stab_lock); | 418 | spin_unlock(&qdisc_stab_lock); |
@@ -425,7 +435,7 @@ nla_put_failure: | |||
425 | return -1; | 435 | return -1; |
426 | } | 436 | } |
427 | 437 | ||
428 | void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab) | 438 | void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) |
429 | { | 439 | { |
430 | int pkt_len, slot; | 440 | int pkt_len, slot; |
431 | 441 | ||
@@ -451,14 +461,13 @@ out: | |||
451 | pkt_len = 1; | 461 | pkt_len = 1; |
452 | qdisc_skb_cb(skb)->pkt_len = pkt_len; | 462 | qdisc_skb_cb(skb)->pkt_len = pkt_len; |
453 | } | 463 | } |
454 | EXPORT_SYMBOL(qdisc_calculate_pkt_len); | 464 | EXPORT_SYMBOL(__qdisc_calculate_pkt_len); |
455 | 465 | ||
456 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) | 466 | void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) |
457 | { | 467 | { |
458 | if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { | 468 | if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { |
459 | printk(KERN_WARNING | 469 | pr_warn("%s: %s qdisc %X: is non-work-conserving?\n", |
460 | "%s: %s qdisc %X: is non-work-conserving?\n", | 470 | txt, qdisc->ops->id, qdisc->handle >> 16); |
461 | txt, qdisc->ops->id, qdisc->handle >> 16); | ||
462 | qdisc->flags |= TCQ_F_WARN_NONWC; | 471 | qdisc->flags |= TCQ_F_WARN_NONWC; |
463 | } | 472 | } |
464 | } | 473 | } |
@@ -469,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) | |||
469 | struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, | 478 | struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, |
470 | timer); | 479 | timer); |
471 | 480 | ||
472 | wd->qdisc->flags &= ~TCQ_F_THROTTLED; | 481 | qdisc_unthrottled(wd->qdisc); |
473 | __netif_schedule(qdisc_root(wd->qdisc)); | 482 | __netif_schedule(qdisc_root(wd->qdisc)); |
474 | 483 | ||
475 | return HRTIMER_NORESTART; | 484 | return HRTIMER_NORESTART; |
@@ -491,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires) | |||
491 | &qdisc_root_sleeping(wd->qdisc)->state)) | 500 | &qdisc_root_sleeping(wd->qdisc)->state)) |
492 | return; | 501 | return; |
493 | 502 | ||
494 | wd->qdisc->flags |= TCQ_F_THROTTLED; | 503 | qdisc_throttled(wd->qdisc); |
495 | time = ktime_set(0, 0); | 504 | time = ktime_set(0, 0); |
496 | time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); | 505 | time = ktime_add_ns(time, PSCHED_TICKS2NS(expires)); |
497 | hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); | 506 | hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS); |
@@ -501,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule); | |||
501 | void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) | 510 | void qdisc_watchdog_cancel(struct qdisc_watchdog *wd) |
502 | { | 511 | { |
503 | hrtimer_cancel(&wd->timer); | 512 | hrtimer_cancel(&wd->timer); |
504 | wd->qdisc->flags &= ~TCQ_F_THROTTLED; | 513 | qdisc_unthrottled(wd->qdisc); |
505 | } | 514 | } |
506 | EXPORT_SYMBOL(qdisc_watchdog_cancel); | 515 | EXPORT_SYMBOL(qdisc_watchdog_cancel); |
507 | 516 | ||
@@ -622,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev) | |||
622 | autohandle = TC_H_MAKE(0x80000000U, 0); | 631 | autohandle = TC_H_MAKE(0x80000000U, 0); |
623 | } while (qdisc_lookup(dev, autohandle) && --i > 0); | 632 | } while (qdisc_lookup(dev, autohandle) && --i > 0); |
624 | 633 | ||
625 | return i>0 ? autohandle : 0; | 634 | return i > 0 ? autohandle : 0; |
626 | } | 635 | } |
627 | 636 | ||
628 | void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) | 637 | void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) |
@@ -690,6 +699,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, | |||
690 | (new && new->flags & TCQ_F_INGRESS)) { | 699 | (new && new->flags & TCQ_F_INGRESS)) { |
691 | num_q = 1; | 700 | num_q = 1; |
692 | ingress = 1; | 701 | ingress = 1; |
702 | if (!dev_ingress_queue(dev)) | ||
703 | return -ENOENT; | ||
693 | } | 704 | } |
694 | 705 | ||
695 | if (dev->flags & IFF_UP) | 706 | if (dev->flags & IFF_UP) |
@@ -701,7 +712,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, | |||
701 | } | 712 | } |
702 | 713 | ||
703 | for (i = 0; i < num_q; i++) { | 714 | for (i = 0; i < num_q; i++) { |
704 | struct netdev_queue *dev_queue = &dev->rx_queue; | 715 | struct netdev_queue *dev_queue = dev_ingress_queue(dev); |
705 | 716 | ||
706 | if (!ingress) | 717 | if (!ingress) |
707 | dev_queue = netdev_get_tx_queue(dev, i); | 718 | dev_queue = netdev_get_tx_queue(dev, i); |
@@ -829,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, | |||
829 | err = PTR_ERR(stab); | 840 | err = PTR_ERR(stab); |
830 | goto err_out4; | 841 | goto err_out4; |
831 | } | 842 | } |
832 | sch->stab = stab; | 843 | rcu_assign_pointer(sch->stab, stab); |
833 | } | 844 | } |
834 | if (tca[TCA_RATE]) { | 845 | if (tca[TCA_RATE]) { |
835 | spinlock_t *root_lock; | 846 | spinlock_t *root_lock; |
@@ -869,7 +880,7 @@ err_out4: | |||
869 | * Any broken qdiscs that would require a ops->reset() here? | 880 | * Any broken qdiscs that would require a ops->reset() here? |
870 | * The qdisc was never in action so it shouldn't be necessary. | 881 | * The qdisc was never in action so it shouldn't be necessary. |
871 | */ | 882 | */ |
872 | qdisc_put_stab(sch->stab); | 883 | qdisc_put_stab(rtnl_dereference(sch->stab)); |
873 | if (ops->destroy) | 884 | if (ops->destroy) |
874 | ops->destroy(sch); | 885 | ops->destroy(sch); |
875 | goto err_out3; | 886 | goto err_out3; |
@@ -877,7 +888,7 @@ err_out4: | |||
877 | 888 | ||
878 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | 889 | static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) |
879 | { | 890 | { |
880 | struct qdisc_size_table *stab = NULL; | 891 | struct qdisc_size_table *ostab, *stab = NULL; |
881 | int err = 0; | 892 | int err = 0; |
882 | 893 | ||
883 | if (tca[TCA_OPTIONS]) { | 894 | if (tca[TCA_OPTIONS]) { |
@@ -894,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) | |||
894 | return PTR_ERR(stab); | 905 | return PTR_ERR(stab); |
895 | } | 906 | } |
896 | 907 | ||
897 | qdisc_put_stab(sch->stab); | 908 | ostab = rtnl_dereference(sch->stab); |
898 | sch->stab = stab; | 909 | rcu_assign_pointer(sch->stab, stab); |
910 | qdisc_put_stab(ostab); | ||
899 | 911 | ||
900 | if (tca[TCA_RATE]) { | 912 | if (tca[TCA_RATE]) { |
901 | /* NB: ignores errors from replace_estimator | 913 | /* NB: ignores errors from replace_estimator |
@@ -910,9 +922,8 @@ out: | |||
910 | return 0; | 922 | return 0; |
911 | } | 923 | } |
912 | 924 | ||
913 | struct check_loop_arg | 925 | struct check_loop_arg { |
914 | { | 926 | struct qdisc_walker w; |
915 | struct qdisc_walker w; | ||
916 | struct Qdisc *p; | 927 | struct Qdisc *p; |
917 | int depth; | 928 | int depth; |
918 | }; | 929 | }; |
@@ -965,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
965 | struct Qdisc *p = NULL; | 976 | struct Qdisc *p = NULL; |
966 | int err; | 977 | int err; |
967 | 978 | ||
968 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 979 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
980 | if (!dev) | ||
969 | return -ENODEV; | 981 | return -ENODEV; |
970 | 982 | ||
971 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); | 983 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); |
@@ -975,11 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
975 | if (clid) { | 987 | if (clid) { |
976 | if (clid != TC_H_ROOT) { | 988 | if (clid != TC_H_ROOT) { |
977 | if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { | 989 | if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) { |
978 | if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) | 990 | p = qdisc_lookup(dev, TC_H_MAJ(clid)); |
991 | if (!p) | ||
979 | return -ENOENT; | 992 | return -ENOENT; |
980 | q = qdisc_leaf(p, clid); | 993 | q = qdisc_leaf(p, clid); |
981 | } else { /* ingress */ | 994 | } else if (dev_ingress_queue(dev)) { |
982 | q = dev->rx_queue.qdisc_sleeping; | 995 | q = dev_ingress_queue(dev)->qdisc_sleeping; |
983 | } | 996 | } |
984 | } else { | 997 | } else { |
985 | q = dev->qdisc; | 998 | q = dev->qdisc; |
@@ -990,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
990 | if (tcm->tcm_handle && q->handle != tcm->tcm_handle) | 1003 | if (tcm->tcm_handle && q->handle != tcm->tcm_handle) |
991 | return -EINVAL; | 1004 | return -EINVAL; |
992 | } else { | 1005 | } else { |
993 | if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) | 1006 | q = qdisc_lookup(dev, tcm->tcm_handle); |
1007 | if (!q) | ||
994 | return -ENOENT; | 1008 | return -ENOENT; |
995 | } | 1009 | } |
996 | 1010 | ||
@@ -1002,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1002 | return -EINVAL; | 1016 | return -EINVAL; |
1003 | if (q->handle == 0) | 1017 | if (q->handle == 0) |
1004 | return -ENOENT; | 1018 | return -ENOENT; |
1005 | if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0) | 1019 | err = qdisc_graft(dev, p, skb, n, clid, NULL, q); |
1020 | if (err != 0) | ||
1006 | return err; | 1021 | return err; |
1007 | } else { | 1022 | } else { |
1008 | qdisc_notify(net, skb, n, clid, NULL, q); | 1023 | qdisc_notify(net, skb, n, clid, NULL, q); |
@@ -1011,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1011 | } | 1026 | } |
1012 | 1027 | ||
1013 | /* | 1028 | /* |
1014 | Create/change qdisc. | 1029 | * Create/change qdisc. |
1015 | */ | 1030 | */ |
1016 | 1031 | ||
1017 | static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | 1032 | static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) |
@@ -1030,7 +1045,8 @@ replay: | |||
1030 | clid = tcm->tcm_parent; | 1045 | clid = tcm->tcm_parent; |
1031 | q = p = NULL; | 1046 | q = p = NULL; |
1032 | 1047 | ||
1033 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 1048 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
1049 | if (!dev) | ||
1034 | return -ENODEV; | 1050 | return -ENODEV; |
1035 | 1051 | ||
1036 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); | 1052 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); |
@@ -1040,11 +1056,12 @@ replay: | |||
1040 | if (clid) { | 1056 | if (clid) { |
1041 | if (clid != TC_H_ROOT) { | 1057 | if (clid != TC_H_ROOT) { |
1042 | if (clid != TC_H_INGRESS) { | 1058 | if (clid != TC_H_INGRESS) { |
1043 | if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) | 1059 | p = qdisc_lookup(dev, TC_H_MAJ(clid)); |
1060 | if (!p) | ||
1044 | return -ENOENT; | 1061 | return -ENOENT; |
1045 | q = qdisc_leaf(p, clid); | 1062 | q = qdisc_leaf(p, clid); |
1046 | } else { /*ingress */ | 1063 | } else if (dev_ingress_queue_create(dev)) { |
1047 | q = dev->rx_queue.qdisc_sleeping; | 1064 | q = dev_ingress_queue(dev)->qdisc_sleeping; |
1048 | } | 1065 | } |
1049 | } else { | 1066 | } else { |
1050 | q = dev->qdisc; | 1067 | q = dev->qdisc; |
@@ -1056,13 +1073,14 @@ replay: | |||
1056 | 1073 | ||
1057 | if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { | 1074 | if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) { |
1058 | if (tcm->tcm_handle) { | 1075 | if (tcm->tcm_handle) { |
1059 | if (q && !(n->nlmsg_flags&NLM_F_REPLACE)) | 1076 | if (q && !(n->nlmsg_flags & NLM_F_REPLACE)) |
1060 | return -EEXIST; | 1077 | return -EEXIST; |
1061 | if (TC_H_MIN(tcm->tcm_handle)) | 1078 | if (TC_H_MIN(tcm->tcm_handle)) |
1062 | return -EINVAL; | 1079 | return -EINVAL; |
1063 | if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL) | 1080 | q = qdisc_lookup(dev, tcm->tcm_handle); |
1081 | if (!q) | ||
1064 | goto create_n_graft; | 1082 | goto create_n_graft; |
1065 | if (n->nlmsg_flags&NLM_F_EXCL) | 1083 | if (n->nlmsg_flags & NLM_F_EXCL) |
1066 | return -EEXIST; | 1084 | return -EEXIST; |
1067 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) | 1085 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) |
1068 | return -EINVAL; | 1086 | return -EINVAL; |
@@ -1072,7 +1090,7 @@ replay: | |||
1072 | atomic_inc(&q->refcnt); | 1090 | atomic_inc(&q->refcnt); |
1073 | goto graft; | 1091 | goto graft; |
1074 | } else { | 1092 | } else { |
1075 | if (q == NULL) | 1093 | if (!q) |
1076 | goto create_n_graft; | 1094 | goto create_n_graft; |
1077 | 1095 | ||
1078 | /* This magic test requires explanation. | 1096 | /* This magic test requires explanation. |
@@ -1094,9 +1112,9 @@ replay: | |||
1094 | * For now we select create/graft, if | 1112 | * For now we select create/graft, if |
1095 | * user gave KIND, which does not match existing. | 1113 | * user gave KIND, which does not match existing. |
1096 | */ | 1114 | */ |
1097 | if ((n->nlmsg_flags&NLM_F_CREATE) && | 1115 | if ((n->nlmsg_flags & NLM_F_CREATE) && |
1098 | (n->nlmsg_flags&NLM_F_REPLACE) && | 1116 | (n->nlmsg_flags & NLM_F_REPLACE) && |
1099 | ((n->nlmsg_flags&NLM_F_EXCL) || | 1117 | ((n->nlmsg_flags & NLM_F_EXCL) || |
1100 | (tca[TCA_KIND] && | 1118 | (tca[TCA_KIND] && |
1101 | nla_strcmp(tca[TCA_KIND], q->ops->id)))) | 1119 | nla_strcmp(tca[TCA_KIND], q->ops->id)))) |
1102 | goto create_n_graft; | 1120 | goto create_n_graft; |
@@ -1111,7 +1129,7 @@ replay: | |||
1111 | /* Change qdisc parameters */ | 1129 | /* Change qdisc parameters */ |
1112 | if (q == NULL) | 1130 | if (q == NULL) |
1113 | return -ENOENT; | 1131 | return -ENOENT; |
1114 | if (n->nlmsg_flags&NLM_F_EXCL) | 1132 | if (n->nlmsg_flags & NLM_F_EXCL) |
1115 | return -EEXIST; | 1133 | return -EEXIST; |
1116 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) | 1134 | if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id)) |
1117 | return -EINVAL; | 1135 | return -EINVAL; |
@@ -1121,13 +1139,16 @@ replay: | |||
1121 | return err; | 1139 | return err; |
1122 | 1140 | ||
1123 | create_n_graft: | 1141 | create_n_graft: |
1124 | if (!(n->nlmsg_flags&NLM_F_CREATE)) | 1142 | if (!(n->nlmsg_flags & NLM_F_CREATE)) |
1125 | return -ENOENT; | 1143 | return -ENOENT; |
1126 | if (clid == TC_H_INGRESS) | 1144 | if (clid == TC_H_INGRESS) { |
1127 | q = qdisc_create(dev, &dev->rx_queue, p, | 1145 | if (dev_ingress_queue(dev)) |
1128 | tcm->tcm_parent, tcm->tcm_parent, | 1146 | q = qdisc_create(dev, dev_ingress_queue(dev), p, |
1129 | tca, &err); | 1147 | tcm->tcm_parent, tcm->tcm_parent, |
1130 | else { | 1148 | tca, &err); |
1149 | else | ||
1150 | err = -ENOENT; | ||
1151 | } else { | ||
1131 | struct netdev_queue *dev_queue; | 1152 | struct netdev_queue *dev_queue; |
1132 | 1153 | ||
1133 | if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) | 1154 | if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) |
@@ -1165,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
1165 | struct nlmsghdr *nlh; | 1186 | struct nlmsghdr *nlh; |
1166 | unsigned char *b = skb_tail_pointer(skb); | 1187 | unsigned char *b = skb_tail_pointer(skb); |
1167 | struct gnet_dump d; | 1188 | struct gnet_dump d; |
1189 | struct qdisc_size_table *stab; | ||
1168 | 1190 | ||
1169 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); | 1191 | nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags); |
1170 | tcm = NLMSG_DATA(nlh); | 1192 | tcm = NLMSG_DATA(nlh); |
@@ -1180,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, | |||
1180 | goto nla_put_failure; | 1202 | goto nla_put_failure; |
1181 | q->qstats.qlen = q->q.qlen; | 1203 | q->qstats.qlen = q->q.qlen; |
1182 | 1204 | ||
1183 | if (q->stab && qdisc_dump_stab(skb, q->stab) < 0) | 1205 | stab = rtnl_dereference(q->stab); |
1206 | if (stab && qdisc_dump_stab(skb, stab) < 0) | ||
1184 | goto nla_put_failure; | 1207 | goto nla_put_failure; |
1185 | 1208 | ||
1186 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, | 1209 | if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, |
@@ -1224,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, | |||
1224 | return -ENOBUFS; | 1247 | return -ENOBUFS; |
1225 | 1248 | ||
1226 | if (old && !tc_qdisc_dump_ignore(old)) { | 1249 | if (old && !tc_qdisc_dump_ignore(old)) { |
1227 | if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) | 1250 | if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, |
1251 | 0, RTM_DELQDISC) < 0) | ||
1228 | goto err_out; | 1252 | goto err_out; |
1229 | } | 1253 | } |
1230 | if (new && !tc_qdisc_dump_ignore(new)) { | 1254 | if (new && !tc_qdisc_dump_ignore(new)) { |
1231 | if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) | 1255 | if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, |
1256 | old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) | ||
1232 | goto err_out; | 1257 | goto err_out; |
1233 | } | 1258 | } |
1234 | 1259 | ||
1235 | if (skb->len) | 1260 | if (skb->len) |
1236 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 1261 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
1262 | n->nlmsg_flags & NLM_F_ECHO); | ||
1237 | 1263 | ||
1238 | err_out: | 1264 | err_out: |
1239 | kfree_skb(skb); | 1265 | kfree_skb(skb); |
@@ -1265,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, | |||
1265 | q_idx++; | 1291 | q_idx++; |
1266 | continue; | 1292 | continue; |
1267 | } | 1293 | } |
1268 | if (!tc_qdisc_dump_ignore(q) && | 1294 | if (!tc_qdisc_dump_ignore(q) && |
1269 | tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, | 1295 | tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, |
1270 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) | 1296 | cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) |
1271 | goto done; | 1297 | goto done; |
@@ -1304,8 +1330,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) | |||
1304 | if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) | 1330 | if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) |
1305 | goto done; | 1331 | goto done; |
1306 | 1332 | ||
1307 | dev_queue = &dev->rx_queue; | 1333 | dev_queue = dev_ingress_queue(dev); |
1308 | if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) | 1334 | if (dev_queue && |
1335 | tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, | ||
1336 | &q_idx, s_q_idx) < 0) | ||
1309 | goto done; | 1337 | goto done; |
1310 | 1338 | ||
1311 | cont: | 1339 | cont: |
@@ -1344,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1344 | u32 qid = TC_H_MAJ(clid); | 1372 | u32 qid = TC_H_MAJ(clid); |
1345 | int err; | 1373 | int err; |
1346 | 1374 | ||
1347 | if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 1375 | dev = __dev_get_by_index(net, tcm->tcm_ifindex); |
1376 | if (!dev) | ||
1348 | return -ENODEV; | 1377 | return -ENODEV; |
1349 | 1378 | ||
1350 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); | 1379 | err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL); |
@@ -1379,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1379 | qid = dev->qdisc->handle; | 1408 | qid = dev->qdisc->handle; |
1380 | 1409 | ||
1381 | /* Now qid is genuine qdisc handle consistent | 1410 | /* Now qid is genuine qdisc handle consistent |
1382 | both with parent and child. | 1411 | * both with parent and child. |
1383 | 1412 | * | |
1384 | TC_H_MAJ(pid) still may be unspecified, complete it now. | 1413 | * TC_H_MAJ(pid) still may be unspecified, complete it now. |
1385 | */ | 1414 | */ |
1386 | if (pid) | 1415 | if (pid) |
1387 | pid = TC_H_MAKE(qid, pid); | 1416 | pid = TC_H_MAKE(qid, pid); |
@@ -1391,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1391 | } | 1420 | } |
1392 | 1421 | ||
1393 | /* OK. Locate qdisc */ | 1422 | /* OK. Locate qdisc */ |
1394 | if ((q = qdisc_lookup(dev, qid)) == NULL) | 1423 | q = qdisc_lookup(dev, qid); |
1424 | if (!q) | ||
1395 | return -ENOENT; | 1425 | return -ENOENT; |
1396 | 1426 | ||
1397 | /* An check that it supports classes */ | 1427 | /* An check that it supports classes */ |
@@ -1411,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) | |||
1411 | 1441 | ||
1412 | if (cl == 0) { | 1442 | if (cl == 0) { |
1413 | err = -ENOENT; | 1443 | err = -ENOENT; |
1414 | if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE)) | 1444 | if (n->nlmsg_type != RTM_NEWTCLASS || |
1445 | !(n->nlmsg_flags & NLM_F_CREATE)) | ||
1415 | goto out; | 1446 | goto out; |
1416 | } else { | 1447 | } else { |
1417 | switch (n->nlmsg_type) { | 1448 | switch (n->nlmsg_type) { |
1418 | case RTM_NEWTCLASS: | 1449 | case RTM_NEWTCLASS: |
1419 | err = -EEXIST; | 1450 | err = -EEXIST; |
1420 | if (n->nlmsg_flags&NLM_F_EXCL) | 1451 | if (n->nlmsg_flags & NLM_F_EXCL) |
1421 | goto out; | 1452 | goto out; |
1422 | break; | 1453 | break; |
1423 | case RTM_DELTCLASS: | 1454 | case RTM_DELTCLASS: |
@@ -1509,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, | |||
1509 | return -EINVAL; | 1540 | return -EINVAL; |
1510 | } | 1541 | } |
1511 | 1542 | ||
1512 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO); | 1543 | return rtnetlink_send(skb, net, pid, RTNLGRP_TC, |
1544 | n->nlmsg_flags & NLM_F_ECHO); | ||
1513 | } | 1545 | } |
1514 | 1546 | ||
1515 | struct qdisc_dump_args | 1547 | struct qdisc_dump_args { |
1516 | { | 1548 | struct qdisc_walker w; |
1517 | struct qdisc_walker w; | 1549 | struct sk_buff *skb; |
1518 | struct sk_buff *skb; | 1550 | struct netlink_callback *cb; |
1519 | struct netlink_callback *cb; | ||
1520 | }; | 1551 | }; |
1521 | 1552 | ||
1522 | static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) | 1553 | static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) |
@@ -1578,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, | |||
1578 | 1609 | ||
1579 | static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | 1610 | static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) |
1580 | { | 1611 | { |
1581 | struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh); | 1612 | struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh); |
1582 | struct net *net = sock_net(skb->sk); | 1613 | struct net *net = sock_net(skb->sk); |
1583 | struct netdev_queue *dev_queue; | 1614 | struct netdev_queue *dev_queue; |
1584 | struct net_device *dev; | 1615 | struct net_device *dev; |
@@ -1586,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | |||
1586 | 1617 | ||
1587 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) | 1618 | if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm))) |
1588 | return 0; | 1619 | return 0; |
1589 | if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL) | 1620 | dev = dev_get_by_index(net, tcm->tcm_ifindex); |
1621 | if (!dev) | ||
1590 | return 0; | 1622 | return 0; |
1591 | 1623 | ||
1592 | s_t = cb->args[0]; | 1624 | s_t = cb->args[0]; |
@@ -1595,8 +1627,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) | |||
1595 | if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) | 1627 | if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) |
1596 | goto done; | 1628 | goto done; |
1597 | 1629 | ||
1598 | dev_queue = &dev->rx_queue; | 1630 | dev_queue = dev_ingress_queue(dev); |
1599 | if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) | 1631 | if (dev_queue && |
1632 | tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, | ||
1633 | &t, s_t) < 0) | ||
1600 | goto done; | 1634 | goto done; |
1601 | 1635 | ||
1602 | done: | 1636 | done: |
@@ -1607,19 +1641,22 @@ done: | |||
1607 | } | 1641 | } |
1608 | 1642 | ||
1609 | /* Main classifier routine: scans classifier chain attached | 1643 | /* Main classifier routine: scans classifier chain attached |
1610 | to this qdisc, (optionally) tests for protocol and asks | 1644 | * to this qdisc, (optionally) tests for protocol and asks |
1611 | specific classifiers. | 1645 | * specific classifiers. |
1612 | */ | 1646 | */ |
1613 | int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, | 1647 | int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp, |
1614 | struct tcf_result *res) | 1648 | struct tcf_result *res) |
1615 | { | 1649 | { |
1616 | __be16 protocol = skb->protocol; | 1650 | __be16 protocol = skb->protocol; |
1617 | int err = 0; | 1651 | int err; |
1618 | 1652 | ||
1619 | for (; tp; tp = tp->next) { | 1653 | for (; tp; tp = tp->next) { |
1620 | if ((tp->protocol == protocol || | 1654 | if (tp->protocol != protocol && |
1621 | tp->protocol == htons(ETH_P_ALL)) && | 1655 | tp->protocol != htons(ETH_P_ALL)) |
1622 | (err = tp->classify(skb, tp, res)) >= 0) { | 1656 | continue; |
1657 | err = tp->classify(skb, tp, res); | ||
1658 | |||
1659 | if (err >= 0) { | ||
1623 | #ifdef CONFIG_NET_CLS_ACT | 1660 | #ifdef CONFIG_NET_CLS_ACT |
1624 | if (err != TC_ACT_RECLASSIFY && skb->tc_verd) | 1661 | if (err != TC_ACT_RECLASSIFY && skb->tc_verd) |
1625 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); | 1662 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0); |
@@ -1635,12 +1672,10 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, | |||
1635 | struct tcf_result *res) | 1672 | struct tcf_result *res) |
1636 | { | 1673 | { |
1637 | int err = 0; | 1674 | int err = 0; |
1638 | __be16 protocol; | ||
1639 | #ifdef CONFIG_NET_CLS_ACT | 1675 | #ifdef CONFIG_NET_CLS_ACT |
1640 | struct tcf_proto *otp = tp; | 1676 | struct tcf_proto *otp = tp; |
1641 | reclassify: | 1677 | reclassify: |
1642 | #endif | 1678 | #endif |
1643 | protocol = skb->protocol; | ||
1644 | 1679 | ||
1645 | err = tc_classify_compat(skb, tp, res); | 1680 | err = tc_classify_compat(skb, tp, res); |
1646 | #ifdef CONFIG_NET_CLS_ACT | 1681 | #ifdef CONFIG_NET_CLS_ACT |
@@ -1650,11 +1685,11 @@ reclassify: | |||
1650 | 1685 | ||
1651 | if (verd++ >= MAX_REC_LOOP) { | 1686 | if (verd++ >= MAX_REC_LOOP) { |
1652 | if (net_ratelimit()) | 1687 | if (net_ratelimit()) |
1653 | printk(KERN_NOTICE | 1688 | pr_notice("%s: packet reclassify loop" |
1654 | "%s: packet reclassify loop" | ||
1655 | " rule prio %u protocol %02x\n", | 1689 | " rule prio %u protocol %02x\n", |
1656 | tp->q->ops->id, | 1690 | tp->q->ops->id, |
1657 | tp->prio & 0xffff, ntohs(tp->protocol)); | 1691 | tp->prio & 0xffff, |
1692 | ntohs(tp->protocol)); | ||
1658 | return TC_ACT_SHOT; | 1693 | return TC_ACT_SHOT; |
1659 | } | 1694 | } |
1660 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); | 1695 | skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd); |
@@ -1747,7 +1782,7 @@ static int __init pktsched_init(void) | |||
1747 | 1782 | ||
1748 | err = register_pernet_subsys(&psched_net_ops); | 1783 | err = register_pernet_subsys(&psched_net_ops); |
1749 | if (err) { | 1784 | if (err) { |
1750 | printk(KERN_ERR "pktsched_init: " | 1785 | pr_err("pktsched_init: " |
1751 | "cannot initialize per netns operations\n"); | 1786 | "cannot initialize per netns operations\n"); |
1752 | return err; | 1787 | return err; |
1753 | } | 1788 | } |
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 6318e1136b83..3f08158b8688 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c | |||
@@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, | |||
275 | goto err_out; | 275 | goto err_out; |
276 | } | 276 | } |
277 | flow->filter_list = NULL; | 277 | flow->filter_list = NULL; |
278 | flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 278 | flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); |
279 | &pfifo_qdisc_ops, classid); | ||
280 | if (!flow->q) | 279 | if (!flow->q) |
281 | flow->q = &noop_qdisc; | 280 | flow->q = &noop_qdisc; |
282 | pr_debug("atm_tc_change: qdisc %p\n", flow->q); | 281 | pr_debug("atm_tc_change: qdisc %p\n", flow->q); |
@@ -320,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg) | |||
320 | * creation), and one for the reference held when calling delete. | 319 | * creation), and one for the reference held when calling delete. |
321 | */ | 320 | */ |
322 | if (flow->ref < 2) { | 321 | if (flow->ref < 2) { |
323 | printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref); | 322 | pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref); |
324 | return -EINVAL; | 323 | return -EINVAL; |
325 | } | 324 | } |
326 | if (flow->ref > 2) | 325 | if (flow->ref > 2) |
@@ -385,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
385 | } | 384 | } |
386 | } | 385 | } |
387 | flow = NULL; | 386 | flow = NULL; |
388 | done: | 387 | done: |
389 | ; | 388 | ; |
390 | } | 389 | } |
391 | if (!flow) | 390 | if (!flow) { |
392 | flow = &p->link; | 391 | flow = &p->link; |
393 | else { | 392 | } else { |
394 | if (flow->vcc) | 393 | if (flow->vcc) |
395 | ATM_SKB(skb)->atm_options = flow->vcc->atm_options; | 394 | ATM_SKB(skb)->atm_options = flow->vcc->atm_options; |
396 | /*@@@ looks good ... but it's not supposed to work :-) */ | 395 | /*@@@ looks good ... but it's not supposed to work :-) */ |
@@ -423,10 +422,8 @@ drop: __maybe_unused | |||
423 | } | 422 | } |
424 | return ret; | 423 | return ret; |
425 | } | 424 | } |
426 | sch->bstats.bytes += qdisc_pkt_len(skb); | 425 | qdisc_bstats_update(sch, skb); |
427 | sch->bstats.packets++; | 426 | bstats_update(&flow->bstats, skb); |
428 | flow->bstats.bytes += qdisc_pkt_len(skb); | ||
429 | flow->bstats.packets++; | ||
430 | /* | 427 | /* |
431 | * Okay, this may seem weird. We pretend we've dropped the packet if | 428 | * Okay, this may seem weird. We pretend we've dropped the packet if |
432 | * it goes via ATM. The reason for this is that the outer qdisc | 429 | * it goes via ATM. The reason for this is that the outer qdisc |
@@ -543,7 +540,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) | |||
543 | INIT_LIST_HEAD(&p->flows); | 540 | INIT_LIST_HEAD(&p->flows); |
544 | INIT_LIST_HEAD(&p->link.list); | 541 | INIT_LIST_HEAD(&p->link.list); |
545 | list_add(&p->link.list, &p->flows); | 542 | list_add(&p->link.list, &p->flows); |
546 | p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 543 | p->link.q = qdisc_create_dflt(sch->dev_queue, |
547 | &pfifo_qdisc_ops, sch->handle); | 544 | &pfifo_qdisc_ops, sch->handle); |
548 | if (!p->link.q) | 545 | if (!p->link.q) |
549 | p->link.q = &noop_qdisc; | 546 | p->link.q = &noop_qdisc; |
@@ -579,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch) | |||
579 | 576 | ||
580 | list_for_each_entry_safe(flow, tmp, &p->flows, list) { | 577 | list_for_each_entry_safe(flow, tmp, &p->flows, list) { |
581 | if (flow->ref > 1) | 578 | if (flow->ref > 1) |
582 | printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow, | 579 | pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref); |
583 | flow->ref); | ||
584 | atm_tc_put(sch, (unsigned long)flow); | 580 | atm_tc_put(sch, (unsigned long)flow); |
585 | } | 581 | } |
586 | tasklet_kill(&p->task); | 582 | tasklet_kill(&p->task); |
@@ -619,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, | |||
619 | } | 615 | } |
620 | if (flow->excess) | 616 | if (flow->excess) |
621 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); | 617 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid); |
622 | else { | 618 | else |
623 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); | 619 | NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0); |
624 | } | ||
625 | 620 | ||
626 | nla_nest_end(skb, nest); | 621 | nla_nest_end(skb, nest); |
627 | return skb->len; | 622 | return skb->len; |
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 28c01ef5abc8..24d94c097b35 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c | |||
@@ -72,8 +72,7 @@ | |||
72 | struct cbq_sched_data; | 72 | struct cbq_sched_data; |
73 | 73 | ||
74 | 74 | ||
75 | struct cbq_class | 75 | struct cbq_class { |
76 | { | ||
77 | struct Qdisc_class_common common; | 76 | struct Qdisc_class_common common; |
78 | struct cbq_class *next_alive; /* next class with backlog in this priority band */ | 77 | struct cbq_class *next_alive; /* next class with backlog in this priority band */ |
79 | 78 | ||
@@ -139,19 +138,18 @@ struct cbq_class | |||
139 | int refcnt; | 138 | int refcnt; |
140 | int filters; | 139 | int filters; |
141 | 140 | ||
142 | struct cbq_class *defaults[TC_PRIO_MAX+1]; | 141 | struct cbq_class *defaults[TC_PRIO_MAX + 1]; |
143 | }; | 142 | }; |
144 | 143 | ||
145 | struct cbq_sched_data | 144 | struct cbq_sched_data { |
146 | { | ||
147 | struct Qdisc_class_hash clhash; /* Hash table of all classes */ | 145 | struct Qdisc_class_hash clhash; /* Hash table of all classes */ |
148 | int nclasses[TC_CBQ_MAXPRIO+1]; | 146 | int nclasses[TC_CBQ_MAXPRIO + 1]; |
149 | unsigned quanta[TC_CBQ_MAXPRIO+1]; | 147 | unsigned int quanta[TC_CBQ_MAXPRIO + 1]; |
150 | 148 | ||
151 | struct cbq_class link; | 149 | struct cbq_class link; |
152 | 150 | ||
153 | unsigned activemask; | 151 | unsigned int activemask; |
154 | struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes | 152 | struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes |
155 | with backlog */ | 153 | with backlog */ |
156 | 154 | ||
157 | #ifdef CONFIG_NET_CLS_ACT | 155 | #ifdef CONFIG_NET_CLS_ACT |
@@ -162,7 +160,7 @@ struct cbq_sched_data | |||
162 | int tx_len; | 160 | int tx_len; |
163 | psched_time_t now; /* Cached timestamp */ | 161 | psched_time_t now; /* Cached timestamp */ |
164 | psched_time_t now_rt; /* Cached real time */ | 162 | psched_time_t now_rt; /* Cached real time */ |
165 | unsigned pmask; | 163 | unsigned int pmask; |
166 | 164 | ||
167 | struct hrtimer delay_timer; | 165 | struct hrtimer delay_timer; |
168 | struct qdisc_watchdog watchdog; /* Watchdog timer, | 166 | struct qdisc_watchdog watchdog; /* Watchdog timer, |
@@ -175,9 +173,9 @@ struct cbq_sched_data | |||
175 | }; | 173 | }; |
176 | 174 | ||
177 | 175 | ||
178 | #define L2T(cl,len) qdisc_l2t((cl)->R_tab,len) | 176 | #define L2T(cl, len) qdisc_l2t((cl)->R_tab, len) |
179 | 177 | ||
180 | static __inline__ struct cbq_class * | 178 | static inline struct cbq_class * |
181 | cbq_class_lookup(struct cbq_sched_data *q, u32 classid) | 179 | cbq_class_lookup(struct cbq_sched_data *q, u32 classid) |
182 | { | 180 | { |
183 | struct Qdisc_class_common *clc; | 181 | struct Qdisc_class_common *clc; |
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid) | |||
193 | static struct cbq_class * | 191 | static struct cbq_class * |
194 | cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) | 192 | cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) |
195 | { | 193 | { |
196 | struct cbq_class *cl, *new; | 194 | struct cbq_class *cl; |
197 | 195 | ||
198 | for (cl = this->tparent; cl; cl = cl->tparent) | 196 | for (cl = this->tparent; cl; cl = cl->tparent) { |
199 | if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this) | 197 | struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT]; |
200 | return new; | ||
201 | 198 | ||
199 | if (new != NULL && new != this) | ||
200 | return new; | ||
201 | } | ||
202 | return NULL; | 202 | return NULL; |
203 | } | 203 | } |
204 | 204 | ||
205 | #endif | 205 | #endif |
206 | 206 | ||
207 | /* Classify packet. The procedure is pretty complicated, but | 207 | /* Classify packet. The procedure is pretty complicated, but |
208 | it allows us to combine link sharing and priority scheduling | 208 | * it allows us to combine link sharing and priority scheduling |
209 | transparently. | 209 | * transparently. |
210 | 210 | * | |
211 | Namely, you can put link sharing rules (f.e. route based) at root of CBQ, | 211 | * Namely, you can put link sharing rules (f.e. route based) at root of CBQ, |
212 | so that it resolves to split nodes. Then packets are classified | 212 | * so that it resolves to split nodes. Then packets are classified |
213 | by logical priority, or a more specific classifier may be attached | 213 | * by logical priority, or a more specific classifier may be attached |
214 | to the split node. | 214 | * to the split node. |
215 | */ | 215 | */ |
216 | 216 | ||
217 | static struct cbq_class * | 217 | static struct cbq_class * |
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
227 | /* | 227 | /* |
228 | * Step 1. If skb->priority points to one of our classes, use it. | 228 | * Step 1. If skb->priority points to one of our classes, use it. |
229 | */ | 229 | */ |
230 | if (TC_H_MAJ(prio^sch->handle) == 0 && | 230 | if (TC_H_MAJ(prio ^ sch->handle) == 0 && |
231 | (cl = cbq_class_lookup(q, prio)) != NULL) | 231 | (cl = cbq_class_lookup(q, prio)) != NULL) |
232 | return cl; | 232 | return cl; |
233 | 233 | ||
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
243 | (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) | 243 | (result = tc_classify_compat(skb, head->filter_list, &res)) < 0) |
244 | goto fallback; | 244 | goto fallback; |
245 | 245 | ||
246 | if ((cl = (void*)res.class) == NULL) { | 246 | cl = (void *)res.class; |
247 | if (!cl) { | ||
247 | if (TC_H_MAJ(res.classid)) | 248 | if (TC_H_MAJ(res.classid)) |
248 | cl = cbq_class_lookup(q, res.classid); | 249 | cl = cbq_class_lookup(q, res.classid); |
249 | else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL) | 250 | else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL) |
250 | cl = defmap[TC_PRIO_BESTEFFORT]; | 251 | cl = defmap[TC_PRIO_BESTEFFORT]; |
251 | 252 | ||
252 | if (cl == NULL || cl->level >= head->level) | 253 | if (cl == NULL || cl->level >= head->level) |
@@ -282,7 +283,7 @@ fallback: | |||
282 | * Step 4. No success... | 283 | * Step 4. No success... |
283 | */ | 284 | */ |
284 | if (TC_H_MAJ(prio) == 0 && | 285 | if (TC_H_MAJ(prio) == 0 && |
285 | !(cl = head->defaults[prio&TC_PRIO_MAX]) && | 286 | !(cl = head->defaults[prio & TC_PRIO_MAX]) && |
286 | !(cl = head->defaults[TC_PRIO_BESTEFFORT])) | 287 | !(cl = head->defaults[TC_PRIO_BESTEFFORT])) |
287 | return head; | 288 | return head; |
288 | 289 | ||
@@ -290,12 +291,12 @@ fallback: | |||
290 | } | 291 | } |
291 | 292 | ||
292 | /* | 293 | /* |
293 | A packet has just been enqueued on the empty class. | 294 | * A packet has just been enqueued on the empty class. |
294 | cbq_activate_class adds it to the tail of active class list | 295 | * cbq_activate_class adds it to the tail of active class list |
295 | of its priority band. | 296 | * of its priority band. |
296 | */ | 297 | */ |
297 | 298 | ||
298 | static __inline__ void cbq_activate_class(struct cbq_class *cl) | 299 | static inline void cbq_activate_class(struct cbq_class *cl) |
299 | { | 300 | { |
300 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); | 301 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); |
301 | int prio = cl->cpriority; | 302 | int prio = cl->cpriority; |
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl) | |||
314 | } | 315 | } |
315 | 316 | ||
316 | /* | 317 | /* |
317 | Unlink class from active chain. | 318 | * Unlink class from active chain. |
318 | Note that this same procedure is done directly in cbq_dequeue* | 319 | * Note that this same procedure is done directly in cbq_dequeue* |
319 | during round-robin procedure. | 320 | * during round-robin procedure. |
320 | */ | 321 | */ |
321 | 322 | ||
322 | static void cbq_deactivate_class(struct cbq_class *this) | 323 | static void cbq_deactivate_class(struct cbq_class *this) |
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) | |||
350 | { | 351 | { |
351 | int toplevel = q->toplevel; | 352 | int toplevel = q->toplevel; |
352 | 353 | ||
353 | if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) { | 354 | if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) { |
354 | psched_time_t now; | 355 | psched_time_t now; |
355 | psched_tdiff_t incr; | 356 | psched_tdiff_t incr; |
356 | 357 | ||
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl) | |||
363 | q->toplevel = cl->level; | 364 | q->toplevel = cl->level; |
364 | return; | 365 | return; |
365 | } | 366 | } |
366 | } while ((cl=cl->borrow) != NULL && toplevel > cl->level); | 367 | } while ((cl = cl->borrow) != NULL && toplevel > cl->level); |
367 | } | 368 | } |
368 | } | 369 | } |
369 | 370 | ||
@@ -390,8 +391,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
390 | ret = qdisc_enqueue(skb, cl->q); | 391 | ret = qdisc_enqueue(skb, cl->q); |
391 | if (ret == NET_XMIT_SUCCESS) { | 392 | if (ret == NET_XMIT_SUCCESS) { |
392 | sch->q.qlen++; | 393 | sch->q.qlen++; |
393 | sch->bstats.packets++; | ||
394 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
395 | cbq_mark_toplevel(q, cl); | 394 | cbq_mark_toplevel(q, cl); |
396 | if (!cl->next_alive) | 395 | if (!cl->next_alive) |
397 | cbq_activate_class(cl); | 396 | cbq_activate_class(cl); |
@@ -419,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl) | |||
419 | delay += cl->offtime; | 418 | delay += cl->offtime; |
420 | 419 | ||
421 | /* | 420 | /* |
422 | Class goes to sleep, so that it will have no | 421 | * Class goes to sleep, so that it will have no |
423 | chance to work avgidle. Let's forgive it 8) | 422 | * chance to work avgidle. Let's forgive it 8) |
424 | 423 | * | |
425 | BTW cbq-2.0 has a crap in this | 424 | * BTW cbq-2.0 has a crap in this |
426 | place, apparently they forgot to shift it by cl->ewma_log. | 425 | * place, apparently they forgot to shift it by cl->ewma_log. |
427 | */ | 426 | */ |
428 | if (cl->avgidle < 0) | 427 | if (cl->avgidle < 0) |
429 | delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); | 428 | delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log); |
@@ -440,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl) | |||
440 | q->wd_expires = delay; | 439 | q->wd_expires = delay; |
441 | 440 | ||
442 | /* Dirty work! We must schedule wakeups based on | 441 | /* Dirty work! We must schedule wakeups based on |
443 | real available rate, rather than leaf rate, | 442 | * real available rate, rather than leaf rate, |
444 | which may be tiny (even zero). | 443 | * which may be tiny (even zero). |
445 | */ | 444 | */ |
446 | if (q->toplevel == TC_CBQ_MAXLEVEL) { | 445 | if (q->toplevel == TC_CBQ_MAXLEVEL) { |
447 | struct cbq_class *b; | 446 | struct cbq_class *b; |
@@ -461,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl) | |||
461 | } | 460 | } |
462 | 461 | ||
463 | /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when | 462 | /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when |
464 | they go overlimit | 463 | * they go overlimit |
465 | */ | 464 | */ |
466 | 465 | ||
467 | static void cbq_ovl_rclassic(struct cbq_class *cl) | 466 | static void cbq_ovl_rclassic(struct cbq_class *cl) |
@@ -596,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) | |||
596 | struct Qdisc *sch = q->watchdog.qdisc; | 595 | struct Qdisc *sch = q->watchdog.qdisc; |
597 | psched_time_t now; | 596 | psched_time_t now; |
598 | psched_tdiff_t delay = 0; | 597 | psched_tdiff_t delay = 0; |
599 | unsigned pmask; | 598 | unsigned int pmask; |
600 | 599 | ||
601 | now = psched_get_time(); | 600 | now = psched_get_time(); |
602 | 601 | ||
@@ -625,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) | |||
625 | hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); | 624 | hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS); |
626 | } | 625 | } |
627 | 626 | ||
628 | sch->flags &= ~TCQ_F_THROTTLED; | 627 | qdisc_unthrottled(sch); |
629 | __netif_schedule(qdisc_root(sch)); | 628 | __netif_schedule(qdisc_root(sch)); |
630 | return HRTIMER_NORESTART; | 629 | return HRTIMER_NORESTART; |
631 | } | 630 | } |
@@ -650,8 +649,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) | |||
650 | ret = qdisc_enqueue(skb, cl->q); | 649 | ret = qdisc_enqueue(skb, cl->q); |
651 | if (ret == NET_XMIT_SUCCESS) { | 650 | if (ret == NET_XMIT_SUCCESS) { |
652 | sch->q.qlen++; | 651 | sch->q.qlen++; |
653 | sch->bstats.packets++; | ||
654 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
655 | if (!cl->next_alive) | 652 | if (!cl->next_alive) |
656 | cbq_activate_class(cl); | 653 | cbq_activate_class(cl); |
657 | return 0; | 654 | return 0; |
@@ -667,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child) | |||
667 | #endif | 664 | #endif |
668 | 665 | ||
669 | /* | 666 | /* |
670 | It is mission critical procedure. | 667 | * It is mission critical procedure. |
671 | 668 | * | |
672 | We "regenerate" toplevel cutoff, if transmitting class | 669 | * We "regenerate" toplevel cutoff, if transmitting class |
673 | has backlog and it is not regulated. It is not part of | 670 | * has backlog and it is not regulated. It is not part of |
674 | original CBQ description, but looks more reasonable. | 671 | * original CBQ description, but looks more reasonable. |
675 | Probably, it is wrong. This question needs further investigation. | 672 | * Probably, it is wrong. This question needs further investigation. |
676 | */ | 673 | */ |
677 | 674 | ||
678 | static __inline__ void | 675 | static inline void |
679 | cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, | 676 | cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, |
680 | struct cbq_class *borrowed) | 677 | struct cbq_class *borrowed) |
681 | { | 678 | { |
@@ -686,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl, | |||
686 | q->toplevel = borrowed->level; | 683 | q->toplevel = borrowed->level; |
687 | return; | 684 | return; |
688 | } | 685 | } |
689 | } while ((borrowed=borrowed->borrow) != NULL); | 686 | } while ((borrowed = borrowed->borrow) != NULL); |
690 | } | 687 | } |
691 | #if 0 | 688 | #if 0 |
692 | /* It is not necessary now. Uncommenting it | 689 | /* It is not necessary now. Uncommenting it |
@@ -714,10 +711,10 @@ cbq_update(struct cbq_sched_data *q) | |||
714 | cl->bstats.bytes += len; | 711 | cl->bstats.bytes += len; |
715 | 712 | ||
716 | /* | 713 | /* |
717 | (now - last) is total time between packet right edges. | 714 | * (now - last) is total time between packet right edges. |
718 | (last_pktlen/rate) is "virtual" busy time, so that | 715 | * (last_pktlen/rate) is "virtual" busy time, so that |
719 | 716 | * | |
720 | idle = (now - last) - last_pktlen/rate | 717 | * idle = (now - last) - last_pktlen/rate |
721 | */ | 718 | */ |
722 | 719 | ||
723 | idle = q->now - cl->last; | 720 | idle = q->now - cl->last; |
@@ -727,9 +724,9 @@ cbq_update(struct cbq_sched_data *q) | |||
727 | idle -= L2T(cl, len); | 724 | idle -= L2T(cl, len); |
728 | 725 | ||
729 | /* true_avgidle := (1-W)*true_avgidle + W*idle, | 726 | /* true_avgidle := (1-W)*true_avgidle + W*idle, |
730 | where W=2^{-ewma_log}. But cl->avgidle is scaled: | 727 | * where W=2^{-ewma_log}. But cl->avgidle is scaled: |
731 | cl->avgidle == true_avgidle/W, | 728 | * cl->avgidle == true_avgidle/W, |
732 | hence: | 729 | * hence: |
733 | */ | 730 | */ |
734 | avgidle += idle - (avgidle>>cl->ewma_log); | 731 | avgidle += idle - (avgidle>>cl->ewma_log); |
735 | } | 732 | } |
@@ -743,22 +740,22 @@ cbq_update(struct cbq_sched_data *q) | |||
743 | cl->avgidle = avgidle; | 740 | cl->avgidle = avgidle; |
744 | 741 | ||
745 | /* Calculate expected time, when this class | 742 | /* Calculate expected time, when this class |
746 | will be allowed to send. | 743 | * will be allowed to send. |
747 | It will occur, when: | 744 | * It will occur, when: |
748 | (1-W)*true_avgidle + W*delay = 0, i.e. | 745 | * (1-W)*true_avgidle + W*delay = 0, i.e. |
749 | idle = (1/W - 1)*(-true_avgidle) | 746 | * idle = (1/W - 1)*(-true_avgidle) |
750 | or | 747 | * or |
751 | idle = (1 - W)*(-cl->avgidle); | 748 | * idle = (1 - W)*(-cl->avgidle); |
752 | */ | 749 | */ |
753 | idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); | 750 | idle = (-avgidle) - ((-avgidle) >> cl->ewma_log); |
754 | 751 | ||
755 | /* | 752 | /* |
756 | That is not all. | 753 | * That is not all. |
757 | To maintain the rate allocated to the class, | 754 | * To maintain the rate allocated to the class, |
758 | we add to undertime virtual clock, | 755 | * we add to undertime virtual clock, |
759 | necessary to complete transmitted packet. | 756 | * necessary to complete transmitted packet. |
760 | (len/phys_bandwidth has been already passed | 757 | * (len/phys_bandwidth has been already passed |
761 | to the moment of cbq_update) | 758 | * to the moment of cbq_update) |
762 | */ | 759 | */ |
763 | 760 | ||
764 | idle -= L2T(&q->link, len); | 761 | idle -= L2T(&q->link, len); |
@@ -780,7 +777,7 @@ cbq_update(struct cbq_sched_data *q) | |||
780 | cbq_update_toplevel(q, this, q->tx_borrowed); | 777 | cbq_update_toplevel(q, this, q->tx_borrowed); |
781 | } | 778 | } |
782 | 779 | ||
783 | static __inline__ struct cbq_class * | 780 | static inline struct cbq_class * |
784 | cbq_under_limit(struct cbq_class *cl) | 781 | cbq_under_limit(struct cbq_class *cl) |
785 | { | 782 | { |
786 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); | 783 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); |
@@ -796,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl) | |||
796 | 793 | ||
797 | do { | 794 | do { |
798 | /* It is very suspicious place. Now overlimit | 795 | /* It is very suspicious place. Now overlimit |
799 | action is generated for not bounded classes | 796 | * action is generated for not bounded classes |
800 | only if link is completely congested. | 797 | * only if link is completely congested. |
801 | Though it is in agree with ancestor-only paradigm, | 798 | * Though it is in agree with ancestor-only paradigm, |
802 | it looks very stupid. Particularly, | 799 | * it looks very stupid. Particularly, |
803 | it means that this chunk of code will either | 800 | * it means that this chunk of code will either |
804 | never be called or result in strong amplification | 801 | * never be called or result in strong amplification |
805 | of burstiness. Dangerous, silly, and, however, | 802 | * of burstiness. Dangerous, silly, and, however, |
806 | no another solution exists. | 803 | * no another solution exists. |
807 | */ | 804 | */ |
808 | if ((cl = cl->borrow) == NULL) { | 805 | cl = cl->borrow; |
806 | if (!cl) { | ||
809 | this_cl->qstats.overlimits++; | 807 | this_cl->qstats.overlimits++; |
810 | this_cl->overlimit(this_cl); | 808 | this_cl->overlimit(this_cl); |
811 | return NULL; | 809 | return NULL; |
@@ -818,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl) | |||
818 | return cl; | 816 | return cl; |
819 | } | 817 | } |
820 | 818 | ||
821 | static __inline__ struct sk_buff * | 819 | static inline struct sk_buff * |
822 | cbq_dequeue_prio(struct Qdisc *sch, int prio) | 820 | cbq_dequeue_prio(struct Qdisc *sch, int prio) |
823 | { | 821 | { |
824 | struct cbq_sched_data *q = qdisc_priv(sch); | 822 | struct cbq_sched_data *q = qdisc_priv(sch); |
@@ -842,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) | |||
842 | 840 | ||
843 | if (cl->deficit <= 0) { | 841 | if (cl->deficit <= 0) { |
844 | /* Class exhausted its allotment per | 842 | /* Class exhausted its allotment per |
845 | this round. Switch to the next one. | 843 | * this round. Switch to the next one. |
846 | */ | 844 | */ |
847 | deficit = 1; | 845 | deficit = 1; |
848 | cl->deficit += cl->quantum; | 846 | cl->deficit += cl->quantum; |
@@ -852,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) | |||
852 | skb = cl->q->dequeue(cl->q); | 850 | skb = cl->q->dequeue(cl->q); |
853 | 851 | ||
854 | /* Class did not give us any skb :-( | 852 | /* Class did not give us any skb :-( |
855 | It could occur even if cl->q->q.qlen != 0 | 853 | * It could occur even if cl->q->q.qlen != 0 |
856 | f.e. if cl->q == "tbf" | 854 | * f.e. if cl->q == "tbf" |
857 | */ | 855 | */ |
858 | if (skb == NULL) | 856 | if (skb == NULL) |
859 | goto skip_class; | 857 | goto skip_class; |
@@ -882,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio) | |||
882 | skip_class: | 880 | skip_class: |
883 | if (cl->q->q.qlen == 0 || prio != cl->cpriority) { | 881 | if (cl->q->q.qlen == 0 || prio != cl->cpriority) { |
884 | /* Class is empty or penalized. | 882 | /* Class is empty or penalized. |
885 | Unlink it from active chain. | 883 | * Unlink it from active chain. |
886 | */ | 884 | */ |
887 | cl_prev->next_alive = cl->next_alive; | 885 | cl_prev->next_alive = cl->next_alive; |
888 | cl->next_alive = NULL; | 886 | cl->next_alive = NULL; |
@@ -921,14 +919,14 @@ next_class: | |||
921 | return NULL; | 919 | return NULL; |
922 | } | 920 | } |
923 | 921 | ||
924 | static __inline__ struct sk_buff * | 922 | static inline struct sk_buff * |
925 | cbq_dequeue_1(struct Qdisc *sch) | 923 | cbq_dequeue_1(struct Qdisc *sch) |
926 | { | 924 | { |
927 | struct cbq_sched_data *q = qdisc_priv(sch); | 925 | struct cbq_sched_data *q = qdisc_priv(sch); |
928 | struct sk_buff *skb; | 926 | struct sk_buff *skb; |
929 | unsigned activemask; | 927 | unsigned int activemask; |
930 | 928 | ||
931 | activemask = q->activemask&0xFF; | 929 | activemask = q->activemask & 0xFF; |
932 | while (activemask) { | 930 | while (activemask) { |
933 | int prio = ffz(~activemask); | 931 | int prio = ffz(~activemask); |
934 | activemask &= ~(1<<prio); | 932 | activemask &= ~(1<<prio); |
@@ -953,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch) | |||
953 | if (q->tx_class) { | 951 | if (q->tx_class) { |
954 | psched_tdiff_t incr2; | 952 | psched_tdiff_t incr2; |
955 | /* Time integrator. We calculate EOS time | 953 | /* Time integrator. We calculate EOS time |
956 | by adding expected packet transmission time. | 954 | * by adding expected packet transmission time. |
957 | If real time is greater, we warp artificial clock, | 955 | * If real time is greater, we warp artificial clock, |
958 | so that: | 956 | * so that: |
959 | 957 | * | |
960 | cbq_time = max(real_time, work); | 958 | * cbq_time = max(real_time, work); |
961 | */ | 959 | */ |
962 | incr2 = L2T(&q->link, q->tx_len); | 960 | incr2 = L2T(&q->link, q->tx_len); |
963 | q->now += incr2; | 961 | q->now += incr2; |
@@ -973,28 +971,29 @@ cbq_dequeue(struct Qdisc *sch) | |||
973 | 971 | ||
974 | skb = cbq_dequeue_1(sch); | 972 | skb = cbq_dequeue_1(sch); |
975 | if (skb) { | 973 | if (skb) { |
974 | qdisc_bstats_update(sch, skb); | ||
976 | sch->q.qlen--; | 975 | sch->q.qlen--; |
977 | sch->flags &= ~TCQ_F_THROTTLED; | 976 | qdisc_unthrottled(sch); |
978 | return skb; | 977 | return skb; |
979 | } | 978 | } |
980 | 979 | ||
981 | /* All the classes are overlimit. | 980 | /* All the classes are overlimit. |
982 | 981 | * | |
983 | It is possible, if: | 982 | * It is possible, if: |
984 | 983 | * | |
985 | 1. Scheduler is empty. | 984 | * 1. Scheduler is empty. |
986 | 2. Toplevel cutoff inhibited borrowing. | 985 | * 2. Toplevel cutoff inhibited borrowing. |
987 | 3. Root class is overlimit. | 986 | * 3. Root class is overlimit. |
988 | 987 | * | |
989 | Reset 2d and 3d conditions and retry. | 988 | * Reset 2d and 3d conditions and retry. |
990 | 989 | * | |
991 | Note, that NS and cbq-2.0 are buggy, peeking | 990 | * Note, that NS and cbq-2.0 are buggy, peeking |
992 | an arbitrary class is appropriate for ancestor-only | 991 | * an arbitrary class is appropriate for ancestor-only |
993 | sharing, but not for toplevel algorithm. | 992 | * sharing, but not for toplevel algorithm. |
994 | 993 | * | |
995 | Our version is better, but slower, because it requires | 994 | * Our version is better, but slower, because it requires |
996 | two passes, but it is unavoidable with top-level sharing. | 995 | * two passes, but it is unavoidable with top-level sharing. |
997 | */ | 996 | */ |
998 | 997 | ||
999 | if (q->toplevel == TC_CBQ_MAXLEVEL && | 998 | if (q->toplevel == TC_CBQ_MAXLEVEL && |
1000 | q->link.undertime == PSCHED_PASTPERFECT) | 999 | q->link.undertime == PSCHED_PASTPERFECT) |
@@ -1005,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch) | |||
1005 | } | 1004 | } |
1006 | 1005 | ||
1007 | /* No packets in scheduler or nobody wants to give them to us :-( | 1006 | /* No packets in scheduler or nobody wants to give them to us :-( |
1008 | Sigh... start watchdog timer in the last case. */ | 1007 | * Sigh... start watchdog timer in the last case. |
1008 | */ | ||
1009 | 1009 | ||
1010 | if (sch->q.qlen) { | 1010 | if (sch->q.qlen) { |
1011 | sch->qstats.overlimits++; | 1011 | sch->qstats.overlimits++; |
@@ -1027,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this) | |||
1027 | int level = 0; | 1027 | int level = 0; |
1028 | struct cbq_class *cl; | 1028 | struct cbq_class *cl; |
1029 | 1029 | ||
1030 | if ((cl = this->children) != NULL) { | 1030 | cl = this->children; |
1031 | if (cl) { | ||
1031 | do { | 1032 | do { |
1032 | if (cl->level > level) | 1033 | if (cl->level > level) |
1033 | level = cl->level; | 1034 | level = cl->level; |
1034 | } while ((cl = cl->sibling) != this->children); | 1035 | } while ((cl = cl->sibling) != this->children); |
1035 | } | 1036 | } |
1036 | this->level = level+1; | 1037 | this->level = level + 1; |
1037 | } while ((this = this->tparent) != NULL); | 1038 | } while ((this = this->tparent) != NULL); |
1038 | } | 1039 | } |
1039 | 1040 | ||
@@ -1049,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio) | |||
1049 | for (h = 0; h < q->clhash.hashsize; h++) { | 1050 | for (h = 0; h < q->clhash.hashsize; h++) { |
1050 | hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { | 1051 | hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) { |
1051 | /* BUGGGG... Beware! This expression suffer of | 1052 | /* BUGGGG... Beware! This expression suffer of |
1052 | arithmetic overflows! | 1053 | * arithmetic overflows! |
1053 | */ | 1054 | */ |
1054 | if (cl->priority == prio) { | 1055 | if (cl->priority == prio) { |
1055 | cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ | 1056 | cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/ |
1056 | q->quanta[prio]; | 1057 | q->quanta[prio]; |
1057 | } | 1058 | } |
1058 | if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { | 1059 | if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) { |
1059 | printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum); | 1060 | pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n", |
1061 | cl->common.classid, cl->quantum); | ||
1060 | cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; | 1062 | cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1; |
1061 | } | 1063 | } |
1062 | } | 1064 | } |
@@ -1067,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl) | |||
1067 | { | 1069 | { |
1068 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); | 1070 | struct cbq_sched_data *q = qdisc_priv(cl->qdisc); |
1069 | struct cbq_class *split = cl->split; | 1071 | struct cbq_class *split = cl->split; |
1070 | unsigned h; | 1072 | unsigned int h; |
1071 | int i; | 1073 | int i; |
1072 | 1074 | ||
1073 | if (split == NULL) | 1075 | if (split == NULL) |
1074 | return; | 1076 | return; |
1075 | 1077 | ||
1076 | for (i=0; i<=TC_PRIO_MAX; i++) { | 1078 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
1077 | if (split->defaults[i] == cl && !(cl->defmap&(1<<i))) | 1079 | if (split->defaults[i] == cl && !(cl->defmap & (1<<i))) |
1078 | split->defaults[i] = NULL; | 1080 | split->defaults[i] = NULL; |
1079 | } | 1081 | } |
1080 | 1082 | ||
1081 | for (i=0; i<=TC_PRIO_MAX; i++) { | 1083 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
1082 | int level = split->level; | 1084 | int level = split->level; |
1083 | 1085 | ||
1084 | if (split->defaults[i]) | 1086 | if (split->defaults[i]) |
@@ -1091,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl) | |||
1091 | hlist_for_each_entry(c, n, &q->clhash.hash[h], | 1093 | hlist_for_each_entry(c, n, &q->clhash.hash[h], |
1092 | common.hnode) { | 1094 | common.hnode) { |
1093 | if (c->split == split && c->level < level && | 1095 | if (c->split == split && c->level < level && |
1094 | c->defmap&(1<<i)) { | 1096 | c->defmap & (1<<i)) { |
1095 | split->defaults[i] = c; | 1097 | split->defaults[i] = c; |
1096 | level = c->level; | 1098 | level = c->level; |
1097 | } | 1099 | } |
@@ -1105,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma | |||
1105 | struct cbq_class *split = NULL; | 1107 | struct cbq_class *split = NULL; |
1106 | 1108 | ||
1107 | if (splitid == 0) { | 1109 | if (splitid == 0) { |
1108 | if ((split = cl->split) == NULL) | 1110 | split = cl->split; |
1111 | if (!split) | ||
1109 | return; | 1112 | return; |
1110 | splitid = split->common.classid; | 1113 | splitid = split->common.classid; |
1111 | } | 1114 | } |
@@ -1123,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma | |||
1123 | cl->defmap = 0; | 1126 | cl->defmap = 0; |
1124 | cbq_sync_defmap(cl); | 1127 | cbq_sync_defmap(cl); |
1125 | cl->split = split; | 1128 | cl->split = split; |
1126 | cl->defmap = def&mask; | 1129 | cl->defmap = def & mask; |
1127 | } else | 1130 | } else |
1128 | cl->defmap = (cl->defmap&~mask)|(def&mask); | 1131 | cl->defmap = (cl->defmap & ~mask) | (def & mask); |
1129 | 1132 | ||
1130 | cbq_sync_defmap(cl); | 1133 | cbq_sync_defmap(cl); |
1131 | } | 1134 | } |
@@ -1138,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this) | |||
1138 | qdisc_class_hash_remove(&q->clhash, &this->common); | 1141 | qdisc_class_hash_remove(&q->clhash, &this->common); |
1139 | 1142 | ||
1140 | if (this->tparent) { | 1143 | if (this->tparent) { |
1141 | clp=&this->sibling; | 1144 | clp = &this->sibling; |
1142 | cl = *clp; | 1145 | cl = *clp; |
1143 | do { | 1146 | do { |
1144 | if (cl == this) { | 1147 | if (cl == this) { |
@@ -1177,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this) | |||
1177 | } | 1180 | } |
1178 | } | 1181 | } |
1179 | 1182 | ||
1180 | static unsigned int cbq_drop(struct Qdisc* sch) | 1183 | static unsigned int cbq_drop(struct Qdisc *sch) |
1181 | { | 1184 | { |
1182 | struct cbq_sched_data *q = qdisc_priv(sch); | 1185 | struct cbq_sched_data *q = qdisc_priv(sch); |
1183 | struct cbq_class *cl, *cl_head; | 1186 | struct cbq_class *cl, *cl_head; |
@@ -1185,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch) | |||
1185 | unsigned int len; | 1188 | unsigned int len; |
1186 | 1189 | ||
1187 | for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { | 1190 | for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) { |
1188 | if ((cl_head = q->active[prio]) == NULL) | 1191 | cl_head = q->active[prio]; |
1192 | if (!cl_head) | ||
1189 | continue; | 1193 | continue; |
1190 | 1194 | ||
1191 | cl = cl_head; | 1195 | cl = cl_head; |
@@ -1202,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch) | |||
1202 | } | 1206 | } |
1203 | 1207 | ||
1204 | static void | 1208 | static void |
1205 | cbq_reset(struct Qdisc* sch) | 1209 | cbq_reset(struct Qdisc *sch) |
1206 | { | 1210 | { |
1207 | struct cbq_sched_data *q = qdisc_priv(sch); | 1211 | struct cbq_sched_data *q = qdisc_priv(sch); |
1208 | struct cbq_class *cl; | 1212 | struct cbq_class *cl; |
1209 | struct hlist_node *n; | 1213 | struct hlist_node *n; |
1210 | int prio; | 1214 | int prio; |
1211 | unsigned h; | 1215 | unsigned int h; |
1212 | 1216 | ||
1213 | q->activemask = 0; | 1217 | q->activemask = 0; |
1214 | q->pmask = 0; | 1218 | q->pmask = 0; |
@@ -1240,21 +1244,21 @@ cbq_reset(struct Qdisc* sch) | |||
1240 | 1244 | ||
1241 | static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) | 1245 | static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss) |
1242 | { | 1246 | { |
1243 | if (lss->change&TCF_CBQ_LSS_FLAGS) { | 1247 | if (lss->change & TCF_CBQ_LSS_FLAGS) { |
1244 | cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; | 1248 | cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent; |
1245 | cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; | 1249 | cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent; |
1246 | } | 1250 | } |
1247 | if (lss->change&TCF_CBQ_LSS_EWMA) | 1251 | if (lss->change & TCF_CBQ_LSS_EWMA) |
1248 | cl->ewma_log = lss->ewma_log; | 1252 | cl->ewma_log = lss->ewma_log; |
1249 | if (lss->change&TCF_CBQ_LSS_AVPKT) | 1253 | if (lss->change & TCF_CBQ_LSS_AVPKT) |
1250 | cl->avpkt = lss->avpkt; | 1254 | cl->avpkt = lss->avpkt; |
1251 | if (lss->change&TCF_CBQ_LSS_MINIDLE) | 1255 | if (lss->change & TCF_CBQ_LSS_MINIDLE) |
1252 | cl->minidle = -(long)lss->minidle; | 1256 | cl->minidle = -(long)lss->minidle; |
1253 | if (lss->change&TCF_CBQ_LSS_MAXIDLE) { | 1257 | if (lss->change & TCF_CBQ_LSS_MAXIDLE) { |
1254 | cl->maxidle = lss->maxidle; | 1258 | cl->maxidle = lss->maxidle; |
1255 | cl->avgidle = lss->maxidle; | 1259 | cl->avgidle = lss->maxidle; |
1256 | } | 1260 | } |
1257 | if (lss->change&TCF_CBQ_LSS_OFFTIME) | 1261 | if (lss->change & TCF_CBQ_LSS_OFFTIME) |
1258 | cl->offtime = lss->offtime; | 1262 | cl->offtime = lss->offtime; |
1259 | return 0; | 1263 | return 0; |
1260 | } | 1264 | } |
@@ -1282,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr) | |||
1282 | if (wrr->weight) | 1286 | if (wrr->weight) |
1283 | cl->weight = wrr->weight; | 1287 | cl->weight = wrr->weight; |
1284 | if (wrr->priority) { | 1288 | if (wrr->priority) { |
1285 | cl->priority = wrr->priority-1; | 1289 | cl->priority = wrr->priority - 1; |
1286 | cl->cpriority = cl->priority; | 1290 | cl->cpriority = cl->priority; |
1287 | if (cl->priority >= cl->priority2) | 1291 | if (cl->priority >= cl->priority2) |
1288 | cl->priority2 = TC_CBQ_MAXPRIO-1; | 1292 | cl->priority2 = TC_CBQ_MAXPRIO - 1; |
1289 | } | 1293 | } |
1290 | 1294 | ||
1291 | cbq_addprio(q, cl); | 1295 | cbq_addprio(q, cl); |
@@ -1302,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl) | |||
1302 | cl->overlimit = cbq_ovl_delay; | 1306 | cl->overlimit = cbq_ovl_delay; |
1303 | break; | 1307 | break; |
1304 | case TC_CBQ_OVL_LOWPRIO: | 1308 | case TC_CBQ_OVL_LOWPRIO: |
1305 | if (ovl->priority2-1 >= TC_CBQ_MAXPRIO || | 1309 | if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO || |
1306 | ovl->priority2-1 <= cl->priority) | 1310 | ovl->priority2 - 1 <= cl->priority) |
1307 | return -EINVAL; | 1311 | return -EINVAL; |
1308 | cl->priority2 = ovl->priority2-1; | 1312 | cl->priority2 = ovl->priority2 - 1; |
1309 | cl->overlimit = cbq_ovl_lowprio; | 1313 | cl->overlimit = cbq_ovl_lowprio; |
1310 | break; | 1314 | break; |
1311 | case TC_CBQ_OVL_DROP: | 1315 | case TC_CBQ_OVL_DROP: |
@@ -1379,14 +1383,14 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) | |||
1379 | q->link.sibling = &q->link; | 1383 | q->link.sibling = &q->link; |
1380 | q->link.common.classid = sch->handle; | 1384 | q->link.common.classid = sch->handle; |
1381 | q->link.qdisc = sch; | 1385 | q->link.qdisc = sch; |
1382 | if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1386 | q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
1383 | &pfifo_qdisc_ops, | 1387 | sch->handle); |
1384 | sch->handle))) | 1388 | if (!q->link.q) |
1385 | q->link.q = &noop_qdisc; | 1389 | q->link.q = &noop_qdisc; |
1386 | 1390 | ||
1387 | q->link.priority = TC_CBQ_MAXPRIO-1; | 1391 | q->link.priority = TC_CBQ_MAXPRIO - 1; |
1388 | q->link.priority2 = TC_CBQ_MAXPRIO-1; | 1392 | q->link.priority2 = TC_CBQ_MAXPRIO - 1; |
1389 | q->link.cpriority = TC_CBQ_MAXPRIO-1; | 1393 | q->link.cpriority = TC_CBQ_MAXPRIO - 1; |
1390 | q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; | 1394 | q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC; |
1391 | q->link.overlimit = cbq_ovl_classic; | 1395 | q->link.overlimit = cbq_ovl_classic; |
1392 | q->link.allot = psched_mtu(qdisc_dev(sch)); | 1396 | q->link.allot = psched_mtu(qdisc_dev(sch)); |
@@ -1417,7 +1421,7 @@ put_rtab: | |||
1417 | return err; | 1421 | return err; |
1418 | } | 1422 | } |
1419 | 1423 | ||
1420 | static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) | 1424 | static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl) |
1421 | { | 1425 | { |
1422 | unsigned char *b = skb_tail_pointer(skb); | 1426 | unsigned char *b = skb_tail_pointer(skb); |
1423 | 1427 | ||
@@ -1429,7 +1433,7 @@ nla_put_failure: | |||
1429 | return -1; | 1433 | return -1; |
1430 | } | 1434 | } |
1431 | 1435 | ||
1432 | static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) | 1436 | static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl) |
1433 | { | 1437 | { |
1434 | unsigned char *b = skb_tail_pointer(skb); | 1438 | unsigned char *b = skb_tail_pointer(skb); |
1435 | struct tc_cbq_lssopt opt; | 1439 | struct tc_cbq_lssopt opt; |
@@ -1454,15 +1458,15 @@ nla_put_failure: | |||
1454 | return -1; | 1458 | return -1; |
1455 | } | 1459 | } |
1456 | 1460 | ||
1457 | static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) | 1461 | static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) |
1458 | { | 1462 | { |
1459 | unsigned char *b = skb_tail_pointer(skb); | 1463 | unsigned char *b = skb_tail_pointer(skb); |
1460 | struct tc_cbq_wrropt opt; | 1464 | struct tc_cbq_wrropt opt; |
1461 | 1465 | ||
1462 | opt.flags = 0; | 1466 | opt.flags = 0; |
1463 | opt.allot = cl->allot; | 1467 | opt.allot = cl->allot; |
1464 | opt.priority = cl->priority+1; | 1468 | opt.priority = cl->priority + 1; |
1465 | opt.cpriority = cl->cpriority+1; | 1469 | opt.cpriority = cl->cpriority + 1; |
1466 | opt.weight = cl->weight; | 1470 | opt.weight = cl->weight; |
1467 | NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); | 1471 | NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt); |
1468 | return skb->len; | 1472 | return skb->len; |
@@ -1472,13 +1476,13 @@ nla_put_failure: | |||
1472 | return -1; | 1476 | return -1; |
1473 | } | 1477 | } |
1474 | 1478 | ||
1475 | static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) | 1479 | static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl) |
1476 | { | 1480 | { |
1477 | unsigned char *b = skb_tail_pointer(skb); | 1481 | unsigned char *b = skb_tail_pointer(skb); |
1478 | struct tc_cbq_ovl opt; | 1482 | struct tc_cbq_ovl opt; |
1479 | 1483 | ||
1480 | opt.strategy = cl->ovl_strategy; | 1484 | opt.strategy = cl->ovl_strategy; |
1481 | opt.priority2 = cl->priority2+1; | 1485 | opt.priority2 = cl->priority2 + 1; |
1482 | opt.pad = 0; | 1486 | opt.pad = 0; |
1483 | opt.penalty = cl->penalty; | 1487 | opt.penalty = cl->penalty; |
1484 | NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); | 1488 | NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt); |
@@ -1489,7 +1493,7 @@ nla_put_failure: | |||
1489 | return -1; | 1493 | return -1; |
1490 | } | 1494 | } |
1491 | 1495 | ||
1492 | static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) | 1496 | static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl) |
1493 | { | 1497 | { |
1494 | unsigned char *b = skb_tail_pointer(skb); | 1498 | unsigned char *b = skb_tail_pointer(skb); |
1495 | struct tc_cbq_fopt opt; | 1499 | struct tc_cbq_fopt opt; |
@@ -1508,7 +1512,7 @@ nla_put_failure: | |||
1508 | } | 1512 | } |
1509 | 1513 | ||
1510 | #ifdef CONFIG_NET_CLS_ACT | 1514 | #ifdef CONFIG_NET_CLS_ACT |
1511 | static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) | 1515 | static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl) |
1512 | { | 1516 | { |
1513 | unsigned char *b = skb_tail_pointer(skb); | 1517 | unsigned char *b = skb_tail_pointer(skb); |
1514 | struct tc_cbq_police opt; | 1518 | struct tc_cbq_police opt; |
@@ -1572,7 +1576,7 @@ static int | |||
1572 | cbq_dump_class(struct Qdisc *sch, unsigned long arg, | 1576 | cbq_dump_class(struct Qdisc *sch, unsigned long arg, |
1573 | struct sk_buff *skb, struct tcmsg *tcm) | 1577 | struct sk_buff *skb, struct tcmsg *tcm) |
1574 | { | 1578 | { |
1575 | struct cbq_class *cl = (struct cbq_class*)arg; | 1579 | struct cbq_class *cl = (struct cbq_class *)arg; |
1576 | struct nlattr *nest; | 1580 | struct nlattr *nest; |
1577 | 1581 | ||
1578 | if (cl->tparent) | 1582 | if (cl->tparent) |
@@ -1600,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | |||
1600 | struct gnet_dump *d) | 1604 | struct gnet_dump *d) |
1601 | { | 1605 | { |
1602 | struct cbq_sched_data *q = qdisc_priv(sch); | 1606 | struct cbq_sched_data *q = qdisc_priv(sch); |
1603 | struct cbq_class *cl = (struct cbq_class*)arg; | 1607 | struct cbq_class *cl = (struct cbq_class *)arg; |
1604 | 1608 | ||
1605 | cl->qstats.qlen = cl->q->q.qlen; | 1609 | cl->qstats.qlen = cl->q->q.qlen; |
1606 | cl->xstats.avgidle = cl->avgidle; | 1610 | cl->xstats.avgidle = cl->avgidle; |
@@ -1620,10 +1624,10 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | |||
1620 | static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | 1624 | static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, |
1621 | struct Qdisc **old) | 1625 | struct Qdisc **old) |
1622 | { | 1626 | { |
1623 | struct cbq_class *cl = (struct cbq_class*)arg; | 1627 | struct cbq_class *cl = (struct cbq_class *)arg; |
1624 | 1628 | ||
1625 | if (new == NULL) { | 1629 | if (new == NULL) { |
1626 | new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1630 | new = qdisc_create_dflt(sch->dev_queue, |
1627 | &pfifo_qdisc_ops, cl->common.classid); | 1631 | &pfifo_qdisc_ops, cl->common.classid); |
1628 | if (new == NULL) | 1632 | if (new == NULL) |
1629 | return -ENOBUFS; | 1633 | return -ENOBUFS; |
@@ -1643,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |||
1643 | return 0; | 1647 | return 0; |
1644 | } | 1648 | } |
1645 | 1649 | ||
1646 | static struct Qdisc * | 1650 | static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg) |
1647 | cbq_leaf(struct Qdisc *sch, unsigned long arg) | ||
1648 | { | 1651 | { |
1649 | struct cbq_class *cl = (struct cbq_class*)arg; | 1652 | struct cbq_class *cl = (struct cbq_class *)arg; |
1650 | 1653 | ||
1651 | return cl->q; | 1654 | return cl->q; |
1652 | } | 1655 | } |
@@ -1685,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) | |||
1685 | kfree(cl); | 1688 | kfree(cl); |
1686 | } | 1689 | } |
1687 | 1690 | ||
1688 | static void | 1691 | static void cbq_destroy(struct Qdisc *sch) |
1689 | cbq_destroy(struct Qdisc* sch) | ||
1690 | { | 1692 | { |
1691 | struct cbq_sched_data *q = qdisc_priv(sch); | 1693 | struct cbq_sched_data *q = qdisc_priv(sch); |
1692 | struct hlist_node *n, *next; | 1694 | struct hlist_node *n, *next; |
1693 | struct cbq_class *cl; | 1695 | struct cbq_class *cl; |
1694 | unsigned h; | 1696 | unsigned int h; |
1695 | 1697 | ||
1696 | #ifdef CONFIG_NET_CLS_ACT | 1698 | #ifdef CONFIG_NET_CLS_ACT |
1697 | q->rx_class = NULL; | 1699 | q->rx_class = NULL; |
@@ -1715,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch) | |||
1715 | 1717 | ||
1716 | static void cbq_put(struct Qdisc *sch, unsigned long arg) | 1718 | static void cbq_put(struct Qdisc *sch, unsigned long arg) |
1717 | { | 1719 | { |
1718 | struct cbq_class *cl = (struct cbq_class*)arg; | 1720 | struct cbq_class *cl = (struct cbq_class *)arg; |
1719 | 1721 | ||
1720 | if (--cl->refcnt == 0) { | 1722 | if (--cl->refcnt == 0) { |
1721 | #ifdef CONFIG_NET_CLS_ACT | 1723 | #ifdef CONFIG_NET_CLS_ACT |
@@ -1738,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1738 | { | 1740 | { |
1739 | int err; | 1741 | int err; |
1740 | struct cbq_sched_data *q = qdisc_priv(sch); | 1742 | struct cbq_sched_data *q = qdisc_priv(sch); |
1741 | struct cbq_class *cl = (struct cbq_class*)*arg; | 1743 | struct cbq_class *cl = (struct cbq_class *)*arg; |
1742 | struct nlattr *opt = tca[TCA_OPTIONS]; | 1744 | struct nlattr *opt = tca[TCA_OPTIONS]; |
1743 | struct nlattr *tb[TCA_CBQ_MAX + 1]; | 1745 | struct nlattr *tb[TCA_CBQ_MAX + 1]; |
1744 | struct cbq_class *parent; | 1746 | struct cbq_class *parent; |
@@ -1830,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1830 | 1832 | ||
1831 | if (classid) { | 1833 | if (classid) { |
1832 | err = -EINVAL; | 1834 | err = -EINVAL; |
1833 | if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid)) | 1835 | if (TC_H_MAJ(classid ^ sch->handle) || |
1836 | cbq_class_lookup(q, classid)) | ||
1834 | goto failure; | 1837 | goto failure; |
1835 | } else { | 1838 | } else { |
1836 | int i; | 1839 | int i; |
1837 | classid = TC_H_MAKE(sch->handle,0x8000); | 1840 | classid = TC_H_MAKE(sch->handle, 0x8000); |
1838 | 1841 | ||
1839 | for (i=0; i<0x8000; i++) { | 1842 | for (i = 0; i < 0x8000; i++) { |
1840 | if (++q->hgenerator >= 0x8000) | 1843 | if (++q->hgenerator >= 0x8000) |
1841 | q->hgenerator = 1; | 1844 | q->hgenerator = 1; |
1842 | if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) | 1845 | if (cbq_class_lookup(q, classid|q->hgenerator) == NULL) |
@@ -1874,8 +1877,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1874 | cl->R_tab = rtab; | 1877 | cl->R_tab = rtab; |
1875 | rtab = NULL; | 1878 | rtab = NULL; |
1876 | cl->refcnt = 1; | 1879 | cl->refcnt = 1; |
1877 | if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1880 | cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); |
1878 | &pfifo_qdisc_ops, classid))) | 1881 | if (!cl->q) |
1879 | cl->q = &noop_qdisc; | 1882 | cl->q = &noop_qdisc; |
1880 | cl->common.classid = classid; | 1883 | cl->common.classid = classid; |
1881 | cl->tparent = parent; | 1884 | cl->tparent = parent; |
@@ -1893,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t | |||
1893 | cl->minidle = -0x7FFFFFFF; | 1896 | cl->minidle = -0x7FFFFFFF; |
1894 | cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); | 1897 | cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT])); |
1895 | cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); | 1898 | cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT])); |
1896 | if (cl->ewma_log==0) | 1899 | if (cl->ewma_log == 0) |
1897 | cl->ewma_log = q->link.ewma_log; | 1900 | cl->ewma_log = q->link.ewma_log; |
1898 | if (cl->maxidle==0) | 1901 | if (cl->maxidle == 0) |
1899 | cl->maxidle = q->link.maxidle; | 1902 | cl->maxidle = q->link.maxidle; |
1900 | if (cl->avpkt==0) | 1903 | if (cl->avpkt == 0) |
1901 | cl->avpkt = q->link.avpkt; | 1904 | cl->avpkt = q->link.avpkt; |
1902 | cl->overlimit = cbq_ovl_classic; | 1905 | cl->overlimit = cbq_ovl_classic; |
1903 | if (tb[TCA_CBQ_OVL_STRATEGY]) | 1906 | if (tb[TCA_CBQ_OVL_STRATEGY]) |
@@ -1923,7 +1926,7 @@ failure: | |||
1923 | static int cbq_delete(struct Qdisc *sch, unsigned long arg) | 1926 | static int cbq_delete(struct Qdisc *sch, unsigned long arg) |
1924 | { | 1927 | { |
1925 | struct cbq_sched_data *q = qdisc_priv(sch); | 1928 | struct cbq_sched_data *q = qdisc_priv(sch); |
1926 | struct cbq_class *cl = (struct cbq_class*)arg; | 1929 | struct cbq_class *cl = (struct cbq_class *)arg; |
1927 | unsigned int qlen; | 1930 | unsigned int qlen; |
1928 | 1931 | ||
1929 | if (cl->filters || cl->children || cl == &q->link) | 1932 | if (cl->filters || cl->children || cl == &q->link) |
@@ -1981,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, | |||
1981 | u32 classid) | 1984 | u32 classid) |
1982 | { | 1985 | { |
1983 | struct cbq_sched_data *q = qdisc_priv(sch); | 1986 | struct cbq_sched_data *q = qdisc_priv(sch); |
1984 | struct cbq_class *p = (struct cbq_class*)parent; | 1987 | struct cbq_class *p = (struct cbq_class *)parent; |
1985 | struct cbq_class *cl = cbq_class_lookup(q, classid); | 1988 | struct cbq_class *cl = cbq_class_lookup(q, classid); |
1986 | 1989 | ||
1987 | if (cl) { | 1990 | if (cl) { |
@@ -1995,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent, | |||
1995 | 1998 | ||
1996 | static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) | 1999 | static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg) |
1997 | { | 2000 | { |
1998 | struct cbq_class *cl = (struct cbq_class*)arg; | 2001 | struct cbq_class *cl = (struct cbq_class *)arg; |
1999 | 2002 | ||
2000 | cl->filters--; | 2003 | cl->filters--; |
2001 | } | 2004 | } |
@@ -2005,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
2005 | struct cbq_sched_data *q = qdisc_priv(sch); | 2008 | struct cbq_sched_data *q = qdisc_priv(sch); |
2006 | struct cbq_class *cl; | 2009 | struct cbq_class *cl; |
2007 | struct hlist_node *n; | 2010 | struct hlist_node *n; |
2008 | unsigned h; | 2011 | unsigned int h; |
2009 | 2012 | ||
2010 | if (arg->stop) | 2013 | if (arg->stop) |
2011 | return; | 2014 | return; |
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c new file mode 100644 index 000000000000..06afbaeb4c88 --- /dev/null +++ b/net/sched/sch_choke.c | |||
@@ -0,0 +1,688 @@ | |||
1 | /* | ||
2 | * net/sched/sch_choke.c CHOKE scheduler | ||
3 | * | ||
4 | * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com> | ||
5 | * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <linux/reciprocal_div.h> | ||
18 | #include <linux/vmalloc.h> | ||
19 | #include <net/pkt_sched.h> | ||
20 | #include <net/inet_ecn.h> | ||
21 | #include <net/red.h> | ||
22 | #include <linux/ip.h> | ||
23 | #include <net/ip.h> | ||
24 | #include <linux/ipv6.h> | ||
25 | #include <net/ipv6.h> | ||
26 | |||
27 | /* | ||
28 | CHOKe stateless AQM for fair bandwidth allocation | ||
29 | ================================================= | ||
30 | |||
31 | CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for | ||
32 | unresponsive flows) is a variant of RED that penalizes misbehaving flows but | ||
33 | maintains no flow state. The difference from RED is an additional step | ||
34 | during the enqueuing process. If average queue size is over the | ||
35 | low threshold (qmin), a packet is chosen at random from the queue. | ||
36 | If both the new and chosen packet are from the same flow, both | ||
37 | are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it | ||
38 | needs to access packets in queue randomly. It has a minimal class | ||
39 | interface to allow overriding the builtin flow classifier with | ||
40 | filters. | ||
41 | |||
42 | Source: | ||
43 | R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless | ||
44 | Active Queue Management Scheme for Approximating Fair Bandwidth Allocation", | ||
45 | IEEE INFOCOM, 2000. | ||
46 | |||
47 | A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial | ||
48 | Characteristics", IEEE/ACM Transactions on Networking, 2004 | ||
49 | |||
50 | */ | ||
51 | |||
52 | /* Upper bound on size of sk_buff table (packets) */ | ||
53 | #define CHOKE_MAX_QUEUE (128*1024 - 1) | ||
54 | |||
55 | struct choke_sched_data { | ||
56 | /* Parameters */ | ||
57 | u32 limit; | ||
58 | unsigned char flags; | ||
59 | |||
60 | struct red_parms parms; | ||
61 | |||
62 | /* Variables */ | ||
63 | struct tcf_proto *filter_list; | ||
64 | struct { | ||
65 | u32 prob_drop; /* Early probability drops */ | ||
66 | u32 prob_mark; /* Early probability marks */ | ||
67 | u32 forced_drop; /* Forced drops, qavg > max_thresh */ | ||
68 | u32 forced_mark; /* Forced marks, qavg > max_thresh */ | ||
69 | u32 pdrop; /* Drops due to queue limits */ | ||
70 | u32 other; /* Drops due to drop() calls */ | ||
71 | u32 matched; /* Drops to flow match */ | ||
72 | } stats; | ||
73 | |||
74 | unsigned int head; | ||
75 | unsigned int tail; | ||
76 | |||
77 | unsigned int tab_mask; /* size - 1 */ | ||
78 | |||
79 | struct sk_buff **tab; | ||
80 | }; | ||
81 | |||
82 | /* deliver a random number between 0 and N - 1 */ | ||
83 | static u32 random_N(unsigned int N) | ||
84 | { | ||
85 | return reciprocal_divide(random32(), N); | ||
86 | } | ||
87 | |||
88 | /* number of elements in queue including holes */ | ||
89 | static unsigned int choke_len(const struct choke_sched_data *q) | ||
90 | { | ||
91 | return (q->tail - q->head) & q->tab_mask; | ||
92 | } | ||
93 | |||
94 | /* Is ECN parameter configured */ | ||
95 | static int use_ecn(const struct choke_sched_data *q) | ||
96 | { | ||
97 | return q->flags & TC_RED_ECN; | ||
98 | } | ||
99 | |||
100 | /* Should packets over max just be dropped (versus marked) */ | ||
101 | static int use_harddrop(const struct choke_sched_data *q) | ||
102 | { | ||
103 | return q->flags & TC_RED_HARDDROP; | ||
104 | } | ||
105 | |||
106 | /* Move head pointer forward to skip over holes */ | ||
107 | static void choke_zap_head_holes(struct choke_sched_data *q) | ||
108 | { | ||
109 | do { | ||
110 | q->head = (q->head + 1) & q->tab_mask; | ||
111 | if (q->head == q->tail) | ||
112 | break; | ||
113 | } while (q->tab[q->head] == NULL); | ||
114 | } | ||
115 | |||
116 | /* Move tail pointer backwards to reuse holes */ | ||
117 | static void choke_zap_tail_holes(struct choke_sched_data *q) | ||
118 | { | ||
119 | do { | ||
120 | q->tail = (q->tail - 1) & q->tab_mask; | ||
121 | if (q->head == q->tail) | ||
122 | break; | ||
123 | } while (q->tab[q->tail] == NULL); | ||
124 | } | ||
125 | |||
126 | /* Drop packet from queue array by creating a "hole" */ | ||
127 | static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx) | ||
128 | { | ||
129 | struct choke_sched_data *q = qdisc_priv(sch); | ||
130 | struct sk_buff *skb = q->tab[idx]; | ||
131 | |||
132 | q->tab[idx] = NULL; | ||
133 | |||
134 | if (idx == q->head) | ||
135 | choke_zap_head_holes(q); | ||
136 | if (idx == q->tail) | ||
137 | choke_zap_tail_holes(q); | ||
138 | |||
139 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
140 | qdisc_drop(skb, sch); | ||
141 | qdisc_tree_decrease_qlen(sch, 1); | ||
142 | --sch->q.qlen; | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Compare flow of two packets | ||
147 | * Returns true only if source and destination address and port match. | ||
148 | * false for special cases | ||
149 | */ | ||
150 | static bool choke_match_flow(struct sk_buff *skb1, | ||
151 | struct sk_buff *skb2) | ||
152 | { | ||
153 | int off1, off2, poff; | ||
154 | const u32 *ports1, *ports2; | ||
155 | u8 ip_proto; | ||
156 | __u32 hash1; | ||
157 | |||
158 | if (skb1->protocol != skb2->protocol) | ||
159 | return false; | ||
160 | |||
161 | /* Use hash value as quick check | ||
162 | * Assumes that __skb_get_rxhash makes IP header and ports linear | ||
163 | */ | ||
164 | hash1 = skb_get_rxhash(skb1); | ||
165 | if (!hash1 || hash1 != skb_get_rxhash(skb2)) | ||
166 | return false; | ||
167 | |||
168 | /* Probably match, but be sure to avoid hash collisions */ | ||
169 | off1 = skb_network_offset(skb1); | ||
170 | off2 = skb_network_offset(skb2); | ||
171 | |||
172 | switch (skb1->protocol) { | ||
173 | case __constant_htons(ETH_P_IP): { | ||
174 | const struct iphdr *ip1, *ip2; | ||
175 | |||
176 | ip1 = (const struct iphdr *) (skb1->data + off1); | ||
177 | ip2 = (const struct iphdr *) (skb2->data + off2); | ||
178 | |||
179 | ip_proto = ip1->protocol; | ||
180 | if (ip_proto != ip2->protocol || | ||
181 | ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr) | ||
182 | return false; | ||
183 | |||
184 | if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET)) | ||
185 | ip_proto = 0; | ||
186 | off1 += ip1->ihl * 4; | ||
187 | off2 += ip2->ihl * 4; | ||
188 | break; | ||
189 | } | ||
190 | |||
191 | case __constant_htons(ETH_P_IPV6): { | ||
192 | const struct ipv6hdr *ip1, *ip2; | ||
193 | |||
194 | ip1 = (const struct ipv6hdr *) (skb1->data + off1); | ||
195 | ip2 = (const struct ipv6hdr *) (skb2->data + off2); | ||
196 | |||
197 | ip_proto = ip1->nexthdr; | ||
198 | if (ip_proto != ip2->nexthdr || | ||
199 | ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) || | ||
200 | ipv6_addr_cmp(&ip1->daddr, &ip2->daddr)) | ||
201 | return false; | ||
202 | off1 += 40; | ||
203 | off2 += 40; | ||
204 | } | ||
205 | |||
206 | default: /* Maybe compare MAC header here? */ | ||
207 | return false; | ||
208 | } | ||
209 | |||
210 | poff = proto_ports_offset(ip_proto); | ||
211 | if (poff < 0) | ||
212 | return true; | ||
213 | |||
214 | off1 += poff; | ||
215 | off2 += poff; | ||
216 | |||
217 | ports1 = (__force u32 *)(skb1->data + off1); | ||
218 | ports2 = (__force u32 *)(skb2->data + off2); | ||
219 | return *ports1 == *ports2; | ||
220 | } | ||
221 | |||
222 | struct choke_skb_cb { | ||
223 | u16 classid; | ||
224 | }; | ||
225 | |||
226 | static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb) | ||
227 | { | ||
228 | BUILD_BUG_ON(sizeof(skb->cb) < | ||
229 | sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb)); | ||
230 | return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data; | ||
231 | } | ||
232 | |||
233 | static inline void choke_set_classid(struct sk_buff *skb, u16 classid) | ||
234 | { | ||
235 | choke_skb_cb(skb)->classid = classid; | ||
236 | } | ||
237 | |||
238 | static u16 choke_get_classid(const struct sk_buff *skb) | ||
239 | { | ||
240 | return choke_skb_cb(skb)->classid; | ||
241 | } | ||
242 | |||
243 | /* | ||
244 | * Classify flow using either: | ||
245 | * 1. pre-existing classification result in skb | ||
246 | * 2. fast internal classification | ||
247 | * 3. use TC filter based classification | ||
248 | */ | ||
249 | static bool choke_classify(struct sk_buff *skb, | ||
250 | struct Qdisc *sch, int *qerr) | ||
251 | |||
252 | { | ||
253 | struct choke_sched_data *q = qdisc_priv(sch); | ||
254 | struct tcf_result res; | ||
255 | int result; | ||
256 | |||
257 | result = tc_classify(skb, q->filter_list, &res); | ||
258 | if (result >= 0) { | ||
259 | #ifdef CONFIG_NET_CLS_ACT | ||
260 | switch (result) { | ||
261 | case TC_ACT_STOLEN: | ||
262 | case TC_ACT_QUEUED: | ||
263 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | ||
264 | case TC_ACT_SHOT: | ||
265 | return false; | ||
266 | } | ||
267 | #endif | ||
268 | choke_set_classid(skb, TC_H_MIN(res.classid)); | ||
269 | return true; | ||
270 | } | ||
271 | |||
272 | return false; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Select a packet at random from queue | ||
277 | * HACK: since queue can have holes from previous deletion; retry several | ||
278 | * times to find a random skb but then just give up and return the head | ||
279 | * Will return NULL if queue is empty (q->head == q->tail) | ||
280 | */ | ||
281 | static struct sk_buff *choke_peek_random(const struct choke_sched_data *q, | ||
282 | unsigned int *pidx) | ||
283 | { | ||
284 | struct sk_buff *skb; | ||
285 | int retrys = 3; | ||
286 | |||
287 | do { | ||
288 | *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask; | ||
289 | skb = q->tab[*pidx]; | ||
290 | if (skb) | ||
291 | return skb; | ||
292 | } while (--retrys > 0); | ||
293 | |||
294 | return q->tab[*pidx = q->head]; | ||
295 | } | ||
296 | |||
297 | /* | ||
298 | * Compare new packet with random packet in queue | ||
299 | * returns true if matched and sets *pidx | ||
300 | */ | ||
301 | static bool choke_match_random(const struct choke_sched_data *q, | ||
302 | struct sk_buff *nskb, | ||
303 | unsigned int *pidx) | ||
304 | { | ||
305 | struct sk_buff *oskb; | ||
306 | |||
307 | if (q->head == q->tail) | ||
308 | return false; | ||
309 | |||
310 | oskb = choke_peek_random(q, pidx); | ||
311 | if (q->filter_list) | ||
312 | return choke_get_classid(nskb) == choke_get_classid(oskb); | ||
313 | |||
314 | return choke_match_flow(oskb, nskb); | ||
315 | } | ||
316 | |||
317 | static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
318 | { | ||
319 | struct choke_sched_data *q = qdisc_priv(sch); | ||
320 | struct red_parms *p = &q->parms; | ||
321 | int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | ||
322 | |||
323 | if (q->filter_list) { | ||
324 | /* If using external classifiers, get result and record it. */ | ||
325 | if (!choke_classify(skb, sch, &ret)) | ||
326 | goto other_drop; /* Packet was eaten by filter */ | ||
327 | } | ||
328 | |||
329 | /* Compute average queue usage (see RED) */ | ||
330 | p->qavg = red_calc_qavg(p, sch->q.qlen); | ||
331 | if (red_is_idling(p)) | ||
332 | red_end_of_idle_period(p); | ||
333 | |||
334 | /* Is queue small? */ | ||
335 | if (p->qavg <= p->qth_min) | ||
336 | p->qcount = -1; | ||
337 | else { | ||
338 | unsigned int idx; | ||
339 | |||
340 | /* Draw a packet at random from queue and compare flow */ | ||
341 | if (choke_match_random(q, skb, &idx)) { | ||
342 | q->stats.matched++; | ||
343 | choke_drop_by_idx(sch, idx); | ||
344 | goto congestion_drop; | ||
345 | } | ||
346 | |||
347 | /* Queue is large, always mark/drop */ | ||
348 | if (p->qavg > p->qth_max) { | ||
349 | p->qcount = -1; | ||
350 | |||
351 | sch->qstats.overlimits++; | ||
352 | if (use_harddrop(q) || !use_ecn(q) || | ||
353 | !INET_ECN_set_ce(skb)) { | ||
354 | q->stats.forced_drop++; | ||
355 | goto congestion_drop; | ||
356 | } | ||
357 | |||
358 | q->stats.forced_mark++; | ||
359 | } else if (++p->qcount) { | ||
360 | if (red_mark_probability(p, p->qavg)) { | ||
361 | p->qcount = 0; | ||
362 | p->qR = red_random(p); | ||
363 | |||
364 | sch->qstats.overlimits++; | ||
365 | if (!use_ecn(q) || !INET_ECN_set_ce(skb)) { | ||
366 | q->stats.prob_drop++; | ||
367 | goto congestion_drop; | ||
368 | } | ||
369 | |||
370 | q->stats.prob_mark++; | ||
371 | } | ||
372 | } else | ||
373 | p->qR = red_random(p); | ||
374 | } | ||
375 | |||
376 | /* Admit new packet */ | ||
377 | if (sch->q.qlen < q->limit) { | ||
378 | q->tab[q->tail] = skb; | ||
379 | q->tail = (q->tail + 1) & q->tab_mask; | ||
380 | ++sch->q.qlen; | ||
381 | sch->qstats.backlog += qdisc_pkt_len(skb); | ||
382 | return NET_XMIT_SUCCESS; | ||
383 | } | ||
384 | |||
385 | q->stats.pdrop++; | ||
386 | sch->qstats.drops++; | ||
387 | kfree_skb(skb); | ||
388 | return NET_XMIT_DROP; | ||
389 | |||
390 | congestion_drop: | ||
391 | qdisc_drop(skb, sch); | ||
392 | return NET_XMIT_CN; | ||
393 | |||
394 | other_drop: | ||
395 | if (ret & __NET_XMIT_BYPASS) | ||
396 | sch->qstats.drops++; | ||
397 | kfree_skb(skb); | ||
398 | return ret; | ||
399 | } | ||
400 | |||
401 | static struct sk_buff *choke_dequeue(struct Qdisc *sch) | ||
402 | { | ||
403 | struct choke_sched_data *q = qdisc_priv(sch); | ||
404 | struct sk_buff *skb; | ||
405 | |||
406 | if (q->head == q->tail) { | ||
407 | if (!red_is_idling(&q->parms)) | ||
408 | red_start_of_idle_period(&q->parms); | ||
409 | return NULL; | ||
410 | } | ||
411 | |||
412 | skb = q->tab[q->head]; | ||
413 | q->tab[q->head] = NULL; | ||
414 | choke_zap_head_holes(q); | ||
415 | --sch->q.qlen; | ||
416 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
417 | qdisc_bstats_update(sch, skb); | ||
418 | |||
419 | return skb; | ||
420 | } | ||
421 | |||
422 | static unsigned int choke_drop(struct Qdisc *sch) | ||
423 | { | ||
424 | struct choke_sched_data *q = qdisc_priv(sch); | ||
425 | unsigned int len; | ||
426 | |||
427 | len = qdisc_queue_drop(sch); | ||
428 | if (len > 0) | ||
429 | q->stats.other++; | ||
430 | else { | ||
431 | if (!red_is_idling(&q->parms)) | ||
432 | red_start_of_idle_period(&q->parms); | ||
433 | } | ||
434 | |||
435 | return len; | ||
436 | } | ||
437 | |||
438 | static void choke_reset(struct Qdisc *sch) | ||
439 | { | ||
440 | struct choke_sched_data *q = qdisc_priv(sch); | ||
441 | |||
442 | red_restart(&q->parms); | ||
443 | } | ||
444 | |||
445 | static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = { | ||
446 | [TCA_CHOKE_PARMS] = { .len = sizeof(struct tc_red_qopt) }, | ||
447 | [TCA_CHOKE_STAB] = { .len = RED_STAB_SIZE }, | ||
448 | }; | ||
449 | |||
450 | |||
451 | static void choke_free(void *addr) | ||
452 | { | ||
453 | if (addr) { | ||
454 | if (is_vmalloc_addr(addr)) | ||
455 | vfree(addr); | ||
456 | else | ||
457 | kfree(addr); | ||
458 | } | ||
459 | } | ||
460 | |||
461 | static int choke_change(struct Qdisc *sch, struct nlattr *opt) | ||
462 | { | ||
463 | struct choke_sched_data *q = qdisc_priv(sch); | ||
464 | struct nlattr *tb[TCA_CHOKE_MAX + 1]; | ||
465 | const struct tc_red_qopt *ctl; | ||
466 | int err; | ||
467 | struct sk_buff **old = NULL; | ||
468 | unsigned int mask; | ||
469 | |||
470 | if (opt == NULL) | ||
471 | return -EINVAL; | ||
472 | |||
473 | err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy); | ||
474 | if (err < 0) | ||
475 | return err; | ||
476 | |||
477 | if (tb[TCA_CHOKE_PARMS] == NULL || | ||
478 | tb[TCA_CHOKE_STAB] == NULL) | ||
479 | return -EINVAL; | ||
480 | |||
481 | ctl = nla_data(tb[TCA_CHOKE_PARMS]); | ||
482 | |||
483 | if (ctl->limit > CHOKE_MAX_QUEUE) | ||
484 | return -EINVAL; | ||
485 | |||
486 | mask = roundup_pow_of_two(ctl->limit + 1) - 1; | ||
487 | if (mask != q->tab_mask) { | ||
488 | struct sk_buff **ntab; | ||
489 | |||
490 | ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL); | ||
491 | if (!ntab) | ||
492 | ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *)); | ||
493 | if (!ntab) | ||
494 | return -ENOMEM; | ||
495 | |||
496 | sch_tree_lock(sch); | ||
497 | old = q->tab; | ||
498 | if (old) { | ||
499 | unsigned int oqlen = sch->q.qlen, tail = 0; | ||
500 | |||
501 | while (q->head != q->tail) { | ||
502 | struct sk_buff *skb = q->tab[q->head]; | ||
503 | |||
504 | q->head = (q->head + 1) & q->tab_mask; | ||
505 | if (!skb) | ||
506 | continue; | ||
507 | if (tail < mask) { | ||
508 | ntab[tail++] = skb; | ||
509 | continue; | ||
510 | } | ||
511 | sch->qstats.backlog -= qdisc_pkt_len(skb); | ||
512 | --sch->q.qlen; | ||
513 | qdisc_drop(skb, sch); | ||
514 | } | ||
515 | qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen); | ||
516 | q->head = 0; | ||
517 | q->tail = tail; | ||
518 | } | ||
519 | |||
520 | q->tab_mask = mask; | ||
521 | q->tab = ntab; | ||
522 | } else | ||
523 | sch_tree_lock(sch); | ||
524 | |||
525 | q->flags = ctl->flags; | ||
526 | q->limit = ctl->limit; | ||
527 | |||
528 | red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, | ||
529 | ctl->Plog, ctl->Scell_log, | ||
530 | nla_data(tb[TCA_CHOKE_STAB])); | ||
531 | |||
532 | if (q->head == q->tail) | ||
533 | red_end_of_idle_period(&q->parms); | ||
534 | |||
535 | sch_tree_unlock(sch); | ||
536 | choke_free(old); | ||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | static int choke_init(struct Qdisc *sch, struct nlattr *opt) | ||
541 | { | ||
542 | return choke_change(sch, opt); | ||
543 | } | ||
544 | |||
545 | static int choke_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
546 | { | ||
547 | struct choke_sched_data *q = qdisc_priv(sch); | ||
548 | struct nlattr *opts = NULL; | ||
549 | struct tc_red_qopt opt = { | ||
550 | .limit = q->limit, | ||
551 | .flags = q->flags, | ||
552 | .qth_min = q->parms.qth_min >> q->parms.Wlog, | ||
553 | .qth_max = q->parms.qth_max >> q->parms.Wlog, | ||
554 | .Wlog = q->parms.Wlog, | ||
555 | .Plog = q->parms.Plog, | ||
556 | .Scell_log = q->parms.Scell_log, | ||
557 | }; | ||
558 | |||
559 | opts = nla_nest_start(skb, TCA_OPTIONS); | ||
560 | if (opts == NULL) | ||
561 | goto nla_put_failure; | ||
562 | |||
563 | NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt); | ||
564 | return nla_nest_end(skb, opts); | ||
565 | |||
566 | nla_put_failure: | ||
567 | nla_nest_cancel(skb, opts); | ||
568 | return -EMSGSIZE; | ||
569 | } | ||
570 | |||
571 | static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | ||
572 | { | ||
573 | struct choke_sched_data *q = qdisc_priv(sch); | ||
574 | struct tc_choke_xstats st = { | ||
575 | .early = q->stats.prob_drop + q->stats.forced_drop, | ||
576 | .marked = q->stats.prob_mark + q->stats.forced_mark, | ||
577 | .pdrop = q->stats.pdrop, | ||
578 | .other = q->stats.other, | ||
579 | .matched = q->stats.matched, | ||
580 | }; | ||
581 | |||
582 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
583 | } | ||
584 | |||
585 | static void choke_destroy(struct Qdisc *sch) | ||
586 | { | ||
587 | struct choke_sched_data *q = qdisc_priv(sch); | ||
588 | |||
589 | tcf_destroy_chain(&q->filter_list); | ||
590 | choke_free(q->tab); | ||
591 | } | ||
592 | |||
593 | static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg) | ||
594 | { | ||
595 | return NULL; | ||
596 | } | ||
597 | |||
598 | static unsigned long choke_get(struct Qdisc *sch, u32 classid) | ||
599 | { | ||
600 | return 0; | ||
601 | } | ||
602 | |||
603 | static void choke_put(struct Qdisc *q, unsigned long cl) | ||
604 | { | ||
605 | } | ||
606 | |||
607 | static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent, | ||
608 | u32 classid) | ||
609 | { | ||
610 | return 0; | ||
611 | } | ||
612 | |||
613 | static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl) | ||
614 | { | ||
615 | struct choke_sched_data *q = qdisc_priv(sch); | ||
616 | |||
617 | if (cl) | ||
618 | return NULL; | ||
619 | return &q->filter_list; | ||
620 | } | ||
621 | |||
622 | static int choke_dump_class(struct Qdisc *sch, unsigned long cl, | ||
623 | struct sk_buff *skb, struct tcmsg *tcm) | ||
624 | { | ||
625 | tcm->tcm_handle |= TC_H_MIN(cl); | ||
626 | return 0; | ||
627 | } | ||
628 | |||
629 | static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
630 | { | ||
631 | if (!arg->stop) { | ||
632 | if (arg->fn(sch, 1, arg) < 0) { | ||
633 | arg->stop = 1; | ||
634 | return; | ||
635 | } | ||
636 | arg->count++; | ||
637 | } | ||
638 | } | ||
639 | |||
640 | static const struct Qdisc_class_ops choke_class_ops = { | ||
641 | .leaf = choke_leaf, | ||
642 | .get = choke_get, | ||
643 | .put = choke_put, | ||
644 | .tcf_chain = choke_find_tcf, | ||
645 | .bind_tcf = choke_bind, | ||
646 | .unbind_tcf = choke_put, | ||
647 | .dump = choke_dump_class, | ||
648 | .walk = choke_walk, | ||
649 | }; | ||
650 | |||
651 | static struct sk_buff *choke_peek_head(struct Qdisc *sch) | ||
652 | { | ||
653 | struct choke_sched_data *q = qdisc_priv(sch); | ||
654 | |||
655 | return (q->head != q->tail) ? q->tab[q->head] : NULL; | ||
656 | } | ||
657 | |||
658 | static struct Qdisc_ops choke_qdisc_ops __read_mostly = { | ||
659 | .id = "choke", | ||
660 | .priv_size = sizeof(struct choke_sched_data), | ||
661 | |||
662 | .enqueue = choke_enqueue, | ||
663 | .dequeue = choke_dequeue, | ||
664 | .peek = choke_peek_head, | ||
665 | .drop = choke_drop, | ||
666 | .init = choke_init, | ||
667 | .destroy = choke_destroy, | ||
668 | .reset = choke_reset, | ||
669 | .change = choke_change, | ||
670 | .dump = choke_dump, | ||
671 | .dump_stats = choke_dump_stats, | ||
672 | .owner = THIS_MODULE, | ||
673 | }; | ||
674 | |||
675 | static int __init choke_module_init(void) | ||
676 | { | ||
677 | return register_qdisc(&choke_qdisc_ops); | ||
678 | } | ||
679 | |||
680 | static void __exit choke_module_exit(void) | ||
681 | { | ||
682 | unregister_qdisc(&choke_qdisc_ops); | ||
683 | } | ||
684 | |||
685 | module_init(choke_module_init) | ||
686 | module_exit(choke_module_exit) | ||
687 | |||
688 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index b74046a95397..6b7fe4a84f13 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c | |||
@@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
110 | cl->refcnt = 1; | 110 | cl->refcnt = 1; |
111 | cl->common.classid = classid; | 111 | cl->common.classid = classid; |
112 | cl->quantum = quantum; | 112 | cl->quantum = quantum; |
113 | cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 113 | cl->qdisc = qdisc_create_dflt(sch->dev_queue, |
114 | &pfifo_qdisc_ops, classid); | 114 | &pfifo_qdisc_ops, classid); |
115 | if (cl->qdisc == NULL) | 115 | if (cl->qdisc == NULL) |
116 | cl->qdisc = &noop_qdisc; | 116 | cl->qdisc = &noop_qdisc; |
@@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg, | |||
218 | struct drr_class *cl = (struct drr_class *)arg; | 218 | struct drr_class *cl = (struct drr_class *)arg; |
219 | 219 | ||
220 | if (new == NULL) { | 220 | if (new == NULL) { |
221 | new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 221 | new = qdisc_create_dflt(sch->dev_queue, |
222 | &pfifo_qdisc_ops, cl->common.classid); | 222 | &pfifo_qdisc_ops, cl->common.classid); |
223 | if (new == NULL) | 223 | if (new == NULL) |
224 | new = &noop_qdisc; | 224 | new = &noop_qdisc; |
@@ -351,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
351 | { | 351 | { |
352 | struct drr_sched *q = qdisc_priv(sch); | 352 | struct drr_sched *q = qdisc_priv(sch); |
353 | struct drr_class *cl; | 353 | struct drr_class *cl; |
354 | unsigned int len; | ||
355 | int err; | 354 | int err; |
356 | 355 | ||
357 | cl = drr_classify(skb, sch, &err); | 356 | cl = drr_classify(skb, sch, &err); |
@@ -362,7 +361,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
362 | return err; | 361 | return err; |
363 | } | 362 | } |
364 | 363 | ||
365 | len = qdisc_pkt_len(skb); | ||
366 | err = qdisc_enqueue(skb, cl->qdisc); | 364 | err = qdisc_enqueue(skb, cl->qdisc); |
367 | if (unlikely(err != NET_XMIT_SUCCESS)) { | 365 | if (unlikely(err != NET_XMIT_SUCCESS)) { |
368 | if (net_xmit_drop_count(err)) { | 366 | if (net_xmit_drop_count(err)) { |
@@ -377,10 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
377 | cl->deficit = cl->quantum; | 375 | cl->deficit = cl->quantum; |
378 | } | 376 | } |
379 | 377 | ||
380 | cl->bstats.packets++; | 378 | bstats_update(&cl->bstats, skb); |
381 | cl->bstats.bytes += len; | ||
382 | sch->bstats.packets++; | ||
383 | sch->bstats.bytes += len; | ||
384 | 379 | ||
385 | sch->q.qlen++; | 380 | sch->q.qlen++; |
386 | return err; | 381 | return err; |
@@ -407,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) | |||
407 | skb = qdisc_dequeue_peeked(cl->qdisc); | 402 | skb = qdisc_dequeue_peeked(cl->qdisc); |
408 | if (cl->qdisc->q.qlen == 0) | 403 | if (cl->qdisc->q.qlen == 0) |
409 | list_del(&cl->alist); | 404 | list_del(&cl->alist); |
405 | qdisc_bstats_update(sch, skb); | ||
410 | sch->q.qlen--; | 406 | sch->q.qlen--; |
411 | return skb; | 407 | return skb; |
412 | } | 408 | } |
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 63d41f86679c..2c790204d042 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c | |||
@@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, | |||
61 | sch, p, new, old); | 61 | sch, p, new, old); |
62 | 62 | ||
63 | if (new == NULL) { | 63 | if (new == NULL) { |
64 | new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 64 | new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
65 | &pfifo_qdisc_ops, | ||
66 | sch->handle); | 65 | sch->handle); |
67 | if (new == NULL) | 66 | if (new == NULL) |
68 | new = &noop_qdisc; | 67 | new = &noop_qdisc; |
@@ -138,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent, | |||
138 | mask = nla_get_u8(tb[TCA_DSMARK_MASK]); | 137 | mask = nla_get_u8(tb[TCA_DSMARK_MASK]); |
139 | 138 | ||
140 | if (tb[TCA_DSMARK_VALUE]) | 139 | if (tb[TCA_DSMARK_VALUE]) |
141 | p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); | 140 | p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]); |
142 | 141 | ||
143 | if (tb[TCA_DSMARK_MASK]) | 142 | if (tb[TCA_DSMARK_MASK]) |
144 | p->mask[*arg-1] = mask; | 143 | p->mask[*arg - 1] = mask; |
145 | 144 | ||
146 | err = 0; | 145 | err = 0; |
147 | 146 | ||
@@ -156,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg) | |||
156 | if (!dsmark_valid_index(p, arg)) | 155 | if (!dsmark_valid_index(p, arg)) |
157 | return -EINVAL; | 156 | return -EINVAL; |
158 | 157 | ||
159 | p->mask[arg-1] = 0xff; | 158 | p->mask[arg - 1] = 0xff; |
160 | p->value[arg-1] = 0; | 159 | p->value[arg - 1] = 0; |
161 | 160 | ||
162 | return 0; | 161 | return 0; |
163 | } | 162 | } |
@@ -176,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) | |||
176 | if (p->mask[i] == 0xff && !p->value[i]) | 175 | if (p->mask[i] == 0xff && !p->value[i]) |
177 | goto ignore; | 176 | goto ignore; |
178 | if (walker->count >= walker->skip) { | 177 | if (walker->count >= walker->skip) { |
179 | if (walker->fn(sch, i+1, walker) < 0) { | 178 | if (walker->fn(sch, i + 1, walker) < 0) { |
180 | walker->stop = 1; | 179 | walker->stop = 1; |
181 | break; | 180 | break; |
182 | } | 181 | } |
@@ -261,8 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
261 | return err; | 260 | return err; |
262 | } | 261 | } |
263 | 262 | ||
264 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
265 | sch->bstats.packets++; | ||
266 | sch->q.qlen++; | 263 | sch->q.qlen++; |
267 | 264 | ||
268 | return NET_XMIT_SUCCESS; | 265 | return NET_XMIT_SUCCESS; |
@@ -285,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) | |||
285 | if (skb == NULL) | 282 | if (skb == NULL) |
286 | return NULL; | 283 | return NULL; |
287 | 284 | ||
285 | qdisc_bstats_update(sch, skb); | ||
288 | sch->q.qlen--; | 286 | sch->q.qlen--; |
289 | 287 | ||
290 | index = skb->tc_index & (p->indices - 1); | 288 | index = skb->tc_index & (p->indices - 1); |
@@ -306,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) | |||
306 | * and don't need yet another qdisc as a bypass. | 304 | * and don't need yet another qdisc as a bypass. |
307 | */ | 305 | */ |
308 | if (p->mask[index] != 0xff || p->value[index]) | 306 | if (p->mask[index] != 0xff || p->value[index]) |
309 | printk(KERN_WARNING | 307 | pr_warning("dsmark_dequeue: unsupported protocol %d\n", |
310 | "dsmark_dequeue: unsupported protocol %d\n", | 308 | ntohs(skb->protocol)); |
311 | ntohs(skb->protocol)); | ||
312 | break; | 309 | break; |
313 | } | 310 | } |
314 | 311 | ||
@@ -384,8 +381,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) | |||
384 | p->default_index = default_index; | 381 | p->default_index = default_index; |
385 | p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); | 382 | p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); |
386 | 383 | ||
387 | p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 384 | p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); |
388 | &pfifo_qdisc_ops, sch->handle); | ||
389 | if (p->q == NULL) | 385 | if (p->q == NULL) |
390 | p->q = &noop_qdisc; | 386 | p->q = &noop_qdisc; |
391 | 387 | ||
@@ -427,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, | |||
427 | if (!dsmark_valid_index(p, cl)) | 423 | if (!dsmark_valid_index(p, cl)) |
428 | return -EINVAL; | 424 | return -EINVAL; |
429 | 425 | ||
430 | tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1); | 426 | tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1); |
431 | tcm->tcm_info = p->q->handle; | 427 | tcm->tcm_info = p->q->handle; |
432 | 428 | ||
433 | opts = nla_nest_start(skb, TCA_OPTIONS); | 429 | opts = nla_nest_start(skb, TCA_OPTIONS); |
434 | if (opts == NULL) | 430 | if (opts == NULL) |
435 | goto nla_put_failure; | 431 | goto nla_put_failure; |
436 | NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]); | 432 | NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]); |
437 | NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]); | 433 | NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]); |
438 | 434 | ||
439 | return nla_nest_end(skb, opts); | 435 | return nla_nest_end(skb, opts); |
440 | 436 | ||
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 5948bafa8ce2..66effe2da8e0 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c | |||
@@ -19,46 +19,30 @@ | |||
19 | 19 | ||
20 | /* 1 band FIFO pseudo-"scheduler" */ | 20 | /* 1 band FIFO pseudo-"scheduler" */ |
21 | 21 | ||
22 | struct fifo_sched_data | 22 | static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
23 | { | 23 | { |
24 | u32 limit; | 24 | if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit)) |
25 | }; | ||
26 | |||
27 | static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) | ||
28 | { | ||
29 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
30 | |||
31 | if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit)) | ||
32 | return qdisc_enqueue_tail(skb, sch); | 25 | return qdisc_enqueue_tail(skb, sch); |
33 | 26 | ||
34 | return qdisc_reshape_fail(skb, sch); | 27 | return qdisc_reshape_fail(skb, sch); |
35 | } | 28 | } |
36 | 29 | ||
37 | static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 30 | static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
38 | { | 31 | { |
39 | struct fifo_sched_data *q = qdisc_priv(sch); | 32 | if (likely(skb_queue_len(&sch->q) < sch->limit)) |
40 | |||
41 | if (likely(skb_queue_len(&sch->q) < q->limit)) | ||
42 | return qdisc_enqueue_tail(skb, sch); | 33 | return qdisc_enqueue_tail(skb, sch); |
43 | 34 | ||
44 | return qdisc_reshape_fail(skb, sch); | 35 | return qdisc_reshape_fail(skb, sch); |
45 | } | 36 | } |
46 | 37 | ||
47 | static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 38 | static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
48 | { | 39 | { |
49 | struct sk_buff *skb_head; | 40 | if (likely(skb_queue_len(&sch->q) < sch->limit)) |
50 | struct fifo_sched_data *q = qdisc_priv(sch); | ||
51 | |||
52 | if (likely(skb_queue_len(&sch->q) < q->limit)) | ||
53 | return qdisc_enqueue_tail(skb, sch); | 41 | return qdisc_enqueue_tail(skb, sch); |
54 | 42 | ||
55 | /* queue full, remove one skb to fulfill the limit */ | 43 | /* queue full, remove one skb to fulfill the limit */ |
56 | skb_head = qdisc_dequeue_head(sch); | 44 | __qdisc_queue_drop_head(sch, &sch->q); |
57 | sch->bstats.bytes -= qdisc_pkt_len(skb_head); | ||
58 | sch->bstats.packets--; | ||
59 | sch->qstats.drops++; | 45 | sch->qstats.drops++; |
60 | kfree_skb(skb_head); | ||
61 | |||
62 | qdisc_enqueue_tail(skb, sch); | 46 | qdisc_enqueue_tail(skb, sch); |
63 | 47 | ||
64 | return NET_XMIT_CN; | 48 | return NET_XMIT_CN; |
@@ -66,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
66 | 50 | ||
67 | static int fifo_init(struct Qdisc *sch, struct nlattr *opt) | 51 | static int fifo_init(struct Qdisc *sch, struct nlattr *opt) |
68 | { | 52 | { |
69 | struct fifo_sched_data *q = qdisc_priv(sch); | 53 | bool bypass; |
54 | bool is_bfifo = sch->ops == &bfifo_qdisc_ops; | ||
70 | 55 | ||
71 | if (opt == NULL) { | 56 | if (opt == NULL) { |
72 | u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; | 57 | u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1; |
73 | 58 | ||
74 | if (sch->ops == &bfifo_qdisc_ops) | 59 | if (is_bfifo) |
75 | limit *= psched_mtu(qdisc_dev(sch)); | 60 | limit *= psched_mtu(qdisc_dev(sch)); |
76 | 61 | ||
77 | q->limit = limit; | 62 | sch->limit = limit; |
78 | } else { | 63 | } else { |
79 | struct tc_fifo_qopt *ctl = nla_data(opt); | 64 | struct tc_fifo_qopt *ctl = nla_data(opt); |
80 | 65 | ||
81 | if (nla_len(opt) < sizeof(*ctl)) | 66 | if (nla_len(opt) < sizeof(*ctl)) |
82 | return -EINVAL; | 67 | return -EINVAL; |
83 | 68 | ||
84 | q->limit = ctl->limit; | 69 | sch->limit = ctl->limit; |
85 | } | 70 | } |
86 | 71 | ||
72 | if (is_bfifo) | ||
73 | bypass = sch->limit >= psched_mtu(qdisc_dev(sch)); | ||
74 | else | ||
75 | bypass = sch->limit >= 1; | ||
76 | |||
77 | if (bypass) | ||
78 | sch->flags |= TCQ_F_CAN_BYPASS; | ||
79 | else | ||
80 | sch->flags &= ~TCQ_F_CAN_BYPASS; | ||
87 | return 0; | 81 | return 0; |
88 | } | 82 | } |
89 | 83 | ||
90 | static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) | 84 | static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb) |
91 | { | 85 | { |
92 | struct fifo_sched_data *q = qdisc_priv(sch); | 86 | struct tc_fifo_qopt opt = { .limit = sch->limit }; |
93 | struct tc_fifo_qopt opt = { .limit = q->limit }; | ||
94 | 87 | ||
95 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 88 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
96 | return skb->len; | 89 | return skb->len; |
@@ -101,7 +94,7 @@ nla_put_failure: | |||
101 | 94 | ||
102 | struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { | 95 | struct Qdisc_ops pfifo_qdisc_ops __read_mostly = { |
103 | .id = "pfifo", | 96 | .id = "pfifo", |
104 | .priv_size = sizeof(struct fifo_sched_data), | 97 | .priv_size = 0, |
105 | .enqueue = pfifo_enqueue, | 98 | .enqueue = pfifo_enqueue, |
106 | .dequeue = qdisc_dequeue_head, | 99 | .dequeue = qdisc_dequeue_head, |
107 | .peek = qdisc_peek_head, | 100 | .peek = qdisc_peek_head, |
@@ -116,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops); | |||
116 | 109 | ||
117 | struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { | 110 | struct Qdisc_ops bfifo_qdisc_ops __read_mostly = { |
118 | .id = "bfifo", | 111 | .id = "bfifo", |
119 | .priv_size = sizeof(struct fifo_sched_data), | 112 | .priv_size = 0, |
120 | .enqueue = bfifo_enqueue, | 113 | .enqueue = bfifo_enqueue, |
121 | .dequeue = qdisc_dequeue_head, | 114 | .dequeue = qdisc_dequeue_head, |
122 | .peek = qdisc_peek_head, | 115 | .peek = qdisc_peek_head, |
@@ -131,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops); | |||
131 | 124 | ||
132 | struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { | 125 | struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = { |
133 | .id = "pfifo_head_drop", | 126 | .id = "pfifo_head_drop", |
134 | .priv_size = sizeof(struct fifo_sched_data), | 127 | .priv_size = 0, |
135 | .enqueue = pfifo_tail_enqueue, | 128 | .enqueue = pfifo_tail_enqueue, |
136 | .dequeue = qdisc_dequeue_head, | 129 | .dequeue = qdisc_dequeue_head, |
137 | .peek = qdisc_peek_head, | 130 | .peek = qdisc_peek_head, |
@@ -172,8 +165,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, | |||
172 | struct Qdisc *q; | 165 | struct Qdisc *q; |
173 | int err = -ENOMEM; | 166 | int err = -ENOMEM; |
174 | 167 | ||
175 | q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 168 | q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1)); |
176 | ops, TC_H_MAKE(sch->handle, 1)); | ||
177 | if (q) { | 169 | if (q) { |
178 | err = fifo_set_limit(q, limit); | 170 | err = fifo_set_limit(q, limit); |
179 | if (err < 0) { | 171 | if (err < 0) { |
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 2aeb3a4386a1..b4c680900d7a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c | |||
@@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) | |||
60 | 60 | ||
61 | /* check the reason of requeuing without tx lock first */ | 61 | /* check the reason of requeuing without tx lock first */ |
62 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); | 62 | txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); |
63 | if (!netif_tx_queue_stopped(txq) && | 63 | if (!netif_tx_queue_frozen_or_stopped(txq)) { |
64 | !netif_tx_queue_frozen(txq)) { | ||
65 | q->gso_skb = NULL; | 64 | q->gso_skb = NULL; |
66 | q->q.qlen--; | 65 | q->q.qlen--; |
67 | } else | 66 | } else |
@@ -88,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb, | |||
88 | */ | 87 | */ |
89 | kfree_skb(skb); | 88 | kfree_skb(skb); |
90 | if (net_ratelimit()) | 89 | if (net_ratelimit()) |
91 | printk(KERN_WARNING "Dead loop on netdevice %s, " | 90 | pr_warning("Dead loop on netdevice %s, fix it urgently!\n", |
92 | "fix it urgently!\n", dev_queue->dev->name); | 91 | dev_queue->dev->name); |
93 | ret = qdisc_qlen(q); | 92 | ret = qdisc_qlen(q); |
94 | } else { | 93 | } else { |
95 | /* | 94 | /* |
@@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, | |||
122 | spin_unlock(root_lock); | 121 | spin_unlock(root_lock); |
123 | 122 | ||
124 | HARD_TX_LOCK(dev, txq, smp_processor_id()); | 123 | HARD_TX_LOCK(dev, txq, smp_processor_id()); |
125 | if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) | 124 | if (!netif_tx_queue_frozen_or_stopped(txq)) |
126 | ret = dev_hard_start_xmit(skb, dev, txq); | 125 | ret = dev_hard_start_xmit(skb, dev, txq); |
127 | 126 | ||
128 | HARD_TX_UNLOCK(dev, txq); | 127 | HARD_TX_UNLOCK(dev, txq); |
@@ -138,14 +137,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, | |||
138 | } else { | 137 | } else { |
139 | /* Driver returned NETDEV_TX_BUSY - requeue skb */ | 138 | /* Driver returned NETDEV_TX_BUSY - requeue skb */ |
140 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) | 139 | if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) |
141 | printk(KERN_WARNING "BUG %s code %d qlen %d\n", | 140 | pr_warning("BUG %s code %d qlen %d\n", |
142 | dev->name, ret, q->q.qlen); | 141 | dev->name, ret, q->q.qlen); |
143 | 142 | ||
144 | ret = dev_requeue_skb(skb, q); | 143 | ret = dev_requeue_skb(skb, q); |
145 | } | 144 | } |
146 | 145 | ||
147 | if (ret && (netif_tx_queue_stopped(txq) || | 146 | if (ret && netif_tx_queue_frozen_or_stopped(txq)) |
148 | netif_tx_queue_frozen(txq))) | ||
149 | ret = 0; | 147 | ret = 0; |
150 | 148 | ||
151 | return ret; | 149 | return ret; |
@@ -253,9 +251,8 @@ static void dev_watchdog(unsigned long arg) | |||
253 | } | 251 | } |
254 | 252 | ||
255 | if (some_queue_timedout) { | 253 | if (some_queue_timedout) { |
256 | char drivername[64]; | ||
257 | WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", | 254 | WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n", |
258 | dev->name, netdev_drivername(dev, drivername, 64), i); | 255 | dev->name, netdev_drivername(dev), i); |
259 | dev->netdev_ops->ndo_tx_timeout(dev); | 256 | dev->netdev_ops->ndo_tx_timeout(dev); |
260 | } | 257 | } |
261 | if (!mod_timer(&dev->watchdog_timer, | 258 | if (!mod_timer(&dev->watchdog_timer, |
@@ -383,6 +380,7 @@ struct Qdisc noop_qdisc = { | |||
383 | .list = LIST_HEAD_INIT(noop_qdisc.list), | 380 | .list = LIST_HEAD_INIT(noop_qdisc.list), |
384 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), | 381 | .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), |
385 | .dev_queue = &noop_netdev_queue, | 382 | .dev_queue = &noop_netdev_queue, |
383 | .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock), | ||
386 | }; | 384 | }; |
387 | EXPORT_SYMBOL(noop_qdisc); | 385 | EXPORT_SYMBOL(noop_qdisc); |
388 | 386 | ||
@@ -409,11 +407,13 @@ static struct Qdisc noqueue_qdisc = { | |||
409 | .list = LIST_HEAD_INIT(noqueue_qdisc.list), | 407 | .list = LIST_HEAD_INIT(noqueue_qdisc.list), |
410 | .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), | 408 | .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), |
411 | .dev_queue = &noqueue_netdev_queue, | 409 | .dev_queue = &noqueue_netdev_queue, |
410 | .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock), | ||
412 | }; | 411 | }; |
413 | 412 | ||
414 | 413 | ||
415 | static const u8 prio2band[TC_PRIO_MAX+1] = | 414 | static const u8 prio2band[TC_PRIO_MAX + 1] = { |
416 | { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 }; | 415 | 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 |
416 | }; | ||
417 | 417 | ||
418 | /* 3-band FIFO queue: old style, but should be a bit faster than | 418 | /* 3-band FIFO queue: old style, but should be a bit faster than |
419 | generic prio+fifo combination. | 419 | generic prio+fifo combination. |
@@ -445,7 +445,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, | |||
445 | return priv->q + band; | 445 | return priv->q + band; |
446 | } | 446 | } |
447 | 447 | ||
448 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | 448 | static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc) |
449 | { | 449 | { |
450 | if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { | 450 | if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { |
451 | int band = prio2band[skb->priority & TC_PRIO_MAX]; | 451 | int band = prio2band[skb->priority & TC_PRIO_MAX]; |
@@ -460,7 +460,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc) | |||
460 | return qdisc_drop(skb, qdisc); | 460 | return qdisc_drop(skb, qdisc); |
461 | } | 461 | } |
462 | 462 | ||
463 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | 463 | static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) |
464 | { | 464 | { |
465 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | 465 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
466 | int band = bitmap2band[priv->bitmap]; | 466 | int band = bitmap2band[priv->bitmap]; |
@@ -479,7 +479,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc) | |||
479 | return NULL; | 479 | return NULL; |
480 | } | 480 | } |
481 | 481 | ||
482 | static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) | 482 | static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) |
483 | { | 483 | { |
484 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | 484 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
485 | int band = bitmap2band[priv->bitmap]; | 485 | int band = bitmap2band[priv->bitmap]; |
@@ -493,7 +493,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc) | |||
493 | return NULL; | 493 | return NULL; |
494 | } | 494 | } |
495 | 495 | ||
496 | static void pfifo_fast_reset(struct Qdisc* qdisc) | 496 | static void pfifo_fast_reset(struct Qdisc *qdisc) |
497 | { | 497 | { |
498 | int prio; | 498 | int prio; |
499 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); | 499 | struct pfifo_fast_priv *priv = qdisc_priv(qdisc); |
@@ -510,7 +510,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb) | |||
510 | { | 510 | { |
511 | struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; | 511 | struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS }; |
512 | 512 | ||
513 | memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1); | 513 | memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1); |
514 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 514 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
515 | return skb->len; | 515 | return skb->len; |
516 | 516 | ||
@@ -526,6 +526,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) | |||
526 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) | 526 | for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) |
527 | skb_queue_head_init(band2list(priv, prio)); | 527 | skb_queue_head_init(band2list(priv, prio)); |
528 | 528 | ||
529 | /* Can by-pass the queue discipline */ | ||
530 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
529 | return 0; | 531 | return 0; |
530 | } | 532 | } |
531 | 533 | ||
@@ -540,25 +542,32 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = { | |||
540 | .dump = pfifo_fast_dump, | 542 | .dump = pfifo_fast_dump, |
541 | .owner = THIS_MODULE, | 543 | .owner = THIS_MODULE, |
542 | }; | 544 | }; |
545 | EXPORT_SYMBOL(pfifo_fast_ops); | ||
543 | 546 | ||
544 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, | 547 | struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, |
545 | struct Qdisc_ops *ops) | 548 | struct Qdisc_ops *ops) |
546 | { | 549 | { |
547 | void *p; | 550 | void *p; |
548 | struct Qdisc *sch; | 551 | struct Qdisc *sch; |
549 | unsigned int size; | 552 | unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; |
550 | int err = -ENOBUFS; | 553 | int err = -ENOBUFS; |
551 | 554 | ||
552 | /* ensure that the Qdisc and the private data are 64-byte aligned */ | 555 | p = kzalloc_node(size, GFP_KERNEL, |
553 | size = QDISC_ALIGN(sizeof(*sch)); | 556 | netdev_queue_numa_node_read(dev_queue)); |
554 | size += ops->priv_size + (QDISC_ALIGNTO - 1); | ||
555 | 557 | ||
556 | p = kzalloc(size, GFP_KERNEL); | ||
557 | if (!p) | 558 | if (!p) |
558 | goto errout; | 559 | goto errout; |
559 | sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); | 560 | sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); |
560 | sch->padded = (char *) sch - (char *) p; | 561 | /* if we got non aligned memory, ask more and do alignment ourself */ |
561 | 562 | if (sch != p) { | |
563 | kfree(p); | ||
564 | p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL, | ||
565 | netdev_queue_numa_node_read(dev_queue)); | ||
566 | if (!p) | ||
567 | goto errout; | ||
568 | sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); | ||
569 | sch->padded = (char *) sch - (char *) p; | ||
570 | } | ||
562 | INIT_LIST_HEAD(&sch->list); | 571 | INIT_LIST_HEAD(&sch->list); |
563 | skb_queue_head_init(&sch->q); | 572 | skb_queue_head_init(&sch->q); |
564 | spin_lock_init(&sch->busylock); | 573 | spin_lock_init(&sch->busylock); |
@@ -574,10 +583,8 @@ errout: | |||
574 | return ERR_PTR(err); | 583 | return ERR_PTR(err); |
575 | } | 584 | } |
576 | 585 | ||
577 | struct Qdisc * qdisc_create_dflt(struct net_device *dev, | 586 | struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, |
578 | struct netdev_queue *dev_queue, | 587 | struct Qdisc_ops *ops, unsigned int parentid) |
579 | struct Qdisc_ops *ops, | ||
580 | unsigned int parentid) | ||
581 | { | 588 | { |
582 | struct Qdisc *sch; | 589 | struct Qdisc *sch; |
583 | 590 | ||
@@ -630,7 +637,7 @@ void qdisc_destroy(struct Qdisc *qdisc) | |||
630 | #ifdef CONFIG_NET_SCHED | 637 | #ifdef CONFIG_NET_SCHED |
631 | qdisc_list_del(qdisc); | 638 | qdisc_list_del(qdisc); |
632 | 639 | ||
633 | qdisc_put_stab(qdisc->stab); | 640 | qdisc_put_stab(rtnl_dereference(qdisc->stab)); |
634 | #endif | 641 | #endif |
635 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); | 642 | gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); |
636 | if (ops->reset) | 643 | if (ops->reset) |
@@ -674,25 +681,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, | |||
674 | 681 | ||
675 | return oqdisc; | 682 | return oqdisc; |
676 | } | 683 | } |
684 | EXPORT_SYMBOL(dev_graft_qdisc); | ||
677 | 685 | ||
678 | static void attach_one_default_qdisc(struct net_device *dev, | 686 | static void attach_one_default_qdisc(struct net_device *dev, |
679 | struct netdev_queue *dev_queue, | 687 | struct netdev_queue *dev_queue, |
680 | void *_unused) | 688 | void *_unused) |
681 | { | 689 | { |
682 | struct Qdisc *qdisc; | 690 | struct Qdisc *qdisc = &noqueue_qdisc; |
683 | 691 | ||
684 | if (dev->tx_queue_len) { | 692 | if (dev->tx_queue_len) { |
685 | qdisc = qdisc_create_dflt(dev, dev_queue, | 693 | qdisc = qdisc_create_dflt(dev_queue, |
686 | &pfifo_fast_ops, TC_H_ROOT); | 694 | &pfifo_fast_ops, TC_H_ROOT); |
687 | if (!qdisc) { | 695 | if (!qdisc) { |
688 | printk(KERN_INFO "%s: activation failed\n", dev->name); | 696 | netdev_info(dev, "activation failed\n"); |
689 | return; | 697 | return; |
690 | } | 698 | } |
691 | |||
692 | /* Can by-pass the queue discipline for default qdisc */ | ||
693 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
694 | } else { | ||
695 | qdisc = &noqueue_qdisc; | ||
696 | } | 699 | } |
697 | dev_queue->qdisc_sleeping = qdisc; | 700 | dev_queue->qdisc_sleeping = qdisc; |
698 | } | 701 | } |
@@ -709,7 +712,7 @@ static void attach_default_qdiscs(struct net_device *dev) | |||
709 | dev->qdisc = txq->qdisc_sleeping; | 712 | dev->qdisc = txq->qdisc_sleeping; |
710 | atomic_inc(&dev->qdisc->refcnt); | 713 | atomic_inc(&dev->qdisc->refcnt); |
711 | } else { | 714 | } else { |
712 | qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); | 715 | qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); |
713 | if (qdisc) { | 716 | if (qdisc) { |
714 | qdisc->ops->attach(qdisc); | 717 | qdisc->ops->attach(qdisc); |
715 | dev->qdisc = qdisc; | 718 | dev->qdisc = qdisc; |
@@ -753,13 +756,15 @@ void dev_activate(struct net_device *dev) | |||
753 | 756 | ||
754 | need_watchdog = 0; | 757 | need_watchdog = 0; |
755 | netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); | 758 | netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); |
756 | transition_one_qdisc(dev, &dev->rx_queue, NULL); | 759 | if (dev_ingress_queue(dev)) |
760 | transition_one_qdisc(dev, dev_ingress_queue(dev), NULL); | ||
757 | 761 | ||
758 | if (need_watchdog) { | 762 | if (need_watchdog) { |
759 | dev->trans_start = jiffies; | 763 | dev->trans_start = jiffies; |
760 | dev_watchdog_up(dev); | 764 | dev_watchdog_up(dev); |
761 | } | 765 | } |
762 | } | 766 | } |
767 | EXPORT_SYMBOL(dev_activate); | ||
763 | 768 | ||
764 | static void dev_deactivate_queue(struct net_device *dev, | 769 | static void dev_deactivate_queue(struct net_device *dev, |
765 | struct netdev_queue *dev_queue, | 770 | struct netdev_queue *dev_queue, |
@@ -809,20 +814,51 @@ static bool some_qdisc_is_busy(struct net_device *dev) | |||
809 | return false; | 814 | return false; |
810 | } | 815 | } |
811 | 816 | ||
812 | void dev_deactivate(struct net_device *dev) | 817 | /** |
818 | * dev_deactivate_many - deactivate transmissions on several devices | ||
819 | * @head: list of devices to deactivate | ||
820 | * | ||
821 | * This function returns only when all outstanding transmissions | ||
822 | * have completed, unless all devices are in dismantle phase. | ||
823 | */ | ||
824 | void dev_deactivate_many(struct list_head *head) | ||
813 | { | 825 | { |
814 | netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); | 826 | struct net_device *dev; |
815 | dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); | 827 | bool sync_needed = false; |
816 | 828 | ||
817 | dev_watchdog_down(dev); | 829 | list_for_each_entry(dev, head, unreg_list) { |
830 | netdev_for_each_tx_queue(dev, dev_deactivate_queue, | ||
831 | &noop_qdisc); | ||
832 | if (dev_ingress_queue(dev)) | ||
833 | dev_deactivate_queue(dev, dev_ingress_queue(dev), | ||
834 | &noop_qdisc); | ||
818 | 835 | ||
819 | /* Wait for outstanding qdisc-less dev_queue_xmit calls. */ | 836 | dev_watchdog_down(dev); |
820 | synchronize_rcu(); | 837 | sync_needed |= !dev->dismantle; |
838 | } | ||
839 | |||
840 | /* Wait for outstanding qdisc-less dev_queue_xmit calls. | ||
841 | * This is avoided if all devices are in dismantle phase : | ||
842 | * Caller will call synchronize_net() for us | ||
843 | */ | ||
844 | if (sync_needed) | ||
845 | synchronize_net(); | ||
821 | 846 | ||
822 | /* Wait for outstanding qdisc_run calls. */ | 847 | /* Wait for outstanding qdisc_run calls. */ |
823 | while (some_qdisc_is_busy(dev)) | 848 | list_for_each_entry(dev, head, unreg_list) |
824 | yield(); | 849 | while (some_qdisc_is_busy(dev)) |
850 | yield(); | ||
851 | } | ||
852 | |||
853 | void dev_deactivate(struct net_device *dev) | ||
854 | { | ||
855 | LIST_HEAD(single); | ||
856 | |||
857 | list_add(&dev->unreg_list, &single); | ||
858 | dev_deactivate_many(&single); | ||
859 | list_del(&single); | ||
825 | } | 860 | } |
861 | EXPORT_SYMBOL(dev_deactivate); | ||
826 | 862 | ||
827 | static void dev_init_scheduler_queue(struct net_device *dev, | 863 | static void dev_init_scheduler_queue(struct net_device *dev, |
828 | struct netdev_queue *dev_queue, | 864 | struct netdev_queue *dev_queue, |
@@ -838,7 +874,8 @@ void dev_init_scheduler(struct net_device *dev) | |||
838 | { | 874 | { |
839 | dev->qdisc = &noop_qdisc; | 875 | dev->qdisc = &noop_qdisc; |
840 | netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); | 876 | netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); |
841 | dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); | 877 | if (dev_ingress_queue(dev)) |
878 | dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); | ||
842 | 879 | ||
843 | setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); | 880 | setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); |
844 | } | 881 | } |
@@ -861,7 +898,8 @@ static void shutdown_scheduler_queue(struct net_device *dev, | |||
861 | void dev_shutdown(struct net_device *dev) | 898 | void dev_shutdown(struct net_device *dev) |
862 | { | 899 | { |
863 | netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); | 900 | netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); |
864 | shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); | 901 | if (dev_ingress_queue(dev)) |
902 | shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); | ||
865 | qdisc_destroy(dev->qdisc); | 903 | qdisc_destroy(dev->qdisc); |
866 | dev->qdisc = &noop_qdisc; | 904 | dev->qdisc = &noop_qdisc; |
867 | 905 | ||
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 51dcc2aa5c92..b9493a09a870 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c | |||
@@ -32,8 +32,7 @@ | |||
32 | struct gred_sched_data; | 32 | struct gred_sched_data; |
33 | struct gred_sched; | 33 | struct gred_sched; |
34 | 34 | ||
35 | struct gred_sched_data | 35 | struct gred_sched_data { |
36 | { | ||
37 | u32 limit; /* HARD maximal queue length */ | 36 | u32 limit; /* HARD maximal queue length */ |
38 | u32 DP; /* the drop pramaters */ | 37 | u32 DP; /* the drop pramaters */ |
39 | u32 bytesin; /* bytes seen on virtualQ so far*/ | 38 | u32 bytesin; /* bytes seen on virtualQ so far*/ |
@@ -50,8 +49,7 @@ enum { | |||
50 | GRED_RIO_MODE, | 49 | GRED_RIO_MODE, |
51 | }; | 50 | }; |
52 | 51 | ||
53 | struct gred_sched | 52 | struct gred_sched { |
54 | { | ||
55 | struct gred_sched_data *tab[MAX_DPs]; | 53 | struct gred_sched_data *tab[MAX_DPs]; |
56 | unsigned long flags; | 54 | unsigned long flags; |
57 | u32 red_flags; | 55 | u32 red_flags; |
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t) | |||
150 | return t->red_flags & TC_RED_HARDDROP; | 148 | return t->red_flags & TC_RED_HARDDROP; |
151 | } | 149 | } |
152 | 150 | ||
153 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 151 | static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
154 | { | 152 | { |
155 | struct gred_sched_data *q=NULL; | 153 | struct gred_sched_data *q = NULL; |
156 | struct gred_sched *t= qdisc_priv(sch); | 154 | struct gred_sched *t = qdisc_priv(sch); |
157 | unsigned long qavg = 0; | 155 | unsigned long qavg = 0; |
158 | u16 dp = tc_index_to_dp(skb); | 156 | u16 dp = tc_index_to_dp(skb); |
159 | 157 | ||
160 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 158 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
161 | dp = t->def; | 159 | dp = t->def; |
162 | 160 | ||
163 | if ((q = t->tab[dp]) == NULL) { | 161 | q = t->tab[dp]; |
162 | if (!q) { | ||
164 | /* Pass through packets not assigned to a DP | 163 | /* Pass through packets not assigned to a DP |
165 | * if no default DP has been configured. This | 164 | * if no default DP has been configured. This |
166 | * allows for DP flows to be left untouched. | 165 | * allows for DP flows to be left untouched. |
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
183 | for (i = 0; i < t->DPs; i++) { | 182 | for (i = 0; i < t->DPs; i++) { |
184 | if (t->tab[i] && t->tab[i]->prio < q->prio && | 183 | if (t->tab[i] && t->tab[i]->prio < q->prio && |
185 | !red_is_idling(&t->tab[i]->parms)) | 184 | !red_is_idling(&t->tab[i]->parms)) |
186 | qavg +=t->tab[i]->parms.qavg; | 185 | qavg += t->tab[i]->parms.qavg; |
187 | } | 186 | } |
188 | 187 | ||
189 | } | 188 | } |
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
203 | gred_store_wred_set(t, q); | 202 | gred_store_wred_set(t, q); |
204 | 203 | ||
205 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { | 204 | switch (red_action(&q->parms, q->parms.qavg + qavg)) { |
206 | case RED_DONT_MARK: | 205 | case RED_DONT_MARK: |
207 | break; | 206 | break; |
208 | 207 | ||
209 | case RED_PROB_MARK: | 208 | case RED_PROB_MARK: |
210 | sch->qstats.overlimits++; | 209 | sch->qstats.overlimits++; |
211 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { | 210 | if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) { |
212 | q->stats.prob_drop++; | 211 | q->stats.prob_drop++; |
213 | goto congestion_drop; | 212 | goto congestion_drop; |
214 | } | 213 | } |
215 | 214 | ||
216 | q->stats.prob_mark++; | 215 | q->stats.prob_mark++; |
217 | break; | 216 | break; |
218 | 217 | ||
219 | case RED_HARD_MARK: | 218 | case RED_HARD_MARK: |
220 | sch->qstats.overlimits++; | 219 | sch->qstats.overlimits++; |
221 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || | 220 | if (gred_use_harddrop(t) || !gred_use_ecn(t) || |
222 | !INET_ECN_set_ce(skb)) { | 221 | !INET_ECN_set_ce(skb)) { |
223 | q->stats.forced_drop++; | 222 | q->stats.forced_drop++; |
224 | goto congestion_drop; | 223 | goto congestion_drop; |
225 | } | 224 | } |
226 | q->stats.forced_mark++; | 225 | q->stats.forced_mark++; |
227 | break; | 226 | break; |
228 | } | 227 | } |
229 | 228 | ||
230 | if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { | 229 | if (q->backlog + qdisc_pkt_len(skb) <= q->limit) { |
@@ -241,7 +240,7 @@ congestion_drop: | |||
241 | return NET_XMIT_CN; | 240 | return NET_XMIT_CN; |
242 | } | 241 | } |
243 | 242 | ||
244 | static struct sk_buff *gred_dequeue(struct Qdisc* sch) | 243 | static struct sk_buff *gred_dequeue(struct Qdisc *sch) |
245 | { | 244 | { |
246 | struct sk_buff *skb; | 245 | struct sk_buff *skb; |
247 | struct gred_sched *t = qdisc_priv(sch); | 246 | struct gred_sched *t = qdisc_priv(sch); |
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) | |||
254 | 253 | ||
255 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 254 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
256 | if (net_ratelimit()) | 255 | if (net_ratelimit()) |
257 | printk(KERN_WARNING "GRED: Unable to relocate " | 256 | pr_warning("GRED: Unable to relocate VQ 0x%x " |
258 | "VQ 0x%x after dequeue, screwing up " | 257 | "after dequeue, screwing up " |
259 | "backlog.\n", tc_index_to_dp(skb)); | 258 | "backlog.\n", tc_index_to_dp(skb)); |
260 | } else { | 259 | } else { |
261 | q->backlog -= qdisc_pkt_len(skb); | 260 | q->backlog -= qdisc_pkt_len(skb); |
262 | 261 | ||
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch) | |||
273 | return NULL; | 272 | return NULL; |
274 | } | 273 | } |
275 | 274 | ||
276 | static unsigned int gred_drop(struct Qdisc* sch) | 275 | static unsigned int gred_drop(struct Qdisc *sch) |
277 | { | 276 | { |
278 | struct sk_buff *skb; | 277 | struct sk_buff *skb; |
279 | struct gred_sched *t = qdisc_priv(sch); | 278 | struct gred_sched *t = qdisc_priv(sch); |
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
286 | 285 | ||
287 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { | 286 | if (dp >= t->DPs || (q = t->tab[dp]) == NULL) { |
288 | if (net_ratelimit()) | 287 | if (net_ratelimit()) |
289 | printk(KERN_WARNING "GRED: Unable to relocate " | 288 | pr_warning("GRED: Unable to relocate VQ 0x%x " |
290 | "VQ 0x%x while dropping, screwing up " | 289 | "while dropping, screwing up " |
291 | "backlog.\n", tc_index_to_dp(skb)); | 290 | "backlog.\n", tc_index_to_dp(skb)); |
292 | } else { | 291 | } else { |
293 | q->backlog -= len; | 292 | q->backlog -= len; |
294 | q->stats.other++; | 293 | q->stats.other++; |
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch) | |||
308 | 307 | ||
309 | } | 308 | } |
310 | 309 | ||
311 | static void gred_reset(struct Qdisc* sch) | 310 | static void gred_reset(struct Qdisc *sch) |
312 | { | 311 | { |
313 | int i; | 312 | int i; |
314 | struct gred_sched *t = qdisc_priv(sch); | 313 | struct gred_sched *t = qdisc_priv(sch); |
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps) | |||
369 | 368 | ||
370 | for (i = table->DPs; i < MAX_DPs; i++) { | 369 | for (i = table->DPs; i < MAX_DPs; i++) { |
371 | if (table->tab[i]) { | 370 | if (table->tab[i]) { |
372 | printk(KERN_WARNING "GRED: Warning: Destroying " | 371 | pr_warning("GRED: Warning: Destroying " |
373 | "shadowed VQ 0x%x\n", i); | 372 | "shadowed VQ 0x%x\n", i); |
374 | gred_destroy_vq(table->tab[i]); | 373 | gred_destroy_vq(table->tab[i]); |
375 | table->tab[i] = NULL; | 374 | table->tab[i] = NULL; |
376 | } | 375 | } |
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 47496098d35c..6488e6425652 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c | |||
@@ -81,8 +81,7 @@ | |||
81 | * that are expensive on 32-bit architectures. | 81 | * that are expensive on 32-bit architectures. |
82 | */ | 82 | */ |
83 | 83 | ||
84 | struct internal_sc | 84 | struct internal_sc { |
85 | { | ||
86 | u64 sm1; /* scaled slope of the 1st segment */ | 85 | u64 sm1; /* scaled slope of the 1st segment */ |
87 | u64 ism1; /* scaled inverse-slope of the 1st segment */ | 86 | u64 ism1; /* scaled inverse-slope of the 1st segment */ |
88 | u64 dx; /* the x-projection of the 1st segment */ | 87 | u64 dx; /* the x-projection of the 1st segment */ |
@@ -92,8 +91,7 @@ struct internal_sc | |||
92 | }; | 91 | }; |
93 | 92 | ||
94 | /* runtime service curve */ | 93 | /* runtime service curve */ |
95 | struct runtime_sc | 94 | struct runtime_sc { |
96 | { | ||
97 | u64 x; /* current starting position on x-axis */ | 95 | u64 x; /* current starting position on x-axis */ |
98 | u64 y; /* current starting position on y-axis */ | 96 | u64 y; /* current starting position on y-axis */ |
99 | u64 sm1; /* scaled slope of the 1st segment */ | 97 | u64 sm1; /* scaled slope of the 1st segment */ |
@@ -104,15 +102,13 @@ struct runtime_sc | |||
104 | u64 ism2; /* scaled inverse-slope of the 2nd segment */ | 102 | u64 ism2; /* scaled inverse-slope of the 2nd segment */ |
105 | }; | 103 | }; |
106 | 104 | ||
107 | enum hfsc_class_flags | 105 | enum hfsc_class_flags { |
108 | { | ||
109 | HFSC_RSC = 0x1, | 106 | HFSC_RSC = 0x1, |
110 | HFSC_FSC = 0x2, | 107 | HFSC_FSC = 0x2, |
111 | HFSC_USC = 0x4 | 108 | HFSC_USC = 0x4 |
112 | }; | 109 | }; |
113 | 110 | ||
114 | struct hfsc_class | 111 | struct hfsc_class { |
115 | { | ||
116 | struct Qdisc_class_common cl_common; | 112 | struct Qdisc_class_common cl_common; |
117 | unsigned int refcnt; /* usage count */ | 113 | unsigned int refcnt; /* usage count */ |
118 | 114 | ||
@@ -140,8 +136,8 @@ struct hfsc_class | |||
140 | u64 cl_cumul; /* cumulative work in bytes done by | 136 | u64 cl_cumul; /* cumulative work in bytes done by |
141 | real-time criteria */ | 137 | real-time criteria */ |
142 | 138 | ||
143 | u64 cl_d; /* deadline*/ | 139 | u64 cl_d; /* deadline*/ |
144 | u64 cl_e; /* eligible time */ | 140 | u64 cl_e; /* eligible time */ |
145 | u64 cl_vt; /* virtual time */ | 141 | u64 cl_vt; /* virtual time */ |
146 | u64 cl_f; /* time when this class will fit for | 142 | u64 cl_f; /* time when this class will fit for |
147 | link-sharing, max(myf, cfmin) */ | 143 | link-sharing, max(myf, cfmin) */ |
@@ -176,8 +172,7 @@ struct hfsc_class | |||
176 | unsigned long cl_nactive; /* number of active children */ | 172 | unsigned long cl_nactive; /* number of active children */ |
177 | }; | 173 | }; |
178 | 174 | ||
179 | struct hfsc_sched | 175 | struct hfsc_sched { |
180 | { | ||
181 | u16 defcls; /* default class id */ | 176 | u16 defcls; /* default class id */ |
182 | struct hfsc_class root; /* root class */ | 177 | struct hfsc_class root; /* root class */ |
183 | struct Qdisc_class_hash clhash; /* class hash */ | 178 | struct Qdisc_class_hash clhash; /* class hash */ |
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len) | |||
693 | if (go_active) { | 688 | if (go_active) { |
694 | n = rb_last(&cl->cl_parent->vt_tree); | 689 | n = rb_last(&cl->cl_parent->vt_tree); |
695 | if (n != NULL) { | 690 | if (n != NULL) { |
696 | max_cl = rb_entry(n, struct hfsc_class,vt_node); | 691 | max_cl = rb_entry(n, struct hfsc_class, vt_node); |
697 | /* | 692 | /* |
698 | * set vt to the average of the min and max | 693 | * set vt to the average of the min and max |
699 | * classes. if the parent's period didn't | 694 | * classes. if the parent's period didn't |
@@ -1088,7 +1083,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
1088 | cl->refcnt = 1; | 1083 | cl->refcnt = 1; |
1089 | cl->sched = q; | 1084 | cl->sched = q; |
1090 | cl->cl_parent = parent; | 1085 | cl->cl_parent = parent; |
1091 | cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1086 | cl->qdisc = qdisc_create_dflt(sch->dev_queue, |
1092 | &pfifo_qdisc_ops, classid); | 1087 | &pfifo_qdisc_ops, classid); |
1093 | if (cl->qdisc == NULL) | 1088 | if (cl->qdisc == NULL) |
1094 | cl->qdisc = &noop_qdisc; | 1089 | cl->qdisc = &noop_qdisc; |
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
1177 | return NULL; | 1172 | return NULL; |
1178 | } | 1173 | } |
1179 | #endif | 1174 | #endif |
1180 | if ((cl = (struct hfsc_class *)res.class) == NULL) { | 1175 | cl = (struct hfsc_class *)res.class; |
1181 | if ((cl = hfsc_find_class(res.classid, sch)) == NULL) | 1176 | if (!cl) { |
1177 | cl = hfsc_find_class(res.classid, sch); | ||
1178 | if (!cl) | ||
1182 | break; /* filter selected invalid classid */ | 1179 | break; /* filter selected invalid classid */ |
1183 | if (cl->level >= head->level) | 1180 | if (cl->level >= head->level) |
1184 | break; /* filter may only point downwards */ | 1181 | break; /* filter may only point downwards */ |
@@ -1209,8 +1206,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |||
1209 | if (cl->level > 0) | 1206 | if (cl->level > 0) |
1210 | return -EINVAL; | 1207 | return -EINVAL; |
1211 | if (new == NULL) { | 1208 | if (new == NULL) { |
1212 | new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1209 | new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
1213 | &pfifo_qdisc_ops, | ||
1214 | cl->cl_common.classid); | 1210 | cl->cl_common.classid); |
1215 | if (new == NULL) | 1211 | if (new == NULL) |
1216 | new = &noop_qdisc; | 1212 | new = &noop_qdisc; |
@@ -1317,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc) | |||
1317 | return -1; | 1313 | return -1; |
1318 | } | 1314 | } |
1319 | 1315 | ||
1320 | static inline int | 1316 | static int |
1321 | hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) | 1317 | hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl) |
1322 | { | 1318 | { |
1323 | if ((cl->cl_flags & HFSC_RSC) && | 1319 | if ((cl->cl_flags & HFSC_RSC) && |
@@ -1421,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch) | |||
1421 | struct hfsc_class *cl; | 1417 | struct hfsc_class *cl; |
1422 | u64 next_time = 0; | 1418 | u64 next_time = 0; |
1423 | 1419 | ||
1424 | if ((cl = eltree_get_minel(q)) != NULL) | 1420 | cl = eltree_get_minel(q); |
1421 | if (cl) | ||
1425 | next_time = cl->cl_e; | 1422 | next_time = cl->cl_e; |
1426 | if (q->root.cl_cfmin != 0) { | 1423 | if (q->root.cl_cfmin != 0) { |
1427 | if (next_time == 0 || next_time > q->root.cl_cfmin) | 1424 | if (next_time == 0 || next_time > q->root.cl_cfmin) |
@@ -1452,8 +1449,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) | |||
1452 | q->root.cl_common.classid = sch->handle; | 1449 | q->root.cl_common.classid = sch->handle; |
1453 | q->root.refcnt = 1; | 1450 | q->root.refcnt = 1; |
1454 | q->root.sched = q; | 1451 | q->root.sched = q; |
1455 | q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1452 | q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
1456 | &pfifo_qdisc_ops, | ||
1457 | sch->handle); | 1453 | sch->handle); |
1458 | if (q->root.qdisc == NULL) | 1454 | if (q->root.qdisc == NULL) |
1459 | q->root.qdisc = &noop_qdisc; | 1455 | q->root.qdisc = &noop_qdisc; |
@@ -1601,10 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
1601 | if (cl->qdisc->q.qlen == 1) | 1597 | if (cl->qdisc->q.qlen == 1) |
1602 | set_active(cl, qdisc_pkt_len(skb)); | 1598 | set_active(cl, qdisc_pkt_len(skb)); |
1603 | 1599 | ||
1604 | cl->bstats.packets++; | 1600 | bstats_update(&cl->bstats, skb); |
1605 | cl->bstats.bytes += qdisc_pkt_len(skb); | ||
1606 | sch->bstats.packets++; | ||
1607 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
1608 | sch->q.qlen++; | 1601 | sch->q.qlen++; |
1609 | 1602 | ||
1610 | return NET_XMIT_SUCCESS; | 1603 | return NET_XMIT_SUCCESS; |
@@ -1630,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch) | |||
1630 | * find the class with the minimum deadline among | 1623 | * find the class with the minimum deadline among |
1631 | * the eligible classes. | 1624 | * the eligible classes. |
1632 | */ | 1625 | */ |
1633 | if ((cl = eltree_get_mindl(q, cur_time)) != NULL) { | 1626 | cl = eltree_get_mindl(q, cur_time); |
1627 | if (cl) { | ||
1634 | realtime = 1; | 1628 | realtime = 1; |
1635 | } else { | 1629 | } else { |
1636 | /* | 1630 | /* |
@@ -1669,7 +1663,8 @@ hfsc_dequeue(struct Qdisc *sch) | |||
1669 | set_passive(cl); | 1663 | set_passive(cl); |
1670 | } | 1664 | } |
1671 | 1665 | ||
1672 | sch->flags &= ~TCQ_F_THROTTLED; | 1666 | qdisc_unthrottled(sch); |
1667 | qdisc_bstats_update(sch, skb); | ||
1673 | sch->q.qlen--; | 1668 | sch->q.qlen--; |
1674 | 1669 | ||
1675 | return skb; | 1670 | return skb; |
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 4be8d04b262d..29b942ce9e82 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c | |||
@@ -99,9 +99,10 @@ struct htb_class { | |||
99 | struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ | 99 | struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ |
100 | struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ | 100 | struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ |
101 | /* When class changes from state 1->2 and disconnects from | 101 | /* When class changes from state 1->2 and disconnects from |
102 | parent's feed then we lost ptr value and start from the | 102 | * parent's feed then we lost ptr value and start from the |
103 | first child again. Here we store classid of the | 103 | * first child again. Here we store classid of the |
104 | last valid ptr (used when ptr is NULL). */ | 104 | * last valid ptr (used when ptr is NULL). |
105 | */ | ||
105 | u32 last_ptr_id[TC_HTB_NUMPRIO]; | 106 | u32 last_ptr_id[TC_HTB_NUMPRIO]; |
106 | } inner; | 107 | } inner; |
107 | } un; | 108 | } un; |
@@ -182,10 +183,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) | |||
182 | * filters in qdisc and in inner nodes (if higher filter points to the inner | 183 | * filters in qdisc and in inner nodes (if higher filter points to the inner |
183 | * node). If we end up with classid MAJOR:0 we enqueue the skb into special | 184 | * node). If we end up with classid MAJOR:0 we enqueue the skb into special |
184 | * internal fifo (direct). These packets then go directly thru. If we still | 185 | * internal fifo (direct). These packets then go directly thru. If we still |
185 | * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull | 186 | * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful |
186 | * then finish and return direct queue. | 187 | * then finish and return direct queue. |
187 | */ | 188 | */ |
188 | #define HTB_DIRECT (struct htb_class*)-1 | 189 | #define HTB_DIRECT ((struct htb_class *)-1L) |
189 | 190 | ||
190 | static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, | 191 | static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, |
191 | int *qerr) | 192 | int *qerr) |
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
197 | int result; | 198 | int result; |
198 | 199 | ||
199 | /* allow to select class by setting skb->priority to valid classid; | 200 | /* allow to select class by setting skb->priority to valid classid; |
200 | note that nfmark can be used too by attaching filter fw with no | 201 | * note that nfmark can be used too by attaching filter fw with no |
201 | rules in it */ | 202 | * rules in it |
203 | */ | ||
202 | if (skb->priority == sch->handle) | 204 | if (skb->priority == sch->handle) |
203 | return HTB_DIRECT; /* X:0 (direct flow) selected */ | 205 | return HTB_DIRECT; /* X:0 (direct flow) selected */ |
204 | if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) | 206 | cl = htb_find(skb->priority, sch); |
207 | if (cl && cl->level == 0) | ||
205 | return cl; | 208 | return cl; |
206 | 209 | ||
207 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | 210 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; |
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
216 | return NULL; | 219 | return NULL; |
217 | } | 220 | } |
218 | #endif | 221 | #endif |
219 | if ((cl = (void *)res.class) == NULL) { | 222 | cl = (void *)res.class; |
223 | if (!cl) { | ||
220 | if (res.classid == sch->handle) | 224 | if (res.classid == sch->handle) |
221 | return HTB_DIRECT; /* X:0 (direct flow) */ | 225 | return HTB_DIRECT; /* X:0 (direct flow) */ |
222 | if ((cl = htb_find(res.classid, sch)) == NULL) | 226 | cl = htb_find(res.classid, sch); |
227 | if (!cl) | ||
223 | break; /* filter selected invalid classid */ | 228 | break; /* filter selected invalid classid */ |
224 | } | 229 | } |
225 | if (!cl->level) | 230 | if (!cl->level) |
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) | |||
378 | 383 | ||
379 | if (p->un.inner.feed[prio].rb_node) | 384 | if (p->un.inner.feed[prio].rb_node) |
380 | /* parent already has its feed in use so that | 385 | /* parent already has its feed in use so that |
381 | reset bit in mask as parent is already ok */ | 386 | * reset bit in mask as parent is already ok |
387 | */ | ||
382 | mask &= ~(1 << prio); | 388 | mask &= ~(1 << prio); |
383 | 389 | ||
384 | htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); | 390 | htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); |
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) | |||
413 | 419 | ||
414 | if (p->un.inner.ptr[prio] == cl->node + prio) { | 420 | if (p->un.inner.ptr[prio] == cl->node + prio) { |
415 | /* we are removing child which is pointed to from | 421 | /* we are removing child which is pointed to from |
416 | parent feed - forget the pointer but remember | 422 | * parent feed - forget the pointer but remember |
417 | classid */ | 423 | * classid |
424 | */ | ||
418 | p->un.inner.last_ptr_id[prio] = cl->common.classid; | 425 | p->un.inner.last_ptr_id[prio] = cl->common.classid; |
419 | p->un.inner.ptr[prio] = NULL; | 426 | p->un.inner.ptr[prio] = NULL; |
420 | } | 427 | } |
@@ -569,15 +576,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
569 | } | 576 | } |
570 | return ret; | 577 | return ret; |
571 | } else { | 578 | } else { |
572 | cl->bstats.packets += | 579 | bstats_update(&cl->bstats, skb); |
573 | skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; | ||
574 | cl->bstats.bytes += qdisc_pkt_len(skb); | ||
575 | htb_activate(q, cl); | 580 | htb_activate(q, cl); |
576 | } | 581 | } |
577 | 582 | ||
578 | sch->q.qlen++; | 583 | sch->q.qlen++; |
579 | sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; | ||
580 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
581 | return NET_XMIT_SUCCESS; | 584 | return NET_XMIT_SUCCESS; |
582 | } | 585 | } |
583 | 586 | ||
@@ -648,12 +651,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, | |||
648 | htb_add_to_wait_tree(q, cl, diff); | 651 | htb_add_to_wait_tree(q, cl, diff); |
649 | } | 652 | } |
650 | 653 | ||
651 | /* update byte stats except for leaves which are already updated */ | 654 | /* update basic stats except for leaves which are already updated */ |
652 | if (cl->level) { | 655 | if (cl->level) |
653 | cl->bstats.bytes += bytes; | 656 | bstats_update(&cl->bstats, skb); |
654 | cl->bstats.packets += skb_is_gso(skb)? | 657 | |
655 | skb_shinfo(skb)->gso_segs:1; | ||
656 | } | ||
657 | cl = cl->parent; | 658 | cl = cl->parent; |
658 | } | 659 | } |
659 | } | 660 | } |
@@ -669,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, | |||
669 | unsigned long start) | 670 | unsigned long start) |
670 | { | 671 | { |
671 | /* don't run for longer than 2 jiffies; 2 is used instead of | 672 | /* don't run for longer than 2 jiffies; 2 is used instead of |
672 | 1 to simplify things when jiffy is going to be incremented | 673 | * 1 to simplify things when jiffy is going to be incremented |
673 | too soon */ | 674 | * too soon |
675 | */ | ||
674 | unsigned long stop_at = start + 2; | 676 | unsigned long stop_at = start + 2; |
675 | while (time_before(jiffies, stop_at)) { | 677 | while (time_before(jiffies, stop_at)) { |
676 | struct htb_class *cl; | 678 | struct htb_class *cl; |
@@ -693,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, | |||
693 | 695 | ||
694 | /* too much load - let's continue after a break for scheduling */ | 696 | /* too much load - let's continue after a break for scheduling */ |
695 | if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { | 697 | if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { |
696 | printk(KERN_WARNING "htb: too many events!\n"); | 698 | pr_warning("htb: too many events!\n"); |
697 | q->warned |= HTB_WARN_TOOMANYEVENTS; | 699 | q->warned |= HTB_WARN_TOOMANYEVENTS; |
698 | } | 700 | } |
699 | 701 | ||
@@ -701,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, | |||
701 | } | 703 | } |
702 | 704 | ||
703 | /* Returns class->node+prio from id-tree where classe's id is >= id. NULL | 705 | /* Returns class->node+prio from id-tree where classe's id is >= id. NULL |
704 | is no such one exists. */ | 706 | * is no such one exists. |
707 | */ | ||
705 | static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, | 708 | static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, |
706 | u32 id) | 709 | u32 id) |
707 | { | 710 | { |
@@ -745,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, | |||
745 | for (i = 0; i < 65535; i++) { | 748 | for (i = 0; i < 65535; i++) { |
746 | if (!*sp->pptr && *sp->pid) { | 749 | if (!*sp->pptr && *sp->pid) { |
747 | /* ptr was invalidated but id is valid - try to recover | 750 | /* ptr was invalidated but id is valid - try to recover |
748 | the original or next ptr */ | 751 | * the original or next ptr |
752 | */ | ||
749 | *sp->pptr = | 753 | *sp->pptr = |
750 | htb_id_find_next_upper(prio, sp->root, *sp->pid); | 754 | htb_id_find_next_upper(prio, sp->root, *sp->pid); |
751 | } | 755 | } |
752 | *sp->pid = 0; /* ptr is valid now so that remove this hint as it | 756 | *sp->pid = 0; /* ptr is valid now so that remove this hint as it |
753 | can become out of date quickly */ | 757 | * can become out of date quickly |
758 | */ | ||
754 | if (!*sp->pptr) { /* we are at right end; rewind & go up */ | 759 | if (!*sp->pptr) { /* we are at right end; rewind & go up */ |
755 | *sp->pptr = sp->root; | 760 | *sp->pptr = sp->root; |
756 | while ((*sp->pptr)->rb_left) | 761 | while ((*sp->pptr)->rb_left) |
@@ -778,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, | |||
778 | } | 783 | } |
779 | 784 | ||
780 | /* dequeues packet at given priority and level; call only if | 785 | /* dequeues packet at given priority and level; call only if |
781 | you are sure that there is active class at prio/level */ | 786 | * you are sure that there is active class at prio/level |
787 | */ | ||
782 | static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, | 788 | static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, |
783 | int level) | 789 | int level) |
784 | { | 790 | { |
@@ -795,9 +801,10 @@ next: | |||
795 | return NULL; | 801 | return NULL; |
796 | 802 | ||
797 | /* class can be empty - it is unlikely but can be true if leaf | 803 | /* class can be empty - it is unlikely but can be true if leaf |
798 | qdisc drops packets in enqueue routine or if someone used | 804 | * qdisc drops packets in enqueue routine or if someone used |
799 | graft operation on the leaf since last dequeue; | 805 | * graft operation on the leaf since last dequeue; |
800 | simply deactivate and skip such class */ | 806 | * simply deactivate and skip such class |
807 | */ | ||
801 | if (unlikely(cl->un.leaf.q->q.qlen == 0)) { | 808 | if (unlikely(cl->un.leaf.q->q.qlen == 0)) { |
802 | struct htb_class *next; | 809 | struct htb_class *next; |
803 | htb_deactivate(q, cl); | 810 | htb_deactivate(q, cl); |
@@ -837,7 +844,8 @@ next: | |||
837 | ptr[0]) + prio); | 844 | ptr[0]) + prio); |
838 | } | 845 | } |
839 | /* this used to be after charge_class but this constelation | 846 | /* this used to be after charge_class but this constelation |
840 | gives us slightly better performance */ | 847 | * gives us slightly better performance |
848 | */ | ||
841 | if (!cl->un.leaf.q->q.qlen) | 849 | if (!cl->un.leaf.q->q.qlen) |
842 | htb_deactivate(q, cl); | 850 | htb_deactivate(q, cl); |
843 | htb_charge_class(q, cl, level, skb); | 851 | htb_charge_class(q, cl, level, skb); |
@@ -847,7 +855,7 @@ next: | |||
847 | 855 | ||
848 | static struct sk_buff *htb_dequeue(struct Qdisc *sch) | 856 | static struct sk_buff *htb_dequeue(struct Qdisc *sch) |
849 | { | 857 | { |
850 | struct sk_buff *skb = NULL; | 858 | struct sk_buff *skb; |
851 | struct htb_sched *q = qdisc_priv(sch); | 859 | struct htb_sched *q = qdisc_priv(sch); |
852 | int level; | 860 | int level; |
853 | psched_time_t next_event; | 861 | psched_time_t next_event; |
@@ -856,7 +864,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) | |||
856 | /* try to dequeue direct packets as high prio (!) to minimize cpu work */ | 864 | /* try to dequeue direct packets as high prio (!) to minimize cpu work */ |
857 | skb = __skb_dequeue(&q->direct_queue); | 865 | skb = __skb_dequeue(&q->direct_queue); |
858 | if (skb != NULL) { | 866 | if (skb != NULL) { |
859 | sch->flags &= ~TCQ_F_THROTTLED; | 867 | ok: |
868 | qdisc_bstats_update(sch, skb); | ||
869 | qdisc_unthrottled(sch); | ||
860 | sch->q.qlen--; | 870 | sch->q.qlen--; |
861 | return skb; | 871 | return skb; |
862 | } | 872 | } |
@@ -887,13 +897,11 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) | |||
887 | m = ~q->row_mask[level]; | 897 | m = ~q->row_mask[level]; |
888 | while (m != (int)(-1)) { | 898 | while (m != (int)(-1)) { |
889 | int prio = ffz(m); | 899 | int prio = ffz(m); |
900 | |||
890 | m |= 1 << prio; | 901 | m |= 1 << prio; |
891 | skb = htb_dequeue_tree(q, prio, level); | 902 | skb = htb_dequeue_tree(q, prio, level); |
892 | if (likely(skb != NULL)) { | 903 | if (likely(skb != NULL)) |
893 | sch->q.qlen--; | 904 | goto ok; |
894 | sch->flags &= ~TCQ_F_THROTTLED; | ||
895 | goto fin; | ||
896 | } | ||
897 | } | 905 | } |
898 | } | 906 | } |
899 | sch->qstats.overlimits++; | 907 | sch->qstats.overlimits++; |
@@ -994,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) | |||
994 | return err; | 1002 | return err; |
995 | 1003 | ||
996 | if (tb[TCA_HTB_INIT] == NULL) { | 1004 | if (tb[TCA_HTB_INIT] == NULL) { |
997 | printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); | 1005 | pr_err("HTB: hey probably you have bad tc tool ?\n"); |
998 | return -EINVAL; | 1006 | return -EINVAL; |
999 | } | 1007 | } |
1000 | gopt = nla_data(tb[TCA_HTB_INIT]); | 1008 | gopt = nla_data(tb[TCA_HTB_INIT]); |
1001 | if (gopt->version != HTB_VER >> 16) { | 1009 | if (gopt->version != HTB_VER >> 16) { |
1002 | printk(KERN_ERR | 1010 | pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n", |
1003 | "HTB: need tc/htb version %d (minor is %d), you have %d\n", | ||
1004 | HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); | 1011 | HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); |
1005 | return -EINVAL; | 1012 | return -EINVAL; |
1006 | } | 1013 | } |
@@ -1121,8 +1128,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |||
1121 | if (cl->level) | 1128 | if (cl->level) |
1122 | return -EINVAL; | 1129 | return -EINVAL; |
1123 | if (new == NULL && | 1130 | if (new == NULL && |
1124 | (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1131 | (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
1125 | &pfifo_qdisc_ops, | ||
1126 | cl->common.classid)) == NULL) | 1132 | cl->common.classid)) == NULL) |
1127 | return -ENOBUFS; | 1133 | return -ENOBUFS; |
1128 | 1134 | ||
@@ -1214,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch) | |||
1214 | cancel_work_sync(&q->work); | 1220 | cancel_work_sync(&q->work); |
1215 | qdisc_watchdog_cancel(&q->watchdog); | 1221 | qdisc_watchdog_cancel(&q->watchdog); |
1216 | /* This line used to be after htb_destroy_class call below | 1222 | /* This line used to be after htb_destroy_class call below |
1217 | and surprisingly it worked in 2.4. But it must precede it | 1223 | * and surprisingly it worked in 2.4. But it must precede it |
1218 | because filter need its target class alive to be able to call | 1224 | * because filter need its target class alive to be able to call |
1219 | unbind_filter on it (without Oops). */ | 1225 | * unbind_filter on it (without Oops). |
1226 | */ | ||
1220 | tcf_destroy_chain(&q->filter_list); | 1227 | tcf_destroy_chain(&q->filter_list); |
1221 | 1228 | ||
1222 | for (i = 0; i < q->clhash.hashsize; i++) { | 1229 | for (i = 0; i < q->clhash.hashsize; i++) { |
@@ -1247,8 +1254,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) | |||
1247 | return -EBUSY; | 1254 | return -EBUSY; |
1248 | 1255 | ||
1249 | if (!cl->level && htb_parent_last_child(cl)) { | 1256 | if (!cl->level && htb_parent_last_child(cl)) { |
1250 | new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1257 | new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, |
1251 | &pfifo_qdisc_ops, | ||
1252 | cl->parent->common.classid); | 1258 | cl->parent->common.classid); |
1253 | last_child = 1; | 1259 | last_child = 1; |
1254 | } | 1260 | } |
@@ -1302,14 +1308,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1302 | struct htb_class *cl = (struct htb_class *)*arg, *parent; | 1308 | struct htb_class *cl = (struct htb_class *)*arg, *parent; |
1303 | struct nlattr *opt = tca[TCA_OPTIONS]; | 1309 | struct nlattr *opt = tca[TCA_OPTIONS]; |
1304 | struct qdisc_rate_table *rtab = NULL, *ctab = NULL; | 1310 | struct qdisc_rate_table *rtab = NULL, *ctab = NULL; |
1305 | struct nlattr *tb[TCA_HTB_RTAB + 1]; | 1311 | struct nlattr *tb[__TCA_HTB_MAX]; |
1306 | struct tc_htb_opt *hopt; | 1312 | struct tc_htb_opt *hopt; |
1307 | 1313 | ||
1308 | /* extract all subattrs from opt attr */ | 1314 | /* extract all subattrs from opt attr */ |
1309 | if (!opt) | 1315 | if (!opt) |
1310 | goto failure; | 1316 | goto failure; |
1311 | 1317 | ||
1312 | err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy); | 1318 | err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy); |
1313 | if (err < 0) | 1319 | if (err < 0) |
1314 | goto failure; | 1320 | goto failure; |
1315 | 1321 | ||
@@ -1351,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1351 | 1357 | ||
1352 | /* check maximal depth */ | 1358 | /* check maximal depth */ |
1353 | if (parent && parent->parent && parent->parent->level < 2) { | 1359 | if (parent && parent->parent && parent->parent->level < 2) { |
1354 | printk(KERN_ERR "htb: tree is too deep\n"); | 1360 | pr_err("htb: tree is too deep\n"); |
1355 | goto failure; | 1361 | goto failure; |
1356 | } | 1362 | } |
1357 | err = -ENOBUFS; | 1363 | err = -ENOBUFS; |
1358 | if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) | 1364 | cl = kzalloc(sizeof(*cl), GFP_KERNEL); |
1365 | if (!cl) | ||
1359 | goto failure; | 1366 | goto failure; |
1360 | 1367 | ||
1361 | err = gen_new_estimator(&cl->bstats, &cl->rate_est, | 1368 | err = gen_new_estimator(&cl->bstats, &cl->rate_est, |
@@ -1375,9 +1382,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1375 | RB_CLEAR_NODE(&cl->node[prio]); | 1382 | RB_CLEAR_NODE(&cl->node[prio]); |
1376 | 1383 | ||
1377 | /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) | 1384 | /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) |
1378 | so that can't be used inside of sch_tree_lock | 1385 | * so that can't be used inside of sch_tree_lock |
1379 | -- thanks to Karlis Peisenieks */ | 1386 | * -- thanks to Karlis Peisenieks |
1380 | new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 1387 | */ |
1388 | new_q = qdisc_create_dflt(sch->dev_queue, | ||
1381 | &pfifo_qdisc_ops, classid); | 1389 | &pfifo_qdisc_ops, classid); |
1382 | sch_tree_lock(sch); | 1390 | sch_tree_lock(sch); |
1383 | if (parent && !parent->level) { | 1391 | if (parent && !parent->level) { |
@@ -1428,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, | |||
1428 | } | 1436 | } |
1429 | 1437 | ||
1430 | /* it used to be a nasty bug here, we have to check that node | 1438 | /* it used to be a nasty bug here, we have to check that node |
1431 | is really leaf before changing cl->un.leaf ! */ | 1439 | * is really leaf before changing cl->un.leaf ! |
1440 | */ | ||
1432 | if (!cl->level) { | 1441 | if (!cl->level) { |
1433 | cl->quantum = rtab->rate.rate / q->rate2quantum; | 1442 | cl->quantum = rtab->rate.rate / q->rate2quantum; |
1434 | if (!hopt->quantum && cl->quantum < 1000) { | 1443 | if (!hopt->quantum && cl->quantum < 1000) { |
1435 | printk(KERN_WARNING | 1444 | pr_warning( |
1436 | "HTB: quantum of class %X is small. Consider r2q change.\n", | 1445 | "HTB: quantum of class %X is small. Consider r2q change.\n", |
1437 | cl->common.classid); | 1446 | cl->common.classid); |
1438 | cl->quantum = 1000; | 1447 | cl->quantum = 1000; |
1439 | } | 1448 | } |
1440 | if (!hopt->quantum && cl->quantum > 200000) { | 1449 | if (!hopt->quantum && cl->quantum > 200000) { |
1441 | printk(KERN_WARNING | 1450 | pr_warning( |
1442 | "HTB: quantum of class %X is big. Consider r2q change.\n", | 1451 | "HTB: quantum of class %X is big. Consider r2q change.\n", |
1443 | cl->common.classid); | 1452 | cl->common.classid); |
1444 | cl->quantum = 200000; | 1453 | cl->quantum = 200000; |
@@ -1487,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, | |||
1487 | struct htb_class *cl = htb_find(classid, sch); | 1496 | struct htb_class *cl = htb_find(classid, sch); |
1488 | 1497 | ||
1489 | /*if (cl && !cl->level) return 0; | 1498 | /*if (cl && !cl->level) return 0; |
1490 | The line above used to be there to prevent attaching filters to | 1499 | * The line above used to be there to prevent attaching filters to |
1491 | leaves. But at least tc_index filter uses this just to get class | 1500 | * leaves. But at least tc_index filter uses this just to get class |
1492 | for other reasons so that we have to allow for it. | 1501 | * for other reasons so that we have to allow for it. |
1493 | ---- | 1502 | * ---- |
1494 | 19.6.2002 As Werner explained it is ok - bind filter is just | 1503 | * 19.6.2002 As Werner explained it is ok - bind filter is just |
1495 | another way to "lock" the class - unlike "get" this lock can | 1504 | * another way to "lock" the class - unlike "get" this lock can |
1496 | be broken by class during destroy IIUC. | 1505 | * be broken by class during destroy IIUC. |
1497 | */ | 1506 | */ |
1498 | if (cl) | 1507 | if (cl) |
1499 | cl->filter_cnt++; | 1508 | cl->filter_cnt++; |
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f10e34a68445..bce1665239b8 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c | |||
@@ -63,8 +63,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
63 | 63 | ||
64 | result = tc_classify(skb, p->filter_list, &res); | 64 | result = tc_classify(skb, p->filter_list, &res); |
65 | 65 | ||
66 | sch->bstats.packets++; | 66 | qdisc_bstats_update(sch, skb); |
67 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
68 | switch (result) { | 67 | switch (result) { |
69 | case TC_ACT_SHOT: | 68 | case TC_ACT_SHOT: |
70 | result = TC_ACT_SHOT; | 69 | result = TC_ACT_SHOT; |
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index fe91e50f9d98..ec5cbc848963 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c | |||
@@ -56,12 +56,11 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) | |||
56 | 56 | ||
57 | for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { | 57 | for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { |
58 | dev_queue = netdev_get_tx_queue(dev, ntx); | 58 | dev_queue = netdev_get_tx_queue(dev, ntx); |
59 | qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, | 59 | qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, |
60 | TC_H_MAKE(TC_H_MAJ(sch->handle), | 60 | TC_H_MAKE(TC_H_MAJ(sch->handle), |
61 | TC_H_MIN(ntx + 1))); | 61 | TC_H_MIN(ntx + 1))); |
62 | if (qdisc == NULL) | 62 | if (qdisc == NULL) |
63 | goto err; | 63 | goto err; |
64 | qdisc->flags |= TCQ_F_CAN_BYPASS; | ||
65 | priv->qdiscs[ntx] = qdisc; | 64 | priv->qdiscs[ntx] = qdisc; |
66 | } | 65 | } |
67 | 66 | ||
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c new file mode 100644 index 000000000000..ea17cbed29ef --- /dev/null +++ b/net/sched/sch_mqprio.c | |||
@@ -0,0 +1,418 @@ | |||
1 | /* | ||
2 | * net/sched/sch_mqprio.c | ||
3 | * | ||
4 | * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com> | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * version 2 as published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/types.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/skbuff.h> | ||
17 | #include <net/netlink.h> | ||
18 | #include <net/pkt_sched.h> | ||
19 | #include <net/sch_generic.h> | ||
20 | |||
21 | struct mqprio_sched { | ||
22 | struct Qdisc **qdiscs; | ||
23 | int hw_owned; | ||
24 | }; | ||
25 | |||
26 | static void mqprio_destroy(struct Qdisc *sch) | ||
27 | { | ||
28 | struct net_device *dev = qdisc_dev(sch); | ||
29 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
30 | unsigned int ntx; | ||
31 | |||
32 | if (priv->qdiscs) { | ||
33 | for (ntx = 0; | ||
34 | ntx < dev->num_tx_queues && priv->qdiscs[ntx]; | ||
35 | ntx++) | ||
36 | qdisc_destroy(priv->qdiscs[ntx]); | ||
37 | kfree(priv->qdiscs); | ||
38 | } | ||
39 | |||
40 | if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc) | ||
41 | dev->netdev_ops->ndo_setup_tc(dev, 0); | ||
42 | else | ||
43 | netdev_set_num_tc(dev, 0); | ||
44 | } | ||
45 | |||
46 | static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) | ||
47 | { | ||
48 | int i, j; | ||
49 | |||
50 | /* Verify num_tc is not out of max range */ | ||
51 | if (qopt->num_tc > TC_MAX_QUEUE) | ||
52 | return -EINVAL; | ||
53 | |||
54 | /* Verify priority mapping uses valid tcs */ | ||
55 | for (i = 0; i < TC_BITMASK + 1; i++) { | ||
56 | if (qopt->prio_tc_map[i] >= qopt->num_tc) | ||
57 | return -EINVAL; | ||
58 | } | ||
59 | |||
60 | /* net_device does not support requested operation */ | ||
61 | if (qopt->hw && !dev->netdev_ops->ndo_setup_tc) | ||
62 | return -EINVAL; | ||
63 | |||
64 | /* if hw owned qcount and qoffset are taken from LLD so | ||
65 | * no reason to verify them here | ||
66 | */ | ||
67 | if (qopt->hw) | ||
68 | return 0; | ||
69 | |||
70 | for (i = 0; i < qopt->num_tc; i++) { | ||
71 | unsigned int last = qopt->offset[i] + qopt->count[i]; | ||
72 | |||
73 | /* Verify the queue count is in tx range being equal to the | ||
74 | * real_num_tx_queues indicates the last queue is in use. | ||
75 | */ | ||
76 | if (qopt->offset[i] >= dev->real_num_tx_queues || | ||
77 | !qopt->count[i] || | ||
78 | last > dev->real_num_tx_queues) | ||
79 | return -EINVAL; | ||
80 | |||
81 | /* Verify that the offset and counts do not overlap */ | ||
82 | for (j = i + 1; j < qopt->num_tc; j++) { | ||
83 | if (last > qopt->offset[j]) | ||
84 | return -EINVAL; | ||
85 | } | ||
86 | } | ||
87 | |||
88 | return 0; | ||
89 | } | ||
90 | |||
91 | static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) | ||
92 | { | ||
93 | struct net_device *dev = qdisc_dev(sch); | ||
94 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
95 | struct netdev_queue *dev_queue; | ||
96 | struct Qdisc *qdisc; | ||
97 | int i, err = -EOPNOTSUPP; | ||
98 | struct tc_mqprio_qopt *qopt = NULL; | ||
99 | |||
100 | BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); | ||
101 | BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); | ||
102 | |||
103 | if (sch->parent != TC_H_ROOT) | ||
104 | return -EOPNOTSUPP; | ||
105 | |||
106 | if (!netif_is_multiqueue(dev)) | ||
107 | return -EOPNOTSUPP; | ||
108 | |||
109 | if (nla_len(opt) < sizeof(*qopt)) | ||
110 | return -EINVAL; | ||
111 | |||
112 | qopt = nla_data(opt); | ||
113 | if (mqprio_parse_opt(dev, qopt)) | ||
114 | return -EINVAL; | ||
115 | |||
116 | /* pre-allocate qdisc, attachment can't fail */ | ||
117 | priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), | ||
118 | GFP_KERNEL); | ||
119 | if (priv->qdiscs == NULL) { | ||
120 | err = -ENOMEM; | ||
121 | goto err; | ||
122 | } | ||
123 | |||
124 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
125 | dev_queue = netdev_get_tx_queue(dev, i); | ||
126 | qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, | ||
127 | TC_H_MAKE(TC_H_MAJ(sch->handle), | ||
128 | TC_H_MIN(i + 1))); | ||
129 | if (qdisc == NULL) { | ||
130 | err = -ENOMEM; | ||
131 | goto err; | ||
132 | } | ||
133 | priv->qdiscs[i] = qdisc; | ||
134 | } | ||
135 | |||
136 | /* If the mqprio options indicate that hardware should own | ||
137 | * the queue mapping then run ndo_setup_tc otherwise use the | ||
138 | * supplied and verified mapping | ||
139 | */ | ||
140 | if (qopt->hw) { | ||
141 | priv->hw_owned = 1; | ||
142 | err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc); | ||
143 | if (err) | ||
144 | goto err; | ||
145 | } else { | ||
146 | netdev_set_num_tc(dev, qopt->num_tc); | ||
147 | for (i = 0; i < qopt->num_tc; i++) | ||
148 | netdev_set_tc_queue(dev, i, | ||
149 | qopt->count[i], qopt->offset[i]); | ||
150 | } | ||
151 | |||
152 | /* Always use supplied priority mappings */ | ||
153 | for (i = 0; i < TC_BITMASK + 1; i++) | ||
154 | netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]); | ||
155 | |||
156 | sch->flags |= TCQ_F_MQROOT; | ||
157 | return 0; | ||
158 | |||
159 | err: | ||
160 | mqprio_destroy(sch); | ||
161 | return err; | ||
162 | } | ||
163 | |||
164 | static void mqprio_attach(struct Qdisc *sch) | ||
165 | { | ||
166 | struct net_device *dev = qdisc_dev(sch); | ||
167 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
168 | struct Qdisc *qdisc; | ||
169 | unsigned int ntx; | ||
170 | |||
171 | /* Attach underlying qdisc */ | ||
172 | for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { | ||
173 | qdisc = priv->qdiscs[ntx]; | ||
174 | qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); | ||
175 | if (qdisc) | ||
176 | qdisc_destroy(qdisc); | ||
177 | } | ||
178 | kfree(priv->qdiscs); | ||
179 | priv->qdiscs = NULL; | ||
180 | } | ||
181 | |||
182 | static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, | ||
183 | unsigned long cl) | ||
184 | { | ||
185 | struct net_device *dev = qdisc_dev(sch); | ||
186 | unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); | ||
187 | |||
188 | if (ntx >= dev->num_tx_queues) | ||
189 | return NULL; | ||
190 | return netdev_get_tx_queue(dev, ntx); | ||
191 | } | ||
192 | |||
193 | static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, | ||
194 | struct Qdisc **old) | ||
195 | { | ||
196 | struct net_device *dev = qdisc_dev(sch); | ||
197 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | ||
198 | |||
199 | if (!dev_queue) | ||
200 | return -EINVAL; | ||
201 | |||
202 | if (dev->flags & IFF_UP) | ||
203 | dev_deactivate(dev); | ||
204 | |||
205 | *old = dev_graft_qdisc(dev_queue, new); | ||
206 | |||
207 | if (dev->flags & IFF_UP) | ||
208 | dev_activate(dev); | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
214 | { | ||
215 | struct net_device *dev = qdisc_dev(sch); | ||
216 | struct mqprio_sched *priv = qdisc_priv(sch); | ||
217 | unsigned char *b = skb_tail_pointer(skb); | ||
218 | struct tc_mqprio_qopt opt = { 0 }; | ||
219 | struct Qdisc *qdisc; | ||
220 | unsigned int i; | ||
221 | |||
222 | sch->q.qlen = 0; | ||
223 | memset(&sch->bstats, 0, sizeof(sch->bstats)); | ||
224 | memset(&sch->qstats, 0, sizeof(sch->qstats)); | ||
225 | |||
226 | for (i = 0; i < dev->num_tx_queues; i++) { | ||
227 | qdisc = netdev_get_tx_queue(dev, i)->qdisc; | ||
228 | spin_lock_bh(qdisc_lock(qdisc)); | ||
229 | sch->q.qlen += qdisc->q.qlen; | ||
230 | sch->bstats.bytes += qdisc->bstats.bytes; | ||
231 | sch->bstats.packets += qdisc->bstats.packets; | ||
232 | sch->qstats.qlen += qdisc->qstats.qlen; | ||
233 | sch->qstats.backlog += qdisc->qstats.backlog; | ||
234 | sch->qstats.drops += qdisc->qstats.drops; | ||
235 | sch->qstats.requeues += qdisc->qstats.requeues; | ||
236 | sch->qstats.overlimits += qdisc->qstats.overlimits; | ||
237 | spin_unlock_bh(qdisc_lock(qdisc)); | ||
238 | } | ||
239 | |||
240 | opt.num_tc = netdev_get_num_tc(dev); | ||
241 | memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); | ||
242 | opt.hw = priv->hw_owned; | ||
243 | |||
244 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | ||
245 | opt.count[i] = dev->tc_to_txq[i].count; | ||
246 | opt.offset[i] = dev->tc_to_txq[i].offset; | ||
247 | } | ||
248 | |||
249 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | ||
250 | |||
251 | return skb->len; | ||
252 | nla_put_failure: | ||
253 | nlmsg_trim(skb, b); | ||
254 | return -1; | ||
255 | } | ||
256 | |||
257 | static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) | ||
258 | { | ||
259 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | ||
260 | |||
261 | if (!dev_queue) | ||
262 | return NULL; | ||
263 | |||
264 | return dev_queue->qdisc_sleeping; | ||
265 | } | ||
266 | |||
267 | static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) | ||
268 | { | ||
269 | struct net_device *dev = qdisc_dev(sch); | ||
270 | unsigned int ntx = TC_H_MIN(classid); | ||
271 | |||
272 | if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) | ||
273 | return 0; | ||
274 | return ntx; | ||
275 | } | ||
276 | |||
277 | static void mqprio_put(struct Qdisc *sch, unsigned long cl) | ||
278 | { | ||
279 | } | ||
280 | |||
281 | static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, | ||
282 | struct sk_buff *skb, struct tcmsg *tcm) | ||
283 | { | ||
284 | struct net_device *dev = qdisc_dev(sch); | ||
285 | |||
286 | if (cl <= netdev_get_num_tc(dev)) { | ||
287 | tcm->tcm_parent = TC_H_ROOT; | ||
288 | tcm->tcm_info = 0; | ||
289 | } else { | ||
290 | int i; | ||
291 | struct netdev_queue *dev_queue; | ||
292 | |||
293 | dev_queue = mqprio_queue_get(sch, cl); | ||
294 | tcm->tcm_parent = 0; | ||
295 | for (i = 0; i < netdev_get_num_tc(dev); i++) { | ||
296 | struct netdev_tc_txq tc = dev->tc_to_txq[i]; | ||
297 | int q_idx = cl - netdev_get_num_tc(dev); | ||
298 | |||
299 | if (q_idx > tc.offset && | ||
300 | q_idx <= tc.offset + tc.count) { | ||
301 | tcm->tcm_parent = | ||
302 | TC_H_MAKE(TC_H_MAJ(sch->handle), | ||
303 | TC_H_MIN(i + 1)); | ||
304 | break; | ||
305 | } | ||
306 | } | ||
307 | tcm->tcm_info = dev_queue->qdisc_sleeping->handle; | ||
308 | } | ||
309 | tcm->tcm_handle |= TC_H_MIN(cl); | ||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, | ||
314 | struct gnet_dump *d) | ||
315 | __releases(d->lock) | ||
316 | __acquires(d->lock) | ||
317 | { | ||
318 | struct net_device *dev = qdisc_dev(sch); | ||
319 | |||
320 | if (cl <= netdev_get_num_tc(dev)) { | ||
321 | int i; | ||
322 | struct Qdisc *qdisc; | ||
323 | struct gnet_stats_queue qstats = {0}; | ||
324 | struct gnet_stats_basic_packed bstats = {0}; | ||
325 | struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; | ||
326 | |||
327 | /* Drop lock here it will be reclaimed before touching | ||
328 | * statistics this is required because the d->lock we | ||
329 | * hold here is the look on dev_queue->qdisc_sleeping | ||
330 | * also acquired below. | ||
331 | */ | ||
332 | spin_unlock_bh(d->lock); | ||
333 | |||
334 | for (i = tc.offset; i < tc.offset + tc.count; i++) { | ||
335 | qdisc = netdev_get_tx_queue(dev, i)->qdisc; | ||
336 | spin_lock_bh(qdisc_lock(qdisc)); | ||
337 | bstats.bytes += qdisc->bstats.bytes; | ||
338 | bstats.packets += qdisc->bstats.packets; | ||
339 | qstats.qlen += qdisc->qstats.qlen; | ||
340 | qstats.backlog += qdisc->qstats.backlog; | ||
341 | qstats.drops += qdisc->qstats.drops; | ||
342 | qstats.requeues += qdisc->qstats.requeues; | ||
343 | qstats.overlimits += qdisc->qstats.overlimits; | ||
344 | spin_unlock_bh(qdisc_lock(qdisc)); | ||
345 | } | ||
346 | /* Reclaim root sleeping lock before completing stats */ | ||
347 | spin_lock_bh(d->lock); | ||
348 | if (gnet_stats_copy_basic(d, &bstats) < 0 || | ||
349 | gnet_stats_copy_queue(d, &qstats) < 0) | ||
350 | return -1; | ||
351 | } else { | ||
352 | struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); | ||
353 | |||
354 | sch = dev_queue->qdisc_sleeping; | ||
355 | sch->qstats.qlen = sch->q.qlen; | ||
356 | if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || | ||
357 | gnet_stats_copy_queue(d, &sch->qstats) < 0) | ||
358 | return -1; | ||
359 | } | ||
360 | return 0; | ||
361 | } | ||
362 | |||
363 | static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
364 | { | ||
365 | struct net_device *dev = qdisc_dev(sch); | ||
366 | unsigned long ntx; | ||
367 | |||
368 | if (arg->stop) | ||
369 | return; | ||
370 | |||
371 | /* Walk hierarchy with a virtual class per tc */ | ||
372 | arg->count = arg->skip; | ||
373 | for (ntx = arg->skip; | ||
374 | ntx < dev->num_tx_queues + netdev_get_num_tc(dev); | ||
375 | ntx++) { | ||
376 | if (arg->fn(sch, ntx + 1, arg) < 0) { | ||
377 | arg->stop = 1; | ||
378 | break; | ||
379 | } | ||
380 | arg->count++; | ||
381 | } | ||
382 | } | ||
383 | |||
384 | static const struct Qdisc_class_ops mqprio_class_ops = { | ||
385 | .graft = mqprio_graft, | ||
386 | .leaf = mqprio_leaf, | ||
387 | .get = mqprio_get, | ||
388 | .put = mqprio_put, | ||
389 | .walk = mqprio_walk, | ||
390 | .dump = mqprio_dump_class, | ||
391 | .dump_stats = mqprio_dump_class_stats, | ||
392 | }; | ||
393 | |||
394 | static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { | ||
395 | .cl_ops = &mqprio_class_ops, | ||
396 | .id = "mqprio", | ||
397 | .priv_size = sizeof(struct mqprio_sched), | ||
398 | .init = mqprio_init, | ||
399 | .destroy = mqprio_destroy, | ||
400 | .attach = mqprio_attach, | ||
401 | .dump = mqprio_dump, | ||
402 | .owner = THIS_MODULE, | ||
403 | }; | ||
404 | |||
405 | static int __init mqprio_module_init(void) | ||
406 | { | ||
407 | return register_qdisc(&mqprio_qdisc_ops); | ||
408 | } | ||
409 | |||
410 | static void __exit mqprio_module_exit(void) | ||
411 | { | ||
412 | unregister_qdisc(&mqprio_qdisc_ops); | ||
413 | } | ||
414 | |||
415 | module_init(mqprio_module_init); | ||
416 | module_exit(mqprio_module_exit); | ||
417 | |||
418 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 6ae251279fc2..edc1950e0e77 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c | |||
@@ -83,8 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
83 | 83 | ||
84 | ret = qdisc_enqueue(skb, qdisc); | 84 | ret = qdisc_enqueue(skb, qdisc); |
85 | if (ret == NET_XMIT_SUCCESS) { | 85 | if (ret == NET_XMIT_SUCCESS) { |
86 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
87 | sch->bstats.packets++; | ||
88 | sch->q.qlen++; | 86 | sch->q.qlen++; |
89 | return NET_XMIT_SUCCESS; | 87 | return NET_XMIT_SUCCESS; |
90 | } | 88 | } |
@@ -113,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch) | |||
113 | qdisc = q->queues[q->curband]; | 111 | qdisc = q->queues[q->curband]; |
114 | skb = qdisc->dequeue(qdisc); | 112 | skb = qdisc->dequeue(qdisc); |
115 | if (skb) { | 113 | if (skb) { |
114 | qdisc_bstats_update(sch, skb); | ||
116 | sch->q.qlen--; | 115 | sch->q.qlen--; |
117 | return skb; | 116 | return skb; |
118 | } | 117 | } |
@@ -157,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch) | |||
157 | unsigned int len; | 156 | unsigned int len; |
158 | struct Qdisc *qdisc; | 157 | struct Qdisc *qdisc; |
159 | 158 | ||
160 | for (band = q->bands-1; band >= 0; band--) { | 159 | for (band = q->bands - 1; band >= 0; band--) { |
161 | qdisc = q->queues[band]; | 160 | qdisc = q->queues[band]; |
162 | if (qdisc->ops->drop) { | 161 | if (qdisc->ops->drop) { |
163 | len = qdisc->ops->drop(qdisc); | 162 | len = qdisc->ops->drop(qdisc); |
@@ -227,8 +226,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) | |||
227 | for (i = 0; i < q->bands; i++) { | 226 | for (i = 0; i < q->bands; i++) { |
228 | if (q->queues[i] == &noop_qdisc) { | 227 | if (q->queues[i] == &noop_qdisc) { |
229 | struct Qdisc *child, *old; | 228 | struct Qdisc *child, *old; |
230 | child = qdisc_create_dflt(qdisc_dev(sch), | 229 | child = qdisc_create_dflt(sch->dev_queue, |
231 | sch->dev_queue, | ||
232 | &pfifo_qdisc_ops, | 230 | &pfifo_qdisc_ops, |
233 | TC_H_MAKE(sch->handle, | 231 | TC_H_MAKE(sch->handle, |
234 | i + 1)); | 232 | i + 1)); |
@@ -267,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) | |||
267 | for (i = 0; i < q->max_bands; i++) | 265 | for (i = 0; i < q->max_bands; i++) |
268 | q->queues[i] = &noop_qdisc; | 266 | q->queues[i] = &noop_qdisc; |
269 | 267 | ||
270 | err = multiq_tune(sch,opt); | 268 | err = multiq_tune(sch, opt); |
271 | 269 | ||
272 | if (err) | 270 | if (err) |
273 | kfree(q->queues); | 271 | kfree(q->queues); |
@@ -348,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, | |||
348 | struct multiq_sched_data *q = qdisc_priv(sch); | 346 | struct multiq_sched_data *q = qdisc_priv(sch); |
349 | 347 | ||
350 | tcm->tcm_handle |= TC_H_MIN(cl); | 348 | tcm->tcm_handle |= TC_H_MIN(cl); |
351 | tcm->tcm_info = q->queues[cl-1]->handle; | 349 | tcm->tcm_info = q->queues[cl - 1]->handle; |
352 | return 0; | 350 | return 0; |
353 | } | 351 | } |
354 | 352 | ||
@@ -380,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
380 | arg->count++; | 378 | arg->count++; |
381 | continue; | 379 | continue; |
382 | } | 380 | } |
383 | if (arg->fn(sch, band+1, arg) < 0) { | 381 | if (arg->fn(sch, band + 1, arg) < 0) { |
384 | arg->stop = 1; | 382 | arg->stop = 1; |
385 | break; | 383 | break; |
386 | } | 384 | } |
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4714ff162bbd..69c35f6cd13f 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c | |||
@@ -19,12 +19,13 @@ | |||
19 | #include <linux/kernel.h> | 19 | #include <linux/kernel.h> |
20 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
21 | #include <linux/skbuff.h> | 21 | #include <linux/skbuff.h> |
22 | #include <linux/vmalloc.h> | ||
22 | #include <linux/rtnetlink.h> | 23 | #include <linux/rtnetlink.h> |
23 | 24 | ||
24 | #include <net/netlink.h> | 25 | #include <net/netlink.h> |
25 | #include <net/pkt_sched.h> | 26 | #include <net/pkt_sched.h> |
26 | 27 | ||
27 | #define VERSION "1.2" | 28 | #define VERSION "1.3" |
28 | 29 | ||
29 | /* Network Emulation Queuing algorithm. | 30 | /* Network Emulation Queuing algorithm. |
30 | ==================================== | 31 | ==================================== |
@@ -47,6 +48,20 @@ | |||
47 | layering other disciplines. It does not need to do bandwidth | 48 | layering other disciplines. It does not need to do bandwidth |
48 | control either since that can be handled by using token | 49 | control either since that can be handled by using token |
49 | bucket or other rate control. | 50 | bucket or other rate control. |
51 | |||
52 | Correlated Loss Generator models | ||
53 | |||
54 | Added generation of correlated loss according to the | ||
55 | "Gilbert-Elliot" model, a 4-state markov model. | ||
56 | |||
57 | References: | ||
58 | [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG | ||
59 | [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general | ||
60 | and intuitive loss model for packet networks and its implementation | ||
61 | in the Netem module in the Linux kernel", available in [1] | ||
62 | |||
63 | Authors: Stefano Salsano <stefano.salsano at uniroma2.it | ||
64 | Fabio Ludovici <fabio.ludovici at yahoo.it> | ||
50 | */ | 65 | */ |
51 | 66 | ||
52 | struct netem_sched_data { | 67 | struct netem_sched_data { |
@@ -73,6 +88,26 @@ struct netem_sched_data { | |||
73 | u32 size; | 88 | u32 size; |
74 | s16 table[0]; | 89 | s16 table[0]; |
75 | } *delay_dist; | 90 | } *delay_dist; |
91 | |||
92 | enum { | ||
93 | CLG_RANDOM, | ||
94 | CLG_4_STATES, | ||
95 | CLG_GILB_ELL, | ||
96 | } loss_model; | ||
97 | |||
98 | /* Correlated Loss Generation models */ | ||
99 | struct clgstate { | ||
100 | /* state of the Markov chain */ | ||
101 | u8 state; | ||
102 | |||
103 | /* 4-states and Gilbert-Elliot models */ | ||
104 | u32 a1; /* p13 for 4-states or p for GE */ | ||
105 | u32 a2; /* p31 for 4-states or r for GE */ | ||
106 | u32 a3; /* p32 for 4-states or h for GE */ | ||
107 | u32 a4; /* p14 for 4-states or 1-k for GE */ | ||
108 | u32 a5; /* p23 used only in 4-states */ | ||
109 | } clg; | ||
110 | |||
76 | }; | 111 | }; |
77 | 112 | ||
78 | /* Time stamp put into socket buffer control block */ | 113 | /* Time stamp put into socket buffer control block */ |
@@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state) | |||
115 | return answer; | 150 | return answer; |
116 | } | 151 | } |
117 | 152 | ||
153 | /* loss_4state - 4-state model loss generator | ||
154 | * Generates losses according to the 4-state Markov chain adopted in | ||
155 | * the GI (General and Intuitive) loss model. | ||
156 | */ | ||
157 | static bool loss_4state(struct netem_sched_data *q) | ||
158 | { | ||
159 | struct clgstate *clg = &q->clg; | ||
160 | u32 rnd = net_random(); | ||
161 | |||
162 | /* | ||
163 | * Makes a comparison between rnd and the transition | ||
164 | * probabilities outgoing from the current state, then decides the | ||
165 | * next state and if the next packet has to be transmitted or lost. | ||
166 | * The four states correspond to: | ||
167 | * 1 => successfully transmitted packets within a gap period | ||
168 | * 4 => isolated losses within a gap period | ||
169 | * 3 => lost packets within a burst period | ||
170 | * 2 => successfully transmitted packets within a burst period | ||
171 | */ | ||
172 | switch (clg->state) { | ||
173 | case 1: | ||
174 | if (rnd < clg->a4) { | ||
175 | clg->state = 4; | ||
176 | return true; | ||
177 | } else if (clg->a4 < rnd && rnd < clg->a1) { | ||
178 | clg->state = 3; | ||
179 | return true; | ||
180 | } else if (clg->a1 < rnd) | ||
181 | clg->state = 1; | ||
182 | |||
183 | break; | ||
184 | case 2: | ||
185 | if (rnd < clg->a5) { | ||
186 | clg->state = 3; | ||
187 | return true; | ||
188 | } else | ||
189 | clg->state = 2; | ||
190 | |||
191 | break; | ||
192 | case 3: | ||
193 | if (rnd < clg->a3) | ||
194 | clg->state = 2; | ||
195 | else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) { | ||
196 | clg->state = 1; | ||
197 | return true; | ||
198 | } else if (clg->a2 + clg->a3 < rnd) { | ||
199 | clg->state = 3; | ||
200 | return true; | ||
201 | } | ||
202 | break; | ||
203 | case 4: | ||
204 | clg->state = 1; | ||
205 | break; | ||
206 | } | ||
207 | |||
208 | return false; | ||
209 | } | ||
210 | |||
211 | /* loss_gilb_ell - Gilbert-Elliot model loss generator | ||
212 | * Generates losses according to the Gilbert-Elliot loss model or | ||
213 | * its special cases (Gilbert or Simple Gilbert) | ||
214 | * | ||
215 | * Makes a comparison between random number and the transition | ||
216 | * probabilities outgoing from the current state, then decides the | ||
217 | * next state. A second random number is extracted and the comparison | ||
218 | * with the loss probability of the current state decides if the next | ||
219 | * packet will be transmitted or lost. | ||
220 | */ | ||
221 | static bool loss_gilb_ell(struct netem_sched_data *q) | ||
222 | { | ||
223 | struct clgstate *clg = &q->clg; | ||
224 | |||
225 | switch (clg->state) { | ||
226 | case 1: | ||
227 | if (net_random() < clg->a1) | ||
228 | clg->state = 2; | ||
229 | if (net_random() < clg->a4) | ||
230 | return true; | ||
231 | case 2: | ||
232 | if (net_random() < clg->a2) | ||
233 | clg->state = 1; | ||
234 | if (clg->a3 > net_random()) | ||
235 | return true; | ||
236 | } | ||
237 | |||
238 | return false; | ||
239 | } | ||
240 | |||
241 | static bool loss_event(struct netem_sched_data *q) | ||
242 | { | ||
243 | switch (q->loss_model) { | ||
244 | case CLG_RANDOM: | ||
245 | /* Random packet drop 0 => none, ~0 => all */ | ||
246 | return q->loss && q->loss >= get_crandom(&q->loss_cor); | ||
247 | |||
248 | case CLG_4_STATES: | ||
249 | /* 4state loss model algorithm (used also for GI model) | ||
250 | * Extracts a value from the markov 4 state loss generator, | ||
251 | * if it is 1 drops a packet and if needed writes the event in | ||
252 | * the kernel logs | ||
253 | */ | ||
254 | return loss_4state(q); | ||
255 | |||
256 | case CLG_GILB_ELL: | ||
257 | /* Gilbert-Elliot loss model algorithm | ||
258 | * Extracts a value from the Gilbert-Elliot loss generator, | ||
259 | * if it is 1 drops a packet and if needed writes the event in | ||
260 | * the kernel logs | ||
261 | */ | ||
262 | return loss_gilb_ell(q); | ||
263 | } | ||
264 | |||
265 | return false; /* not reached */ | ||
266 | } | ||
267 | |||
268 | |||
118 | /* tabledist - return a pseudo-randomly distributed value with mean mu and | 269 | /* tabledist - return a pseudo-randomly distributed value with mean mu and |
119 | * std deviation sigma. Uses table lookup to approximate the desired | 270 | * std deviation sigma. Uses table lookup to approximate the desired |
120 | * distribution, and a uniformly-distributed pseudo-random source. | 271 | * distribution, and a uniformly-distributed pseudo-random source. |
@@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
161 | int ret; | 312 | int ret; |
162 | int count = 1; | 313 | int count = 1; |
163 | 314 | ||
164 | pr_debug("netem_enqueue skb=%p\n", skb); | ||
165 | |||
166 | /* Random duplication */ | 315 | /* Random duplication */ |
167 | if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) | 316 | if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) |
168 | ++count; | 317 | ++count; |
169 | 318 | ||
170 | /* Random packet drop 0 => none, ~0 => all */ | 319 | /* Drop packet? */ |
171 | if (q->loss && q->loss >= get_crandom(&q->loss_cor)) | 320 | if (loss_event(q)) |
172 | --count; | 321 | --count; |
173 | 322 | ||
174 | if (count == 0) { | 323 | if (count == 0) { |
@@ -211,8 +360,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
211 | } | 360 | } |
212 | 361 | ||
213 | cb = netem_skb_cb(skb); | 362 | cb = netem_skb_cb(skb); |
214 | if (q->gap == 0 || /* not doing reordering */ | 363 | if (q->gap == 0 || /* not doing reordering */ |
215 | q->counter < q->gap || /* inside last reordering gap */ | 364 | q->counter < q->gap || /* inside last reordering gap */ |
216 | q->reorder < get_crandom(&q->reorder_cor)) { | 365 | q->reorder < get_crandom(&q->reorder_cor)) { |
217 | psched_time_t now; | 366 | psched_time_t now; |
218 | psched_tdiff_t delay; | 367 | psched_tdiff_t delay; |
@@ -238,19 +387,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
238 | ret = NET_XMIT_SUCCESS; | 387 | ret = NET_XMIT_SUCCESS; |
239 | } | 388 | } |
240 | 389 | ||
241 | if (likely(ret == NET_XMIT_SUCCESS)) { | 390 | if (ret != NET_XMIT_SUCCESS) { |
242 | sch->q.qlen++; | 391 | if (net_xmit_drop_count(ret)) { |
243 | sch->bstats.bytes += qdisc_pkt_len(skb); | 392 | sch->qstats.drops++; |
244 | sch->bstats.packets++; | 393 | return ret; |
245 | } else if (net_xmit_drop_count(ret)) { | 394 | } |
246 | sch->qstats.drops++; | ||
247 | } | 395 | } |
248 | 396 | ||
249 | pr_debug("netem: enqueue ret %d\n", ret); | 397 | sch->q.qlen++; |
250 | return ret; | 398 | return NET_XMIT_SUCCESS; |
251 | } | 399 | } |
252 | 400 | ||
253 | static unsigned int netem_drop(struct Qdisc* sch) | 401 | static unsigned int netem_drop(struct Qdisc *sch) |
254 | { | 402 | { |
255 | struct netem_sched_data *q = qdisc_priv(sch); | 403 | struct netem_sched_data *q = qdisc_priv(sch); |
256 | unsigned int len = 0; | 404 | unsigned int len = 0; |
@@ -267,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
267 | struct netem_sched_data *q = qdisc_priv(sch); | 415 | struct netem_sched_data *q = qdisc_priv(sch); |
268 | struct sk_buff *skb; | 416 | struct sk_buff *skb; |
269 | 417 | ||
270 | if (sch->flags & TCQ_F_THROTTLED) | 418 | if (qdisc_is_throttled(sch)) |
271 | return NULL; | 419 | return NULL; |
272 | 420 | ||
273 | skb = q->qdisc->ops->peek(q->qdisc); | 421 | skb = q->qdisc->ops->peek(q->qdisc); |
@@ -289,8 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) | |||
289 | if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) | 437 | if (G_TC_FROM(skb->tc_verd) & AT_INGRESS) |
290 | skb->tstamp.tv64 = 0; | 438 | skb->tstamp.tv64 = 0; |
291 | #endif | 439 | #endif |
292 | pr_debug("netem_dequeue: return skb=%p\n", skb); | 440 | |
293 | sch->q.qlen--; | 441 | sch->q.qlen--; |
442 | qdisc_unthrottled(sch); | ||
443 | qdisc_bstats_update(sch, skb); | ||
294 | return skb; | 444 | return skb; |
295 | } | 445 | } |
296 | 446 | ||
@@ -309,6 +459,16 @@ static void netem_reset(struct Qdisc *sch) | |||
309 | qdisc_watchdog_cancel(&q->watchdog); | 459 | qdisc_watchdog_cancel(&q->watchdog); |
310 | } | 460 | } |
311 | 461 | ||
462 | static void dist_free(struct disttable *d) | ||
463 | { | ||
464 | if (d) { | ||
465 | if (is_vmalloc_addr(d)) | ||
466 | vfree(d); | ||
467 | else | ||
468 | kfree(d); | ||
469 | } | ||
470 | } | ||
471 | |||
312 | /* | 472 | /* |
313 | * Distribution data is a variable size payload containing | 473 | * Distribution data is a variable size payload containing |
314 | * signed 16 bit values. | 474 | * signed 16 bit values. |
@@ -316,16 +476,20 @@ static void netem_reset(struct Qdisc *sch) | |||
316 | static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) | 476 | static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) |
317 | { | 477 | { |
318 | struct netem_sched_data *q = qdisc_priv(sch); | 478 | struct netem_sched_data *q = qdisc_priv(sch); |
319 | unsigned long n = nla_len(attr)/sizeof(__s16); | 479 | size_t n = nla_len(attr)/sizeof(__s16); |
320 | const __s16 *data = nla_data(attr); | 480 | const __s16 *data = nla_data(attr); |
321 | spinlock_t *root_lock; | 481 | spinlock_t *root_lock; |
322 | struct disttable *d; | 482 | struct disttable *d; |
323 | int i; | 483 | int i; |
484 | size_t s; | ||
324 | 485 | ||
325 | if (n > 65536) | 486 | if (n > NETEM_DIST_MAX) |
326 | return -EINVAL; | 487 | return -EINVAL; |
327 | 488 | ||
328 | d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL); | 489 | s = sizeof(struct disttable) + n * sizeof(s16); |
490 | d = kmalloc(s, GFP_KERNEL); | ||
491 | if (!d) | ||
492 | d = vmalloc(s); | ||
329 | if (!d) | 493 | if (!d) |
330 | return -ENOMEM; | 494 | return -ENOMEM; |
331 | 495 | ||
@@ -336,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) | |||
336 | root_lock = qdisc_root_sleeping_lock(sch); | 500 | root_lock = qdisc_root_sleeping_lock(sch); |
337 | 501 | ||
338 | spin_lock_bh(root_lock); | 502 | spin_lock_bh(root_lock); |
339 | kfree(q->delay_dist); | 503 | dist_free(q->delay_dist); |
340 | q->delay_dist = d; | 504 | q->delay_dist = d; |
341 | spin_unlock_bh(root_lock); | 505 | spin_unlock_bh(root_lock); |
342 | return 0; | 506 | return 0; |
@@ -370,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr) | |||
370 | init_crandom(&q->corrupt_cor, r->correlation); | 534 | init_crandom(&q->corrupt_cor, r->correlation); |
371 | } | 535 | } |
372 | 536 | ||
537 | static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr) | ||
538 | { | ||
539 | struct netem_sched_data *q = qdisc_priv(sch); | ||
540 | const struct nlattr *la; | ||
541 | int rem; | ||
542 | |||
543 | nla_for_each_nested(la, attr, rem) { | ||
544 | u16 type = nla_type(la); | ||
545 | |||
546 | switch(type) { | ||
547 | case NETEM_LOSS_GI: { | ||
548 | const struct tc_netem_gimodel *gi = nla_data(la); | ||
549 | |||
550 | if (nla_len(la) != sizeof(struct tc_netem_gimodel)) { | ||
551 | pr_info("netem: incorrect gi model size\n"); | ||
552 | return -EINVAL; | ||
553 | } | ||
554 | |||
555 | q->loss_model = CLG_4_STATES; | ||
556 | |||
557 | q->clg.state = 1; | ||
558 | q->clg.a1 = gi->p13; | ||
559 | q->clg.a2 = gi->p31; | ||
560 | q->clg.a3 = gi->p32; | ||
561 | q->clg.a4 = gi->p14; | ||
562 | q->clg.a5 = gi->p23; | ||
563 | break; | ||
564 | } | ||
565 | |||
566 | case NETEM_LOSS_GE: { | ||
567 | const struct tc_netem_gemodel *ge = nla_data(la); | ||
568 | |||
569 | if (nla_len(la) != sizeof(struct tc_netem_gemodel)) { | ||
570 | pr_info("netem: incorrect gi model size\n"); | ||
571 | return -EINVAL; | ||
572 | } | ||
573 | |||
574 | q->loss_model = CLG_GILB_ELL; | ||
575 | q->clg.state = 1; | ||
576 | q->clg.a1 = ge->p; | ||
577 | q->clg.a2 = ge->r; | ||
578 | q->clg.a3 = ge->h; | ||
579 | q->clg.a4 = ge->k1; | ||
580 | break; | ||
581 | } | ||
582 | |||
583 | default: | ||
584 | pr_info("netem: unknown loss type %u\n", type); | ||
585 | return -EINVAL; | ||
586 | } | ||
587 | } | ||
588 | |||
589 | return 0; | ||
590 | } | ||
591 | |||
373 | static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { | 592 | static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { |
374 | [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, | 593 | [TCA_NETEM_CORR] = { .len = sizeof(struct tc_netem_corr) }, |
375 | [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, | 594 | [TCA_NETEM_REORDER] = { .len = sizeof(struct tc_netem_reorder) }, |
376 | [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, | 595 | [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, |
596 | [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, | ||
377 | }; | 597 | }; |
378 | 598 | ||
379 | static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, | 599 | static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, |
@@ -381,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, | |||
381 | { | 601 | { |
382 | int nested_len = nla_len(nla) - NLA_ALIGN(len); | 602 | int nested_len = nla_len(nla) - NLA_ALIGN(len); |
383 | 603 | ||
384 | if (nested_len < 0) | 604 | if (nested_len < 0) { |
605 | pr_info("netem: invalid attributes len %d\n", nested_len); | ||
385 | return -EINVAL; | 606 | return -EINVAL; |
607 | } | ||
608 | |||
386 | if (nested_len >= nla_attr_size(0)) | 609 | if (nested_len >= nla_attr_size(0)) |
387 | return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), | 610 | return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), |
388 | nested_len, policy); | 611 | nested_len, policy); |
612 | |||
389 | memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); | 613 | memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); |
390 | return 0; | 614 | return 0; |
391 | } | 615 | } |
@@ -408,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) | |||
408 | 632 | ||
409 | ret = fifo_set_limit(q->qdisc, qopt->limit); | 633 | ret = fifo_set_limit(q->qdisc, qopt->limit); |
410 | if (ret) { | 634 | if (ret) { |
411 | pr_debug("netem: can't set fifo limit\n"); | 635 | pr_info("netem: can't set fifo limit\n"); |
412 | return ret; | 636 | return ret; |
413 | } | 637 | } |
414 | 638 | ||
@@ -441,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) | |||
441 | if (tb[TCA_NETEM_CORRUPT]) | 665 | if (tb[TCA_NETEM_CORRUPT]) |
442 | get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); | 666 | get_corrupt(sch, tb[TCA_NETEM_CORRUPT]); |
443 | 667 | ||
444 | return 0; | 668 | q->loss_model = CLG_RANDOM; |
669 | if (tb[TCA_NETEM_LOSS]) | ||
670 | ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]); | ||
671 | |||
672 | return ret; | ||
445 | } | 673 | } |
446 | 674 | ||
447 | /* | 675 | /* |
@@ -477,8 +705,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) | |||
477 | __skb_queue_after(list, skb, nskb); | 705 | __skb_queue_after(list, skb, nskb); |
478 | 706 | ||
479 | sch->qstats.backlog += qdisc_pkt_len(nskb); | 707 | sch->qstats.backlog += qdisc_pkt_len(nskb); |
480 | sch->bstats.bytes += qdisc_pkt_len(nskb); | ||
481 | sch->bstats.packets++; | ||
482 | 708 | ||
483 | return NET_XMIT_SUCCESS; | 709 | return NET_XMIT_SUCCESS; |
484 | } | 710 | } |
@@ -538,17 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) | |||
538 | 764 | ||
539 | qdisc_watchdog_init(&q->watchdog, sch); | 765 | qdisc_watchdog_init(&q->watchdog, sch); |
540 | 766 | ||
541 | q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 767 | q->loss_model = CLG_RANDOM; |
542 | &tfifo_qdisc_ops, | 768 | q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, |
543 | TC_H_MAKE(sch->handle, 1)); | 769 | TC_H_MAKE(sch->handle, 1)); |
544 | if (!q->qdisc) { | 770 | if (!q->qdisc) { |
545 | pr_debug("netem: qdisc create failed\n"); | 771 | pr_notice("netem: qdisc create tfifo qdisc failed\n"); |
546 | return -ENOMEM; | 772 | return -ENOMEM; |
547 | } | 773 | } |
548 | 774 | ||
549 | ret = netem_change(sch, opt); | 775 | ret = netem_change(sch, opt); |
550 | if (ret) { | 776 | if (ret) { |
551 | pr_debug("netem: change failed\n"); | 777 | pr_info("netem: change failed\n"); |
552 | qdisc_destroy(q->qdisc); | 778 | qdisc_destroy(q->qdisc); |
553 | } | 779 | } |
554 | return ret; | 780 | return ret; |
@@ -560,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch) | |||
560 | 786 | ||
561 | qdisc_watchdog_cancel(&q->watchdog); | 787 | qdisc_watchdog_cancel(&q->watchdog); |
562 | qdisc_destroy(q->qdisc); | 788 | qdisc_destroy(q->qdisc); |
563 | kfree(q->delay_dist); | 789 | dist_free(q->delay_dist); |
790 | } | ||
791 | |||
792 | static int dump_loss_model(const struct netem_sched_data *q, | ||
793 | struct sk_buff *skb) | ||
794 | { | ||
795 | struct nlattr *nest; | ||
796 | |||
797 | nest = nla_nest_start(skb, TCA_NETEM_LOSS); | ||
798 | if (nest == NULL) | ||
799 | goto nla_put_failure; | ||
800 | |||
801 | switch (q->loss_model) { | ||
802 | case CLG_RANDOM: | ||
803 | /* legacy loss model */ | ||
804 | nla_nest_cancel(skb, nest); | ||
805 | return 0; /* no data */ | ||
806 | |||
807 | case CLG_4_STATES: { | ||
808 | struct tc_netem_gimodel gi = { | ||
809 | .p13 = q->clg.a1, | ||
810 | .p31 = q->clg.a2, | ||
811 | .p32 = q->clg.a3, | ||
812 | .p14 = q->clg.a4, | ||
813 | .p23 = q->clg.a5, | ||
814 | }; | ||
815 | |||
816 | NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi); | ||
817 | break; | ||
818 | } | ||
819 | case CLG_GILB_ELL: { | ||
820 | struct tc_netem_gemodel ge = { | ||
821 | .p = q->clg.a1, | ||
822 | .r = q->clg.a2, | ||
823 | .h = q->clg.a3, | ||
824 | .k1 = q->clg.a4, | ||
825 | }; | ||
826 | |||
827 | NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge); | ||
828 | break; | ||
829 | } | ||
830 | } | ||
831 | |||
832 | nla_nest_end(skb, nest); | ||
833 | return 0; | ||
834 | |||
835 | nla_put_failure: | ||
836 | nla_nest_cancel(skb, nest); | ||
837 | return -1; | ||
564 | } | 838 | } |
565 | 839 | ||
566 | static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | 840 | static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) |
567 | { | 841 | { |
568 | const struct netem_sched_data *q = qdisc_priv(sch); | 842 | const struct netem_sched_data *q = qdisc_priv(sch); |
569 | unsigned char *b = skb_tail_pointer(skb); | 843 | struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb); |
570 | struct nlattr *nla = (struct nlattr *) b; | ||
571 | struct tc_netem_qopt qopt; | 844 | struct tc_netem_qopt qopt; |
572 | struct tc_netem_corr cor; | 845 | struct tc_netem_corr cor; |
573 | struct tc_netem_reorder reorder; | 846 | struct tc_netem_reorder reorder; |
@@ -594,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
594 | corrupt.correlation = q->corrupt_cor.rho; | 867 | corrupt.correlation = q->corrupt_cor.rho; |
595 | NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); | 868 | NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt); |
596 | 869 | ||
597 | nla->nla_len = skb_tail_pointer(skb) - b; | 870 | if (dump_loss_model(q, skb) != 0) |
871 | goto nla_put_failure; | ||
598 | 872 | ||
599 | return skb->len; | 873 | return nla_nest_end(skb, nla); |
600 | 874 | ||
601 | nla_put_failure: | 875 | nla_put_failure: |
602 | nlmsg_trim(skb, b); | 876 | nlmsg_trim(skb, nla); |
603 | return -1; | 877 | return -1; |
604 | } | 878 | } |
605 | 879 | ||
880 | static int netem_dump_class(struct Qdisc *sch, unsigned long cl, | ||
881 | struct sk_buff *skb, struct tcmsg *tcm) | ||
882 | { | ||
883 | struct netem_sched_data *q = qdisc_priv(sch); | ||
884 | |||
885 | if (cl != 1) /* only one class */ | ||
886 | return -ENOENT; | ||
887 | |||
888 | tcm->tcm_handle |= TC_H_MIN(1); | ||
889 | tcm->tcm_info = q->qdisc->handle; | ||
890 | |||
891 | return 0; | ||
892 | } | ||
893 | |||
894 | static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | ||
895 | struct Qdisc **old) | ||
896 | { | ||
897 | struct netem_sched_data *q = qdisc_priv(sch); | ||
898 | |||
899 | if (new == NULL) | ||
900 | new = &noop_qdisc; | ||
901 | |||
902 | sch_tree_lock(sch); | ||
903 | *old = q->qdisc; | ||
904 | q->qdisc = new; | ||
905 | qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); | ||
906 | qdisc_reset(*old); | ||
907 | sch_tree_unlock(sch); | ||
908 | |||
909 | return 0; | ||
910 | } | ||
911 | |||
912 | static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) | ||
913 | { | ||
914 | struct netem_sched_data *q = qdisc_priv(sch); | ||
915 | return q->qdisc; | ||
916 | } | ||
917 | |||
918 | static unsigned long netem_get(struct Qdisc *sch, u32 classid) | ||
919 | { | ||
920 | return 1; | ||
921 | } | ||
922 | |||
923 | static void netem_put(struct Qdisc *sch, unsigned long arg) | ||
924 | { | ||
925 | } | ||
926 | |||
927 | static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) | ||
928 | { | ||
929 | if (!walker->stop) { | ||
930 | if (walker->count >= walker->skip) | ||
931 | if (walker->fn(sch, 1, walker) < 0) { | ||
932 | walker->stop = 1; | ||
933 | return; | ||
934 | } | ||
935 | walker->count++; | ||
936 | } | ||
937 | } | ||
938 | |||
939 | static const struct Qdisc_class_ops netem_class_ops = { | ||
940 | .graft = netem_graft, | ||
941 | .leaf = netem_leaf, | ||
942 | .get = netem_get, | ||
943 | .put = netem_put, | ||
944 | .walk = netem_walk, | ||
945 | .dump = netem_dump_class, | ||
946 | }; | ||
947 | |||
606 | static struct Qdisc_ops netem_qdisc_ops __read_mostly = { | 948 | static struct Qdisc_ops netem_qdisc_ops __read_mostly = { |
607 | .id = "netem", | 949 | .id = "netem", |
950 | .cl_ops = &netem_class_ops, | ||
608 | .priv_size = sizeof(struct netem_sched_data), | 951 | .priv_size = sizeof(struct netem_sched_data), |
609 | .enqueue = netem_enqueue, | 952 | .enqueue = netem_enqueue, |
610 | .dequeue = netem_dequeue, | 953 | .dequeue = netem_dequeue, |
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 0748fb1e3a49..2a318f2dc3e5 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c | |||
@@ -22,8 +22,7 @@ | |||
22 | #include <net/pkt_sched.h> | 22 | #include <net/pkt_sched.h> |
23 | 23 | ||
24 | 24 | ||
25 | struct prio_sched_data | 25 | struct prio_sched_data { |
26 | { | ||
27 | int bands; | 26 | int bands; |
28 | struct tcf_proto *filter_list; | 27 | struct tcf_proto *filter_list; |
29 | u8 prio2band[TC_PRIO_MAX+1]; | 28 | u8 prio2band[TC_PRIO_MAX+1]; |
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) | |||
54 | if (!q->filter_list || err < 0) { | 53 | if (!q->filter_list || err < 0) { |
55 | if (TC_H_MAJ(band)) | 54 | if (TC_H_MAJ(band)) |
56 | band = 0; | 55 | band = 0; |
57 | return q->queues[q->prio2band[band&TC_PRIO_MAX]]; | 56 | return q->queues[q->prio2band[band & TC_PRIO_MAX]]; |
58 | } | 57 | } |
59 | band = res.classid; | 58 | band = res.classid; |
60 | } | 59 | } |
@@ -84,8 +83,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
84 | 83 | ||
85 | ret = qdisc_enqueue(skb, qdisc); | 84 | ret = qdisc_enqueue(skb, qdisc); |
86 | if (ret == NET_XMIT_SUCCESS) { | 85 | if (ret == NET_XMIT_SUCCESS) { |
87 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
88 | sch->bstats.packets++; | ||
89 | sch->q.qlen++; | 86 | sch->q.qlen++; |
90 | return NET_XMIT_SUCCESS; | 87 | return NET_XMIT_SUCCESS; |
91 | } | 88 | } |
@@ -108,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch) | |||
108 | return NULL; | 105 | return NULL; |
109 | } | 106 | } |
110 | 107 | ||
111 | static struct sk_buff *prio_dequeue(struct Qdisc* sch) | 108 | static struct sk_buff *prio_dequeue(struct Qdisc *sch) |
112 | { | 109 | { |
113 | struct prio_sched_data *q = qdisc_priv(sch); | 110 | struct prio_sched_data *q = qdisc_priv(sch); |
114 | int prio; | 111 | int prio; |
@@ -117,6 +114,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch) | |||
117 | struct Qdisc *qdisc = q->queues[prio]; | 114 | struct Qdisc *qdisc = q->queues[prio]; |
118 | struct sk_buff *skb = qdisc->dequeue(qdisc); | 115 | struct sk_buff *skb = qdisc->dequeue(qdisc); |
119 | if (skb) { | 116 | if (skb) { |
117 | qdisc_bstats_update(sch, skb); | ||
120 | sch->q.qlen--; | 118 | sch->q.qlen--; |
121 | return skb; | 119 | return skb; |
122 | } | 120 | } |
@@ -125,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch) | |||
125 | 123 | ||
126 | } | 124 | } |
127 | 125 | ||
128 | static unsigned int prio_drop(struct Qdisc* sch) | 126 | static unsigned int prio_drop(struct Qdisc *sch) |
129 | { | 127 | { |
130 | struct prio_sched_data *q = qdisc_priv(sch); | 128 | struct prio_sched_data *q = qdisc_priv(sch); |
131 | int prio; | 129 | int prio; |
@@ -144,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch) | |||
144 | 142 | ||
145 | 143 | ||
146 | static void | 144 | static void |
147 | prio_reset(struct Qdisc* sch) | 145 | prio_reset(struct Qdisc *sch) |
148 | { | 146 | { |
149 | int prio; | 147 | int prio; |
150 | struct prio_sched_data *q = qdisc_priv(sch); | 148 | struct prio_sched_data *q = qdisc_priv(sch); |
151 | 149 | ||
152 | for (prio=0; prio<q->bands; prio++) | 150 | for (prio = 0; prio < q->bands; prio++) |
153 | qdisc_reset(q->queues[prio]); | 151 | qdisc_reset(q->queues[prio]); |
154 | sch->q.qlen = 0; | 152 | sch->q.qlen = 0; |
155 | } | 153 | } |
156 | 154 | ||
157 | static void | 155 | static void |
158 | prio_destroy(struct Qdisc* sch) | 156 | prio_destroy(struct Qdisc *sch) |
159 | { | 157 | { |
160 | int prio; | 158 | int prio; |
161 | struct prio_sched_data *q = qdisc_priv(sch); | 159 | struct prio_sched_data *q = qdisc_priv(sch); |
162 | 160 | ||
163 | tcf_destroy_chain(&q->filter_list); | 161 | tcf_destroy_chain(&q->filter_list); |
164 | for (prio=0; prio<q->bands; prio++) | 162 | for (prio = 0; prio < q->bands; prio++) |
165 | qdisc_destroy(q->queues[prio]); | 163 | qdisc_destroy(q->queues[prio]); |
166 | } | 164 | } |
167 | 165 | ||
@@ -178,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) | |||
178 | if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) | 176 | if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) |
179 | return -EINVAL; | 177 | return -EINVAL; |
180 | 178 | ||
181 | for (i=0; i<=TC_PRIO_MAX; i++) { | 179 | for (i = 0; i <= TC_PRIO_MAX; i++) { |
182 | if (qopt->priomap[i] >= qopt->bands) | 180 | if (qopt->priomap[i] >= qopt->bands) |
183 | return -EINVAL; | 181 | return -EINVAL; |
184 | } | 182 | } |
@@ -187,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) | |||
187 | q->bands = qopt->bands; | 185 | q->bands = qopt->bands; |
188 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); | 186 | memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); |
189 | 187 | ||
190 | for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { | 188 | for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { |
191 | struct Qdisc *child = q->queues[i]; | 189 | struct Qdisc *child = q->queues[i]; |
192 | q->queues[i] = &noop_qdisc; | 190 | q->queues[i] = &noop_qdisc; |
193 | if (child != &noop_qdisc) { | 191 | if (child != &noop_qdisc) { |
@@ -197,10 +195,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) | |||
197 | } | 195 | } |
198 | sch_tree_unlock(sch); | 196 | sch_tree_unlock(sch); |
199 | 197 | ||
200 | for (i=0; i<q->bands; i++) { | 198 | for (i = 0; i < q->bands; i++) { |
201 | if (q->queues[i] == &noop_qdisc) { | 199 | if (q->queues[i] == &noop_qdisc) { |
202 | struct Qdisc *child, *old; | 200 | struct Qdisc *child, *old; |
203 | child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, | 201 | |
202 | child = qdisc_create_dflt(sch->dev_queue, | ||
204 | &pfifo_qdisc_ops, | 203 | &pfifo_qdisc_ops, |
205 | TC_H_MAKE(sch->handle, i + 1)); | 204 | TC_H_MAKE(sch->handle, i + 1)); |
206 | if (child) { | 205 | if (child) { |
@@ -225,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) | |||
225 | struct prio_sched_data *q = qdisc_priv(sch); | 224 | struct prio_sched_data *q = qdisc_priv(sch); |
226 | int i; | 225 | int i; |
227 | 226 | ||
228 | for (i=0; i<TCQ_PRIO_BANDS; i++) | 227 | for (i = 0; i < TCQ_PRIO_BANDS; i++) |
229 | q->queues[i] = &noop_qdisc; | 228 | q->queues[i] = &noop_qdisc; |
230 | 229 | ||
231 | if (opt == NULL) { | 230 | if (opt == NULL) { |
@@ -233,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) | |||
233 | } else { | 232 | } else { |
234 | int err; | 233 | int err; |
235 | 234 | ||
236 | if ((err= prio_tune(sch, opt)) != 0) | 235 | if ((err = prio_tune(sch, opt)) != 0) |
237 | return err; | 236 | return err; |
238 | } | 237 | } |
239 | return 0; | 238 | return 0; |
@@ -246,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
246 | struct tc_prio_qopt opt; | 245 | struct tc_prio_qopt opt; |
247 | 246 | ||
248 | opt.bands = q->bands; | 247 | opt.bands = q->bands; |
249 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); | 248 | memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); |
250 | 249 | ||
251 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 250 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
252 | 251 | ||
@@ -343,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
343 | arg->count++; | 342 | arg->count++; |
344 | continue; | 343 | continue; |
345 | } | 344 | } |
346 | if (arg->fn(sch, prio+1, arg) < 0) { | 345 | if (arg->fn(sch, prio + 1, arg) < 0) { |
347 | arg->stop = 1; | 346 | arg->stop = 1; |
348 | break; | 347 | break; |
349 | } | 348 | } |
@@ -351,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
351 | } | 350 | } |
352 | } | 351 | } |
353 | 352 | ||
354 | static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl) | 353 | static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl) |
355 | { | 354 | { |
356 | struct prio_sched_data *q = qdisc_priv(sch); | 355 | struct prio_sched_data *q = qdisc_priv(sch); |
357 | 356 | ||
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c new file mode 100644 index 000000000000..103343408593 --- /dev/null +++ b/net/sched/sch_qfq.c | |||
@@ -0,0 +1,1137 @@ | |||
1 | /* | ||
2 | * net/sched/sch_qfq.c Quick Fair Queueing Scheduler. | ||
3 | * | ||
4 | * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * version 2 as published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/bitops.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/netdevice.h> | ||
16 | #include <linux/pkt_sched.h> | ||
17 | #include <net/sch_generic.h> | ||
18 | #include <net/pkt_sched.h> | ||
19 | #include <net/pkt_cls.h> | ||
20 | |||
21 | |||
22 | /* Quick Fair Queueing | ||
23 | =================== | ||
24 | |||
25 | Sources: | ||
26 | |||
27 | Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient | ||
28 | Packet Scheduling with Tight Bandwidth Distribution Guarantees." | ||
29 | |||
30 | See also: | ||
31 | http://retis.sssup.it/~fabio/linux/qfq/ | ||
32 | */ | ||
33 | |||
34 | /* | ||
35 | |||
36 | Virtual time computations. | ||
37 | |||
38 | S, F and V are all computed in fixed point arithmetic with | ||
39 | FRAC_BITS decimal bits. | ||
40 | |||
41 | QFQ_MAX_INDEX is the maximum index allowed for a group. We need | ||
42 | one bit per index. | ||
43 | QFQ_MAX_WSHIFT is the maximum power of two supported as a weight. | ||
44 | |||
45 | The layout of the bits is as below: | ||
46 | |||
47 | [ MTU_SHIFT ][ FRAC_BITS ] | ||
48 | [ MAX_INDEX ][ MIN_SLOT_SHIFT ] | ||
49 | ^.__grp->index = 0 | ||
50 | *.__grp->slot_shift | ||
51 | |||
52 | where MIN_SLOT_SHIFT is derived by difference from the others. | ||
53 | |||
54 | The max group index corresponds to Lmax/w_min, where | ||
55 | Lmax=1<<MTU_SHIFT, w_min = 1 . | ||
56 | From this, and knowing how many groups (MAX_INDEX) we want, | ||
57 | we can derive the shift corresponding to each group. | ||
58 | |||
59 | Because we often need to compute | ||
60 | F = S + len/w_i and V = V + len/wsum | ||
61 | instead of storing w_i store the value | ||
62 | inv_w = (1<<FRAC_BITS)/w_i | ||
63 | so we can do F = S + len * inv_w * wsum. | ||
64 | We use W_TOT in the formulas so we can easily move between | ||
65 | static and adaptive weight sum. | ||
66 | |||
67 | The per-scheduler-instance data contain all the data structures | ||
68 | for the scheduler: bitmaps and bucket lists. | ||
69 | |||
70 | */ | ||
71 | |||
72 | /* | ||
73 | * Maximum number of consecutive slots occupied by backlogged classes | ||
74 | * inside a group. | ||
75 | */ | ||
76 | #define QFQ_MAX_SLOTS 32 | ||
77 | |||
78 | /* | ||
79 | * Shifts used for class<->group mapping. We allow class weights that are | ||
80 | * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the | ||
81 | * group with the smallest index that can support the L_i / r_i configured | ||
82 | * for the class. | ||
83 | * | ||
84 | * grp->index is the index of the group; and grp->slot_shift | ||
85 | * is the shift for the corresponding (scaled) sigma_i. | ||
86 | */ | ||
87 | #define QFQ_MAX_INDEX 19 | ||
88 | #define QFQ_MAX_WSHIFT 16 | ||
89 | |||
90 | #define QFQ_MAX_WEIGHT (1<<QFQ_MAX_WSHIFT) | ||
91 | #define QFQ_MAX_WSUM (2*QFQ_MAX_WEIGHT) | ||
92 | |||
93 | #define FRAC_BITS 30 /* fixed point arithmetic */ | ||
94 | #define ONE_FP (1UL << FRAC_BITS) | ||
95 | #define IWSUM (ONE_FP/QFQ_MAX_WSUM) | ||
96 | |||
97 | #define QFQ_MTU_SHIFT 11 | ||
98 | #define QFQ_MIN_SLOT_SHIFT (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX) | ||
99 | |||
100 | /* | ||
101 | * Possible group states. These values are used as indexes for the bitmaps | ||
102 | * array of struct qfq_queue. | ||
103 | */ | ||
104 | enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE }; | ||
105 | |||
106 | struct qfq_group; | ||
107 | |||
108 | struct qfq_class { | ||
109 | struct Qdisc_class_common common; | ||
110 | |||
111 | unsigned int refcnt; | ||
112 | unsigned int filter_cnt; | ||
113 | |||
114 | struct gnet_stats_basic_packed bstats; | ||
115 | struct gnet_stats_queue qstats; | ||
116 | struct gnet_stats_rate_est rate_est; | ||
117 | struct Qdisc *qdisc; | ||
118 | |||
119 | struct hlist_node next; /* Link for the slot list. */ | ||
120 | u64 S, F; /* flow timestamps (exact) */ | ||
121 | |||
122 | /* group we belong to. In principle we would need the index, | ||
123 | * which is log_2(lmax/weight), but we never reference it | ||
124 | * directly, only the group. | ||
125 | */ | ||
126 | struct qfq_group *grp; | ||
127 | |||
128 | /* these are copied from the flowset. */ | ||
129 | u32 inv_w; /* ONE_FP/weight */ | ||
130 | u32 lmax; /* Max packet size for this flow. */ | ||
131 | }; | ||
132 | |||
133 | struct qfq_group { | ||
134 | u64 S, F; /* group timestamps (approx). */ | ||
135 | unsigned int slot_shift; /* Slot shift. */ | ||
136 | unsigned int index; /* Group index. */ | ||
137 | unsigned int front; /* Index of the front slot. */ | ||
138 | unsigned long full_slots; /* non-empty slots */ | ||
139 | |||
140 | /* Array of RR lists of active classes. */ | ||
141 | struct hlist_head slots[QFQ_MAX_SLOTS]; | ||
142 | }; | ||
143 | |||
144 | struct qfq_sched { | ||
145 | struct tcf_proto *filter_list; | ||
146 | struct Qdisc_class_hash clhash; | ||
147 | |||
148 | u64 V; /* Precise virtual time. */ | ||
149 | u32 wsum; /* weight sum */ | ||
150 | |||
151 | unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ | ||
152 | struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ | ||
153 | }; | ||
154 | |||
155 | static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid) | ||
156 | { | ||
157 | struct qfq_sched *q = qdisc_priv(sch); | ||
158 | struct Qdisc_class_common *clc; | ||
159 | |||
160 | clc = qdisc_class_find(&q->clhash, classid); | ||
161 | if (clc == NULL) | ||
162 | return NULL; | ||
163 | return container_of(clc, struct qfq_class, common); | ||
164 | } | ||
165 | |||
166 | static void qfq_purge_queue(struct qfq_class *cl) | ||
167 | { | ||
168 | unsigned int len = cl->qdisc->q.qlen; | ||
169 | |||
170 | qdisc_reset(cl->qdisc); | ||
171 | qdisc_tree_decrease_qlen(cl->qdisc, len); | ||
172 | } | ||
173 | |||
174 | static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = { | ||
175 | [TCA_QFQ_WEIGHT] = { .type = NLA_U32 }, | ||
176 | [TCA_QFQ_LMAX] = { .type = NLA_U32 }, | ||
177 | }; | ||
178 | |||
179 | /* | ||
180 | * Calculate a flow index, given its weight and maximum packet length. | ||
181 | * index = log_2(maxlen/weight) but we need to apply the scaling. | ||
182 | * This is used only once at flow creation. | ||
183 | */ | ||
184 | static int qfq_calc_index(u32 inv_w, unsigned int maxlen) | ||
185 | { | ||
186 | u64 slot_size = (u64)maxlen * inv_w; | ||
187 | unsigned long size_map; | ||
188 | int index = 0; | ||
189 | |||
190 | size_map = slot_size >> QFQ_MIN_SLOT_SHIFT; | ||
191 | if (!size_map) | ||
192 | goto out; | ||
193 | |||
194 | index = __fls(size_map) + 1; /* basically a log_2 */ | ||
195 | index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1))); | ||
196 | |||
197 | if (index < 0) | ||
198 | index = 0; | ||
199 | out: | ||
200 | pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n", | ||
201 | (unsigned long) ONE_FP/inv_w, maxlen, index); | ||
202 | |||
203 | return index; | ||
204 | } | ||
205 | |||
206 | static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | ||
207 | struct nlattr **tca, unsigned long *arg) | ||
208 | { | ||
209 | struct qfq_sched *q = qdisc_priv(sch); | ||
210 | struct qfq_class *cl = (struct qfq_class *)*arg; | ||
211 | struct nlattr *tb[TCA_QFQ_MAX + 1]; | ||
212 | u32 weight, lmax, inv_w; | ||
213 | int i, err; | ||
214 | |||
215 | if (tca[TCA_OPTIONS] == NULL) { | ||
216 | pr_notice("qfq: no options\n"); | ||
217 | return -EINVAL; | ||
218 | } | ||
219 | |||
220 | err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy); | ||
221 | if (err < 0) | ||
222 | return err; | ||
223 | |||
224 | if (tb[TCA_QFQ_WEIGHT]) { | ||
225 | weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]); | ||
226 | if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) { | ||
227 | pr_notice("qfq: invalid weight %u\n", weight); | ||
228 | return -EINVAL; | ||
229 | } | ||
230 | } else | ||
231 | weight = 1; | ||
232 | |||
233 | inv_w = ONE_FP / weight; | ||
234 | weight = ONE_FP / inv_w; | ||
235 | if (q->wsum + weight > QFQ_MAX_WSUM) { | ||
236 | pr_notice("qfq: total weight out of range (%u + %u)\n", | ||
237 | weight, q->wsum); | ||
238 | return -EINVAL; | ||
239 | } | ||
240 | |||
241 | if (tb[TCA_QFQ_LMAX]) { | ||
242 | lmax = nla_get_u32(tb[TCA_QFQ_LMAX]); | ||
243 | if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) { | ||
244 | pr_notice("qfq: invalid max length %u\n", lmax); | ||
245 | return -EINVAL; | ||
246 | } | ||
247 | } else | ||
248 | lmax = 1UL << QFQ_MTU_SHIFT; | ||
249 | |||
250 | if (cl != NULL) { | ||
251 | if (tca[TCA_RATE]) { | ||
252 | err = gen_replace_estimator(&cl->bstats, &cl->rate_est, | ||
253 | qdisc_root_sleeping_lock(sch), | ||
254 | tca[TCA_RATE]); | ||
255 | if (err) | ||
256 | return err; | ||
257 | } | ||
258 | |||
259 | sch_tree_lock(sch); | ||
260 | if (tb[TCA_QFQ_WEIGHT]) { | ||
261 | q->wsum = weight - ONE_FP / cl->inv_w; | ||
262 | cl->inv_w = inv_w; | ||
263 | } | ||
264 | sch_tree_unlock(sch); | ||
265 | |||
266 | return 0; | ||
267 | } | ||
268 | |||
269 | cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL); | ||
270 | if (cl == NULL) | ||
271 | return -ENOBUFS; | ||
272 | |||
273 | cl->refcnt = 1; | ||
274 | cl->common.classid = classid; | ||
275 | cl->lmax = lmax; | ||
276 | cl->inv_w = inv_w; | ||
277 | i = qfq_calc_index(cl->inv_w, cl->lmax); | ||
278 | |||
279 | cl->grp = &q->groups[i]; | ||
280 | q->wsum += weight; | ||
281 | |||
282 | cl->qdisc = qdisc_create_dflt(sch->dev_queue, | ||
283 | &pfifo_qdisc_ops, classid); | ||
284 | if (cl->qdisc == NULL) | ||
285 | cl->qdisc = &noop_qdisc; | ||
286 | |||
287 | if (tca[TCA_RATE]) { | ||
288 | err = gen_new_estimator(&cl->bstats, &cl->rate_est, | ||
289 | qdisc_root_sleeping_lock(sch), | ||
290 | tca[TCA_RATE]); | ||
291 | if (err) { | ||
292 | qdisc_destroy(cl->qdisc); | ||
293 | kfree(cl); | ||
294 | return err; | ||
295 | } | ||
296 | } | ||
297 | |||
298 | sch_tree_lock(sch); | ||
299 | qdisc_class_hash_insert(&q->clhash, &cl->common); | ||
300 | sch_tree_unlock(sch); | ||
301 | |||
302 | qdisc_class_hash_grow(sch, &q->clhash); | ||
303 | |||
304 | *arg = (unsigned long)cl; | ||
305 | return 0; | ||
306 | } | ||
307 | |||
308 | static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl) | ||
309 | { | ||
310 | struct qfq_sched *q = qdisc_priv(sch); | ||
311 | |||
312 | if (cl->inv_w) { | ||
313 | q->wsum -= ONE_FP / cl->inv_w; | ||
314 | cl->inv_w = 0; | ||
315 | } | ||
316 | |||
317 | gen_kill_estimator(&cl->bstats, &cl->rate_est); | ||
318 | qdisc_destroy(cl->qdisc); | ||
319 | kfree(cl); | ||
320 | } | ||
321 | |||
322 | static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) | ||
323 | { | ||
324 | struct qfq_sched *q = qdisc_priv(sch); | ||
325 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
326 | |||
327 | if (cl->filter_cnt > 0) | ||
328 | return -EBUSY; | ||
329 | |||
330 | sch_tree_lock(sch); | ||
331 | |||
332 | qfq_purge_queue(cl); | ||
333 | qdisc_class_hash_remove(&q->clhash, &cl->common); | ||
334 | |||
335 | BUG_ON(--cl->refcnt == 0); | ||
336 | /* | ||
337 | * This shouldn't happen: we "hold" one cops->get() when called | ||
338 | * from tc_ctl_tclass; the destroy method is done from cops->put(). | ||
339 | */ | ||
340 | |||
341 | sch_tree_unlock(sch); | ||
342 | return 0; | ||
343 | } | ||
344 | |||
345 | static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) | ||
346 | { | ||
347 | struct qfq_class *cl = qfq_find_class(sch, classid); | ||
348 | |||
349 | if (cl != NULL) | ||
350 | cl->refcnt++; | ||
351 | |||
352 | return (unsigned long)cl; | ||
353 | } | ||
354 | |||
355 | static void qfq_put_class(struct Qdisc *sch, unsigned long arg) | ||
356 | { | ||
357 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
358 | |||
359 | if (--cl->refcnt == 0) | ||
360 | qfq_destroy_class(sch, cl); | ||
361 | } | ||
362 | |||
363 | static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl) | ||
364 | { | ||
365 | struct qfq_sched *q = qdisc_priv(sch); | ||
366 | |||
367 | if (cl) | ||
368 | return NULL; | ||
369 | |||
370 | return &q->filter_list; | ||
371 | } | ||
372 | |||
373 | static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent, | ||
374 | u32 classid) | ||
375 | { | ||
376 | struct qfq_class *cl = qfq_find_class(sch, classid); | ||
377 | |||
378 | if (cl != NULL) | ||
379 | cl->filter_cnt++; | ||
380 | |||
381 | return (unsigned long)cl; | ||
382 | } | ||
383 | |||
384 | static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg) | ||
385 | { | ||
386 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
387 | |||
388 | cl->filter_cnt--; | ||
389 | } | ||
390 | |||
391 | static int qfq_graft_class(struct Qdisc *sch, unsigned long arg, | ||
392 | struct Qdisc *new, struct Qdisc **old) | ||
393 | { | ||
394 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
395 | |||
396 | if (new == NULL) { | ||
397 | new = qdisc_create_dflt(sch->dev_queue, | ||
398 | &pfifo_qdisc_ops, cl->common.classid); | ||
399 | if (new == NULL) | ||
400 | new = &noop_qdisc; | ||
401 | } | ||
402 | |||
403 | sch_tree_lock(sch); | ||
404 | qfq_purge_queue(cl); | ||
405 | *old = cl->qdisc; | ||
406 | cl->qdisc = new; | ||
407 | sch_tree_unlock(sch); | ||
408 | return 0; | ||
409 | } | ||
410 | |||
411 | static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg) | ||
412 | { | ||
413 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
414 | |||
415 | return cl->qdisc; | ||
416 | } | ||
417 | |||
418 | static int qfq_dump_class(struct Qdisc *sch, unsigned long arg, | ||
419 | struct sk_buff *skb, struct tcmsg *tcm) | ||
420 | { | ||
421 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
422 | struct nlattr *nest; | ||
423 | |||
424 | tcm->tcm_parent = TC_H_ROOT; | ||
425 | tcm->tcm_handle = cl->common.classid; | ||
426 | tcm->tcm_info = cl->qdisc->handle; | ||
427 | |||
428 | nest = nla_nest_start(skb, TCA_OPTIONS); | ||
429 | if (nest == NULL) | ||
430 | goto nla_put_failure; | ||
431 | NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w); | ||
432 | NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax); | ||
433 | return nla_nest_end(skb, nest); | ||
434 | |||
435 | nla_put_failure: | ||
436 | nla_nest_cancel(skb, nest); | ||
437 | return -EMSGSIZE; | ||
438 | } | ||
439 | |||
440 | static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg, | ||
441 | struct gnet_dump *d) | ||
442 | { | ||
443 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
444 | struct tc_qfq_stats xstats; | ||
445 | |||
446 | memset(&xstats, 0, sizeof(xstats)); | ||
447 | cl->qdisc->qstats.qlen = cl->qdisc->q.qlen; | ||
448 | |||
449 | xstats.weight = ONE_FP/cl->inv_w; | ||
450 | xstats.lmax = cl->lmax; | ||
451 | |||
452 | if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || | ||
453 | gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 || | ||
454 | gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0) | ||
455 | return -1; | ||
456 | |||
457 | return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); | ||
458 | } | ||
459 | |||
460 | static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | ||
461 | { | ||
462 | struct qfq_sched *q = qdisc_priv(sch); | ||
463 | struct qfq_class *cl; | ||
464 | struct hlist_node *n; | ||
465 | unsigned int i; | ||
466 | |||
467 | if (arg->stop) | ||
468 | return; | ||
469 | |||
470 | for (i = 0; i < q->clhash.hashsize; i++) { | ||
471 | hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) { | ||
472 | if (arg->count < arg->skip) { | ||
473 | arg->count++; | ||
474 | continue; | ||
475 | } | ||
476 | if (arg->fn(sch, (unsigned long)cl, arg) < 0) { | ||
477 | arg->stop = 1; | ||
478 | return; | ||
479 | } | ||
480 | arg->count++; | ||
481 | } | ||
482 | } | ||
483 | } | ||
484 | |||
485 | static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, | ||
486 | int *qerr) | ||
487 | { | ||
488 | struct qfq_sched *q = qdisc_priv(sch); | ||
489 | struct qfq_class *cl; | ||
490 | struct tcf_result res; | ||
491 | int result; | ||
492 | |||
493 | if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) { | ||
494 | pr_debug("qfq_classify: found %d\n", skb->priority); | ||
495 | cl = qfq_find_class(sch, skb->priority); | ||
496 | if (cl != NULL) | ||
497 | return cl; | ||
498 | } | ||
499 | |||
500 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | ||
501 | result = tc_classify(skb, q->filter_list, &res); | ||
502 | if (result >= 0) { | ||
503 | #ifdef CONFIG_NET_CLS_ACT | ||
504 | switch (result) { | ||
505 | case TC_ACT_QUEUED: | ||
506 | case TC_ACT_STOLEN: | ||
507 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | ||
508 | case TC_ACT_SHOT: | ||
509 | return NULL; | ||
510 | } | ||
511 | #endif | ||
512 | cl = (struct qfq_class *)res.class; | ||
513 | if (cl == NULL) | ||
514 | cl = qfq_find_class(sch, res.classid); | ||
515 | return cl; | ||
516 | } | ||
517 | |||
518 | return NULL; | ||
519 | } | ||
520 | |||
521 | /* Generic comparison function, handling wraparound. */ | ||
522 | static inline int qfq_gt(u64 a, u64 b) | ||
523 | { | ||
524 | return (s64)(a - b) > 0; | ||
525 | } | ||
526 | |||
527 | /* Round a precise timestamp to its slotted value. */ | ||
528 | static inline u64 qfq_round_down(u64 ts, unsigned int shift) | ||
529 | { | ||
530 | return ts & ~((1ULL << shift) - 1); | ||
531 | } | ||
532 | |||
533 | /* return the pointer to the group with lowest index in the bitmap */ | ||
534 | static inline struct qfq_group *qfq_ffs(struct qfq_sched *q, | ||
535 | unsigned long bitmap) | ||
536 | { | ||
537 | int index = __ffs(bitmap); | ||
538 | return &q->groups[index]; | ||
539 | } | ||
540 | /* Calculate a mask to mimic what would be ffs_from(). */ | ||
541 | static inline unsigned long mask_from(unsigned long bitmap, int from) | ||
542 | { | ||
543 | return bitmap & ~((1UL << from) - 1); | ||
544 | } | ||
545 | |||
546 | /* | ||
547 | * The state computation relies on ER=0, IR=1, EB=2, IB=3 | ||
548 | * First compute eligibility comparing grp->S, q->V, | ||
549 | * then check if someone is blocking us and possibly add EB | ||
550 | */ | ||
551 | static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp) | ||
552 | { | ||
553 | /* if S > V we are not eligible */ | ||
554 | unsigned int state = qfq_gt(grp->S, q->V); | ||
555 | unsigned long mask = mask_from(q->bitmaps[ER], grp->index); | ||
556 | struct qfq_group *next; | ||
557 | |||
558 | if (mask) { | ||
559 | next = qfq_ffs(q, mask); | ||
560 | if (qfq_gt(grp->F, next->F)) | ||
561 | state |= EB; | ||
562 | } | ||
563 | |||
564 | return state; | ||
565 | } | ||
566 | |||
567 | |||
568 | /* | ||
569 | * In principle | ||
570 | * q->bitmaps[dst] |= q->bitmaps[src] & mask; | ||
571 | * q->bitmaps[src] &= ~mask; | ||
572 | * but we should make sure that src != dst | ||
573 | */ | ||
574 | static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask, | ||
575 | int src, int dst) | ||
576 | { | ||
577 | q->bitmaps[dst] |= q->bitmaps[src] & mask; | ||
578 | q->bitmaps[src] &= ~mask; | ||
579 | } | ||
580 | |||
581 | static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F) | ||
582 | { | ||
583 | unsigned long mask = mask_from(q->bitmaps[ER], index + 1); | ||
584 | struct qfq_group *next; | ||
585 | |||
586 | if (mask) { | ||
587 | next = qfq_ffs(q, mask); | ||
588 | if (!qfq_gt(next->F, old_F)) | ||
589 | return; | ||
590 | } | ||
591 | |||
592 | mask = (1UL << index) - 1; | ||
593 | qfq_move_groups(q, mask, EB, ER); | ||
594 | qfq_move_groups(q, mask, IB, IR); | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * perhaps | ||
599 | * | ||
600 | old_V ^= q->V; | ||
601 | old_V >>= QFQ_MIN_SLOT_SHIFT; | ||
602 | if (old_V) { | ||
603 | ... | ||
604 | } | ||
605 | * | ||
606 | */ | ||
607 | static void qfq_make_eligible(struct qfq_sched *q, u64 old_V) | ||
608 | { | ||
609 | unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT; | ||
610 | unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT; | ||
611 | |||
612 | if (vslot != old_vslot) { | ||
613 | unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1; | ||
614 | qfq_move_groups(q, mask, IR, ER); | ||
615 | qfq_move_groups(q, mask, IB, EB); | ||
616 | } | ||
617 | } | ||
618 | |||
619 | |||
620 | /* | ||
621 | * XXX we should make sure that slot becomes less than 32. | ||
622 | * This is guaranteed by the input values. | ||
623 | * roundedS is always cl->S rounded on grp->slot_shift bits. | ||
624 | */ | ||
625 | static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl, | ||
626 | u64 roundedS) | ||
627 | { | ||
628 | u64 slot = (roundedS - grp->S) >> grp->slot_shift; | ||
629 | unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS; | ||
630 | |||
631 | hlist_add_head(&cl->next, &grp->slots[i]); | ||
632 | __set_bit(slot, &grp->full_slots); | ||
633 | } | ||
634 | |||
635 | /* Maybe introduce hlist_first_entry?? */ | ||
636 | static struct qfq_class *qfq_slot_head(struct qfq_group *grp) | ||
637 | { | ||
638 | return hlist_entry(grp->slots[grp->front].first, | ||
639 | struct qfq_class, next); | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * remove the entry from the slot | ||
644 | */ | ||
645 | static void qfq_front_slot_remove(struct qfq_group *grp) | ||
646 | { | ||
647 | struct qfq_class *cl = qfq_slot_head(grp); | ||
648 | |||
649 | BUG_ON(!cl); | ||
650 | hlist_del(&cl->next); | ||
651 | if (hlist_empty(&grp->slots[grp->front])) | ||
652 | __clear_bit(0, &grp->full_slots); | ||
653 | } | ||
654 | |||
655 | /* | ||
656 | * Returns the first full queue in a group. As a side effect, | ||
657 | * adjust the bucket list so the first non-empty bucket is at | ||
658 | * position 0 in full_slots. | ||
659 | */ | ||
660 | static struct qfq_class *qfq_slot_scan(struct qfq_group *grp) | ||
661 | { | ||
662 | unsigned int i; | ||
663 | |||
664 | pr_debug("qfq slot_scan: grp %u full %#lx\n", | ||
665 | grp->index, grp->full_slots); | ||
666 | |||
667 | if (grp->full_slots == 0) | ||
668 | return NULL; | ||
669 | |||
670 | i = __ffs(grp->full_slots); /* zero based */ | ||
671 | if (i > 0) { | ||
672 | grp->front = (grp->front + i) % QFQ_MAX_SLOTS; | ||
673 | grp->full_slots >>= i; | ||
674 | } | ||
675 | |||
676 | return qfq_slot_head(grp); | ||
677 | } | ||
678 | |||
679 | /* | ||
680 | * adjust the bucket list. When the start time of a group decreases, | ||
681 | * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to | ||
682 | * move the objects. The mask of occupied slots must be shifted | ||
683 | * because we use ffs() to find the first non-empty slot. | ||
684 | * This covers decreases in the group's start time, but what about | ||
685 | * increases of the start time ? | ||
686 | * Here too we should make sure that i is less than 32 | ||
687 | */ | ||
688 | static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS) | ||
689 | { | ||
690 | unsigned int i = (grp->S - roundedS) >> grp->slot_shift; | ||
691 | |||
692 | grp->full_slots <<= i; | ||
693 | grp->front = (grp->front - i) % QFQ_MAX_SLOTS; | ||
694 | } | ||
695 | |||
696 | static void qfq_update_eligible(struct qfq_sched *q, u64 old_V) | ||
697 | { | ||
698 | struct qfq_group *grp; | ||
699 | unsigned long ineligible; | ||
700 | |||
701 | ineligible = q->bitmaps[IR] | q->bitmaps[IB]; | ||
702 | if (ineligible) { | ||
703 | if (!q->bitmaps[ER]) { | ||
704 | grp = qfq_ffs(q, ineligible); | ||
705 | if (qfq_gt(grp->S, q->V)) | ||
706 | q->V = grp->S; | ||
707 | } | ||
708 | qfq_make_eligible(q, old_V); | ||
709 | } | ||
710 | } | ||
711 | |||
712 | /* What is length of next packet in queue (0 if queue is empty) */ | ||
713 | static unsigned int qdisc_peek_len(struct Qdisc *sch) | ||
714 | { | ||
715 | struct sk_buff *skb; | ||
716 | |||
717 | skb = sch->ops->peek(sch); | ||
718 | return skb ? qdisc_pkt_len(skb) : 0; | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * Updates the class, returns true if also the group needs to be updated. | ||
723 | */ | ||
724 | static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl) | ||
725 | { | ||
726 | unsigned int len = qdisc_peek_len(cl->qdisc); | ||
727 | |||
728 | cl->S = cl->F; | ||
729 | if (!len) | ||
730 | qfq_front_slot_remove(grp); /* queue is empty */ | ||
731 | else { | ||
732 | u64 roundedS; | ||
733 | |||
734 | cl->F = cl->S + (u64)len * cl->inv_w; | ||
735 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
736 | if (roundedS == grp->S) | ||
737 | return false; | ||
738 | |||
739 | qfq_front_slot_remove(grp); | ||
740 | qfq_slot_insert(grp, cl, roundedS); | ||
741 | } | ||
742 | |||
743 | return true; | ||
744 | } | ||
745 | |||
746 | static struct sk_buff *qfq_dequeue(struct Qdisc *sch) | ||
747 | { | ||
748 | struct qfq_sched *q = qdisc_priv(sch); | ||
749 | struct qfq_group *grp; | ||
750 | struct qfq_class *cl; | ||
751 | struct sk_buff *skb; | ||
752 | unsigned int len; | ||
753 | u64 old_V; | ||
754 | |||
755 | if (!q->bitmaps[ER]) | ||
756 | return NULL; | ||
757 | |||
758 | grp = qfq_ffs(q, q->bitmaps[ER]); | ||
759 | |||
760 | cl = qfq_slot_head(grp); | ||
761 | skb = qdisc_dequeue_peeked(cl->qdisc); | ||
762 | if (!skb) { | ||
763 | WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n"); | ||
764 | return NULL; | ||
765 | } | ||
766 | |||
767 | sch->q.qlen--; | ||
768 | qdisc_bstats_update(sch, skb); | ||
769 | |||
770 | old_V = q->V; | ||
771 | len = qdisc_pkt_len(skb); | ||
772 | q->V += (u64)len * IWSUM; | ||
773 | pr_debug("qfq dequeue: len %u F %lld now %lld\n", | ||
774 | len, (unsigned long long) cl->F, (unsigned long long) q->V); | ||
775 | |||
776 | if (qfq_update_class(grp, cl)) { | ||
777 | u64 old_F = grp->F; | ||
778 | |||
779 | cl = qfq_slot_scan(grp); | ||
780 | if (!cl) | ||
781 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
782 | else { | ||
783 | u64 roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
784 | unsigned int s; | ||
785 | |||
786 | if (grp->S == roundedS) | ||
787 | goto skip_unblock; | ||
788 | grp->S = roundedS; | ||
789 | grp->F = roundedS + (2ULL << grp->slot_shift); | ||
790 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
791 | s = qfq_calc_state(q, grp); | ||
792 | __set_bit(grp->index, &q->bitmaps[s]); | ||
793 | } | ||
794 | |||
795 | qfq_unblock_groups(q, grp->index, old_F); | ||
796 | } | ||
797 | |||
798 | skip_unblock: | ||
799 | qfq_update_eligible(q, old_V); | ||
800 | |||
801 | return skb; | ||
802 | } | ||
803 | |||
804 | /* | ||
805 | * Assign a reasonable start time for a new flow k in group i. | ||
806 | * Admissible values for \hat(F) are multiples of \sigma_i | ||
807 | * no greater than V+\sigma_i . Larger values mean that | ||
808 | * we had a wraparound so we consider the timestamp to be stale. | ||
809 | * | ||
810 | * If F is not stale and F >= V then we set S = F. | ||
811 | * Otherwise we should assign S = V, but this may violate | ||
812 | * the ordering in ER. So, if we have groups in ER, set S to | ||
813 | * the F_j of the first group j which would be blocking us. | ||
814 | * We are guaranteed not to move S backward because | ||
815 | * otherwise our group i would still be blocked. | ||
816 | */ | ||
817 | static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl) | ||
818 | { | ||
819 | unsigned long mask; | ||
820 | uint32_t limit, roundedF; | ||
821 | int slot_shift = cl->grp->slot_shift; | ||
822 | |||
823 | roundedF = qfq_round_down(cl->F, slot_shift); | ||
824 | limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift); | ||
825 | |||
826 | if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) { | ||
827 | /* timestamp was stale */ | ||
828 | mask = mask_from(q->bitmaps[ER], cl->grp->index); | ||
829 | if (mask) { | ||
830 | struct qfq_group *next = qfq_ffs(q, mask); | ||
831 | if (qfq_gt(roundedF, next->F)) { | ||
832 | cl->S = next->F; | ||
833 | return; | ||
834 | } | ||
835 | } | ||
836 | cl->S = q->V; | ||
837 | } else /* timestamp is not stale */ | ||
838 | cl->S = cl->F; | ||
839 | } | ||
840 | |||
841 | static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
842 | { | ||
843 | struct qfq_sched *q = qdisc_priv(sch); | ||
844 | struct qfq_group *grp; | ||
845 | struct qfq_class *cl; | ||
846 | int err; | ||
847 | u64 roundedS; | ||
848 | int s; | ||
849 | |||
850 | cl = qfq_classify(skb, sch, &err); | ||
851 | if (cl == NULL) { | ||
852 | if (err & __NET_XMIT_BYPASS) | ||
853 | sch->qstats.drops++; | ||
854 | kfree_skb(skb); | ||
855 | return err; | ||
856 | } | ||
857 | pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); | ||
858 | |||
859 | err = qdisc_enqueue(skb, cl->qdisc); | ||
860 | if (unlikely(err != NET_XMIT_SUCCESS)) { | ||
861 | pr_debug("qfq_enqueue: enqueue failed %d\n", err); | ||
862 | if (net_xmit_drop_count(err)) { | ||
863 | cl->qstats.drops++; | ||
864 | sch->qstats.drops++; | ||
865 | } | ||
866 | return err; | ||
867 | } | ||
868 | |||
869 | bstats_update(&cl->bstats, skb); | ||
870 | ++sch->q.qlen; | ||
871 | |||
872 | /* If the new skb is not the head of queue, then done here. */ | ||
873 | if (cl->qdisc->q.qlen != 1) | ||
874 | return err; | ||
875 | |||
876 | /* If reach this point, queue q was idle */ | ||
877 | grp = cl->grp; | ||
878 | qfq_update_start(q, cl); | ||
879 | |||
880 | /* compute new finish time and rounded start. */ | ||
881 | cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w; | ||
882 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
883 | |||
884 | /* | ||
885 | * insert cl in the correct bucket. | ||
886 | * If cl->S >= grp->S we don't need to adjust the | ||
887 | * bucket list and simply go to the insertion phase. | ||
888 | * Otherwise grp->S is decreasing, we must make room | ||
889 | * in the bucket list, and also recompute the group state. | ||
890 | * Finally, if there were no flows in this group and nobody | ||
891 | * was in ER make sure to adjust V. | ||
892 | */ | ||
893 | if (grp->full_slots) { | ||
894 | if (!qfq_gt(grp->S, cl->S)) | ||
895 | goto skip_update; | ||
896 | |||
897 | /* create a slot for this cl->S */ | ||
898 | qfq_slot_rotate(grp, roundedS); | ||
899 | /* group was surely ineligible, remove */ | ||
900 | __clear_bit(grp->index, &q->bitmaps[IR]); | ||
901 | __clear_bit(grp->index, &q->bitmaps[IB]); | ||
902 | } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V)) | ||
903 | q->V = roundedS; | ||
904 | |||
905 | grp->S = roundedS; | ||
906 | grp->F = roundedS + (2ULL << grp->slot_shift); | ||
907 | s = qfq_calc_state(q, grp); | ||
908 | __set_bit(grp->index, &q->bitmaps[s]); | ||
909 | |||
910 | pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n", | ||
911 | s, q->bitmaps[s], | ||
912 | (unsigned long long) cl->S, | ||
913 | (unsigned long long) cl->F, | ||
914 | (unsigned long long) q->V); | ||
915 | |||
916 | skip_update: | ||
917 | qfq_slot_insert(grp, cl, roundedS); | ||
918 | |||
919 | return err; | ||
920 | } | ||
921 | |||
922 | |||
923 | static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp, | ||
924 | struct qfq_class *cl) | ||
925 | { | ||
926 | unsigned int i, offset; | ||
927 | u64 roundedS; | ||
928 | |||
929 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
930 | offset = (roundedS - grp->S) >> grp->slot_shift; | ||
931 | i = (grp->front + offset) % QFQ_MAX_SLOTS; | ||
932 | |||
933 | hlist_del(&cl->next); | ||
934 | if (hlist_empty(&grp->slots[i])) | ||
935 | __clear_bit(offset, &grp->full_slots); | ||
936 | } | ||
937 | |||
938 | /* | ||
939 | * called to forcibly destroy a queue. | ||
940 | * If the queue is not in the front bucket, or if it has | ||
941 | * other queues in the front bucket, we can simply remove | ||
942 | * the queue with no other side effects. | ||
943 | * Otherwise we must propagate the event up. | ||
944 | */ | ||
945 | static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl) | ||
946 | { | ||
947 | struct qfq_group *grp = cl->grp; | ||
948 | unsigned long mask; | ||
949 | u64 roundedS; | ||
950 | int s; | ||
951 | |||
952 | cl->F = cl->S; | ||
953 | qfq_slot_remove(q, grp, cl); | ||
954 | |||
955 | if (!grp->full_slots) { | ||
956 | __clear_bit(grp->index, &q->bitmaps[IR]); | ||
957 | __clear_bit(grp->index, &q->bitmaps[EB]); | ||
958 | __clear_bit(grp->index, &q->bitmaps[IB]); | ||
959 | |||
960 | if (test_bit(grp->index, &q->bitmaps[ER]) && | ||
961 | !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) { | ||
962 | mask = q->bitmaps[ER] & ((1UL << grp->index) - 1); | ||
963 | if (mask) | ||
964 | mask = ~((1UL << __fls(mask)) - 1); | ||
965 | else | ||
966 | mask = ~0UL; | ||
967 | qfq_move_groups(q, mask, EB, ER); | ||
968 | qfq_move_groups(q, mask, IB, IR); | ||
969 | } | ||
970 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
971 | } else if (hlist_empty(&grp->slots[grp->front])) { | ||
972 | cl = qfq_slot_scan(grp); | ||
973 | roundedS = qfq_round_down(cl->S, grp->slot_shift); | ||
974 | if (grp->S != roundedS) { | ||
975 | __clear_bit(grp->index, &q->bitmaps[ER]); | ||
976 | __clear_bit(grp->index, &q->bitmaps[IR]); | ||
977 | __clear_bit(grp->index, &q->bitmaps[EB]); | ||
978 | __clear_bit(grp->index, &q->bitmaps[IB]); | ||
979 | grp->S = roundedS; | ||
980 | grp->F = roundedS + (2ULL << grp->slot_shift); | ||
981 | s = qfq_calc_state(q, grp); | ||
982 | __set_bit(grp->index, &q->bitmaps[s]); | ||
983 | } | ||
984 | } | ||
985 | |||
986 | qfq_update_eligible(q, q->V); | ||
987 | } | ||
988 | |||
989 | static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) | ||
990 | { | ||
991 | struct qfq_sched *q = qdisc_priv(sch); | ||
992 | struct qfq_class *cl = (struct qfq_class *)arg; | ||
993 | |||
994 | if (cl->qdisc->q.qlen == 0) | ||
995 | qfq_deactivate_class(q, cl); | ||
996 | } | ||
997 | |||
998 | static unsigned int qfq_drop(struct Qdisc *sch) | ||
999 | { | ||
1000 | struct qfq_sched *q = qdisc_priv(sch); | ||
1001 | struct qfq_group *grp; | ||
1002 | unsigned int i, j, len; | ||
1003 | |||
1004 | for (i = 0; i <= QFQ_MAX_INDEX; i++) { | ||
1005 | grp = &q->groups[i]; | ||
1006 | for (j = 0; j < QFQ_MAX_SLOTS; j++) { | ||
1007 | struct qfq_class *cl; | ||
1008 | struct hlist_node *n; | ||
1009 | |||
1010 | hlist_for_each_entry(cl, n, &grp->slots[j], next) { | ||
1011 | |||
1012 | if (!cl->qdisc->ops->drop) | ||
1013 | continue; | ||
1014 | |||
1015 | len = cl->qdisc->ops->drop(cl->qdisc); | ||
1016 | if (len > 0) { | ||
1017 | sch->q.qlen--; | ||
1018 | if (!cl->qdisc->q.qlen) | ||
1019 | qfq_deactivate_class(q, cl); | ||
1020 | |||
1021 | return len; | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | } | ||
1026 | |||
1027 | return 0; | ||
1028 | } | ||
1029 | |||
1030 | static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) | ||
1031 | { | ||
1032 | struct qfq_sched *q = qdisc_priv(sch); | ||
1033 | struct qfq_group *grp; | ||
1034 | int i, j, err; | ||
1035 | |||
1036 | err = qdisc_class_hash_init(&q->clhash); | ||
1037 | if (err < 0) | ||
1038 | return err; | ||
1039 | |||
1040 | for (i = 0; i <= QFQ_MAX_INDEX; i++) { | ||
1041 | grp = &q->groups[i]; | ||
1042 | grp->index = i; | ||
1043 | grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS | ||
1044 | - (QFQ_MAX_INDEX - i); | ||
1045 | for (j = 0; j < QFQ_MAX_SLOTS; j++) | ||
1046 | INIT_HLIST_HEAD(&grp->slots[j]); | ||
1047 | } | ||
1048 | |||
1049 | return 0; | ||
1050 | } | ||
1051 | |||
1052 | static void qfq_reset_qdisc(struct Qdisc *sch) | ||
1053 | { | ||
1054 | struct qfq_sched *q = qdisc_priv(sch); | ||
1055 | struct qfq_group *grp; | ||
1056 | struct qfq_class *cl; | ||
1057 | struct hlist_node *n, *tmp; | ||
1058 | unsigned int i, j; | ||
1059 | |||
1060 | for (i = 0; i <= QFQ_MAX_INDEX; i++) { | ||
1061 | grp = &q->groups[i]; | ||
1062 | for (j = 0; j < QFQ_MAX_SLOTS; j++) { | ||
1063 | hlist_for_each_entry_safe(cl, n, tmp, | ||
1064 | &grp->slots[j], next) { | ||
1065 | qfq_deactivate_class(q, cl); | ||
1066 | } | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | for (i = 0; i < q->clhash.hashsize; i++) { | ||
1071 | hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) | ||
1072 | qdisc_reset(cl->qdisc); | ||
1073 | } | ||
1074 | sch->q.qlen = 0; | ||
1075 | } | ||
1076 | |||
1077 | static void qfq_destroy_qdisc(struct Qdisc *sch) | ||
1078 | { | ||
1079 | struct qfq_sched *q = qdisc_priv(sch); | ||
1080 | struct qfq_class *cl; | ||
1081 | struct hlist_node *n, *next; | ||
1082 | unsigned int i; | ||
1083 | |||
1084 | tcf_destroy_chain(&q->filter_list); | ||
1085 | |||
1086 | for (i = 0; i < q->clhash.hashsize; i++) { | ||
1087 | hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i], | ||
1088 | common.hnode) { | ||
1089 | qfq_destroy_class(sch, cl); | ||
1090 | } | ||
1091 | } | ||
1092 | qdisc_class_hash_destroy(&q->clhash); | ||
1093 | } | ||
1094 | |||
1095 | static const struct Qdisc_class_ops qfq_class_ops = { | ||
1096 | .change = qfq_change_class, | ||
1097 | .delete = qfq_delete_class, | ||
1098 | .get = qfq_get_class, | ||
1099 | .put = qfq_put_class, | ||
1100 | .tcf_chain = qfq_tcf_chain, | ||
1101 | .bind_tcf = qfq_bind_tcf, | ||
1102 | .unbind_tcf = qfq_unbind_tcf, | ||
1103 | .graft = qfq_graft_class, | ||
1104 | .leaf = qfq_class_leaf, | ||
1105 | .qlen_notify = qfq_qlen_notify, | ||
1106 | .dump = qfq_dump_class, | ||
1107 | .dump_stats = qfq_dump_class_stats, | ||
1108 | .walk = qfq_walk, | ||
1109 | }; | ||
1110 | |||
1111 | static struct Qdisc_ops qfq_qdisc_ops __read_mostly = { | ||
1112 | .cl_ops = &qfq_class_ops, | ||
1113 | .id = "qfq", | ||
1114 | .priv_size = sizeof(struct qfq_sched), | ||
1115 | .enqueue = qfq_enqueue, | ||
1116 | .dequeue = qfq_dequeue, | ||
1117 | .peek = qdisc_peek_dequeued, | ||
1118 | .drop = qfq_drop, | ||
1119 | .init = qfq_init_qdisc, | ||
1120 | .reset = qfq_reset_qdisc, | ||
1121 | .destroy = qfq_destroy_qdisc, | ||
1122 | .owner = THIS_MODULE, | ||
1123 | }; | ||
1124 | |||
1125 | static int __init qfq_init(void) | ||
1126 | { | ||
1127 | return register_qdisc(&qfq_qdisc_ops); | ||
1128 | } | ||
1129 | |||
1130 | static void __exit qfq_exit(void) | ||
1131 | { | ||
1132 | unregister_qdisc(&qfq_qdisc_ops); | ||
1133 | } | ||
1134 | |||
1135 | module_init(qfq_init); | ||
1136 | module_exit(qfq_exit); | ||
1137 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8d42bb3ba540..6649463da1b6 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c | |||
@@ -36,8 +36,7 @@ | |||
36 | if RED works correctly. | 36 | if RED works correctly. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | struct red_sched_data | 39 | struct red_sched_data { |
40 | { | ||
41 | u32 limit; /* HARD maximal queue length */ | 40 | u32 limit; /* HARD maximal queue length */ |
42 | unsigned char flags; | 41 | unsigned char flags; |
43 | struct red_parms parms; | 42 | struct red_parms parms; |
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q) | |||
55 | return q->flags & TC_RED_HARDDROP; | 54 | return q->flags & TC_RED_HARDDROP; |
56 | } | 55 | } |
57 | 56 | ||
58 | static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 57 | static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
59 | { | 58 | { |
60 | struct red_sched_data *q = qdisc_priv(sch); | 59 | struct red_sched_data *q = qdisc_priv(sch); |
61 | struct Qdisc *child = q->qdisc; | 60 | struct Qdisc *child = q->qdisc; |
@@ -67,35 +66,33 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
67 | red_end_of_idle_period(&q->parms); | 66 | red_end_of_idle_period(&q->parms); |
68 | 67 | ||
69 | switch (red_action(&q->parms, q->parms.qavg)) { | 68 | switch (red_action(&q->parms, q->parms.qavg)) { |
70 | case RED_DONT_MARK: | 69 | case RED_DONT_MARK: |
71 | break; | 70 | break; |
72 | 71 | ||
73 | case RED_PROB_MARK: | 72 | case RED_PROB_MARK: |
74 | sch->qstats.overlimits++; | 73 | sch->qstats.overlimits++; |
75 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { | 74 | if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { |
76 | q->stats.prob_drop++; | 75 | q->stats.prob_drop++; |
77 | goto congestion_drop; | 76 | goto congestion_drop; |
78 | } | 77 | } |
79 | 78 | ||
80 | q->stats.prob_mark++; | 79 | q->stats.prob_mark++; |
81 | break; | 80 | break; |
82 | 81 | ||
83 | case RED_HARD_MARK: | 82 | case RED_HARD_MARK: |
84 | sch->qstats.overlimits++; | 83 | sch->qstats.overlimits++; |
85 | if (red_use_harddrop(q) || !red_use_ecn(q) || | 84 | if (red_use_harddrop(q) || !red_use_ecn(q) || |
86 | !INET_ECN_set_ce(skb)) { | 85 | !INET_ECN_set_ce(skb)) { |
87 | q->stats.forced_drop++; | 86 | q->stats.forced_drop++; |
88 | goto congestion_drop; | 87 | goto congestion_drop; |
89 | } | 88 | } |
90 | 89 | ||
91 | q->stats.forced_mark++; | 90 | q->stats.forced_mark++; |
92 | break; | 91 | break; |
93 | } | 92 | } |
94 | 93 | ||
95 | ret = qdisc_enqueue(skb, child); | 94 | ret = qdisc_enqueue(skb, child); |
96 | if (likely(ret == NET_XMIT_SUCCESS)) { | 95 | if (likely(ret == NET_XMIT_SUCCESS)) { |
97 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
98 | sch->bstats.packets++; | ||
99 | sch->q.qlen++; | 96 | sch->q.qlen++; |
100 | } else if (net_xmit_drop_count(ret)) { | 97 | } else if (net_xmit_drop_count(ret)) { |
101 | q->stats.pdrop++; | 98 | q->stats.pdrop++; |
@@ -108,22 +105,24 @@ congestion_drop: | |||
108 | return NET_XMIT_CN; | 105 | return NET_XMIT_CN; |
109 | } | 106 | } |
110 | 107 | ||
111 | static struct sk_buff * red_dequeue(struct Qdisc* sch) | 108 | static struct sk_buff *red_dequeue(struct Qdisc *sch) |
112 | { | 109 | { |
113 | struct sk_buff *skb; | 110 | struct sk_buff *skb; |
114 | struct red_sched_data *q = qdisc_priv(sch); | 111 | struct red_sched_data *q = qdisc_priv(sch); |
115 | struct Qdisc *child = q->qdisc; | 112 | struct Qdisc *child = q->qdisc; |
116 | 113 | ||
117 | skb = child->dequeue(child); | 114 | skb = child->dequeue(child); |
118 | if (skb) | 115 | if (skb) { |
116 | qdisc_bstats_update(sch, skb); | ||
119 | sch->q.qlen--; | 117 | sch->q.qlen--; |
120 | else if (!red_is_idling(&q->parms)) | 118 | } else { |
121 | red_start_of_idle_period(&q->parms); | 119 | if (!red_is_idling(&q->parms)) |
122 | 120 | red_start_of_idle_period(&q->parms); | |
121 | } | ||
123 | return skb; | 122 | return skb; |
124 | } | 123 | } |
125 | 124 | ||
126 | static struct sk_buff * red_peek(struct Qdisc* sch) | 125 | static struct sk_buff *red_peek(struct Qdisc *sch) |
127 | { | 126 | { |
128 | struct red_sched_data *q = qdisc_priv(sch); | 127 | struct red_sched_data *q = qdisc_priv(sch); |
129 | struct Qdisc *child = q->qdisc; | 128 | struct Qdisc *child = q->qdisc; |
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch) | |||
131 | return child->ops->peek(child); | 130 | return child->ops->peek(child); |
132 | } | 131 | } |
133 | 132 | ||
134 | static unsigned int red_drop(struct Qdisc* sch) | 133 | static unsigned int red_drop(struct Qdisc *sch) |
135 | { | 134 | { |
136 | struct red_sched_data *q = qdisc_priv(sch); | 135 | struct red_sched_data *q = qdisc_priv(sch); |
137 | struct Qdisc *child = q->qdisc; | 136 | struct Qdisc *child = q->qdisc; |
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch) | |||
150 | return 0; | 149 | return 0; |
151 | } | 150 | } |
152 | 151 | ||
153 | static void red_reset(struct Qdisc* sch) | 152 | static void red_reset(struct Qdisc *sch) |
154 | { | 153 | { |
155 | struct red_sched_data *q = qdisc_priv(sch); | 154 | struct red_sched_data *q = qdisc_priv(sch); |
156 | 155 | ||
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) | |||
217 | return 0; | 216 | return 0; |
218 | } | 217 | } |
219 | 218 | ||
220 | static int red_init(struct Qdisc* sch, struct nlattr *opt) | 219 | static int red_init(struct Qdisc *sch, struct nlattr *opt) |
221 | { | 220 | { |
222 | struct red_sched_data *q = qdisc_priv(sch); | 221 | struct red_sched_data *q = qdisc_priv(sch); |
223 | 222 | ||
@@ -239,6 +238,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
239 | .Scell_log = q->parms.Scell_log, | 238 | .Scell_log = q->parms.Scell_log, |
240 | }; | 239 | }; |
241 | 240 | ||
241 | sch->qstats.backlog = q->qdisc->qstats.backlog; | ||
242 | opts = nla_nest_start(skb, TCA_OPTIONS); | 242 | opts = nla_nest_start(skb, TCA_OPTIONS); |
243 | if (opts == NULL) | 243 | if (opts == NULL) |
244 | goto nla_put_failure; | 244 | goto nla_put_failure; |
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c new file mode 100644 index 000000000000..0a833d0c1f61 --- /dev/null +++ b/net/sched/sch_sfb.c | |||
@@ -0,0 +1,709 @@ | |||
1 | /* | ||
2 | * net/sched/sch_sfb.c Stochastic Fair Blue | ||
3 | * | ||
4 | * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr> | ||
5 | * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue: | ||
12 | * A New Class of Active Queue Management Algorithms. | ||
13 | * U. Michigan CSE-TR-387-99, April 1999. | ||
14 | * | ||
15 | * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf | ||
16 | * | ||
17 | */ | ||
18 | |||
19 | #include <linux/module.h> | ||
20 | #include <linux/types.h> | ||
21 | #include <linux/kernel.h> | ||
22 | #include <linux/errno.h> | ||
23 | #include <linux/skbuff.h> | ||
24 | #include <linux/random.h> | ||
25 | #include <linux/jhash.h> | ||
26 | #include <net/ip.h> | ||
27 | #include <net/pkt_sched.h> | ||
28 | #include <net/inet_ecn.h> | ||
29 | |||
30 | /* | ||
31 | * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level) | ||
32 | * This implementation uses L = 8 and N = 16 | ||
33 | * This permits us to split one 32bit hash (provided per packet by rxhash or | ||
34 | * external classifier) into 8 subhashes of 4 bits. | ||
35 | */ | ||
36 | #define SFB_BUCKET_SHIFT 4 | ||
37 | #define SFB_NUMBUCKETS (1 << SFB_BUCKET_SHIFT) /* N bins per Level */ | ||
38 | #define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1) | ||
39 | #define SFB_LEVELS (32 / SFB_BUCKET_SHIFT) /* L */ | ||
40 | |||
41 | /* SFB algo uses a virtual queue, named "bin" */ | ||
42 | struct sfb_bucket { | ||
43 | u16 qlen; /* length of virtual queue */ | ||
44 | u16 p_mark; /* marking probability */ | ||
45 | }; | ||
46 | |||
47 | /* We use a double buffering right before hash change | ||
48 | * (Section 4.4 of SFB reference : moving hash functions) | ||
49 | */ | ||
50 | struct sfb_bins { | ||
51 | u32 perturbation; /* jhash perturbation */ | ||
52 | struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS]; | ||
53 | }; | ||
54 | |||
55 | struct sfb_sched_data { | ||
56 | struct Qdisc *qdisc; | ||
57 | struct tcf_proto *filter_list; | ||
58 | unsigned long rehash_interval; | ||
59 | unsigned long warmup_time; /* double buffering warmup time in jiffies */ | ||
60 | u32 max; | ||
61 | u32 bin_size; /* maximum queue length per bin */ | ||
62 | u32 increment; /* d1 */ | ||
63 | u32 decrement; /* d2 */ | ||
64 | u32 limit; /* HARD maximal queue length */ | ||
65 | u32 penalty_rate; | ||
66 | u32 penalty_burst; | ||
67 | u32 tokens_avail; | ||
68 | unsigned long rehash_time; | ||
69 | unsigned long token_time; | ||
70 | |||
71 | u8 slot; /* current active bins (0 or 1) */ | ||
72 | bool double_buffering; | ||
73 | struct sfb_bins bins[2]; | ||
74 | |||
75 | struct { | ||
76 | u32 earlydrop; | ||
77 | u32 penaltydrop; | ||
78 | u32 bucketdrop; | ||
79 | u32 queuedrop; | ||
80 | u32 childdrop; /* drops in child qdisc */ | ||
81 | u32 marked; /* ECN mark */ | ||
82 | } stats; | ||
83 | }; | ||
84 | |||
85 | /* | ||
86 | * Each queued skb might be hashed on one or two bins | ||
87 | * We store in skb_cb the two hash values. | ||
88 | * (A zero value means double buffering was not used) | ||
89 | */ | ||
90 | struct sfb_skb_cb { | ||
91 | u32 hashes[2]; | ||
92 | }; | ||
93 | |||
94 | static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb) | ||
95 | { | ||
96 | BUILD_BUG_ON(sizeof(skb->cb) < | ||
97 | sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb)); | ||
98 | return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * If using 'internal' SFB flow classifier, hash comes from skb rxhash | ||
103 | * If using external classifier, hash comes from the classid. | ||
104 | */ | ||
105 | static u32 sfb_hash(const struct sk_buff *skb, u32 slot) | ||
106 | { | ||
107 | return sfb_skb_cb(skb)->hashes[slot]; | ||
108 | } | ||
109 | |||
110 | /* Probabilities are coded as Q0.16 fixed-point values, | ||
111 | * with 0xFFFF representing 65535/65536 (almost 1.0) | ||
112 | * Addition and subtraction are saturating in [0, 65535] | ||
113 | */ | ||
114 | static u32 prob_plus(u32 p1, u32 p2) | ||
115 | { | ||
116 | u32 res = p1 + p2; | ||
117 | |||
118 | return min_t(u32, res, SFB_MAX_PROB); | ||
119 | } | ||
120 | |||
121 | static u32 prob_minus(u32 p1, u32 p2) | ||
122 | { | ||
123 | return p1 > p2 ? p1 - p2 : 0; | ||
124 | } | ||
125 | |||
126 | static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) | ||
127 | { | ||
128 | int i; | ||
129 | struct sfb_bucket *b = &q->bins[slot].bins[0][0]; | ||
130 | |||
131 | for (i = 0; i < SFB_LEVELS; i++) { | ||
132 | u32 hash = sfbhash & SFB_BUCKET_MASK; | ||
133 | |||
134 | sfbhash >>= SFB_BUCKET_SHIFT; | ||
135 | if (b[hash].qlen < 0xFFFF) | ||
136 | b[hash].qlen++; | ||
137 | b += SFB_NUMBUCKETS; /* next level */ | ||
138 | } | ||
139 | } | ||
140 | |||
141 | static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) | ||
142 | { | ||
143 | u32 sfbhash; | ||
144 | |||
145 | sfbhash = sfb_hash(skb, 0); | ||
146 | if (sfbhash) | ||
147 | increment_one_qlen(sfbhash, 0, q); | ||
148 | |||
149 | sfbhash = sfb_hash(skb, 1); | ||
150 | if (sfbhash) | ||
151 | increment_one_qlen(sfbhash, 1, q); | ||
152 | } | ||
153 | |||
154 | static void decrement_one_qlen(u32 sfbhash, u32 slot, | ||
155 | struct sfb_sched_data *q) | ||
156 | { | ||
157 | int i; | ||
158 | struct sfb_bucket *b = &q->bins[slot].bins[0][0]; | ||
159 | |||
160 | for (i = 0; i < SFB_LEVELS; i++) { | ||
161 | u32 hash = sfbhash & SFB_BUCKET_MASK; | ||
162 | |||
163 | sfbhash >>= SFB_BUCKET_SHIFT; | ||
164 | if (b[hash].qlen > 0) | ||
165 | b[hash].qlen--; | ||
166 | b += SFB_NUMBUCKETS; /* next level */ | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) | ||
171 | { | ||
172 | u32 sfbhash; | ||
173 | |||
174 | sfbhash = sfb_hash(skb, 0); | ||
175 | if (sfbhash) | ||
176 | decrement_one_qlen(sfbhash, 0, q); | ||
177 | |||
178 | sfbhash = sfb_hash(skb, 1); | ||
179 | if (sfbhash) | ||
180 | decrement_one_qlen(sfbhash, 1, q); | ||
181 | } | ||
182 | |||
183 | static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q) | ||
184 | { | ||
185 | b->p_mark = prob_minus(b->p_mark, q->decrement); | ||
186 | } | ||
187 | |||
188 | static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q) | ||
189 | { | ||
190 | b->p_mark = prob_plus(b->p_mark, q->increment); | ||
191 | } | ||
192 | |||
193 | static void sfb_zero_all_buckets(struct sfb_sched_data *q) | ||
194 | { | ||
195 | memset(&q->bins, 0, sizeof(q->bins)); | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * compute max qlen, max p_mark, and avg p_mark | ||
200 | */ | ||
201 | static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q) | ||
202 | { | ||
203 | int i; | ||
204 | u32 qlen = 0, prob = 0, totalpm = 0; | ||
205 | const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0]; | ||
206 | |||
207 | for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) { | ||
208 | if (qlen < b->qlen) | ||
209 | qlen = b->qlen; | ||
210 | totalpm += b->p_mark; | ||
211 | if (prob < b->p_mark) | ||
212 | prob = b->p_mark; | ||
213 | b++; | ||
214 | } | ||
215 | *prob_r = prob; | ||
216 | *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS); | ||
217 | return qlen; | ||
218 | } | ||
219 | |||
220 | |||
221 | static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q) | ||
222 | { | ||
223 | q->bins[slot].perturbation = net_random(); | ||
224 | } | ||
225 | |||
226 | static void sfb_swap_slot(struct sfb_sched_data *q) | ||
227 | { | ||
228 | sfb_init_perturbation(q->slot, q); | ||
229 | q->slot ^= 1; | ||
230 | q->double_buffering = false; | ||
231 | } | ||
232 | |||
233 | /* Non elastic flows are allowed to use part of the bandwidth, expressed | ||
234 | * in "penalty_rate" packets per second, with "penalty_burst" burst | ||
235 | */ | ||
236 | static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q) | ||
237 | { | ||
238 | if (q->penalty_rate == 0 || q->penalty_burst == 0) | ||
239 | return true; | ||
240 | |||
241 | if (q->tokens_avail < 1) { | ||
242 | unsigned long age = min(10UL * HZ, jiffies - q->token_time); | ||
243 | |||
244 | q->tokens_avail = (age * q->penalty_rate) / HZ; | ||
245 | if (q->tokens_avail > q->penalty_burst) | ||
246 | q->tokens_avail = q->penalty_burst; | ||
247 | q->token_time = jiffies; | ||
248 | if (q->tokens_avail < 1) | ||
249 | return true; | ||
250 | } | ||
251 | |||
252 | q->tokens_avail--; | ||
253 | return false; | ||
254 | } | ||
255 | |||
256 | static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q, | ||
257 | int *qerr, u32 *salt) | ||
258 | { | ||
259 | struct tcf_result res; | ||
260 | int result; | ||
261 | |||
262 | result = tc_classify(skb, q->filter_list, &res); | ||
263 | if (result >= 0) { | ||
264 | #ifdef CONFIG_NET_CLS_ACT | ||
265 | switch (result) { | ||
266 | case TC_ACT_STOLEN: | ||
267 | case TC_ACT_QUEUED: | ||
268 | *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; | ||
269 | case TC_ACT_SHOT: | ||
270 | return false; | ||
271 | } | ||
272 | #endif | ||
273 | *salt = TC_H_MIN(res.classid); | ||
274 | return true; | ||
275 | } | ||
276 | return false; | ||
277 | } | ||
278 | |||
279 | static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch) | ||
280 | { | ||
281 | |||
282 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
283 | struct Qdisc *child = q->qdisc; | ||
284 | int i; | ||
285 | u32 p_min = ~0; | ||
286 | u32 minqlen = ~0; | ||
287 | u32 r, slot, salt, sfbhash; | ||
288 | int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; | ||
289 | |||
290 | if (q->rehash_interval > 0) { | ||
291 | unsigned long limit = q->rehash_time + q->rehash_interval; | ||
292 | |||
293 | if (unlikely(time_after(jiffies, limit))) { | ||
294 | sfb_swap_slot(q); | ||
295 | q->rehash_time = jiffies; | ||
296 | } else if (unlikely(!q->double_buffering && q->warmup_time > 0 && | ||
297 | time_after(jiffies, limit - q->warmup_time))) { | ||
298 | q->double_buffering = true; | ||
299 | } | ||
300 | } | ||
301 | |||
302 | if (q->filter_list) { | ||
303 | /* If using external classifiers, get result and record it. */ | ||
304 | if (!sfb_classify(skb, q, &ret, &salt)) | ||
305 | goto other_drop; | ||
306 | } else { | ||
307 | salt = skb_get_rxhash(skb); | ||
308 | } | ||
309 | |||
310 | slot = q->slot; | ||
311 | |||
312 | sfbhash = jhash_1word(salt, q->bins[slot].perturbation); | ||
313 | if (!sfbhash) | ||
314 | sfbhash = 1; | ||
315 | sfb_skb_cb(skb)->hashes[slot] = sfbhash; | ||
316 | |||
317 | for (i = 0; i < SFB_LEVELS; i++) { | ||
318 | u32 hash = sfbhash & SFB_BUCKET_MASK; | ||
319 | struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; | ||
320 | |||
321 | sfbhash >>= SFB_BUCKET_SHIFT; | ||
322 | if (b->qlen == 0) | ||
323 | decrement_prob(b, q); | ||
324 | else if (b->qlen >= q->bin_size) | ||
325 | increment_prob(b, q); | ||
326 | if (minqlen > b->qlen) | ||
327 | minqlen = b->qlen; | ||
328 | if (p_min > b->p_mark) | ||
329 | p_min = b->p_mark; | ||
330 | } | ||
331 | |||
332 | slot ^= 1; | ||
333 | sfb_skb_cb(skb)->hashes[slot] = 0; | ||
334 | |||
335 | if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) { | ||
336 | sch->qstats.overlimits++; | ||
337 | if (minqlen >= q->max) | ||
338 | q->stats.bucketdrop++; | ||
339 | else | ||
340 | q->stats.queuedrop++; | ||
341 | goto drop; | ||
342 | } | ||
343 | |||
344 | if (unlikely(p_min >= SFB_MAX_PROB)) { | ||
345 | /* Inelastic flow */ | ||
346 | if (q->double_buffering) { | ||
347 | sfbhash = jhash_1word(salt, q->bins[slot].perturbation); | ||
348 | if (!sfbhash) | ||
349 | sfbhash = 1; | ||
350 | sfb_skb_cb(skb)->hashes[slot] = sfbhash; | ||
351 | |||
352 | for (i = 0; i < SFB_LEVELS; i++) { | ||
353 | u32 hash = sfbhash & SFB_BUCKET_MASK; | ||
354 | struct sfb_bucket *b = &q->bins[slot].bins[i][hash]; | ||
355 | |||
356 | sfbhash >>= SFB_BUCKET_SHIFT; | ||
357 | if (b->qlen == 0) | ||
358 | decrement_prob(b, q); | ||
359 | else if (b->qlen >= q->bin_size) | ||
360 | increment_prob(b, q); | ||
361 | } | ||
362 | } | ||
363 | if (sfb_rate_limit(skb, q)) { | ||
364 | sch->qstats.overlimits++; | ||
365 | q->stats.penaltydrop++; | ||
366 | goto drop; | ||
367 | } | ||
368 | goto enqueue; | ||
369 | } | ||
370 | |||
371 | r = net_random() & SFB_MAX_PROB; | ||
372 | |||
373 | if (unlikely(r < p_min)) { | ||
374 | if (unlikely(p_min > SFB_MAX_PROB / 2)) { | ||
375 | /* If we're marking that many packets, then either | ||
376 | * this flow is unresponsive, or we're badly congested. | ||
377 | * In either case, we want to start dropping packets. | ||
378 | */ | ||
379 | if (r < (p_min - SFB_MAX_PROB / 2) * 2) { | ||
380 | q->stats.earlydrop++; | ||
381 | goto drop; | ||
382 | } | ||
383 | } | ||
384 | if (INET_ECN_set_ce(skb)) { | ||
385 | q->stats.marked++; | ||
386 | } else { | ||
387 | q->stats.earlydrop++; | ||
388 | goto drop; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | enqueue: | ||
393 | ret = qdisc_enqueue(skb, child); | ||
394 | if (likely(ret == NET_XMIT_SUCCESS)) { | ||
395 | sch->q.qlen++; | ||
396 | increment_qlen(skb, q); | ||
397 | } else if (net_xmit_drop_count(ret)) { | ||
398 | q->stats.childdrop++; | ||
399 | sch->qstats.drops++; | ||
400 | } | ||
401 | return ret; | ||
402 | |||
403 | drop: | ||
404 | qdisc_drop(skb, sch); | ||
405 | return NET_XMIT_CN; | ||
406 | other_drop: | ||
407 | if (ret & __NET_XMIT_BYPASS) | ||
408 | sch->qstats.drops++; | ||
409 | kfree_skb(skb); | ||
410 | return ret; | ||
411 | } | ||
412 | |||
413 | static struct sk_buff *sfb_dequeue(struct Qdisc *sch) | ||
414 | { | ||
415 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
416 | struct Qdisc *child = q->qdisc; | ||
417 | struct sk_buff *skb; | ||
418 | |||
419 | skb = child->dequeue(q->qdisc); | ||
420 | |||
421 | if (skb) { | ||
422 | qdisc_bstats_update(sch, skb); | ||
423 | sch->q.qlen--; | ||
424 | decrement_qlen(skb, q); | ||
425 | } | ||
426 | |||
427 | return skb; | ||
428 | } | ||
429 | |||
430 | static struct sk_buff *sfb_peek(struct Qdisc *sch) | ||
431 | { | ||
432 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
433 | struct Qdisc *child = q->qdisc; | ||
434 | |||
435 | return child->ops->peek(child); | ||
436 | } | ||
437 | |||
438 | /* No sfb_drop -- impossible since the child doesn't return the dropped skb. */ | ||
439 | |||
440 | static void sfb_reset(struct Qdisc *sch) | ||
441 | { | ||
442 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
443 | |||
444 | qdisc_reset(q->qdisc); | ||
445 | sch->q.qlen = 0; | ||
446 | q->slot = 0; | ||
447 | q->double_buffering = false; | ||
448 | sfb_zero_all_buckets(q); | ||
449 | sfb_init_perturbation(0, q); | ||
450 | } | ||
451 | |||
452 | static void sfb_destroy(struct Qdisc *sch) | ||
453 | { | ||
454 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
455 | |||
456 | tcf_destroy_chain(&q->filter_list); | ||
457 | qdisc_destroy(q->qdisc); | ||
458 | } | ||
459 | |||
460 | static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = { | ||
461 | [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) }, | ||
462 | }; | ||
463 | |||
464 | static const struct tc_sfb_qopt sfb_default_ops = { | ||
465 | .rehash_interval = 600 * MSEC_PER_SEC, | ||
466 | .warmup_time = 60 * MSEC_PER_SEC, | ||
467 | .limit = 0, | ||
468 | .max = 25, | ||
469 | .bin_size = 20, | ||
470 | .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */ | ||
471 | .decrement = (SFB_MAX_PROB + 3000) / 6000, | ||
472 | .penalty_rate = 10, | ||
473 | .penalty_burst = 20, | ||
474 | }; | ||
475 | |||
476 | static int sfb_change(struct Qdisc *sch, struct nlattr *opt) | ||
477 | { | ||
478 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
479 | struct Qdisc *child; | ||
480 | struct nlattr *tb[TCA_SFB_MAX + 1]; | ||
481 | const struct tc_sfb_qopt *ctl = &sfb_default_ops; | ||
482 | u32 limit; | ||
483 | int err; | ||
484 | |||
485 | if (opt) { | ||
486 | err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy); | ||
487 | if (err < 0) | ||
488 | return -EINVAL; | ||
489 | |||
490 | if (tb[TCA_SFB_PARMS] == NULL) | ||
491 | return -EINVAL; | ||
492 | |||
493 | ctl = nla_data(tb[TCA_SFB_PARMS]); | ||
494 | } | ||
495 | |||
496 | limit = ctl->limit; | ||
497 | if (limit == 0) | ||
498 | limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1); | ||
499 | |||
500 | child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit); | ||
501 | if (IS_ERR(child)) | ||
502 | return PTR_ERR(child); | ||
503 | |||
504 | sch_tree_lock(sch); | ||
505 | |||
506 | qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen); | ||
507 | qdisc_destroy(q->qdisc); | ||
508 | q->qdisc = child; | ||
509 | |||
510 | q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval); | ||
511 | q->warmup_time = msecs_to_jiffies(ctl->warmup_time); | ||
512 | q->rehash_time = jiffies; | ||
513 | q->limit = limit; | ||
514 | q->increment = ctl->increment; | ||
515 | q->decrement = ctl->decrement; | ||
516 | q->max = ctl->max; | ||
517 | q->bin_size = ctl->bin_size; | ||
518 | q->penalty_rate = ctl->penalty_rate; | ||
519 | q->penalty_burst = ctl->penalty_burst; | ||
520 | q->tokens_avail = ctl->penalty_burst; | ||
521 | q->token_time = jiffies; | ||
522 | |||
523 | q->slot = 0; | ||
524 | q->double_buffering = false; | ||
525 | sfb_zero_all_buckets(q); | ||
526 | sfb_init_perturbation(0, q); | ||
527 | sfb_init_perturbation(1, q); | ||
528 | |||
529 | sch_tree_unlock(sch); | ||
530 | |||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static int sfb_init(struct Qdisc *sch, struct nlattr *opt) | ||
535 | { | ||
536 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
537 | |||
538 | q->qdisc = &noop_qdisc; | ||
539 | return sfb_change(sch, opt); | ||
540 | } | ||
541 | |||
542 | static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb) | ||
543 | { | ||
544 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
545 | struct nlattr *opts; | ||
546 | struct tc_sfb_qopt opt = { | ||
547 | .rehash_interval = jiffies_to_msecs(q->rehash_interval), | ||
548 | .warmup_time = jiffies_to_msecs(q->warmup_time), | ||
549 | .limit = q->limit, | ||
550 | .max = q->max, | ||
551 | .bin_size = q->bin_size, | ||
552 | .increment = q->increment, | ||
553 | .decrement = q->decrement, | ||
554 | .penalty_rate = q->penalty_rate, | ||
555 | .penalty_burst = q->penalty_burst, | ||
556 | }; | ||
557 | |||
558 | sch->qstats.backlog = q->qdisc->qstats.backlog; | ||
559 | opts = nla_nest_start(skb, TCA_OPTIONS); | ||
560 | NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt); | ||
561 | return nla_nest_end(skb, opts); | ||
562 | |||
563 | nla_put_failure: | ||
564 | nla_nest_cancel(skb, opts); | ||
565 | return -EMSGSIZE; | ||
566 | } | ||
567 | |||
568 | static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d) | ||
569 | { | ||
570 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
571 | struct tc_sfb_xstats st = { | ||
572 | .earlydrop = q->stats.earlydrop, | ||
573 | .penaltydrop = q->stats.penaltydrop, | ||
574 | .bucketdrop = q->stats.bucketdrop, | ||
575 | .queuedrop = q->stats.queuedrop, | ||
576 | .childdrop = q->stats.childdrop, | ||
577 | .marked = q->stats.marked, | ||
578 | }; | ||
579 | |||
580 | st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q); | ||
581 | |||
582 | return gnet_stats_copy_app(d, &st, sizeof(st)); | ||
583 | } | ||
584 | |||
585 | static int sfb_dump_class(struct Qdisc *sch, unsigned long cl, | ||
586 | struct sk_buff *skb, struct tcmsg *tcm) | ||
587 | { | ||
588 | return -ENOSYS; | ||
589 | } | ||
590 | |||
591 | static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | ||
592 | struct Qdisc **old) | ||
593 | { | ||
594 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
595 | |||
596 | if (new == NULL) | ||
597 | new = &noop_qdisc; | ||
598 | |||
599 | sch_tree_lock(sch); | ||
600 | *old = q->qdisc; | ||
601 | q->qdisc = new; | ||
602 | qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); | ||
603 | qdisc_reset(*old); | ||
604 | sch_tree_unlock(sch); | ||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) | ||
609 | { | ||
610 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
611 | |||
612 | return q->qdisc; | ||
613 | } | ||
614 | |||
615 | static unsigned long sfb_get(struct Qdisc *sch, u32 classid) | ||
616 | { | ||
617 | return 1; | ||
618 | } | ||
619 | |||
620 | static void sfb_put(struct Qdisc *sch, unsigned long arg) | ||
621 | { | ||
622 | } | ||
623 | |||
624 | static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | ||
625 | struct nlattr **tca, unsigned long *arg) | ||
626 | { | ||
627 | return -ENOSYS; | ||
628 | } | ||
629 | |||
630 | static int sfb_delete(struct Qdisc *sch, unsigned long cl) | ||
631 | { | ||
632 | return -ENOSYS; | ||
633 | } | ||
634 | |||
635 | static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) | ||
636 | { | ||
637 | if (!walker->stop) { | ||
638 | if (walker->count >= walker->skip) | ||
639 | if (walker->fn(sch, 1, walker) < 0) { | ||
640 | walker->stop = 1; | ||
641 | return; | ||
642 | } | ||
643 | walker->count++; | ||
644 | } | ||
645 | } | ||
646 | |||
647 | static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl) | ||
648 | { | ||
649 | struct sfb_sched_data *q = qdisc_priv(sch); | ||
650 | |||
651 | if (cl) | ||
652 | return NULL; | ||
653 | return &q->filter_list; | ||
654 | } | ||
655 | |||
656 | static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, | ||
657 | u32 classid) | ||
658 | { | ||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | |||
663 | static const struct Qdisc_class_ops sfb_class_ops = { | ||
664 | .graft = sfb_graft, | ||
665 | .leaf = sfb_leaf, | ||
666 | .get = sfb_get, | ||
667 | .put = sfb_put, | ||
668 | .change = sfb_change_class, | ||
669 | .delete = sfb_delete, | ||
670 | .walk = sfb_walk, | ||
671 | .tcf_chain = sfb_find_tcf, | ||
672 | .bind_tcf = sfb_bind, | ||
673 | .unbind_tcf = sfb_put, | ||
674 | .dump = sfb_dump_class, | ||
675 | }; | ||
676 | |||
677 | static struct Qdisc_ops sfb_qdisc_ops __read_mostly = { | ||
678 | .id = "sfb", | ||
679 | .priv_size = sizeof(struct sfb_sched_data), | ||
680 | .cl_ops = &sfb_class_ops, | ||
681 | .enqueue = sfb_enqueue, | ||
682 | .dequeue = sfb_dequeue, | ||
683 | .peek = sfb_peek, | ||
684 | .init = sfb_init, | ||
685 | .reset = sfb_reset, | ||
686 | .destroy = sfb_destroy, | ||
687 | .change = sfb_change, | ||
688 | .dump = sfb_dump, | ||
689 | .dump_stats = sfb_dump_stats, | ||
690 | .owner = THIS_MODULE, | ||
691 | }; | ||
692 | |||
693 | static int __init sfb_module_init(void) | ||
694 | { | ||
695 | return register_qdisc(&sfb_qdisc_ops); | ||
696 | } | ||
697 | |||
698 | static void __exit sfb_module_exit(void) | ||
699 | { | ||
700 | unregister_qdisc(&sfb_qdisc_ops); | ||
701 | } | ||
702 | |||
703 | module_init(sfb_module_init) | ||
704 | module_exit(sfb_module_exit) | ||
705 | |||
706 | MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline"); | ||
707 | MODULE_AUTHOR("Juliusz Chroboczek"); | ||
708 | MODULE_AUTHOR("Eric Dumazet"); | ||
709 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 201cbac2b32c..b6ea6afa55b0 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/skbuff.h> | 21 | #include <linux/skbuff.h> |
22 | #include <linux/jhash.h> | 22 | #include <linux/jhash.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/vmalloc.h> | ||
24 | #include <net/ip.h> | 25 | #include <net/ip.h> |
25 | #include <net/netlink.h> | 26 | #include <net/netlink.h> |
26 | #include <net/pkt_sched.h> | 27 | #include <net/pkt_sched.h> |
@@ -67,55 +68,81 @@ | |||
67 | 68 | ||
68 | IMPLEMENTATION: | 69 | IMPLEMENTATION: |
69 | This implementation limits maximal queue length to 128; | 70 | This implementation limits maximal queue length to 128; |
70 | maximal mtu to 2^15-1; number of hash buckets to 1024. | 71 | max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. |
71 | The only goal of this restrictions was that all data | 72 | The only goal of this restrictions was that all data |
72 | fit into one 4K page :-). Struct sfq_sched_data is | 73 | fit into one 4K page on 32bit arches. |
73 | organized in anti-cache manner: all the data for a bucket | ||
74 | are scattered over different locations. This is not good, | ||
75 | but it allowed me to put it into 4K. | ||
76 | 74 | ||
77 | It is easy to increase these values, but not in flight. */ | 75 | It is easy to increase these values, but not in flight. */ |
78 | 76 | ||
79 | #define SFQ_DEPTH 128 | 77 | #define SFQ_DEPTH 128 /* max number of packets per flow */ |
80 | #define SFQ_HASH_DIVISOR 1024 | 78 | #define SFQ_SLOTS 128 /* max number of flows */ |
79 | #define SFQ_EMPTY_SLOT 255 | ||
80 | #define SFQ_DEFAULT_HASH_DIVISOR 1024 | ||
81 | 81 | ||
82 | /* This type should contain at least SFQ_DEPTH*2 values */ | 82 | /* We use 16 bits to store allot, and want to handle packets up to 64K |
83 | * Scale allot by 8 (1<<3) so that no overflow occurs. | ||
84 | */ | ||
85 | #define SFQ_ALLOT_SHIFT 3 | ||
86 | #define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) | ||
87 | |||
88 | /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ | ||
83 | typedef unsigned char sfq_index; | 89 | typedef unsigned char sfq_index; |
84 | 90 | ||
85 | struct sfq_head | 91 | /* |
86 | { | 92 | * We dont use pointers to save space. |
93 | * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array | ||
94 | * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] | ||
95 | * are 'pointers' to dep[] array | ||
96 | */ | ||
97 | struct sfq_head { | ||
87 | sfq_index next; | 98 | sfq_index next; |
88 | sfq_index prev; | 99 | sfq_index prev; |
89 | }; | 100 | }; |
90 | 101 | ||
91 | struct sfq_sched_data | 102 | struct sfq_slot { |
92 | { | 103 | struct sk_buff *skblist_next; |
104 | struct sk_buff *skblist_prev; | ||
105 | sfq_index qlen; /* number of skbs in skblist */ | ||
106 | sfq_index next; /* next slot in sfq chain */ | ||
107 | struct sfq_head dep; /* anchor in dep[] chains */ | ||
108 | unsigned short hash; /* hash value (index in ht[]) */ | ||
109 | short allot; /* credit for this slot */ | ||
110 | }; | ||
111 | |||
112 | struct sfq_sched_data { | ||
93 | /* Parameters */ | 113 | /* Parameters */ |
94 | int perturb_period; | 114 | int perturb_period; |
95 | unsigned quantum; /* Allotment per round: MUST BE >= MTU */ | 115 | unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ |
96 | int limit; | 116 | int limit; |
97 | 117 | unsigned int divisor; /* number of slots in hash table */ | |
98 | /* Variables */ | 118 | /* Variables */ |
99 | struct tcf_proto *filter_list; | 119 | struct tcf_proto *filter_list; |
100 | struct timer_list perturb_timer; | 120 | struct timer_list perturb_timer; |
101 | u32 perturbation; | 121 | u32 perturbation; |
102 | sfq_index tail; /* Index of current slot in round */ | 122 | sfq_index cur_depth; /* depth of longest slot */ |
103 | sfq_index max_depth; /* Maximal depth */ | 123 | unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ |
104 | 124 | struct sfq_slot *tail; /* current slot in round */ | |
105 | sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ | 125 | sfq_index *ht; /* Hash table (divisor slots) */ |
106 | sfq_index next[SFQ_DEPTH]; /* Active slots link */ | 126 | struct sfq_slot slots[SFQ_SLOTS]; |
107 | short allot[SFQ_DEPTH]; /* Current allotment per slot */ | 127 | struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ |
108 | unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ | ||
109 | struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ | ||
110 | struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ | ||
111 | }; | 128 | }; |
112 | 129 | ||
113 | static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) | 130 | /* |
131 | * sfq_head are either in a sfq_slot or in dep[] array | ||
132 | */ | ||
133 | static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) | ||
114 | { | 134 | { |
115 | return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); | 135 | if (val < SFQ_SLOTS) |
136 | return &q->slots[val].dep; | ||
137 | return &q->dep[val - SFQ_SLOTS]; | ||
116 | } | 138 | } |
117 | 139 | ||
118 | static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | 140 | static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) |
141 | { | ||
142 | return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1); | ||
143 | } | ||
144 | |||
145 | static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | ||
119 | { | 146 | { |
120 | u32 h, h2; | 147 | u32 h, h2; |
121 | 148 | ||
@@ -123,40 +150,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) | |||
123 | case htons(ETH_P_IP): | 150 | case htons(ETH_P_IP): |
124 | { | 151 | { |
125 | const struct iphdr *iph; | 152 | const struct iphdr *iph; |
153 | int poff; | ||
126 | 154 | ||
127 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | 155 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
128 | goto err; | 156 | goto err; |
129 | iph = ip_hdr(skb); | 157 | iph = ip_hdr(skb); |
130 | h = (__force u32)iph->daddr; | 158 | h = (__force u32)iph->daddr; |
131 | h2 = (__force u32)iph->saddr ^ iph->protocol; | 159 | h2 = (__force u32)iph->saddr ^ iph->protocol; |
132 | if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && | 160 | if (iph->frag_off & htons(IP_MF | IP_OFFSET)) |
133 | (iph->protocol == IPPROTO_TCP || | 161 | break; |
134 | iph->protocol == IPPROTO_UDP || | 162 | poff = proto_ports_offset(iph->protocol); |
135 | iph->protocol == IPPROTO_UDPLITE || | 163 | if (poff >= 0 && |
136 | iph->protocol == IPPROTO_SCTP || | 164 | pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) { |
137 | iph->protocol == IPPROTO_DCCP || | 165 | iph = ip_hdr(skb); |
138 | iph->protocol == IPPROTO_ESP) && | 166 | h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff); |
139 | pskb_network_may_pull(skb, iph->ihl * 4 + 4)) | 167 | } |
140 | h2 ^= *(((u32*)iph) + iph->ihl); | ||
141 | break; | 168 | break; |
142 | } | 169 | } |
143 | case htons(ETH_P_IPV6): | 170 | case htons(ETH_P_IPV6): |
144 | { | 171 | { |
145 | struct ipv6hdr *iph; | 172 | const struct ipv6hdr *iph; |
173 | int poff; | ||
146 | 174 | ||
147 | if (!pskb_network_may_pull(skb, sizeof(*iph))) | 175 | if (!pskb_network_may_pull(skb, sizeof(*iph))) |
148 | goto err; | 176 | goto err; |
149 | iph = ipv6_hdr(skb); | 177 | iph = ipv6_hdr(skb); |
150 | h = (__force u32)iph->daddr.s6_addr32[3]; | 178 | h = (__force u32)iph->daddr.s6_addr32[3]; |
151 | h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; | 179 | h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; |
152 | if ((iph->nexthdr == IPPROTO_TCP || | 180 | poff = proto_ports_offset(iph->nexthdr); |
153 | iph->nexthdr == IPPROTO_UDP || | 181 | if (poff >= 0 && |
154 | iph->nexthdr == IPPROTO_UDPLITE || | 182 | pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) { |
155 | iph->nexthdr == IPPROTO_SCTP || | 183 | iph = ipv6_hdr(skb); |
156 | iph->nexthdr == IPPROTO_DCCP || | 184 | h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff); |
157 | iph->nexthdr == IPPROTO_ESP) && | 185 | } |
158 | pskb_network_may_pull(skb, sizeof(*iph) + 4)) | ||
159 | h2 ^= *(u32*)&iph[1]; | ||
160 | break; | 186 | break; |
161 | } | 187 | } |
162 | default: | 188 | default: |
@@ -177,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
177 | 203 | ||
178 | if (TC_H_MAJ(skb->priority) == sch->handle && | 204 | if (TC_H_MAJ(skb->priority) == sch->handle && |
179 | TC_H_MIN(skb->priority) > 0 && | 205 | TC_H_MIN(skb->priority) > 0 && |
180 | TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR) | 206 | TC_H_MIN(skb->priority) <= q->divisor) |
181 | return TC_H_MIN(skb->priority); | 207 | return TC_H_MIN(skb->priority); |
182 | 208 | ||
183 | if (!q->filter_list) | 209 | if (!q->filter_list) |
@@ -195,36 +221,47 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, | |||
195 | return 0; | 221 | return 0; |
196 | } | 222 | } |
197 | #endif | 223 | #endif |
198 | if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR) | 224 | if (TC_H_MIN(res.classid) <= q->divisor) |
199 | return TC_H_MIN(res.classid); | 225 | return TC_H_MIN(res.classid); |
200 | } | 226 | } |
201 | return 0; | 227 | return 0; |
202 | } | 228 | } |
203 | 229 | ||
230 | /* | ||
231 | * x : slot number [0 .. SFQ_SLOTS - 1] | ||
232 | */ | ||
204 | static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) | 233 | static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) |
205 | { | 234 | { |
206 | sfq_index p, n; | 235 | sfq_index p, n; |
207 | int d = q->qs[x].qlen + SFQ_DEPTH; | 236 | int qlen = q->slots[x].qlen; |
208 | 237 | ||
209 | p = d; | 238 | p = qlen + SFQ_SLOTS; |
210 | n = q->dep[d].next; | 239 | n = q->dep[qlen].next; |
211 | q->dep[x].next = n; | 240 | |
212 | q->dep[x].prev = p; | 241 | q->slots[x].dep.next = n; |
213 | q->dep[p].next = q->dep[n].prev = x; | 242 | q->slots[x].dep.prev = p; |
243 | |||
244 | q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ | ||
245 | sfq_dep_head(q, n)->prev = x; | ||
214 | } | 246 | } |
215 | 247 | ||
248 | #define sfq_unlink(q, x, n, p) \ | ||
249 | n = q->slots[x].dep.next; \ | ||
250 | p = q->slots[x].dep.prev; \ | ||
251 | sfq_dep_head(q, p)->next = n; \ | ||
252 | sfq_dep_head(q, n)->prev = p | ||
253 | |||
254 | |||
216 | static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) | 255 | static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) |
217 | { | 256 | { |
218 | sfq_index p, n; | 257 | sfq_index p, n; |
258 | int d; | ||
219 | 259 | ||
220 | n = q->dep[x].next; | 260 | sfq_unlink(q, x, n, p); |
221 | p = q->dep[x].prev; | ||
222 | q->dep[p].next = n; | ||
223 | q->dep[n].prev = p; | ||
224 | |||
225 | if (n == p && q->max_depth == q->qs[x].qlen + 1) | ||
226 | q->max_depth--; | ||
227 | 261 | ||
262 | d = q->slots[x].qlen--; | ||
263 | if (n == p && q->cur_depth == d) | ||
264 | q->cur_depth--; | ||
228 | sfq_link(q, x); | 265 | sfq_link(q, x); |
229 | } | 266 | } |
230 | 267 | ||
@@ -233,34 +270,74 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) | |||
233 | sfq_index p, n; | 270 | sfq_index p, n; |
234 | int d; | 271 | int d; |
235 | 272 | ||
236 | n = q->dep[x].next; | 273 | sfq_unlink(q, x, n, p); |
237 | p = q->dep[x].prev; | ||
238 | q->dep[p].next = n; | ||
239 | q->dep[n].prev = p; | ||
240 | d = q->qs[x].qlen; | ||
241 | if (q->max_depth < d) | ||
242 | q->max_depth = d; | ||
243 | 274 | ||
275 | d = ++q->slots[x].qlen; | ||
276 | if (q->cur_depth < d) | ||
277 | q->cur_depth = d; | ||
244 | sfq_link(q, x); | 278 | sfq_link(q, x); |
245 | } | 279 | } |
246 | 280 | ||
281 | /* helper functions : might be changed when/if skb use a standard list_head */ | ||
282 | |||
283 | /* remove one skb from tail of slot queue */ | ||
284 | static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) | ||
285 | { | ||
286 | struct sk_buff *skb = slot->skblist_prev; | ||
287 | |||
288 | slot->skblist_prev = skb->prev; | ||
289 | skb->prev->next = (struct sk_buff *)slot; | ||
290 | skb->next = skb->prev = NULL; | ||
291 | return skb; | ||
292 | } | ||
293 | |||
294 | /* remove one skb from head of slot queue */ | ||
295 | static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) | ||
296 | { | ||
297 | struct sk_buff *skb = slot->skblist_next; | ||
298 | |||
299 | slot->skblist_next = skb->next; | ||
300 | skb->next->prev = (struct sk_buff *)slot; | ||
301 | skb->next = skb->prev = NULL; | ||
302 | return skb; | ||
303 | } | ||
304 | |||
305 | static inline void slot_queue_init(struct sfq_slot *slot) | ||
306 | { | ||
307 | slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; | ||
308 | } | ||
309 | |||
310 | /* add skb to slot queue (tail add) */ | ||
311 | static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) | ||
312 | { | ||
313 | skb->prev = slot->skblist_prev; | ||
314 | skb->next = (struct sk_buff *)slot; | ||
315 | slot->skblist_prev->next = skb; | ||
316 | slot->skblist_prev = skb; | ||
317 | } | ||
318 | |||
319 | #define slot_queue_walk(slot, skb) \ | ||
320 | for (skb = slot->skblist_next; \ | ||
321 | skb != (struct sk_buff *)slot; \ | ||
322 | skb = skb->next) | ||
323 | |||
247 | static unsigned int sfq_drop(struct Qdisc *sch) | 324 | static unsigned int sfq_drop(struct Qdisc *sch) |
248 | { | 325 | { |
249 | struct sfq_sched_data *q = qdisc_priv(sch); | 326 | struct sfq_sched_data *q = qdisc_priv(sch); |
250 | sfq_index d = q->max_depth; | 327 | sfq_index x, d = q->cur_depth; |
251 | struct sk_buff *skb; | 328 | struct sk_buff *skb; |
252 | unsigned int len; | 329 | unsigned int len; |
330 | struct sfq_slot *slot; | ||
253 | 331 | ||
254 | /* Queue is full! Find the longest slot and | 332 | /* Queue is full! Find the longest slot and drop tail packet from it */ |
255 | drop a packet from it */ | ||
256 | |||
257 | if (d > 1) { | 333 | if (d > 1) { |
258 | sfq_index x = q->dep[d + SFQ_DEPTH].next; | 334 | x = q->dep[d].next; |
259 | skb = q->qs[x].prev; | 335 | slot = &q->slots[x]; |
336 | drop: | ||
337 | skb = slot_dequeue_tail(slot); | ||
260 | len = qdisc_pkt_len(skb); | 338 | len = qdisc_pkt_len(skb); |
261 | __skb_unlink(skb, &q->qs[x]); | ||
262 | kfree_skb(skb); | ||
263 | sfq_dec(q, x); | 339 | sfq_dec(q, x); |
340 | kfree_skb(skb); | ||
264 | sch->q.qlen--; | 341 | sch->q.qlen--; |
265 | sch->qstats.drops++; | 342 | sch->qstats.drops++; |
266 | sch->qstats.backlog -= len; | 343 | sch->qstats.backlog -= len; |
@@ -269,19 +346,11 @@ static unsigned int sfq_drop(struct Qdisc *sch) | |||
269 | 346 | ||
270 | if (d == 1) { | 347 | if (d == 1) { |
271 | /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ | 348 | /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ |
272 | d = q->next[q->tail]; | 349 | x = q->tail->next; |
273 | q->next[q->tail] = q->next[d]; | 350 | slot = &q->slots[x]; |
274 | q->allot[q->next[d]] += q->quantum; | 351 | q->tail->next = slot->next; |
275 | skb = q->qs[d].prev; | 352 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; |
276 | len = qdisc_pkt_len(skb); | 353 | goto drop; |
277 | __skb_unlink(skb, &q->qs[d]); | ||
278 | kfree_skb(skb); | ||
279 | sfq_dec(q, d); | ||
280 | sch->q.qlen--; | ||
281 | q->ht[q->hash[d]] = SFQ_DEPTH; | ||
282 | sch->qstats.drops++; | ||
283 | sch->qstats.backlog -= len; | ||
284 | return len; | ||
285 | } | 354 | } |
286 | 355 | ||
287 | return 0; | 356 | return 0; |
@@ -292,7 +361,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
292 | { | 361 | { |
293 | struct sfq_sched_data *q = qdisc_priv(sch); | 362 | struct sfq_sched_data *q = qdisc_priv(sch); |
294 | unsigned int hash; | 363 | unsigned int hash; |
295 | sfq_index x; | 364 | sfq_index x, qlen; |
365 | struct sfq_slot *slot; | ||
296 | int uninitialized_var(ret); | 366 | int uninitialized_var(ret); |
297 | 367 | ||
298 | hash = sfq_classify(skb, sch, &ret); | 368 | hash = sfq_classify(skb, sch, &ret); |
@@ -305,54 +375,42 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) | |||
305 | hash--; | 375 | hash--; |
306 | 376 | ||
307 | x = q->ht[hash]; | 377 | x = q->ht[hash]; |
308 | if (x == SFQ_DEPTH) { | 378 | slot = &q->slots[x]; |
309 | q->ht[hash] = x = q->dep[SFQ_DEPTH].next; | 379 | if (x == SFQ_EMPTY_SLOT) { |
310 | q->hash[x] = hash; | 380 | x = q->dep[0].next; /* get a free slot */ |
381 | q->ht[hash] = x; | ||
382 | slot = &q->slots[x]; | ||
383 | slot->hash = hash; | ||
311 | } | 384 | } |
312 | 385 | ||
313 | /* If selected queue has length q->limit, this means that | 386 | /* If selected queue has length q->limit, do simple tail drop, |
314 | * all another queues are empty and that we do simple tail drop, | ||
315 | * i.e. drop _this_ packet. | 387 | * i.e. drop _this_ packet. |
316 | */ | 388 | */ |
317 | if (q->qs[x].qlen >= q->limit) | 389 | if (slot->qlen >= q->limit) |
318 | return qdisc_drop(skb, sch); | 390 | return qdisc_drop(skb, sch); |
319 | 391 | ||
320 | sch->qstats.backlog += qdisc_pkt_len(skb); | 392 | sch->qstats.backlog += qdisc_pkt_len(skb); |
321 | __skb_queue_tail(&q->qs[x], skb); | 393 | slot_queue_add(slot, skb); |
322 | sfq_inc(q, x); | 394 | sfq_inc(q, x); |
323 | if (q->qs[x].qlen == 1) { /* The flow is new */ | 395 | if (slot->qlen == 1) { /* The flow is new */ |
324 | if (q->tail == SFQ_DEPTH) { /* It is the first flow */ | 396 | if (q->tail == NULL) { /* It is the first flow */ |
325 | q->tail = x; | 397 | slot->next = x; |
326 | q->next[x] = x; | ||
327 | q->allot[x] = q->quantum; | ||
328 | } else { | 398 | } else { |
329 | q->next[x] = q->next[q->tail]; | 399 | slot->next = q->tail->next; |
330 | q->next[q->tail] = x; | 400 | q->tail->next = x; |
331 | q->tail = x; | ||
332 | } | 401 | } |
402 | q->tail = slot; | ||
403 | slot->allot = q->scaled_quantum; | ||
333 | } | 404 | } |
334 | if (++sch->q.qlen <= q->limit) { | 405 | if (++sch->q.qlen <= q->limit) |
335 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
336 | sch->bstats.packets++; | ||
337 | return NET_XMIT_SUCCESS; | 406 | return NET_XMIT_SUCCESS; |
338 | } | ||
339 | 407 | ||
408 | qlen = slot->qlen; | ||
340 | sfq_drop(sch); | 409 | sfq_drop(sch); |
341 | return NET_XMIT_CN; | 410 | /* Return Congestion Notification only if we dropped a packet |
342 | } | 411 | * from this flow. |
343 | 412 | */ | |
344 | static struct sk_buff * | 413 | return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS; |
345 | sfq_peek(struct Qdisc *sch) | ||
346 | { | ||
347 | struct sfq_sched_data *q = qdisc_priv(sch); | ||
348 | sfq_index a; | ||
349 | |||
350 | /* No active slots */ | ||
351 | if (q->tail == SFQ_DEPTH) | ||
352 | return NULL; | ||
353 | |||
354 | a = q->next[q->tail]; | ||
355 | return skb_peek(&q->qs[a]); | ||
356 | } | 414 | } |
357 | 415 | ||
358 | static struct sk_buff * | 416 | static struct sk_buff * |
@@ -360,34 +418,38 @@ sfq_dequeue(struct Qdisc *sch) | |||
360 | { | 418 | { |
361 | struct sfq_sched_data *q = qdisc_priv(sch); | 419 | struct sfq_sched_data *q = qdisc_priv(sch); |
362 | struct sk_buff *skb; | 420 | struct sk_buff *skb; |
363 | sfq_index a, old_a; | 421 | sfq_index a, next_a; |
422 | struct sfq_slot *slot; | ||
364 | 423 | ||
365 | /* No active slots */ | 424 | /* No active slots */ |
366 | if (q->tail == SFQ_DEPTH) | 425 | if (q->tail == NULL) |
367 | return NULL; | 426 | return NULL; |
368 | 427 | ||
369 | a = old_a = q->next[q->tail]; | 428 | next_slot: |
370 | 429 | a = q->tail->next; | |
371 | /* Grab packet */ | 430 | slot = &q->slots[a]; |
372 | skb = __skb_dequeue(&q->qs[a]); | 431 | if (slot->allot <= 0) { |
432 | q->tail = slot; | ||
433 | slot->allot += q->scaled_quantum; | ||
434 | goto next_slot; | ||
435 | } | ||
436 | skb = slot_dequeue_head(slot); | ||
373 | sfq_dec(q, a); | 437 | sfq_dec(q, a); |
438 | qdisc_bstats_update(sch, skb); | ||
374 | sch->q.qlen--; | 439 | sch->q.qlen--; |
375 | sch->qstats.backlog -= qdisc_pkt_len(skb); | 440 | sch->qstats.backlog -= qdisc_pkt_len(skb); |
376 | 441 | ||
377 | /* Is the slot empty? */ | 442 | /* Is the slot empty? */ |
378 | if (q->qs[a].qlen == 0) { | 443 | if (slot->qlen == 0) { |
379 | q->ht[q->hash[a]] = SFQ_DEPTH; | 444 | q->ht[slot->hash] = SFQ_EMPTY_SLOT; |
380 | a = q->next[a]; | 445 | next_a = slot->next; |
381 | if (a == old_a) { | 446 | if (a == next_a) { |
382 | q->tail = SFQ_DEPTH; | 447 | q->tail = NULL; /* no more active slots */ |
383 | return skb; | 448 | return skb; |
384 | } | 449 | } |
385 | q->next[q->tail] = a; | 450 | q->tail->next = next_a; |
386 | q->allot[a] += q->quantum; | 451 | } else { |
387 | } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { | 452 | slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); |
388 | q->tail = a; | ||
389 | a = q->next[a]; | ||
390 | q->allot[a] += q->quantum; | ||
391 | } | 453 | } |
392 | return skb; | 454 | return skb; |
393 | } | 455 | } |
@@ -421,12 +483,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
421 | if (opt->nla_len < nla_attr_size(sizeof(*ctl))) | 483 | if (opt->nla_len < nla_attr_size(sizeof(*ctl))) |
422 | return -EINVAL; | 484 | return -EINVAL; |
423 | 485 | ||
486 | if (ctl->divisor && | ||
487 | (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) | ||
488 | return -EINVAL; | ||
489 | |||
424 | sch_tree_lock(sch); | 490 | sch_tree_lock(sch); |
425 | q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); | 491 | q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); |
492 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | ||
426 | q->perturb_period = ctl->perturb_period * HZ; | 493 | q->perturb_period = ctl->perturb_period * HZ; |
427 | if (ctl->limit) | 494 | if (ctl->limit) |
428 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); | 495 | q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); |
429 | 496 | if (ctl->divisor) | |
497 | q->divisor = ctl->divisor; | ||
430 | qlen = sch->q.qlen; | 498 | qlen = sch->q.qlen; |
431 | while (sch->q.qlen > q->limit) | 499 | while (sch->q.qlen > q->limit) |
432 | sfq_drop(sch); | 500 | sfq_drop(sch); |
@@ -444,26 +512,25 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) | |||
444 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | 512 | static int sfq_init(struct Qdisc *sch, struct nlattr *opt) |
445 | { | 513 | { |
446 | struct sfq_sched_data *q = qdisc_priv(sch); | 514 | struct sfq_sched_data *q = qdisc_priv(sch); |
515 | size_t sz; | ||
447 | int i; | 516 | int i; |
448 | 517 | ||
449 | q->perturb_timer.function = sfq_perturbation; | 518 | q->perturb_timer.function = sfq_perturbation; |
450 | q->perturb_timer.data = (unsigned long)sch; | 519 | q->perturb_timer.data = (unsigned long)sch; |
451 | init_timer_deferrable(&q->perturb_timer); | 520 | init_timer_deferrable(&q->perturb_timer); |
452 | 521 | ||
453 | for (i = 0; i < SFQ_HASH_DIVISOR; i++) | ||
454 | q->ht[i] = SFQ_DEPTH; | ||
455 | |||
456 | for (i = 0; i < SFQ_DEPTH; i++) { | 522 | for (i = 0; i < SFQ_DEPTH; i++) { |
457 | skb_queue_head_init(&q->qs[i]); | 523 | q->dep[i].next = i + SFQ_SLOTS; |
458 | q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; | 524 | q->dep[i].prev = i + SFQ_SLOTS; |
459 | q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; | ||
460 | } | 525 | } |
461 | 526 | ||
462 | q->limit = SFQ_DEPTH - 1; | 527 | q->limit = SFQ_DEPTH - 1; |
463 | q->max_depth = 0; | 528 | q->cur_depth = 0; |
464 | q->tail = SFQ_DEPTH; | 529 | q->tail = NULL; |
530 | q->divisor = SFQ_DEFAULT_HASH_DIVISOR; | ||
465 | if (opt == NULL) { | 531 | if (opt == NULL) { |
466 | q->quantum = psched_mtu(qdisc_dev(sch)); | 532 | q->quantum = psched_mtu(qdisc_dev(sch)); |
533 | q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); | ||
467 | q->perturb_period = 0; | 534 | q->perturb_period = 0; |
468 | q->perturbation = net_random(); | 535 | q->perturbation = net_random(); |
469 | } else { | 536 | } else { |
@@ -472,8 +539,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) | |||
472 | return err; | 539 | return err; |
473 | } | 540 | } |
474 | 541 | ||
475 | for (i = 0; i < SFQ_DEPTH; i++) | 542 | sz = sizeof(q->ht[0]) * q->divisor; |
543 | q->ht = kmalloc(sz, GFP_KERNEL); | ||
544 | if (!q->ht && sz > PAGE_SIZE) | ||
545 | q->ht = vmalloc(sz); | ||
546 | if (!q->ht) | ||
547 | return -ENOMEM; | ||
548 | for (i = 0; i < q->divisor; i++) | ||
549 | q->ht[i] = SFQ_EMPTY_SLOT; | ||
550 | |||
551 | for (i = 0; i < SFQ_SLOTS; i++) { | ||
552 | slot_queue_init(&q->slots[i]); | ||
476 | sfq_link(q, i); | 553 | sfq_link(q, i); |
554 | } | ||
555 | if (q->limit >= 1) | ||
556 | sch->flags |= TCQ_F_CAN_BYPASS; | ||
557 | else | ||
558 | sch->flags &= ~TCQ_F_CAN_BYPASS; | ||
477 | return 0; | 559 | return 0; |
478 | } | 560 | } |
479 | 561 | ||
@@ -484,6 +566,10 @@ static void sfq_destroy(struct Qdisc *sch) | |||
484 | tcf_destroy_chain(&q->filter_list); | 566 | tcf_destroy_chain(&q->filter_list); |
485 | q->perturb_period = 0; | 567 | q->perturb_period = 0; |
486 | del_timer_sync(&q->perturb_timer); | 568 | del_timer_sync(&q->perturb_timer); |
569 | if (is_vmalloc_addr(q->ht)) | ||
570 | vfree(q->ht); | ||
571 | else | ||
572 | kfree(q->ht); | ||
487 | } | 573 | } |
488 | 574 | ||
489 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | 575 | static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) |
@@ -496,7 +582,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) | |||
496 | opt.perturb_period = q->perturb_period / HZ; | 582 | opt.perturb_period = q->perturb_period / HZ; |
497 | 583 | ||
498 | opt.limit = q->limit; | 584 | opt.limit = q->limit; |
499 | opt.divisor = SFQ_HASH_DIVISOR; | 585 | opt.divisor = q->divisor; |
500 | opt.flows = q->limit; | 586 | opt.flows = q->limit; |
501 | 587 | ||
502 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | 588 | NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); |
@@ -521,6 +607,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid) | |||
521 | static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, | 607 | static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, |
522 | u32 classid) | 608 | u32 classid) |
523 | { | 609 | { |
610 | /* we cannot bypass queue discipline anymore */ | ||
611 | sch->flags &= ~TCQ_F_CAN_BYPASS; | ||
524 | return 0; | 612 | return 0; |
525 | } | 613 | } |
526 | 614 | ||
@@ -548,10 +636,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, | |||
548 | struct gnet_dump *d) | 636 | struct gnet_dump *d) |
549 | { | 637 | { |
550 | struct sfq_sched_data *q = qdisc_priv(sch); | 638 | struct sfq_sched_data *q = qdisc_priv(sch); |
551 | sfq_index idx = q->ht[cl-1]; | 639 | sfq_index idx = q->ht[cl - 1]; |
552 | struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; | 640 | struct gnet_stats_queue qs = { 0 }; |
553 | struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; | 641 | struct tc_sfq_xstats xstats = { 0 }; |
642 | struct sk_buff *skb; | ||
554 | 643 | ||
644 | if (idx != SFQ_EMPTY_SLOT) { | ||
645 | const struct sfq_slot *slot = &q->slots[idx]; | ||
646 | |||
647 | xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; | ||
648 | qs.qlen = slot->qlen; | ||
649 | slot_queue_walk(slot, skb) | ||
650 | qs.backlog += qdisc_pkt_len(skb); | ||
651 | } | ||
555 | if (gnet_stats_copy_queue(d, &qs) < 0) | 652 | if (gnet_stats_copy_queue(d, &qs) < 0) |
556 | return -1; | 653 | return -1; |
557 | return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); | 654 | return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); |
@@ -565,8 +662,8 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |||
565 | if (arg->stop) | 662 | if (arg->stop) |
566 | return; | 663 | return; |
567 | 664 | ||
568 | for (i = 0; i < SFQ_HASH_DIVISOR; i++) { | 665 | for (i = 0; i < q->divisor; i++) { |
569 | if (q->ht[i] == SFQ_DEPTH || | 666 | if (q->ht[i] == SFQ_EMPTY_SLOT || |
570 | arg->count < arg->skip) { | 667 | arg->count < arg->skip) { |
571 | arg->count++; | 668 | arg->count++; |
572 | continue; | 669 | continue; |
@@ -597,7 +694,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = { | |||
597 | .priv_size = sizeof(struct sfq_sched_data), | 694 | .priv_size = sizeof(struct sfq_sched_data), |
598 | .enqueue = sfq_enqueue, | 695 | .enqueue = sfq_enqueue, |
599 | .dequeue = sfq_dequeue, | 696 | .dequeue = sfq_dequeue, |
600 | .peek = sfq_peek, | 697 | .peek = qdisc_peek_dequeued, |
601 | .drop = sfq_drop, | 698 | .drop = sfq_drop, |
602 | .init = sfq_init, | 699 | .init = sfq_init, |
603 | .reset = sfq_reset, | 700 | .reset = sfq_reset, |
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 641a30d64635..1dcfb5223a86 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c | |||
@@ -97,8 +97,7 @@ | |||
97 | changed the limit is not effective anymore. | 97 | changed the limit is not effective anymore. |
98 | */ | 98 | */ |
99 | 99 | ||
100 | struct tbf_sched_data | 100 | struct tbf_sched_data { |
101 | { | ||
102 | /* Parameters */ | 101 | /* Parameters */ |
103 | u32 limit; /* Maximal length of backlog: bytes */ | 102 | u32 limit; /* Maximal length of backlog: bytes */ |
104 | u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ | 103 | u32 buffer; /* Token bucket depth/rate: MUST BE >= MTU/B */ |
@@ -115,10 +114,10 @@ struct tbf_sched_data | |||
115 | struct qdisc_watchdog watchdog; /* Watchdog timer */ | 114 | struct qdisc_watchdog watchdog; /* Watchdog timer */ |
116 | }; | 115 | }; |
117 | 116 | ||
118 | #define L2T(q,L) qdisc_l2t((q)->R_tab,L) | 117 | #define L2T(q, L) qdisc_l2t((q)->R_tab, L) |
119 | #define L2T_P(q,L) qdisc_l2t((q)->P_tab,L) | 118 | #define L2T_P(q, L) qdisc_l2t((q)->P_tab, L) |
120 | 119 | ||
121 | static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 120 | static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
122 | { | 121 | { |
123 | struct tbf_sched_data *q = qdisc_priv(sch); | 122 | struct tbf_sched_data *q = qdisc_priv(sch); |
124 | int ret; | 123 | int ret; |
@@ -134,12 +133,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
134 | } | 133 | } |
135 | 134 | ||
136 | sch->q.qlen++; | 135 | sch->q.qlen++; |
137 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
138 | sch->bstats.packets++; | ||
139 | return NET_XMIT_SUCCESS; | 136 | return NET_XMIT_SUCCESS; |
140 | } | 137 | } |
141 | 138 | ||
142 | static unsigned int tbf_drop(struct Qdisc* sch) | 139 | static unsigned int tbf_drop(struct Qdisc *sch) |
143 | { | 140 | { |
144 | struct tbf_sched_data *q = qdisc_priv(sch); | 141 | struct tbf_sched_data *q = qdisc_priv(sch); |
145 | unsigned int len = 0; | 142 | unsigned int len = 0; |
@@ -151,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch) | |||
151 | return len; | 148 | return len; |
152 | } | 149 | } |
153 | 150 | ||
154 | static struct sk_buff *tbf_dequeue(struct Qdisc* sch) | 151 | static struct sk_buff *tbf_dequeue(struct Qdisc *sch) |
155 | { | 152 | { |
156 | struct tbf_sched_data *q = qdisc_priv(sch); | 153 | struct tbf_sched_data *q = qdisc_priv(sch); |
157 | struct sk_buff *skb; | 154 | struct sk_buff *skb; |
@@ -187,7 +184,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) | |||
187 | q->tokens = toks; | 184 | q->tokens = toks; |
188 | q->ptokens = ptoks; | 185 | q->ptokens = ptoks; |
189 | sch->q.qlen--; | 186 | sch->q.qlen--; |
190 | sch->flags &= ~TCQ_F_THROTTLED; | 187 | qdisc_unthrottled(sch); |
188 | qdisc_bstats_update(sch, skb); | ||
191 | return skb; | 189 | return skb; |
192 | } | 190 | } |
193 | 191 | ||
@@ -210,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch) | |||
210 | return NULL; | 208 | return NULL; |
211 | } | 209 | } |
212 | 210 | ||
213 | static void tbf_reset(struct Qdisc* sch) | 211 | static void tbf_reset(struct Qdisc *sch) |
214 | { | 212 | { |
215 | struct tbf_sched_data *q = qdisc_priv(sch); | 213 | struct tbf_sched_data *q = qdisc_priv(sch); |
216 | 214 | ||
@@ -228,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = { | |||
228 | [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, | 226 | [TCA_TBF_PTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, |
229 | }; | 227 | }; |
230 | 228 | ||
231 | static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | 229 | static int tbf_change(struct Qdisc *sch, struct nlattr *opt) |
232 | { | 230 | { |
233 | int err; | 231 | int err; |
234 | struct tbf_sched_data *q = qdisc_priv(sch); | 232 | struct tbf_sched_data *q = qdisc_priv(sch); |
@@ -237,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | |||
237 | struct qdisc_rate_table *rtab = NULL; | 235 | struct qdisc_rate_table *rtab = NULL; |
238 | struct qdisc_rate_table *ptab = NULL; | 236 | struct qdisc_rate_table *ptab = NULL; |
239 | struct Qdisc *child = NULL; | 237 | struct Qdisc *child = NULL; |
240 | int max_size,n; | 238 | int max_size, n; |
241 | 239 | ||
242 | err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); | 240 | err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy); |
243 | if (err < 0) | 241 | if (err < 0) |
@@ -260,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | |||
260 | } | 258 | } |
261 | 259 | ||
262 | for (n = 0; n < 256; n++) | 260 | for (n = 0; n < 256; n++) |
263 | if (rtab->data[n] > qopt->buffer) break; | 261 | if (rtab->data[n] > qopt->buffer) |
264 | max_size = (n << qopt->rate.cell_log)-1; | 262 | break; |
263 | max_size = (n << qopt->rate.cell_log) - 1; | ||
265 | if (ptab) { | 264 | if (ptab) { |
266 | int size; | 265 | int size; |
267 | 266 | ||
268 | for (n = 0; n < 256; n++) | 267 | for (n = 0; n < 256; n++) |
269 | if (ptab->data[n] > qopt->mtu) break; | 268 | if (ptab->data[n] > qopt->mtu) |
270 | size = (n << qopt->peakrate.cell_log)-1; | 269 | break; |
271 | if (size < max_size) max_size = size; | 270 | size = (n << qopt->peakrate.cell_log) - 1; |
271 | if (size < max_size) | ||
272 | max_size = size; | ||
272 | } | 273 | } |
273 | if (max_size < 0) | 274 | if (max_size < 0) |
274 | goto done; | 275 | goto done; |
@@ -311,7 +312,7 @@ done: | |||
311 | return err; | 312 | return err; |
312 | } | 313 | } |
313 | 314 | ||
314 | static int tbf_init(struct Qdisc* sch, struct nlattr *opt) | 315 | static int tbf_init(struct Qdisc *sch, struct nlattr *opt) |
315 | { | 316 | { |
316 | struct tbf_sched_data *q = qdisc_priv(sch); | 317 | struct tbf_sched_data *q = qdisc_priv(sch); |
317 | 318 | ||
@@ -423,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) | |||
423 | } | 424 | } |
424 | } | 425 | } |
425 | 426 | ||
426 | static const struct Qdisc_class_ops tbf_class_ops = | 427 | static const struct Qdisc_class_ops tbf_class_ops = { |
427 | { | ||
428 | .graft = tbf_graft, | 428 | .graft = tbf_graft, |
429 | .leaf = tbf_leaf, | 429 | .leaf = tbf_leaf, |
430 | .get = tbf_get, | 430 | .get = tbf_get, |
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index feaabc103ce6..45cd30098e34 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c | |||
@@ -53,38 +53,38 @@ | |||
53 | which will not break load balancing, though native slave | 53 | which will not break load balancing, though native slave |
54 | traffic will have the highest priority. */ | 54 | traffic will have the highest priority. */ |
55 | 55 | ||
56 | struct teql_master | 56 | struct teql_master { |
57 | { | ||
58 | struct Qdisc_ops qops; | 57 | struct Qdisc_ops qops; |
59 | struct net_device *dev; | 58 | struct net_device *dev; |
60 | struct Qdisc *slaves; | 59 | struct Qdisc *slaves; |
61 | struct list_head master_list; | 60 | struct list_head master_list; |
61 | unsigned long tx_bytes; | ||
62 | unsigned long tx_packets; | ||
63 | unsigned long tx_errors; | ||
64 | unsigned long tx_dropped; | ||
62 | }; | 65 | }; |
63 | 66 | ||
64 | struct teql_sched_data | 67 | struct teql_sched_data { |
65 | { | ||
66 | struct Qdisc *next; | 68 | struct Qdisc *next; |
67 | struct teql_master *m; | 69 | struct teql_master *m; |
68 | struct neighbour *ncache; | 70 | struct neighbour *ncache; |
69 | struct sk_buff_head q; | 71 | struct sk_buff_head q; |
70 | }; | 72 | }; |
71 | 73 | ||
72 | #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next) | 74 | #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next) |
73 | 75 | ||
74 | #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT) | 76 | #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT) |
75 | 77 | ||
76 | /* "teql*" qdisc routines */ | 78 | /* "teql*" qdisc routines */ |
77 | 79 | ||
78 | static int | 80 | static int |
79 | teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) | 81 | teql_enqueue(struct sk_buff *skb, struct Qdisc *sch) |
80 | { | 82 | { |
81 | struct net_device *dev = qdisc_dev(sch); | 83 | struct net_device *dev = qdisc_dev(sch); |
82 | struct teql_sched_data *q = qdisc_priv(sch); | 84 | struct teql_sched_data *q = qdisc_priv(sch); |
83 | 85 | ||
84 | if (q->q.qlen < dev->tx_queue_len) { | 86 | if (q->q.qlen < dev->tx_queue_len) { |
85 | __skb_queue_tail(&q->q, skb); | 87 | __skb_queue_tail(&q->q, skb); |
86 | sch->bstats.bytes += qdisc_pkt_len(skb); | ||
87 | sch->bstats.packets++; | ||
88 | return NET_XMIT_SUCCESS; | 88 | return NET_XMIT_SUCCESS; |
89 | } | 89 | } |
90 | 90 | ||
@@ -94,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |||
94 | } | 94 | } |
95 | 95 | ||
96 | static struct sk_buff * | 96 | static struct sk_buff * |
97 | teql_dequeue(struct Qdisc* sch) | 97 | teql_dequeue(struct Qdisc *sch) |
98 | { | 98 | { |
99 | struct teql_sched_data *dat = qdisc_priv(sch); | 99 | struct teql_sched_data *dat = qdisc_priv(sch); |
100 | struct netdev_queue *dat_queue; | 100 | struct netdev_queue *dat_queue; |
@@ -108,19 +108,21 @@ teql_dequeue(struct Qdisc* sch) | |||
108 | dat->m->slaves = sch; | 108 | dat->m->slaves = sch; |
109 | netif_wake_queue(m); | 109 | netif_wake_queue(m); |
110 | } | 110 | } |
111 | } else { | ||
112 | qdisc_bstats_update(sch, skb); | ||
111 | } | 113 | } |
112 | sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; | 114 | sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen; |
113 | return skb; | 115 | return skb; |
114 | } | 116 | } |
115 | 117 | ||
116 | static struct sk_buff * | 118 | static struct sk_buff * |
117 | teql_peek(struct Qdisc* sch) | 119 | teql_peek(struct Qdisc *sch) |
118 | { | 120 | { |
119 | /* teql is meant to be used as root qdisc */ | 121 | /* teql is meant to be used as root qdisc */ |
120 | return NULL; | 122 | return NULL; |
121 | } | 123 | } |
122 | 124 | ||
123 | static __inline__ void | 125 | static inline void |
124 | teql_neigh_release(struct neighbour *n) | 126 | teql_neigh_release(struct neighbour *n) |
125 | { | 127 | { |
126 | if (n) | 128 | if (n) |
@@ -128,7 +130,7 @@ teql_neigh_release(struct neighbour *n) | |||
128 | } | 130 | } |
129 | 131 | ||
130 | static void | 132 | static void |
131 | teql_reset(struct Qdisc* sch) | 133 | teql_reset(struct Qdisc *sch) |
132 | { | 134 | { |
133 | struct teql_sched_data *dat = qdisc_priv(sch); | 135 | struct teql_sched_data *dat = qdisc_priv(sch); |
134 | 136 | ||
@@ -138,13 +140,14 @@ teql_reset(struct Qdisc* sch) | |||
138 | } | 140 | } |
139 | 141 | ||
140 | static void | 142 | static void |
141 | teql_destroy(struct Qdisc* sch) | 143 | teql_destroy(struct Qdisc *sch) |
142 | { | 144 | { |
143 | struct Qdisc *q, *prev; | 145 | struct Qdisc *q, *prev; |
144 | struct teql_sched_data *dat = qdisc_priv(sch); | 146 | struct teql_sched_data *dat = qdisc_priv(sch); |
145 | struct teql_master *master = dat->m; | 147 | struct teql_master *master = dat->m; |
146 | 148 | ||
147 | if ((prev = master->slaves) != NULL) { | 149 | prev = master->slaves; |
150 | if (prev) { | ||
148 | do { | 151 | do { |
149 | q = NEXT_SLAVE(prev); | 152 | q = NEXT_SLAVE(prev); |
150 | if (q == sch) { | 153 | if (q == sch) { |
@@ -176,7 +179,7 @@ teql_destroy(struct Qdisc* sch) | |||
176 | static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) | 179 | static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt) |
177 | { | 180 | { |
178 | struct net_device *dev = qdisc_dev(sch); | 181 | struct net_device *dev = qdisc_dev(sch); |
179 | struct teql_master *m = (struct teql_master*)sch->ops; | 182 | struct teql_master *m = (struct teql_master *)sch->ops; |
180 | struct teql_sched_data *q = qdisc_priv(sch); | 183 | struct teql_sched_data *q = qdisc_priv(sch); |
181 | 184 | ||
182 | if (dev->hard_header_len > m->dev->hard_header_len) | 185 | if (dev->hard_header_len > m->dev->hard_header_len) |
@@ -241,11 +244,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * | |||
241 | } | 244 | } |
242 | if (neigh_event_send(n, skb_res) == 0) { | 245 | if (neigh_event_send(n, skb_res) == 0) { |
243 | int err; | 246 | int err; |
247 | char haddr[MAX_ADDR_LEN]; | ||
244 | 248 | ||
245 | read_lock(&n->lock); | 249 | neigh_ha_snapshot(haddr, n, dev); |
246 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), | 250 | err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, |
247 | n->ha, NULL, skb->len); | 251 | NULL, skb->len); |
248 | read_unlock(&n->lock); | ||
249 | 252 | ||
250 | if (err < 0) { | 253 | if (err < 0) { |
251 | neigh_release(n); | 254 | neigh_release(n); |
@@ -275,7 +278,6 @@ static inline int teql_resolve(struct sk_buff *skb, | |||
275 | static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) | 278 | static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev) |
276 | { | 279 | { |
277 | struct teql_master *master = netdev_priv(dev); | 280 | struct teql_master *master = netdev_priv(dev); |
278 | struct netdev_queue *txq = netdev_get_tx_queue(dev, 0); | ||
279 | struct Qdisc *start, *q; | 281 | struct Qdisc *start, *q; |
280 | int busy; | 282 | int busy; |
281 | int nores; | 283 | int nores; |
@@ -288,7 +290,8 @@ restart: | |||
288 | nores = 0; | 290 | nores = 0; |
289 | busy = 0; | 291 | busy = 0; |
290 | 292 | ||
291 | if ((q = start) == NULL) | 293 | q = start; |
294 | if (!q) | ||
292 | goto drop; | 295 | goto drop; |
293 | 296 | ||
294 | do { | 297 | do { |
@@ -309,15 +312,14 @@ restart: | |||
309 | if (__netif_tx_trylock(slave_txq)) { | 312 | if (__netif_tx_trylock(slave_txq)) { |
310 | unsigned int length = qdisc_pkt_len(skb); | 313 | unsigned int length = qdisc_pkt_len(skb); |
311 | 314 | ||
312 | if (!netif_tx_queue_stopped(slave_txq) && | 315 | if (!netif_tx_queue_frozen_or_stopped(slave_txq) && |
313 | !netif_tx_queue_frozen(slave_txq) && | ||
314 | slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { | 316 | slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) { |
315 | txq_trans_update(slave_txq); | 317 | txq_trans_update(slave_txq); |
316 | __netif_tx_unlock(slave_txq); | 318 | __netif_tx_unlock(slave_txq); |
317 | master->slaves = NEXT_SLAVE(q); | 319 | master->slaves = NEXT_SLAVE(q); |
318 | netif_wake_queue(dev); | 320 | netif_wake_queue(dev); |
319 | txq->tx_packets++; | 321 | master->tx_packets++; |
320 | txq->tx_bytes += length; | 322 | master->tx_bytes += length; |
321 | return NETDEV_TX_OK; | 323 | return NETDEV_TX_OK; |
322 | } | 324 | } |
323 | __netif_tx_unlock(slave_txq); | 325 | __netif_tx_unlock(slave_txq); |
@@ -344,20 +346,20 @@ restart: | |||
344 | netif_stop_queue(dev); | 346 | netif_stop_queue(dev); |
345 | return NETDEV_TX_BUSY; | 347 | return NETDEV_TX_BUSY; |
346 | } | 348 | } |
347 | dev->stats.tx_errors++; | 349 | master->tx_errors++; |
348 | 350 | ||
349 | drop: | 351 | drop: |
350 | txq->tx_dropped++; | 352 | master->tx_dropped++; |
351 | dev_kfree_skb(skb); | 353 | dev_kfree_skb(skb); |
352 | return NETDEV_TX_OK; | 354 | return NETDEV_TX_OK; |
353 | } | 355 | } |
354 | 356 | ||
355 | static int teql_master_open(struct net_device *dev) | 357 | static int teql_master_open(struct net_device *dev) |
356 | { | 358 | { |
357 | struct Qdisc * q; | 359 | struct Qdisc *q; |
358 | struct teql_master *m = netdev_priv(dev); | 360 | struct teql_master *m = netdev_priv(dev); |
359 | int mtu = 0xFFFE; | 361 | int mtu = 0xFFFE; |
360 | unsigned flags = IFF_NOARP|IFF_MULTICAST; | 362 | unsigned int flags = IFF_NOARP | IFF_MULTICAST; |
361 | 363 | ||
362 | if (m->slaves == NULL) | 364 | if (m->slaves == NULL) |
363 | return -EUNATCH; | 365 | return -EUNATCH; |
@@ -400,6 +402,18 @@ static int teql_master_close(struct net_device *dev) | |||
400 | return 0; | 402 | return 0; |
401 | } | 403 | } |
402 | 404 | ||
405 | static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev, | ||
406 | struct rtnl_link_stats64 *stats) | ||
407 | { | ||
408 | struct teql_master *m = netdev_priv(dev); | ||
409 | |||
410 | stats->tx_packets = m->tx_packets; | ||
411 | stats->tx_bytes = m->tx_bytes; | ||
412 | stats->tx_errors = m->tx_errors; | ||
413 | stats->tx_dropped = m->tx_dropped; | ||
414 | return stats; | ||
415 | } | ||
416 | |||
403 | static int teql_master_mtu(struct net_device *dev, int new_mtu) | 417 | static int teql_master_mtu(struct net_device *dev, int new_mtu) |
404 | { | 418 | { |
405 | struct teql_master *m = netdev_priv(dev); | 419 | struct teql_master *m = netdev_priv(dev); |
@@ -413,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu) | |||
413 | do { | 427 | do { |
414 | if (new_mtu > qdisc_dev(q)->mtu) | 428 | if (new_mtu > qdisc_dev(q)->mtu) |
415 | return -EINVAL; | 429 | return -EINVAL; |
416 | } while ((q=NEXT_SLAVE(q)) != m->slaves); | 430 | } while ((q = NEXT_SLAVE(q)) != m->slaves); |
417 | } | 431 | } |
418 | 432 | ||
419 | dev->mtu = new_mtu; | 433 | dev->mtu = new_mtu; |
@@ -424,6 +438,7 @@ static const struct net_device_ops teql_netdev_ops = { | |||
424 | .ndo_open = teql_master_open, | 438 | .ndo_open = teql_master_open, |
425 | .ndo_stop = teql_master_close, | 439 | .ndo_stop = teql_master_close, |
426 | .ndo_start_xmit = teql_master_xmit, | 440 | .ndo_start_xmit = teql_master_xmit, |
441 | .ndo_get_stats64 = teql_master_stats64, | ||
427 | .ndo_change_mtu = teql_master_mtu, | 442 | .ndo_change_mtu = teql_master_mtu, |
428 | }; | 443 | }; |
429 | 444 | ||