Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp

Conflicts: litmus/sched_cedf.c
author: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
committer: Glenn Elliott <gelliott@cs.unc.edu> 2012-03-04 19:47:13 -0500
commit: c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree: ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/sched
parent: ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent: 6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
50 files changed, 5384 insertions, 1253 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..2590e91b3289 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -24,7 +24,7 @@ menuconfig NET_SCHED
          To administer these schedulers, you'll need the user-level utilities
          from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
          That package also contains some documentation; for more, check out
-          <http://linux-net.osdl.org/index.php/Iproute2>.
+          <http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2>.
          This Quality of Service (QoS) support will enable you to use
          Differentiated Services (diffserv) and Resource Reservation Protocol
@@ -126,6 +126,17 @@ config NET_SCH_RED
          To compile this code as a module, choose M here: the
          module will be called sch_red.
+config NET_SCH_SFB
+        tristate "Stochastic Fair Blue (SFB)"
+        ---help---
+          Say Y here if you want to use the Stochastic Fair Blue (SFB)
+          packet scheduling algorithm.
+          See the top of <file:net/sched/sch_sfb.c> for more details.
+          To compile this code as a module, choose M here: the
+          module will be called sch_sfb.
 config NET_SCH_SFQ
        tristate "Stochastic Fairness Queueing (SFQ)"
        ---help---
@@ -205,6 +216,40 @@ config NET_SCH_DRR
          If unsure, say N.
+config NET_SCH_MQPRIO
+        tristate "Multi-queue priority scheduler (MQPRIO)"
+        help
+          Say Y here if you want to use the Multi-queue Priority scheduler.
+          This scheduler allows QOS to be offloaded on NICs that have support
+          for offloading QOS schedulers.
+          To compile this driver as a module, choose M here: the module will
+          be called sch_mqprio.
+          If unsure, say N.
+config NET_SCH_CHOKE
+        tristate "CHOose and Keep responsive flow scheduler (CHOKE)"
+        help
+          Say Y here if you want to use the CHOKe packet scheduler (CHOose
+          and Keep for responsive flows, CHOose and Kill for unresponsive
+          flows). This is a variation of RED which trys to penalize flows
+          that monopolize the queue.
+          To compile this code as a module, choose M here: the
+          module will be called sch_choke.
+config NET_SCH_QFQ
+        tristate "Quick Fair Queueing scheduler (QFQ)"
+        help
+          Say Y here if you want to use the Quick Fair Queueing Scheduler (QFQ)
+          packet scheduling algorithm.
+          To compile this driver as a module, choose M here: the module
+          will be called sch_qfq.
+          If unsure, say N.
 config NET_SCH_INGRESS
        tristate "Ingress Qdisc"
        depends on NET_CLS_ACT
@@ -243,7 +288,8 @@ config NET_CLS_TCINDEX
 config NET_CLS_ROUTE4
        tristate "Routing decision (ROUTE)"
-        select NET_CLS_ROUTE
+        depends on INET
+        select IP_ROUTE_CLASSID
        select NET_CLS
        ---help---
          If you say Y here, you will be able to classify packets
@@ -252,9 +298,6 @@ config NET_CLS_ROUTE4
          To compile this code as a module, choose M here: the
          module will be called cls_route.
-config NET_CLS_ROUTE
-        bool
 config NET_CLS_FW
        tristate "Netfilter mark (FW)"
        select NET_CLS
@@ -518,6 +561,16 @@ config NET_ACT_SKBEDIT
          To compile this code as a module, choose M here: the
          module will be called act_skbedit.
+config NET_ACT_CSUM
+        tristate "Checksum Updating"
+        depends on NET_CLS_ACT && INET
+        ---help---
+          Say Y here to update some common checksum after some direct
+          packet alterations.
+          To compile this code as a module, choose M here: the
+          module will be called act_csum.
 config NET_CLS_IND
        bool "Incoming device classification"
        depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..dc5889c0a15a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT)	+= act_nat.o
 obj-$(CONFIG_NET_ACT_PEDIT)     += act_pedit.o
 obj-$(CONFIG_NET_ACT_SIMP)      += act_simple.o
 obj-$(CONFIG_NET_ACT_SKBEDIT)   += act_skbedit.o
+obj-$(CONFIG_NET_ACT_CSUM)      += act_csum.o
 obj-$(CONFIG_NET_SCH_FIFO)      += sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)       += sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)       += sch_htb.o
@@ -23,6 +24,7 @@ obj-$(CONFIG_NET_SCH_RED)	+= sch_red.o
 obj-$(CONFIG_NET_SCH_GRED)      += sch_gred.o
 obj-$(CONFIG_NET_SCH_INGRESS)   += sch_ingress.o 
 obj-$(CONFIG_NET_SCH_DSMARK)    += sch_dsmark.o
+obj-$(CONFIG_NET_SCH_SFB)       += sch_sfb.o
 obj-$(CONFIG_NET_SCH_SFQ)       += sch_sfq.o
 obj-$(CONFIG_NET_SCH_TBF)       += sch_tbf.o
 obj-$(CONFIG_NET_SCH_TEQL)      += sch_teql.o
@@ -31,6 +33,10 @@ obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)       += sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)     += sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)       += sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)    += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_CHOKE)     += sch_choke.o
+obj-$(CONFIG_NET_SCH_QFQ)       += sch_qfq.o
 obj-$(CONFIG_NET_CLS_U32)       += cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)    += cls_route.o
 obj-$(CONFIG_NET_CLS_FW)        += cls_fw.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 23b25f89e7e0..a606025814a1 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -26,11 +26,6 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
-static void tcf_common_free_rcu(struct rcu_head *head)
-{
-        kfree(container_of(head, struct tcf_common, tcfc_rcu));
-}
 void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
 {
        unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask);
@@ -47,7 +42,7 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo)
                         * gen_estimator est_timer() might access p->tcfc_lock
                         * or bstats, wait a RCU grace period before freeing p
                         */
-                        call_rcu(&p->tcfc_rcu, tcf_common_free_rcu);
+                        kfree_rcu(p, tcfc_rcu);
                        return;
                }
        }
@@ -78,7 +73,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
                           struct tc_action *a, struct tcf_hashinfo *hinfo)
 {
        struct tcf_common *p;
-        int err = 0, index = -1,i = 0, s_i = 0, n_i = 0;
+        int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
        struct nlattr *nest;
        read_lock_bh(hinfo->lock);
@@ -126,7 +121,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
 {
        struct tcf_common *p, *s_p;
        struct nlattr *nest;
-        int i= 0, n_i = 0;
+        int i = 0, n_i = 0;
        nest = nla_nest_start(skb, a->order);
        if (nest == NULL)
@@ -138,7 +133,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a,
                while (p != NULL) {
                        s_p = p->tcfc_next;
                        if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo))
-                                 module_put(a->ops->owner);
+                                module_put(a->ops->owner);
                        n_i++;
                        p = s_p;
                }
@@ -447,7 +442,8 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
        nest = nla_nest_start(skb, TCA_OPTIONS);
        if (nest == NULL)
                goto nla_put_failure;
-        if ((err = tcf_action_dump_old(skb, a, bind, ref)) > 0) {
+        err = tcf_action_dump_old(skb, a, bind, ref);
+        if (err > 0) {
                nla_nest_end(skb, nest);
                return err;
        }
@@ -491,7 +487,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
        struct tc_action *a;
        struct tc_action_ops *a_o;
        char act_name[IFNAMSIZ];
-        struct nlattr *tb[TCA_ACT_MAX+1];
+        struct nlattr *tb[TCA_ACT_MAX + 1];
        struct nlattr *kind;
        int err;
@@ -549,9 +545,9 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est,
                goto err_free;
        /* module count goes up only when brand new policy is created
-           if it exists and is only bound to in a_o->init() then
+         * if it exists and is only bound to in a_o->init() then
-           ACT_P_CREATED is not returned (a zero is).
+         * ACT_P_CREATED is not returned (a zero is).
-        */
+         */
        if (err != ACT_P_CREATED)
                module_put(a_o->owner);
        a->ops = a_o;
@@ -569,7 +565,7 @@ err_out:
 struct tc_action *tcf_action_init(struct nlattr *nla, struct nlattr *est,
                                  char *name, int ovr, int bind)
 {
-        struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *head = NULL, *act, *act_prev = NULL;
        int err;
        int i;
@@ -697,7 +693,7 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
 static struct tc_action *
 tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 {
-        struct nlattr *tb[TCA_ACT_MAX+1];
+        struct nlattr *tb[TCA_ACT_MAX + 1];
        struct tc_action *a;
        int index;
        int err;
@@ -770,7 +766,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
        struct tcamsg *t;
        struct netlink_callback dcb;
        struct nlattr *nest;
-        struct nlattr *tb[TCA_ACT_MAX+1];
+        struct nlattr *tb[TCA_ACT_MAX + 1];
        struct nlattr *kind;
        struct tc_action *a = create_a(0);
        int err = -ENOMEM;
@@ -821,7 +817,8 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
        nlh->nlmsg_flags |= NLM_F_ROOT;
        module_put(a->ops->owner);
        kfree(a);
-        err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        err = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                             n->nlmsg_flags & NLM_F_ECHO);
        if (err > 0)
                return 0;
@@ -842,14 +839,14 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
              u32 pid, int event)
 {
        int i, ret;
-        struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
+        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct tc_action *head = NULL, *act, *act_prev = NULL;
        ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL);
        if (ret < 0)
                return ret;
-        if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
+        if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) {
                if (tb[1] != NULL)
                        return tca_action_flush(net, tb[1], n, pid);
                else
@@ -892,7 +889,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
                /* now do the delete */
                tcf_action_destroy(head, 0);
                ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
-                                     n->nlmsg_flags&NLM_F_ECHO);
+                                     n->nlmsg_flags & NLM_F_ECHO);
                if (ret > 0)
                        return 0;
                return ret;
@@ -936,7 +933,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a,
        nlh->nlmsg_len = skb_tail_pointer(skb) - b;
        NETLINK_CB(skb).dst_group = RTNLGRP_TC;
-        err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+        err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO);
        if (err > 0)
                err = 0;
        return err;
@@ -967,7 +964,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
        /* dump then free all the actions after update; inserted policy
         * stays intact
-         * */
+         */
        ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
        for (a = act; a; a = act) {
                act = a->next;
@@ -993,17 +990,16 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                return -EINVAL;
        }
-        /* n->nlmsg_flags&NLM_F_CREATE
+        /* n->nlmsg_flags & NLM_F_CREATE */
-         * */
        switch (n->nlmsg_type) {
        case RTM_NEWACTION:
                /* we are going to assume all other flags
-                 * imply create only if it doesnt exist
+                 * imply create only if it doesn't exist
                 * Note that CREATE | EXCL implies that
                 * but since we want avoid ambiguity (eg when flags
                 * is zero) then just set this
                 */
-                if (n->nlmsg_flags&NLM_F_REPLACE)
+                if (n->nlmsg_flags & NLM_F_REPLACE)
                        ovr = 1;
 replay:
                ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
@@ -1028,7 +1024,7 @@ replay:
 static struct nlattr *
 find_dump_kind(const struct nlmsghdr *n)
 {
-        struct nlattr *tb1, *tb2[TCA_ACT_MAX+1];
+        struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
        struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
        struct nlattr *nla[TCAA_MAX + 1];
        struct nlattr *kind;
@@ -1071,9 +1067,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
        }
        a_o = tc_lookup_action(kind);
-        if (a_o == NULL) {
+        if (a_o == NULL)
                return 0;
-        }
        memset(&a, 0, sizeof(struct tc_action));
        a.ops = a_o;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..6cdf9abe475f
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,594 @@
+/*
+ * Checksum updating actions
+ *
+ * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/igmp.h>
+#include <net/tcp.h>
+#include <net/udp.h>
+#include <net/ip6_checksum.h>
+#include <net/act_api.h>
+#include <linux/tc_act/tc_csum.h>
+#include <net/tc_act/tc_csum.h>
+#define CSUM_TAB_MASK 15
+static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
+static u32 csum_idx_gen;
+static DEFINE_RWLOCK(csum_lock);
+static struct tcf_hashinfo csum_hash_info = {
+        .htab   = tcf_csum_ht,
+        .hmask  = CSUM_TAB_MASK,
+        .lock   = &csum_lock,
+};
+static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
+        [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
+};
+static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
+                         struct tc_action *a, int ovr, int bind)
+{
+        struct nlattr *tb[TCA_CSUM_MAX + 1];
+        struct tc_csum *parm;
+        struct tcf_common *pc;
+        struct tcf_csum *p;
+        int ret = 0, err;
+        if (nla == NULL)
+                return -EINVAL;
+        err = nla_parse_nested(tb, TCA_CSUM_MAX, nla, csum_policy);
+        if (err < 0)
+                return err;
+        if (tb[TCA_CSUM_PARMS] == NULL)
+                return -EINVAL;
+        parm = nla_data(tb[TCA_CSUM_PARMS]);
+        pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
+        if (!pc) {
+                pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
+                                     &csum_idx_gen, &csum_hash_info);
+                if (IS_ERR(pc))
+                        return PTR_ERR(pc);
+                p = to_tcf_csum(pc);
+                ret = ACT_P_CREATED;
+        } else {
+                p = to_tcf_csum(pc);
+                if (!ovr) {
+                        tcf_hash_release(pc, bind, &csum_hash_info);
+                        return -EEXIST;
+                }
+        }
+        spin_lock_bh(&p->tcf_lock);
+        p->tcf_action = parm->action;
+        p->update_flags = parm->update_flags;
+        spin_unlock_bh(&p->tcf_lock);
+        if (ret == ACT_P_CREATED)
+                tcf_hash_insert(pc, &csum_hash_info);
+        return ret;
+}
+static int tcf_csum_cleanup(struct tc_action *a, int bind)
+{
+        struct tcf_csum *p = a->priv;
+        return tcf_hash_release(&p->common, bind, &csum_hash_info);
+}
+/**
+ * tcf_csum_skb_nextlayer - Get next layer pointer
+ * @skb: sk_buff to use
+ * @ihl: previous summed headers length
+ * @ipl: complete packet length
+ * @jhl: next header length
+ *
+ * Check the expected next layer availability in the specified sk_buff.
+ * Return the next layer pointer if pass, NULL otherwise.
+ */
+static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
+                                    unsigned int ihl, unsigned int ipl,
+                                    unsigned int jhl)
+{
+        int ntkoff = skb_network_offset(skb);
+        int hl = ihl + jhl;
+        if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
+            (skb_cloned(skb) &&
+             !skb_clone_writable(skb, hl + ntkoff) &&
+             pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
+                return NULL;
+        else
+                return (void *)(skb_network_header(skb) + ihl);
+}
+static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
+                              unsigned int ihl, unsigned int ipl)
+{
+        struct icmphdr *icmph;
+        icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
+        if (icmph == NULL)
+                return 0;
+        icmph->checksum = 0;
+        skb->csum = csum_partial(icmph, ipl - ihl, 0);
+        icmph->checksum = csum_fold(skb->csum);
+        skb->ip_summed = CHECKSUM_NONE;
+        return 1;
+}
+static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
+                              unsigned int ihl, unsigned int ipl)
+{
+        struct igmphdr *igmph;
+        igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
+        if (igmph == NULL)
+                return 0;
+        igmph->csum = 0;
+        skb->csum = csum_partial(igmph, ipl - ihl, 0);
+        igmph->csum = csum_fold(skb->csum);
+        skb->ip_summed = CHECKSUM_NONE;
+        return 1;
+}
+static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+                              unsigned int ihl, unsigned int ipl)
+{
+        struct icmp6hdr *icmp6h;
+        icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
+        if (icmp6h == NULL)
+                return 0;
+        icmp6h->icmp6_cksum = 0;
+        skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
+        icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                              ipl - ihl, IPPROTO_ICMPV6,
+                                              skb->csum);
+        skb->ip_summed = CHECKSUM_NONE;
+        return 1;
+}
+static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
+                             unsigned int ihl, unsigned int ipl)
+{
+        struct tcphdr *tcph;
+        tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
+        if (tcph == NULL)
+                return 0;
+        tcph->check = 0;
+        skb->csum = csum_partial(tcph, ipl - ihl, 0);
+        tcph->check = tcp_v4_check(ipl - ihl,
+                                   iph->saddr, iph->daddr, skb->csum);
+        skb->ip_summed = CHECKSUM_NONE;
+        return 1;
+}
+static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+                             unsigned int ihl, unsigned int ipl)
+{
+        struct tcphdr *tcph;
+        tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
+        if (tcph == NULL)
+                return 0;
+        tcph->check = 0;
+        skb->csum = csum_partial(tcph, ipl - ihl, 0);
+        tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+                                      ipl - ihl, IPPROTO_TCP,
+                                      skb->csum);
+        skb->ip_summed = CHECKSUM_NONE;
+        return 1;
+}
+static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
+                             unsigned int ihl, unsigned int ipl, int udplite)
+{
+        struct udphdr *udph;
+        u16 ul;
+        /*
+         * Support both UDP and UDPLITE checksum algorithms, Don't use
+         * udph->len to get the real length without any protocol check,
+         * UDPLITE uses udph->len for another thing,
+         * Use iph->tot_len, or just ipl.
+         */
+        udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
+        if (udph == NULL)
+                return 0;
+        ul = ntohs(udph->len);
+        if (udplite || udph->check) {
+                udph->check = 0;
+                if (udplite) {
+                        if (ul == 0)
+                                skb->csum = csum_partial(udph, ipl - ihl, 0);
+                        else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
+                                skb->csum = csum_partial(udph, ul, 0);
+                        else
+                                goto ignore_obscure_skb;
+                } else {
+                        if (ul != ipl - ihl)
+                                goto ignore_obscure_skb;
+                        skb->csum = csum_partial(udph, ul, 0);
+                }
+                udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                                ul, iph->protocol,
+                                                skb->csum);
+                if (!udph->check)
+                        udph->check = CSUM_MANGLED_0;
+        }
+        skb->ip_summed = CHECKSUM_NONE;
+ignore_obscure_skb:
+        return 1;
+}
+static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
+                             unsigned int ihl, unsigned int ipl, int udplite)
+{
+        struct udphdr *udph;
+        u16 ul;
+        /*
+         * Support both UDP and UDPLITE checksum algorithms, Don't use
+         * udph->len to get the real length without any protocol check,
+         * UDPLITE uses udph->len for another thing,
+         * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
+         */
+        udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
+        if (udph == NULL)
+                return 0;
+        ul = ntohs(udph->len);
+        udph->check = 0;
+        if (udplite) {
+                if (ul == 0)
+                        skb->csum = csum_partial(udph, ipl - ihl, 0);
+                else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
+                        skb->csum = csum_partial(udph, ul, 0);
+                else
+                        goto ignore_obscure_skb;
+        } else {
+                if (ul != ipl - ihl)
+                        goto ignore_obscure_skb;
+                skb->csum = csum_partial(udph, ul, 0);
+        }
+        udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
+                                      udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
+                                      skb->csum);
+        if (!udph->check)
+                udph->check = CSUM_MANGLED_0;
+        skb->ip_summed = CHECKSUM_NONE;
+ignore_obscure_skb:
+        return 1;
+}
+static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
+{
+        struct iphdr *iph;
+        int ntkoff;
+        ntkoff = skb_network_offset(skb);
+        if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
+                goto fail;
+        iph = ip_hdr(skb);
+        switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
+        case IPPROTO_ICMP:
+                if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
+                        if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
+                                                ntohs(iph->tot_len)))
+                                goto fail;
+                break;
+        case IPPROTO_IGMP:
+                if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
+                        if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
+                                                ntohs(iph->tot_len)))
+                                goto fail;
+                break;
+        case IPPROTO_TCP:
+                if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
+                        if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
+                                               ntohs(iph->tot_len)))
+                                goto fail;
+                break;
+        case IPPROTO_UDP:
+                if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
+                        if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+                                               ntohs(iph->tot_len), 0))
+                                goto fail;
+                break;
+        case IPPROTO_UDPLITE:
+                if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
+                        if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
+                                               ntohs(iph->tot_len), 1))
+                                goto fail;
+                break;
+        }
+        if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
+                if (skb_cloned(skb) &&
+                    !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
+                    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+                        goto fail;
+                ip_send_check(iph);
+        }
+        return 1;
+fail:
+        return 0;
+}
+static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
+                                 unsigned int ixhl, unsigned int *pl)
+{
+        int off, len, optlen;
+        unsigned char *xh = (void *)ip6xh;
+        off = sizeof(*ip6xh);
+        len = ixhl - off;
+        while (len > 1) {
+                switch (xh[off]) {
+                case IPV6_TLV_PAD0:
+                        optlen = 1;
+                        break;
+                case IPV6_TLV_JUMBO:
+                        optlen = xh[off + 1] + 2;
+                        if (optlen != 6 || len < 6 || (off & 3) != 2)
+                                /* wrong jumbo option length/alignment */
+                                return 0;
+                        *pl = ntohl(*(__be32 *)(xh + off + 2));
+                        goto done;
+                default:
+                        optlen = xh[off + 1] + 2;
+                        if (optlen > len)
+                                /* ignore obscure options */
+                                goto done;
+                        break;
+                }
+                off += optlen;
+                len -= optlen;
+        }
+done:
+        return 1;
+}
+static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
+{
+        struct ipv6hdr *ip6h;
+        struct ipv6_opt_hdr *ip6xh;
+        unsigned int hl, ixhl;
+        unsigned int pl;
+        int ntkoff;
+        u8 nexthdr;
+        ntkoff = skb_network_offset(skb);
+        hl = sizeof(*ip6h);
+        if (!pskb_may_pull(skb, hl + ntkoff))
+                goto fail;
+        ip6h = ipv6_hdr(skb);
+        pl = ntohs(ip6h->payload_len);
+        nexthdr = ip6h->nexthdr;
+        do {
+                switch (nexthdr) {
+                case NEXTHDR_FRAGMENT:
+                        goto ignore_skb;
+                case NEXTHDR_ROUTING:
+                case NEXTHDR_HOP:
+                case NEXTHDR_DEST:
+                        if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
+                                goto fail;
+                        ip6xh = (void *)(skb_network_header(skb) + hl);
+                        ixhl = ipv6_optlen(ip6xh);
+                        if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
+                                goto fail;
+                        if ((nexthdr == NEXTHDR_HOP) &&
+                            !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
+                                goto fail;
+                        nexthdr = ip6xh->nexthdr;
+                        hl += ixhl;
+                        break;
+                case IPPROTO_ICMPV6:
+                        if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
+                                if (!tcf_csum_ipv6_icmp(skb, ip6h,
+                                                        hl, pl + sizeof(*ip6h)))
+                                        goto fail;
+                        goto done;
+                case IPPROTO_TCP:
+                        if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
+                                if (!tcf_csum_ipv6_tcp(skb, ip6h,
+                                                       hl, pl + sizeof(*ip6h)))
+                                        goto fail;
+                        goto done;
+                case IPPROTO_UDP:
+                        if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
+                                if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+                                                       pl + sizeof(*ip6h), 0))
+                                        goto fail;
+                        goto done;
+                case IPPROTO_UDPLITE:
+                        if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
+                                if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
+                                                       pl + sizeof(*ip6h), 1))
+                                        goto fail;
+                        goto done;
+                default:
+                        goto ignore_skb;
+                }
+        } while (pskb_may_pull(skb, hl + 1 + ntkoff));
+done:
+ignore_skb:
+        return 1;
+fail:
+        return 0;
+}
+static int tcf_csum(struct sk_buff *skb,
+                    struct tc_action *a, struct tcf_result *res)
+{
+        struct tcf_csum *p = a->priv;
+        int action;
+        u32 update_flags;
+        spin_lock(&p->tcf_lock);
+        p->tcf_tm.lastuse = jiffies;
+        bstats_update(&p->tcf_bstats, skb);
+        action = p->tcf_action;
+        update_flags = p->update_flags;
+        spin_unlock(&p->tcf_lock);
+        if (unlikely(action == TC_ACT_SHOT))
+                goto drop;
+        switch (skb->protocol) {
+        case cpu_to_be16(ETH_P_IP):
+                if (!tcf_csum_ipv4(skb, update_flags))
+                        goto drop;
+                break;
+        case cpu_to_be16(ETH_P_IPV6):
+                if (!tcf_csum_ipv6(skb, update_flags))
+                        goto drop;
+                break;
+        }
+        return action;
+drop:
+        spin_lock(&p->tcf_lock);
+        p->tcf_qstats.drops++;
+        spin_unlock(&p->tcf_lock);
+        return TC_ACT_SHOT;
+}
+static int tcf_csum_dump(struct sk_buff *skb,
+                         struct tc_action *a, int bind, int ref)
+{
+        unsigned char *b = skb_tail_pointer(skb);
+        struct tcf_csum *p = a->priv;
+        struct tc_csum opt = {
+                .update_flags = p->update_flags,
+                .index   = p->tcf_index,
+                .action  = p->tcf_action,
+                .refcnt  = p->tcf_refcnt - ref,
+                .bindcnt = p->tcf_bindcnt - bind,
+        };
+        struct tcf_t t;
+        NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
+        t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
+        t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
+        t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
+        NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
+        return skb->len;
+nla_put_failure:
+        nlmsg_trim(skb, b);
+        return -1;
+}
+static struct tc_action_ops act_csum_ops = {
+        .kind           = "csum",
+        .hinfo          = &csum_hash_info,
+        .type           = TCA_ACT_CSUM,
+        .capab          = TCA_CAP_NONE,
+        .owner          = THIS_MODULE,
+        .act            = tcf_csum,
+        .dump           = tcf_csum_dump,
+        .cleanup        = tcf_csum_cleanup,
+        .lookup         = tcf_hash_search,
+        .init           = tcf_csum_init,
+        .walk           = tcf_generic_walker
+};
+MODULE_DESCRIPTION("Checksum updating actions");
+MODULE_LICENSE("GPL");
+static int __init csum_init_module(void)
+{
+        return tcf_register_action(&act_csum_ops);
+}
+static void __exit csum_cleanup_module(void)
+{
+        tcf_unregister_action(&act_csum_ops);
+}
+module_init(csum_init_module);
+module_exit(csum_cleanup_module);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index c2ed90a4c0b4..2b4ab4b05ce8 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -50,7 +50,7 @@ static int gact_determ(struct tcf_gact *gact)
 }
 typedef int (*g_rand)(struct tcf_gact *gact);
-static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ };
+static g_rand gact_rand[MAX_RAND] = { NULL, gact_net_rand, gact_determ };
 #endif /* CONFIG_GACT_PROB */
 static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
@@ -89,7 +89,7 @@ static int tcf_gact_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*gact),
                                     bind, &gact_idx_gen, &gact_hash_info);
                if (IS_ERR(pc))
-                    return PTR_ERR(pc);
+                        return PTR_ERR(pc);
                ret = ACT_P_CREATED;
        } else {
                if (!ovr) {
@@ -205,9 +205,9 @@ MODULE_LICENSE("GPL");
 static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
-        printk(KERN_INFO "GACT probability on\n");
+        pr_info("GACT probability on\n");
 #else
-        printk(KERN_INFO "GACT probability NOT on\n");
+        pr_info("GACT probability NOT on\n");
 #endif
        return tcf_register_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c7e59e6ec349..9fc211a1b20e 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = {
        .lock   =       &ipt_lock,
 };
-static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
+static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
 {
        struct xt_tgchk_param par;
        struct xt_target *target;
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
        return 0;
 }
-static void ipt_destroy_target(struct ipt_entry_target *t)
+static void ipt_destroy_target(struct xt_entry_target *t)
 {
        struct xt_tgdtor_param par = {
                .target   = t->u.kernel.target,
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
        [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
        [TCA_IPT_HOOK]  = { .type = NLA_U32 },
        [TCA_IPT_INDEX] = { .type = NLA_U32 },
-        [TCA_IPT_TARG]  = { .len = sizeof(struct ipt_entry_target) },
+        [TCA_IPT_TARG]  = { .len = sizeof(struct xt_entry_target) },
 };
 static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
        struct nlattr *tb[TCA_IPT_MAX + 1];
        struct tcf_ipt *ipt;
        struct tcf_common *pc;
-        struct ipt_entry_target *td, *t;
+        struct xt_entry_target *td, *t;
        char *tname;
        int ret = 0, err;
        u32 hook = 0;
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
        if (tb[TCA_IPT_TARG] == NULL)
                return -EINVAL;
-        td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
+        td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
        if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
                return -EINVAL;
@@ -138,7 +138,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind,
                                     &ipt_idx_gen, &ipt_hash_info);
                if (IS_ERR(pc))
-                    return PTR_ERR(pc);
+                        return PTR_ERR(pc);
                ret = ACT_P_CREATED;
        } else {
                if (!ovr) {
@@ -162,7 +162,8 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
        if (unlikely(!t))
                goto err2;
-        if ((err = ipt_init_target(t, tname, hook)) < 0)
+        err = ipt_init_target(t, tname, hook);
+        if (err < 0)
                goto err3;
        spin_lock_bh(&ipt->tcf_lock);
@@ -209,12 +210,12 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
        spin_lock(&ipt->tcf_lock);
        ipt->tcf_tm.lastuse = jiffies;
-        ipt->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&ipt->tcf_bstats, skb);
-        ipt->tcf_bstats.packets++;
        /* yes, we have to worry about both in and out dev
-         worry later - danger - this API seems to have changed
+         * worry later - danger - this API seems to have changed
-         from earlier kernels */
+         * from earlier kernels
+         */
        par.in       = skb->dev;
        par.out      = NULL;
        par.hooknum  = ipt->tcfi_hook;
@@ -230,7 +231,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
                result = TC_ACT_SHOT;
                ipt->tcf_qstats.drops++;
                break;
-        case IPT_CONTINUE:
+        case XT_CONTINUE:
                result = TC_ACT_PIPE;
                break;
        default:
@@ -249,14 +250,14 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_ipt *ipt = a->priv;
-        struct ipt_entry_target *t;
+        struct xt_entry_target *t;
        struct tcf_t tm;
        struct tc_cnt c;
        /* for simple targets kernel size == user size
-        ** user name = target name
+         * user name = target name
-        ** for foolproof you need to not assume this
+         * for foolproof you need to not assume this
-        */
+         */
        t = kmemdup(ipt->tcfi_t, ipt->tcfi_t->u.user.target_size, GFP_ATOMIC);
        if (unlikely(!t))
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 0c311be92827..961386e2f2c0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -41,13 +41,13 @@ static struct tcf_hashinfo mirred_hash_info = {
        .lock   =       &mirred_lock,
 };
-static inline int tcf_mirred_release(struct tcf_mirred *m, int bind)
+static int tcf_mirred_release(struct tcf_mirred *m, int bind)
 {
        if (m) {
                if (bind)
                        m->tcf_bindcnt--;
                m->tcf_refcnt--;
-                if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
+                if (!m->tcf_bindcnt && m->tcf_refcnt <= 0) {
                        list_del(&m->tcfm_list);
                        if (m->tcfm_dev)
                                dev_put(m->tcfm_dev);
@@ -165,8 +165,7 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
        spin_lock(&m->tcf_lock);
        m->tcf_tm.lastuse = jiffies;
-        m->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&m->tcf_bstats, skb);
-        m->tcf_bstats.packets++;
        dev = m->tcfm_dev;
        if (!dev) {
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 186eb837e600..762b027650a9 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -69,7 +69,7 @@ static int tcf_nat_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
                                     &nat_idx_gen, &nat_hash_info);
                if (IS_ERR(pc))
-                    return PTR_ERR(pc);
+                        return PTR_ERR(pc);
                p = to_tcf_nat(pc);
                ret = ACT_P_CREATED;
        } else {
@@ -125,8 +125,7 @@ static int tcf_nat(struct sk_buff *skb, struct tc_action *a,
        egress = p->flags & TCA_NAT_FLAG_EGRESS;
        action = p->tcf_action;
-        p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&p->tcf_bstats, skb);
-        p->tcf_bstats.packets++;
        spin_unlock(&p->tcf_lock);
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0593c9640db..7affe9a92757 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -70,7 +70,7 @@ static int tcf_pedit_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
                                     &pedit_idx_gen, &pedit_hash_info);
                if (IS_ERR(pc))
-                    return PTR_ERR(pc);
+                        return PTR_ERR(pc);
                p = to_pedit(pc);
                keys = kmalloc(ksize, GFP_KERNEL);
                if (keys == NULL) {
@@ -127,11 +127,9 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
        int i, munged = 0;
        unsigned int off;
-        if (skb_cloned(skb)) {
+        if (skb_cloned(skb) &&
-                if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) {
+            pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-                        return p->tcf_action;
+                return p->tcf_action;
-                }
-        }
        off = skb_network_offset(skb);
@@ -163,7 +161,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
                        }
                        if (offset > 0 && offset > skb->len) {
                                pr_info("tc filter pedit"
-                                        " offset %d cant exceed pkt length %d\n",
+                                        " offset %d can't exceed pkt length %d\n",
                                       offset, skb->len);
                                goto bad;
                        }
@@ -187,8 +185,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 bad:
        p->tcf_qstats.overlimits++;
 done:
-        p->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&p->tcf_bstats, skb);
-        p->tcf_bstats.packets++;
        spin_unlock(&p->tcf_lock);
        return p->tcf_action;
 }
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 7ebf7439b478..b3b9b32f4e00 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -22,8 +22,8 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
-#define L2T(p,L)   qdisc_l2t((p)->tcfp_R_tab, L)
+#define L2T(p, L)   qdisc_l2t((p)->tcfp_R_tab, L)
-#define L2T_P(p,L) qdisc_l2t((p)->tcfp_P_tab, L)
+#define L2T_P(p, L) qdisc_l2t((p)->tcfp_P_tab, L)
 #define POL_TAB_MASK     15
 static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1];
@@ -37,8 +37,7 @@ static struct tcf_hashinfo police_hash_info = {
 };
 /* old policer structure from before tc actions */
-struct tc_police_compat
+struct tc_police_compat {
-{
        u32                     index;
        int                     action;
        u32                     limit;
@@ -97,11 +96,6 @@ nla_put_failure:
        goto done;
 }
-static void tcf_police_free_rcu(struct rcu_head *head)
-{
-        kfree(container_of(head, struct tcf_police, tcf_rcu));
-}
 static void tcf_police_destroy(struct tcf_police *p)
 {
        unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK);
@@ -122,7 +116,7 @@ static void tcf_police_destroy(struct tcf_police *p)
                         * gen_estimator est_timer() might access p->tcf_lock
                         * or bstats, wait a RCU grace period before freeing p
                         */
-                        call_rcu(&p->tcf_rcu, tcf_police_free_rcu);
+                        kfree_rcu(p, tcf_rcu);
                        return;
                }
        }
@@ -139,7 +133,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 static int tcf_act_police_locate(struct nlattr *nla, struct nlattr *est,
                                 struct tc_action *a, int ovr, int bind)
 {
-        unsigned h;
+        unsigned int h;
        int ret = 0, err;
        struct nlattr *tb[TCA_POLICE_MAX + 1];
        struct tc_police *parm;
@@ -298,8 +292,7 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
        spin_lock(&police->tcf_lock);
-        police->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&police->tcf_bstats, skb);
-        police->tcf_bstats.packets++;
        if (police->tcfp_ewma_rate &&
            police->tcf_rate_est.bps >= police->tcfp_ewma_rate) {
@@ -403,7 +396,6 @@ static void __exit
 police_cleanup_module(void)
 {
        tcf_unregister_action(&act_police_ops);
-        rcu_barrier(); /* Wait for completion of call_rcu()'s (tcf_police_free_rcu) */
 }
 module_init(police_init_module);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 97e84f3ee775..a34a22de60b3 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -42,13 +42,12 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
        spin_lock(&d->tcf_lock);
        d->tcf_tm.lastuse = jiffies;
-        d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&d->tcf_bstats, skb);
-        d->tcf_bstats.packets++;
        /* print policy string followed by _ then packet count
         * Example if this was the 3rd packet and the string was "hello"
         * then it would look like "hello_3" (without quotes)
-         **/
+         */
        pr_info("simple: %s_%d\n",
               (char *)d->tcfd_defdata, d->tcf_bstats.packets);
        spin_unlock(&d->tcf_lock);
@@ -126,7 +125,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
                                     &simp_idx_gen, &simp_hash_info);
                if (IS_ERR(pc))
-                    return PTR_ERR(pc);
+                        return PTR_ERR(pc);
                d = to_defact(pc);
                ret = alloc_defdata(d, defdata);
@@ -150,7 +149,7 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
        return ret;
 }
-static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
+static int tcf_simp_cleanup(struct tc_action *a, int bind)
 {
        struct tcf_defact *d = a->priv;
@@ -159,8 +158,8 @@ static inline int tcf_simp_cleanup(struct tc_action *a, int bind)
        return 0;
 }
-static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
-                                int bind, int ref)
+                         int bind, int ref)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_defact *d = a->priv;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 66cbf4eb8855..5f6f0c7c3905 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -46,8 +46,7 @@ static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
        spin_lock(&d->tcf_lock);
        d->tcf_tm.lastuse = jiffies;
-        d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+        bstats_update(&d->tcf_bstats, skb);
-        d->tcf_bstats.packets++;
        if (d->flags & SKBEDIT_F_PRIORITY)
                skb->priority = d->priority;
@@ -114,7 +113,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
                pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
                                     &skbedit_idx_gen, &skbedit_hash_info);
                if (IS_ERR(pc))
-                    return PTR_ERR(pc);
+                        return PTR_ERR(pc);
                d = to_skbedit(pc);
                ret = ACT_P_CREATED;
@@ -145,7 +144,7 @@ static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
        return ret;
 }
-static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+static int tcf_skbedit_cleanup(struct tc_action *a, int bind)
 {
        struct tcf_skbedit *d = a->priv;
@@ -154,8 +153,8 @@ static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
        return 0;
 }
-static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
-                                int bind, int ref)
+                            int bind, int ref)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tcf_skbedit *d = a->priv;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 5fd0c28ef79a..bb2c523f8158 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -85,7 +85,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
        int rc = -ENOENT;
        write_lock(&cls_mod_lock);
-        for (tp = &tcf_proto_base; (t=*tp) != NULL; tp = &t->next)
+        for (tp = &tcf_proto_base; (t = *tp) != NULL; tp = &t->next)
                if (t == ops)
                        break;
@@ -111,7 +111,7 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
        u32 first = TC_H_MAKE(0xC0000000U, 0U);
        if (tp)
-                first = tp->prio-1;
+                first = tp->prio - 1;
        return first;
 }
@@ -149,7 +149,8 @@ replay:
        if (prio == 0) {
                /* If no priority is given, user wants we allocated it. */
-                if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+                if (n->nlmsg_type != RTM_NEWTFILTER ||
+                    !(n->nlmsg_flags & NLM_F_CREATE))
                        return -ENOENT;
                prio = TC_H_MAKE(0x80000000U, 0U);
        }
@@ -176,7 +177,8 @@ replay:
        }
        /* Is it classful? */
-        if ((cops = q->ops->cl_ops) == NULL)
+        cops = q->ops->cl_ops;
+        if (!cops)
                return -EINVAL;
        if (cops->tcf_chain == NULL)
@@ -196,10 +198,11 @@ replay:
                goto errout;
        /* Check the chain for existence of proto-tcf with this priority */
-        for (back = chain; (tp=*back) != NULL; back = &tp->next) {
+        for (back = chain; (tp = *back) != NULL; back = &tp->next) {
                if (tp->prio >= prio) {
                        if (tp->prio == prio) {
-                                if (!nprio || (tp->protocol != protocol && protocol))
+                                if (!nprio ||
+                                    (tp->protocol != protocol && protocol))
                                        goto errout;
                        } else
                                tp = NULL;
@@ -216,7 +219,8 @@ replay:
                        goto errout;
                err = -ENOENT;
-                if (n->nlmsg_type != RTM_NEWTFILTER || !(n->nlmsg_flags&NLM_F_CREATE))
+                if (n->nlmsg_type != RTM_NEWTFILTER ||
+                    !(n->nlmsg_flags & NLM_F_CREATE))
                        goto errout;
@@ -420,7 +424,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return skb->len;
-        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+        if (!dev)
                return skb->len;
        if (!tcm->tcm_parent)
@@ -429,7 +434,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
        if (!q)
                goto out;
-        if ((cops = q->ops->cl_ops) == NULL)
+        cops = q->ops->cl_ops;
+        if (!cops)
                goto errout;
        if (cops->tcf_chain == NULL)
                goto errout;
@@ -444,8 +450,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
        s_t = cb->args[0];
-        for (tp=*chain, t=0; tp; tp = tp->next, t++) {
+        for (tp = *chain, t = 0; tp; tp = tp->next, t++) {
-                if (t < s_t) continue;
+                if (t < s_t)
+                        continue;
                if (TC_H_MAJ(tcm->tcm_info) &&
                    TC_H_MAJ(tcm->tcm_info) != tp->prio)
                        continue;
@@ -468,10 +475,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
                arg.skb = skb;
                arg.cb = cb;
                arg.w.stop = 0;
-                arg.w.skip = cb->args[1]-1;
+                arg.w.skip = cb->args[1] - 1;
                arg.w.count = 0;
                tp->ops->walk(tp, &arg.w);
-                cb->args[1] = arg.w.count+1;
+                cb->args[1] = arg.w.count + 1;
                if (arg.w.stop)
                        break;
        }
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index efd4f95fd050..8be8872dd571 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -21,14 +21,12 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
-struct basic_head
+struct basic_head {
-{
        u32                     hgenerator;
        struct list_head        flist;
 };
-struct basic_filter
+struct basic_filter {
-{
        u32                     handle;
        struct tcf_exts         exts;
        struct tcf_ematch_tree  ematches;
@@ -92,8 +90,7 @@ static int basic_init(struct tcf_proto *tp)
        return 0;
 }
-static inline void basic_delete_filter(struct tcf_proto *tp,
+static void basic_delete_filter(struct tcf_proto *tp, struct basic_filter *f)
-                                       struct basic_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
        tcf_exts_destroy(tp, &f->exts);
@@ -135,9 +132,9 @@ static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = {
        [TCA_BASIC_EMATCHES]    = { .type = NLA_NESTED },
 };
-static inline int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
+static int basic_set_parms(struct tcf_proto *tp, struct basic_filter *f,
-                                  unsigned long base, struct nlattr **tb,
+                           unsigned long base, struct nlattr **tb,
-                                  struct nlattr *est)
+                           struct nlattr *est)
 {
        int err = -EINVAL;
        struct tcf_exts e;
@@ -203,7 +200,7 @@ static int basic_change(struct tcf_proto *tp, unsigned long base, u32 handle,
                } while (--i > 0 && basic_get(tp, head->hgenerator));
                if (i <= 0) {
-                        printk(KERN_ERR "Insufficient number of handles\n");
+                        pr_err("Insufficient number of handles\n");
                        goto errout;
                }
@@ -268,6 +265,10 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
                goto nla_put_failure;
        nla_nest_end(skb, nest);
+        if (tcf_exts_dump_stats(skb, &f->exts, &basic_ext_map) < 0)
+                goto nla_put_failure;
        return skb->len;
 nla_put_failure:
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 78ef2c5e130b..32a335194ca5 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -34,8 +34,6 @@ struct cgroup_subsys net_cls_subsys = {
        .populate       = cgrp_populate,
 #ifdef CONFIG_NET_CLS_CGROUP
        .subsys_id      = net_cls_subsys_id,
-#else
-#define net_cls_subsys_id net_cls_subsys.subsys_id
 #endif
        .module         = THIS_MODULE,
 };
@@ -58,7 +56,8 @@ static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
 {
        struct cgroup_cls_state *cs;
-        if (!(cs = kzalloc(sizeof(*cs), GFP_KERNEL)))
+        cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+        if (!cs)
                return ERR_PTR(-ENOMEM);
        if (cgrp->parent)
@@ -96,8 +95,7 @@ static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
        return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
 }
-struct cls_cgroup_head
+struct cls_cgroup_head {
-{
        u32                     handle;
        struct tcf_exts         exts;
        struct tcf_ematch_tree  ematches;
@@ -123,7 +121,7 @@ static int cls_cgroup_classify(struct sk_buff *skb, struct tcf_proto *tp,
         * calls by looking at the number of nested bh disable calls because
         * softirqs always disables bh.
         */
-        if (softirq_count() != SOFTIRQ_OFFSET) {
+        if (in_serving_softirq()) {
                /* If there is an sk_classid we'll use that. */
                if (!skb->sk)
                        return -1;
@@ -168,7 +166,7 @@ static int cls_cgroup_change(struct tcf_proto *tp, unsigned long base,
                             u32 handle, struct nlattr **tca,
                             unsigned long *arg)
 {
-        struct nlattr *tb[TCA_CGROUP_MAX+1];
+        struct nlattr *tb[TCA_CGROUP_MAX + 1];
        struct cls_cgroup_head *head = tp->root;
        struct tcf_ematch_tree t;
        struct tcf_exts e;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..8ec01391d988 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
        }
 }
-static int has_ports(u8 protocol)
-{
-        switch (protocol) {
-        case IPPROTO_TCP:
-        case IPPROTO_UDP:
-        case IPPROTO_UDPLITE:
-        case IPPROTO_SCTP:
-        case IPPROTO_DCCP:
-        case IPPROTO_ESP:
-                return 1;
-        default:
-                return 0;
-        }
-}
 static u32 flow_get_proto_src(struct sk_buff *skb)
 {
        switch (skb->protocol) {
        case htons(ETH_P_IP): {
                struct iphdr *iph;
+                int poff;
                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        break;
                iph = ip_hdr(skb);
-                if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+                if (iph->frag_off & htons(IP_MF | IP_OFFSET))
-                    has_ports(iph->protocol) &&
+                        break;
-                    pskb_network_may_pull(skb, iph->ihl * 4 + 2))
+                poff = proto_ports_offset(iph->protocol);
-                        return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
+                if (poff >= 0 &&
+                    pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
+                        iph = ip_hdr(skb);
+                        return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
+                                                 poff));
+                }
                break;
        }
        case htons(ETH_P_IPV6): {
                struct ipv6hdr *iph;
+                int poff;
-                if (!pskb_network_may_pull(skb, sizeof(*iph) + 2))
+                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        break;
                iph = ipv6_hdr(skb);
-                if (has_ports(iph->nexthdr))
+                poff = proto_ports_offset(iph->nexthdr);
-                        return ntohs(*(__be16 *)&iph[1]);
+                if (poff >= 0 &&
+                    pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
+                        iph = ipv6_hdr(skb);
+                        return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
+                                                 poff));
+                }
                break;
        }
        }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
        switch (skb->protocol) {
        case htons(ETH_P_IP): {
                struct iphdr *iph;
+                int poff;
                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        break;
                iph = ip_hdr(skb);
-                if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+                if (iph->frag_off & htons(IP_MF | IP_OFFSET))
-                    has_ports(iph->protocol) &&
+                        break;
-                    pskb_network_may_pull(skb, iph->ihl * 4 + 4))
+                poff = proto_ports_offset(iph->protocol);
-                        return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
+                if (poff >= 0 &&
+                    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
+                        iph = ip_hdr(skb);
+                        return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
+                                                 2 + poff));
+                }
                break;
        }
        case htons(ETH_P_IPV6): {
                struct ipv6hdr *iph;
+                int poff;
-                if (!pskb_network_may_pull(skb, sizeof(*iph) + 4))
+                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        break;
                iph = ipv6_hdr(skb);
-                if (has_ports(iph->nexthdr))
+                poff = proto_ports_offset(iph->nexthdr);
-                        return ntohs(*(__be16 *)((void *)&iph[1] + 2));
+                if (poff >= 0 &&
+                    pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
+                        iph = ipv6_hdr(skb);
+                        return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
+                                                 poff + 2));
+                }
                break;
        }
        }
@@ -267,7 +276,7 @@ fallback:
 static u32 flow_get_rtclassid(const struct sk_buff *skb)
 {
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
        if (skb_dst(skb))
                return skb_dst(skb)->tclassid;
 #endif
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
        return tag & VLAN_VID_MASK;
 }
+static u32 flow_get_rxhash(struct sk_buff *skb)
+{
+        return skb_get_rxhash(skb);
+}
 static u32 flow_key_get(struct sk_buff *skb, int key)
 {
        switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
                return flow_get_skgid(skb);
        case FLOW_KEY_VLAN_TAG:
                return flow_get_vlan_tag(skb);
+        case FLOW_KEY_RXHASH:
+                return flow_get_rxhash(skb);
        default:
                WARN_ON(1);
                return 0;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 93b0a7b6f9b4..26e7bc4ffb79 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -31,14 +31,12 @@
 #define HTSIZE (PAGE_SIZE/sizeof(struct fw_filter *))
-struct fw_head
+struct fw_head {
-{
        struct fw_filter *ht[HTSIZE];
        u32 mask;
 };
-struct fw_filter
+struct fw_filter {
-{
        struct fw_filter        *next;
        u32                     id;
        struct tcf_result       res;
@@ -53,7 +51,7 @@ static const struct tcf_ext_map fw_ext_map = {
        .police = TCA_FW_POLICE
 };
-static __inline__ int fw_hash(u32 handle)
+static inline int fw_hash(u32 handle)
 {
        if (HTSIZE == 4096)
                return ((handle >> 24) & 0xFFF) ^
@@ -82,14 +80,14 @@ static __inline__ int fw_hash(u32 handle)
 static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
                          struct tcf_result *res)
 {
-        struct fw_head *head = (struct fw_head*)tp->root;
+        struct fw_head *head = (struct fw_head *)tp->root;
        struct fw_filter *f;
        int r;
        u32 id = skb->mark;
        if (head != NULL) {
                id &= head->mask;
-                for (f=head->ht[fw_hash(id)]; f; f=f->next) {
+                for (f = head->ht[fw_hash(id)]; f; f = f->next) {
                        if (f->id == id) {
                                *res = f->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -105,7 +103,8 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
                }
        } else {
                /* old method */
-                if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id^tp->q->handle)))) {
+                if (id && (TC_H_MAJ(id) == 0 ||
+                           !(TC_H_MAJ(id ^ tp->q->handle)))) {
                        res->classid = id;
                        res->class = 0;
                        return 0;
@@ -117,13 +116,13 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp,
 static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
 {
-        struct fw_head *head = (struct fw_head*)tp->root;
+        struct fw_head *head = (struct fw_head *)tp->root;
        struct fw_filter *f;
        if (head == NULL)
                return 0;
-        for (f=head->ht[fw_hash(handle)]; f; f=f->next) {
+        for (f = head->ht[fw_hash(handle)]; f; f = f->next) {
                if (f->id == handle)
                        return (unsigned long)f;
        }
@@ -139,8 +138,7 @@ static int fw_init(struct tcf_proto *tp)
        return 0;
 }
-static inline void
+static void fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
-fw_delete_filter(struct tcf_proto *tp, struct fw_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
        tcf_exts_destroy(tp, &f->exts);
@@ -156,8 +154,8 @@ static void fw_destroy(struct tcf_proto *tp)
        if (head == NULL)
                return;
-        for (h=0; h<HTSIZE; h++) {
+        for (h = 0; h < HTSIZE; h++) {
-                while ((f=head->ht[h]) != NULL) {
+                while ((f = head->ht[h]) != NULL) {
                        head->ht[h] = f->next;
                        fw_delete_filter(tp, f);
                }
@@ -167,14 +165,14 @@ static void fw_destroy(struct tcf_proto *tp)
 static int fw_delete(struct tcf_proto *tp, unsigned long arg)
 {
-        struct fw_head *head = (struct fw_head*)tp->root;
+        struct fw_head *head = (struct fw_head *)tp->root;
-        struct fw_filter *f = (struct fw_filter*)arg;
+        struct fw_filter *f = (struct fw_filter *)arg;
        struct fw_filter **fp;
        if (head == NULL || f == NULL)
                goto out;
-        for (fp=&head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
+        for (fp = &head->ht[fw_hash(f->id)]; *fp; fp = &(*fp)->next) {
                if (*fp == f) {
                        tcf_tree_lock(tp);
                        *fp = f->next;
@@ -240,7 +238,7 @@ static int fw_change(struct tcf_proto *tp, unsigned long base,
                     struct nlattr **tca,
                     unsigned long *arg)
 {
-        struct fw_head *head = (struct fw_head*)tp->root;
+        struct fw_head *head = (struct fw_head *)tp->root;
        struct fw_filter *f = (struct fw_filter *) *arg;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_FW_MAX + 1];
@@ -302,7 +300,7 @@ errout:
 static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
-        struct fw_head *head = (struct fw_head*)tp->root;
+        struct fw_head *head = (struct fw_head *)tp->root;
        int h;
        if (head == NULL)
@@ -332,7 +330,7 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh,
                   struct sk_buff *skb, struct tcmsg *t)
 {
        struct fw_head *head = (struct fw_head *)tp->root;
-        struct fw_filter *f = (struct fw_filter*)fh;
+        struct fw_filter *f = (struct fw_filter *)fh;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 694dcd85dec8..a907905376df 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -23,34 +23,30 @@
 #include <net/pkt_cls.h>
 /*
-   1. For now we assume that route tags < 256.
+ * 1. For now we assume that route tags < 256.
-      It allows to use direct table lookups, instead of hash tables.
+ *    It allows to use direct table lookups, instead of hash tables.
-   2. For now we assume that "from TAG" and "fromdev DEV" statements
+ * 2. For now we assume that "from TAG" and "fromdev DEV" statements
-      are mutually  exclusive.
+ *    are mutually  exclusive.
-   3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
+ * 3. "to TAG from ANY" has higher priority, than "to ANY from XXX"
 */
-struct route4_fastmap
+struct route4_fastmap {
-{
        struct route4_filter    *filter;
        u32                     id;
        int                     iif;
 };
-struct route4_head
+struct route4_head {
-{
        struct route4_fastmap   fastmap[16];
-        struct route4_bucket    *table[256+1];
+        struct route4_bucket    *table[256 + 1];
 };
-struct route4_bucket
+struct route4_bucket {
-{
        /* 16 FROM buckets + 16 IIF buckets + 1 wildcard bucket */
-        struct route4_filter    *ht[16+16+1];
+        struct route4_filter    *ht[16 + 16 + 1];
 };
-struct route4_filter
+struct route4_filter {
-{
        struct route4_filter    *next;
        u32                     id;
        int                     iif;
@@ -61,20 +57,20 @@ struct route4_filter
        struct route4_bucket    *bkt;
 };
-#define ROUTE4_FAILURE ((struct route4_filter*)(-1L))
+#define ROUTE4_FAILURE ((struct route4_filter *)(-1L))
 static const struct tcf_ext_map route_ext_map = {
        .police = TCA_ROUTE4_POLICE,
        .action = TCA_ROUTE4_ACT
 };
-static __inline__ int route4_fastmap_hash(u32 id, int iif)
+static inline int route4_fastmap_hash(u32 id, int iif)
 {
-        return id&0xF;
+        return id & 0xF;
 }
-static inline
+static void
-void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
+route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
 {
        spinlock_t *root_lock = qdisc_root_sleeping_lock(q);
@@ -83,32 +79,33 @@ void route4_reset_fastmap(struct Qdisc *q, struct route4_head *head, u32 id)
        spin_unlock_bh(root_lock);
 }
-static inline void
+static void
 route4_set_fastmap(struct route4_head *head, u32 id, int iif,
                   struct route4_filter *f)
 {
        int h = route4_fastmap_hash(id, iif);
        head->fastmap[h].id = id;
        head->fastmap[h].iif = iif;
        head->fastmap[h].filter = f;
 }
-static __inline__ int route4_hash_to(u32 id)
+static inline int route4_hash_to(u32 id)
 {
-        return id&0xFF;
+        return id & 0xFF;
 }
-static __inline__ int route4_hash_from(u32 id)
+static inline int route4_hash_from(u32 id)
 {
-        return (id>>16)&0xF;
+        return (id >> 16) & 0xF;
 }
-static __inline__ int route4_hash_iif(int iif)
+static inline int route4_hash_iif(int iif)
 {
-        return 16 + ((iif>>16)&0xF);
+        return 16 + ((iif >> 16) & 0xF);
 }
-static __inline__ int route4_hash_wild(void)
+static inline int route4_hash_wild(void)
 {
        return 32;
 }
@@ -131,21 +128,22 @@ static __inline__ int route4_hash_wild(void)
 static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
                           struct tcf_result *res)
 {
-        struct route4_head *head = (struct route4_head*)tp->root;
+        struct route4_head *head = (struct route4_head *)tp->root;
        struct dst_entry *dst;
        struct route4_bucket *b;
        struct route4_filter *f;
        u32 id, h;
        int iif, dont_cache = 0;
-        if ((dst = skb_dst(skb)) == NULL)
+        dst = skb_dst(skb);
+        if (!dst)
                goto failure;
        id = dst->tclassid;
        if (head == NULL)
                goto old_method;
-        iif = ((struct rtable*)dst)->fl.iif;
+        iif = ((struct rtable *)dst)->rt_iif;
        h = route4_fastmap_hash(id, iif);
        if (id == head->fastmap[h].id &&
@@ -161,7 +159,8 @@ static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
        h = route4_hash_to(id);
 restart:
-        if ((b = head->table[h]) != NULL) {
+        b = head->table[h];
+        if (b) {
                for (f = b->ht[route4_hash_from(id)]; f; f = f->next)
                        if (f->id == id)
                                ROUTE4_APPLY_RESULT();
@@ -197,8 +196,9 @@ old_method:
 static inline u32 to_hash(u32 id)
 {
-        u32 h = id&0xFF;
+        u32 h = id & 0xFF;
-        if (id&0x8000)
+        if (id & 0x8000)
                h += 256;
        return h;
 }
@@ -211,17 +211,17 @@ static inline u32 from_hash(u32 id)
        if (!(id & 0x8000)) {
                if (id > 255)
                        return 256;
-                return id&0xF;
+                return id & 0xF;
        }
-        return 16 + (id&0xF);
+        return 16 + (id & 0xF);
 }
 static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
 {
-        struct route4_head *head = (struct route4_head*)tp->root;
+        struct route4_head *head = (struct route4_head *)tp->root;
        struct route4_bucket *b;
        struct route4_filter *f;
-        unsigned h1, h2;
+        unsigned int h1, h2;
        if (!head)
                return 0;
@@ -230,11 +230,12 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle)
        if (h1 > 256)
                return 0;
-        h2 = from_hash(handle>>16);
+        h2 = from_hash(handle >> 16);
        if (h2 > 32)
                return 0;
-        if ((b = head->table[h1]) != NULL) {
+        b = head->table[h1];
+        if (b) {
                for (f = b->ht[h2]; f; f = f->next)
                        if (f->handle == handle)
                                return (unsigned long)f;
@@ -251,7 +252,7 @@ static int route4_init(struct tcf_proto *tp)
        return 0;
 }
-static inline void
+static void
 route4_delete_filter(struct tcf_proto *tp, struct route4_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
@@ -267,11 +268,12 @@ static void route4_destroy(struct tcf_proto *tp)
        if (head == NULL)
                return;
-        for (h1=0; h1<=256; h1++) {
+        for (h1 = 0; h1 <= 256; h1++) {
                struct route4_bucket *b;
-                if ((b = head->table[h1]) != NULL) {
+                b = head->table[h1];
-                        for (h2=0; h2<=32; h2++) {
+                if (b) {
+                        for (h2 = 0; h2 <= 32; h2++) {
                                struct route4_filter *f;
                                while ((f = b->ht[h2]) != NULL) {
@@ -287,9 +289,9 @@ static void route4_destroy(struct tcf_proto *tp)
 static int route4_delete(struct tcf_proto *tp, unsigned long arg)
 {
-        struct route4_head *head = (struct route4_head*)tp->root;
+        struct route4_head *head = (struct route4_head *)tp->root;
-        struct route4_filter **fp, *f = (struct route4_filter*)arg;
+        struct route4_filter **fp, *f = (struct route4_filter *)arg;
-        unsigned h = 0;
+        unsigned int h = 0;
        struct route4_bucket *b;
        int i;
@@ -299,7 +301,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
        h = f->handle;
        b = f->bkt;
-        for (fp = &b->ht[from_hash(h>>16)]; *fp; fp = &(*fp)->next) {
+        for (fp = &b->ht[from_hash(h >> 16)]; *fp; fp = &(*fp)->next) {
                if (*fp == f) {
                        tcf_tree_lock(tp);
                        *fp = f->next;
@@ -310,7 +312,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg)
                        /* Strip tree */
-                        for (i=0; i<=32; i++)
+                        for (i = 0; i <= 32; i++)
                                if (b->ht[i])
                                        return 0;
@@ -380,7 +382,8 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
        }
        h1 = to_hash(nhandle);
-        if ((b = head->table[h1]) == NULL) {
+        b = head->table[h1];
+        if (!b) {
                err = -ENOBUFS;
                b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL);
                if (b == NULL)
@@ -391,6 +394,7 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
                tcf_tree_unlock(tp);
        } else {
                unsigned int h2 = from_hash(nhandle >> 16);
                err = -EEXIST;
                for (fp = b->ht[h2]; fp; fp = fp->next)
                        if (fp->handle == f->handle)
@@ -444,7 +448,8 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
        if (err < 0)
                return err;
-        if ((f = (struct route4_filter*)*arg) != NULL) {
+        f = (struct route4_filter *)*arg;
+        if (f) {
                if (f->handle != handle && handle)
                        return -EINVAL;
@@ -481,7 +486,7 @@ static int route4_change(struct tcf_proto *tp, unsigned long base,
 reinsert:
        h = from_hash(f->handle >> 16);
-        for (fp = &f->bkt->ht[h]; (f1=*fp) != NULL; fp = &f1->next)
+        for (fp = &f->bkt->ht[h]; (f1 = *fp) != NULL; fp = &f1->next)
                if (f->handle < f1->handle)
                        break;
@@ -492,7 +497,8 @@ reinsert:
        if (old_handle && f->handle != old_handle) {
                th = to_hash(old_handle);
                h = from_hash(old_handle >> 16);
-                if ((b = head->table[th]) != NULL) {
+                b = head->table[th];
+                if (b) {
                        for (fp = &b->ht[h]; *fp; fp = &(*fp)->next) {
                                if (*fp == f) {
                                        *fp = f->next;
@@ -515,7 +521,7 @@ errout:
 static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
        struct route4_head *head = tp->root;
-        unsigned h, h1;
+        unsigned int h, h1;
        if (head == NULL)
                arg->stop = 1;
@@ -549,7 +555,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int route4_dump(struct tcf_proto *tp, unsigned long fh,
                       struct sk_buff *skb, struct tcmsg *t)
 {
-        struct route4_filter *f = (struct route4_filter*)fh;
+        struct route4_filter *f = (struct route4_filter *)fh;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
        u32 id;
@@ -563,15 +569,15 @@ static int route4_dump(struct tcf_proto *tp, unsigned long fh,
        if (nest == NULL)
                goto nla_put_failure;
-        if (!(f->handle&0x8000)) {
+        if (!(f->handle & 0x8000)) {
-                id = f->id&0xFF;
+                id = f->id & 0xFF;
                NLA_PUT_U32(skb, TCA_ROUTE4_TO, id);
        }
-        if (f->handle&0x80000000) {
+        if (f->handle & 0x80000000) {
-                if ((f->handle>>16) != 0xFFFF)
+                if ((f->handle >> 16) != 0xFFFF)
                        NLA_PUT_U32(skb, TCA_ROUTE4_IIF, f->iif);
        } else {
-                id = f->id>>16;
+                id = f->id >> 16;
                NLA_PUT_U32(skb, TCA_ROUTE4_FROM, id);
        }
        if (f->res.classid)
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 425a1790b048..402c44b241a3 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -66,28 +66,25 @@
   powerful classification engine.  */
-struct rsvp_head
+struct rsvp_head {
-{
        u32                     tmap[256/32];
        u32                     hgenerator;
        u8                      tgenerator;
        struct rsvp_session     *ht[256];
 };
-struct rsvp_session
+struct rsvp_session {
-{
        struct rsvp_session     *next;
        __be32                  dst[RSVP_DST_LEN];
        struct tc_rsvp_gpi      dpi;
        u8                      protocol;
        u8                      tunnelid;
        /* 16 (src,sport) hash slots, and one wildcard source slot */
-        struct rsvp_filter      *ht[16+1];
+        struct rsvp_filter      *ht[16 + 1];
 };
-struct rsvp_filter
+struct rsvp_filter {
-{
        struct rsvp_filter      *next;
        __be32                  src[RSVP_DST_LEN];
        struct tc_rsvp_gpi      spi;
@@ -100,17 +97,19 @@ struct rsvp_filter
        struct rsvp_session     *sess;
 };
-static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
+static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
 {
-        unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
+        unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
        h ^= h>>16;
        h ^= h>>8;
        return (h ^ protocol ^ tunnelid) & 0xFF;
 }
-static __inline__ unsigned hash_src(__be32 *src)
+static inline unsigned int hash_src(__be32 *src)
 {
-        unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
+        unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
        h ^= h>>16;
        h ^= h>>8;
        h ^= h>>4;
@@ -134,10 +133,10 @@ static struct tcf_ext_map rsvp_ext_map = {
 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
                         struct tcf_result *res)
 {
-        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+        struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
        struct rsvp_session *s;
        struct rsvp_filter *f;
-        unsigned h1, h2;
+        unsigned int h1, h2;
        __be32 *dst, *src;
        u8 protocol;
        u8 tunnelid = 0;
@@ -162,13 +161,13 @@ restart:
        src = &nhptr->saddr.s6_addr32[0];
        dst = &nhptr->daddr.s6_addr32[0];
        protocol = nhptr->nexthdr;
-        xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
+        xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
 #else
        src = &nhptr->saddr;
        dst = &nhptr->daddr;
        protocol = nhptr->protocol;
-        xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
+        xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
-        if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
+        if (nhptr->frag_off & htons(IP_MF | IP_OFFSET))
                return -1;
 #endif
@@ -176,10 +175,10 @@ restart:
        h2 = hash_src(src);
        for (s = sht[h1]; s; s = s->next) {
-                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
+                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
                    protocol == s->protocol &&
                    !(s->dpi.mask &
-                      (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
+                      (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
 #if RSVP_DST_LEN == 4
                    dst[0] == s->dst[0] &&
                    dst[1] == s->dst[1] &&
@@ -188,8 +187,8 @@ restart:
                    tunnelid == s->tunnelid) {
                        for (f = s->ht[h2]; f; f = f->next) {
-                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
+                                if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
-                                    !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
+                                    !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
 #if RSVP_DST_LEN == 4
                                    &&
                                    src[0] == f->src[0] &&
@@ -205,7 +204,7 @@ matched:
                                                return 0;
                                        tunnelid = f->res.classid;
-                                        nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
+                                        nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
                                        goto restart;
                                }
                        }
@@ -224,11 +223,11 @@ matched:
 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
 {
-        struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
+        struct rsvp_session **sht = ((struct rsvp_head *)tp->root)->ht;
        struct rsvp_session *s;
        struct rsvp_filter *f;
-        unsigned h1 = handle&0xFF;
+        unsigned int h1 = handle & 0xFF;
-        unsigned h2 = (handle>>8)&0xFF;
+        unsigned int h2 = (handle >> 8) & 0xFF;
        if (h2 > 16)
                return 0;
@@ -258,7 +257,7 @@ static int rsvp_init(struct tcf_proto *tp)
        return -ENOBUFS;
 }
-static inline void
+static void
 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
 {
        tcf_unbind_filter(tp, &f->res);
@@ -277,13 +276,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
        sht = data->ht;
-        for (h1=0; h1<256; h1++) {
+        for (h1 = 0; h1 < 256; h1++) {
                struct rsvp_session *s;
                while ((s = sht[h1]) != NULL) {
                        sht[h1] = s->next;
-                        for (h2=0; h2<=16; h2++) {
+                        for (h2 = 0; h2 <= 16; h2++) {
                                struct rsvp_filter *f;
                                while ((f = s->ht[h2]) != NULL) {
@@ -299,13 +298,13 @@ static void rsvp_destroy(struct tcf_proto *tp)
 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
 {
-        struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
+        struct rsvp_filter **fp, *f = (struct rsvp_filter *)arg;
-        unsigned h = f->handle;
+        unsigned int h = f->handle;
        struct rsvp_session **sp;
        struct rsvp_session *s = f->sess;
        int i;
-        for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
+        for (fp = &s->ht[(h >> 8) & 0xFF]; *fp; fp = &(*fp)->next) {
                if (*fp == f) {
                        tcf_tree_lock(tp);
                        *fp = f->next;
@@ -314,12 +313,12 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
                        /* Strip tree */
-                        for (i=0; i<=16; i++)
+                        for (i = 0; i <= 16; i++)
                                if (s->ht[i])
                                        return 0;
                        /* OK, session has no flows */
-                        for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
+                        for (sp = &((struct rsvp_head *)tp->root)->ht[h & 0xFF];
                             *sp; sp = &(*sp)->next) {
                                if (*sp == s) {
                                        tcf_tree_lock(tp);
@@ -337,13 +336,14 @@ static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
        return 0;
 }
-static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
+static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
 {
        struct rsvp_head *data = tp->root;
        int i = 0xFFFF;
        while (i-- > 0) {
                u32 h;
                if ((data->hgenerator += 0x10000) == 0)
                        data->hgenerator = 0x10000;
                h = data->hgenerator|salt;
@@ -355,10 +355,10 @@ static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
 static int tunnel_bts(struct rsvp_head *data)
 {
-        int n = data->tgenerator>>5;
+        int n = data->tgenerator >> 5;
-        u32 b = 1<<(data->tgenerator&0x1F);
+        u32 b = 1 << (data->tgenerator & 0x1F);
-        if (data->tmap[n]&b)
+        if (data->tmap[n] & b)
                return 0;
        data->tmap[n] |= b;
        return 1;
@@ -372,10 +372,10 @@ static void tunnel_recycle(struct rsvp_head *data)
        memset(tmap, 0, sizeof(tmap));
-        for (h1=0; h1<256; h1++) {
+        for (h1 = 0; h1 < 256; h1++) {
                struct rsvp_session *s;
                for (s = sht[h1]; s; s = s->next) {
-                        for (h2=0; h2<=16; h2++) {
+                        for (h2 = 0; h2 <= 16; h2++) {
                                struct rsvp_filter *f;
                                for (f = s->ht[h2]; f; f = f->next) {
@@ -395,8 +395,8 @@ static u32 gen_tunnel(struct rsvp_head *data)
 {
        int i, k;
-        for (k=0; k<2; k++) {
+        for (k = 0; k < 2; k++) {
-                for (i=255; i>0; i--) {
+                for (i = 255; i > 0; i--) {
                        if (++data->tgenerator == 0)
                                data->tgenerator = 1;
                        if (tunnel_bts(data))
@@ -428,7 +428,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
        struct nlattr *opt = tca[TCA_OPTIONS-1];
        struct nlattr *tb[TCA_RSVP_MAX + 1];
        struct tcf_exts e;
-        unsigned h1, h2;
+        unsigned int h1, h2;
        __be32 *dst;
        int err;
@@ -443,7 +443,8 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
        if (err < 0)
                return err;
-        if ((f = (struct rsvp_filter*)*arg) != NULL) {
+        f = (struct rsvp_filter *)*arg;
+        if (f) {
                /* Node exists: adjust only classid */
                if (f->handle != handle && handle)
@@ -500,7 +501,7 @@ static int rsvp_change(struct tcf_proto *tp, unsigned long base,
                        goto errout;
        }
-        for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
+        for (sp = &data->ht[h1]; (s = *sp) != NULL; sp = &s->next) {
                if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
                    pinfo && pinfo->protocol == s->protocol &&
                    memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
@@ -523,7 +524,7 @@ insert:
                        tcf_exts_change(tp, &f->exts, &e);
                        for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
-                                if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
+                                if (((*fp)->spi.mask & f->spi.mask) != f->spi.mask)
                                        break;
                        f->next = *fp;
                        wmb();
@@ -567,7 +568,7 @@ errout2:
 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 {
        struct rsvp_head *head = tp->root;
-        unsigned h, h1;
+        unsigned int h, h1;
        if (arg->stop)
                return;
@@ -598,7 +599,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
                     struct sk_buff *skb, struct tcmsg *t)
 {
-        struct rsvp_filter *f = (struct rsvp_filter*)fh;
+        struct rsvp_filter *f = (struct rsvp_filter *)fh;
        struct rsvp_session *s;
        unsigned char *b = skb_tail_pointer(skb);
        struct nlattr *nest;
@@ -624,7 +625,7 @@ static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
        NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
        if (f->res.classid)
                NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
-        if (((f->handle>>8)&0xFF) != 16)
+        if (((f->handle >> 8) & 0xFF) != 16)
                NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
        if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 20ef330bb918..36667fa64237 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -249,7 +249,7 @@ tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle,
                 * of the hashing index is below the threshold.
                 */
                if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-                        cp.hash = (cp.mask >> cp.shift)+1;
+                        cp.hash = (cp.mask >> cp.shift) + 1;
                else
                        cp.hash = DEFAULT_HASH_SIZE;
        }
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index b0c2a82178af..3b93fc0c8955 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -42,8 +42,7 @@
 #include <net/act_api.h>
 #include <net/pkt_cls.h>
-struct tc_u_knode
+struct tc_u_knode {
-{
        struct tc_u_knode       *next;
        u32                     handle;
        struct tc_u_hnode       *ht_up;
@@ -63,19 +62,17 @@ struct tc_u_knode
        struct tc_u32_sel       sel;
 };
-struct tc_u_hnode
+struct tc_u_hnode {
-{
        struct tc_u_hnode       *next;
        u32                     handle;
        u32                     prio;
        struct tc_u_common      *tp_c;
        int                     refcnt;
-        unsigned                divisor;
+        unsigned int            divisor;
        struct tc_u_knode       *ht[1];
 };
-struct tc_u_common
+struct tc_u_common {
-{
        struct tc_u_hnode       *hlist;
        struct Qdisc            *q;
        int                     refcnt;
@@ -87,9 +84,11 @@ static const struct tcf_ext_map u32_ext_map = {
        .police = TCA_U32_POLICE
 };
-static __inline__ unsigned u32_hash_fold(__be32 key, struct tc_u32_sel *sel, u8 fshift)
+static inline unsigned int u32_hash_fold(__be32 key,
+                                         const struct tc_u32_sel *sel,
+                                         u8 fshift)
 {
-        unsigned h = ntohl(key & sel->hmask)>>fshift;
+        unsigned int h = ntohl(key & sel->hmask) >> fshift;
        return h;
 }
@@ -101,7 +100,7 @@ static int u32_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_re
                unsigned int      off;
        } stack[TC_U32_MAXDEPTH];
-        struct tc_u_hnode *ht = (struct tc_u_hnode*)tp->root;
+        struct tc_u_hnode *ht = (struct tc_u_hnode *)tp->root;
        unsigned int off = skb_network_offset(skb);
        struct tc_u_knode *n;
        int sdepth = 0;
@@ -120,7 +119,7 @@ next_knode:
                struct tc_u32_key *key = n->sel.keys;
 #ifdef CONFIG_CLS_U32_PERF
-                n->pf->rcnt +=1;
+                n->pf->rcnt += 1;
                j = 0;
 #endif
@@ -133,14 +132,14 @@ next_knode:
                }
 #endif
-                for (i = n->sel.nkeys; i>0; i--, key++) {
+                for (i = n->sel.nkeys; i > 0; i--, key++) {
                        int toff = off + key->off + (off2 & key->offmask);
-                        __be32 *data, _data;
+                        __be32 *data, hdata;
                        if (skb_headroom(skb) + toff > INT_MAX)
                                goto out;
-                        data = skb_header_pointer(skb, toff, 4, &_data);
+                        data = skb_header_pointer(skb, toff, 4, &hdata);
                        if (!data)
                                goto out;
                        if ((*data ^ key->val) & key->mask) {
@@ -148,13 +147,13 @@ next_knode:
                                goto next_knode;
                        }
 #ifdef CONFIG_CLS_U32_PERF
-                        n->pf->kcnts[j] +=1;
+                        n->pf->kcnts[j] += 1;
                        j++;
 #endif
                }
                if (n->ht_down == NULL) {
 check_terminal:
-                        if (n->sel.flags&TC_U32_TERMINAL) {
+                        if (n->sel.flags & TC_U32_TERMINAL) {
                                *res = n->res;
 #ifdef CONFIG_NET_CLS_IND
@@ -164,7 +163,7 @@ check_terminal:
                                }
 #endif
 #ifdef CONFIG_CLS_U32_PERF
-                                n->pf->rhit +=1;
+                                n->pf->rhit += 1;
 #endif
                                r = tcf_exts_exec(skb, &n->exts, res);
                                if (r < 0) {
@@ -188,26 +187,26 @@ check_terminal:
                ht = n->ht_down;
                sel = 0;
                if (ht->divisor) {
-                        __be32 *data, _data;
+                        __be32 *data, hdata;
                        data = skb_header_pointer(skb, off + n->sel.hoff, 4,
-                                                  &_data);
+                                                  &hdata);
                        if (!data)
                                goto out;
                        sel = ht->divisor & u32_hash_fold(*data, &n->sel,
                                                          n->fshift);
                }
-                if (!(n->sel.flags&(TC_U32_VAROFFSET|TC_U32_OFFSET|TC_U32_EAT)))
+                if (!(n->sel.flags & (TC_U32_VAROFFSET | TC_U32_OFFSET | TC_U32_EAT)))
                        goto next_ht;
-                if (n->sel.flags&(TC_U32_OFFSET|TC_U32_VAROFFSET)) {
+                if (n->sel.flags & (TC_U32_OFFSET | TC_U32_VAROFFSET)) {
                        off2 = n->sel.off + 3;
                        if (n->sel.flags & TC_U32_VAROFFSET) {
-                                __be16 *data, _data;
+                                __be16 *data, hdata;
                                data = skb_header_pointer(skb,
                                                          off + n->sel.offoff,
-                                                          2, &_data);
+                                                          2, &hdata);
                                if (!data)
                                        goto out;
                                off2 += ntohs(n->sel.offmask & *data) >>
@@ -215,7 +214,7 @@ check_terminal:
                        }
                        off2 &= ~3;
                }
-                if (n->sel.flags&TC_U32_EAT) {
+                if (n->sel.flags & TC_U32_EAT) {
                        off += off2;
                        off2 = 0;
                }
@@ -236,11 +235,11 @@ out:
 deadloop:
        if (net_ratelimit())
-                printk(KERN_WARNING "cls_u32: dead loop\n");
+                pr_warning("cls_u32: dead loop\n");
        return -1;
 }
-static __inline__ struct tc_u_hnode *
+static struct tc_u_hnode *
 u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
 {
        struct tc_u_hnode *ht;
@@ -252,10 +251,10 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle)
        return ht;
 }
-static __inline__ struct tc_u_knode *
+static struct tc_u_knode *
 u32_lookup_key(struct tc_u_hnode *ht, u32 handle)
 {
-        unsigned sel;
+        unsigned int sel;
        struct tc_u_knode *n = NULL;
        sel = TC_U32_HASH(handle);
@@ -300,7 +299,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c)
        do {
                if (++tp_c->hgenerator == 0x7FF)
                        tp_c->hgenerator = 1;
-        } while (--i>0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
+        } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20));
        return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0;
 }
@@ -378,9 +377,9 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode* key)
 static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht)
 {
        struct tc_u_knode *n;
-        unsigned h;
+        unsigned int h;
-        for (h=0; h<=ht->divisor; h++) {
+        for (h = 0; h <= ht->divisor; h++) {
                while ((n = ht->ht[h]) != NULL) {
                        ht->ht[h] = n->next;
@@ -446,13 +445,13 @@ static void u32_destroy(struct tcf_proto *tp)
 static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 {
-        struct tc_u_hnode *ht = (struct tc_u_hnode*)arg;
+        struct tc_u_hnode *ht = (struct tc_u_hnode *)arg;
        if (ht == NULL)
                return 0;
        if (TC_U32_KEY(ht->handle))
-                return u32_delete_key(tp, (struct tc_u_knode*)ht);
+                return u32_delete_key(tp, (struct tc_u_knode *)ht);
        if (tp->root == ht)
                return -EINVAL;
@@ -470,14 +469,14 @@ static int u32_delete(struct tcf_proto *tp, unsigned long arg)
 static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle)
 {
        struct tc_u_knode *n;
-        unsigned i = 0x7FF;
+        unsigned int i = 0x7FF;
-        for (n=ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
+        for (n = ht->ht[TC_U32_HASH(handle)]; n; n = n->next)
                if (i < TC_U32_NODE(n->handle))
                        i = TC_U32_NODE(n->handle);
        i++;
-        return handle|(i>0xFFF ? 0xFFF : i);
+        return handle | (i > 0xFFF ? 0xFFF : i);
 }
 static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
@@ -566,7 +565,8 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
        if (err < 0)
                return err;
-        if ((n = (struct tc_u_knode*)*arg) != NULL) {
+        n = (struct tc_u_knode *)*arg;
+        if (n) {
                if (TC_U32_KEY(n->handle) == 0)
                        return -EINVAL;
@@ -574,7 +574,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
        }
        if (tb[TCA_U32_DIVISOR]) {
-                unsigned divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
+                unsigned int divisor = nla_get_u32(tb[TCA_U32_DIVISOR]);
                if (--divisor > 0x100)
                        return -EINVAL;
@@ -585,7 +585,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle,
                        if (handle == 0)
                                return -ENOMEM;
                }
-                ht = kzalloc(sizeof(*ht) + divisor*sizeof(void*), GFP_KERNEL);
+                ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL);
                if (ht == NULL)
                        return -ENOBUFS;
                ht->tp_c = tp_c;
@@ -683,7 +683,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
        struct tc_u_common *tp_c = tp->data;
        struct tc_u_hnode *ht;
        struct tc_u_knode *n;
-        unsigned h;
+        unsigned int h;
        if (arg->stop)
                return;
@@ -717,7 +717,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
 static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                     struct sk_buff *skb, struct tcmsg *t)
 {
-        struct tc_u_knode *n = (struct tc_u_knode*)fh;
+        struct tc_u_knode *n = (struct tc_u_knode *)fh;
        struct nlattr *nest;
        if (n == NULL)
@@ -730,8 +730,9 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                goto nla_put_failure;
        if (TC_U32_KEY(n->handle) == 0) {
-                struct tc_u_hnode *ht = (struct tc_u_hnode*)fh;
+                struct tc_u_hnode *ht = (struct tc_u_hnode *)fh;
-                u32 divisor = ht->divisor+1;
+                u32 divisor = ht->divisor + 1;
                NLA_PUT_U32(skb, TCA_U32_DIVISOR, divisor);
        } else {
                NLA_PUT(skb, TCA_U32_SEL,
@@ -755,7 +756,7 @@ static int u32_dump(struct tcf_proto *tp, unsigned long fh,
                        goto nla_put_failure;
 #ifdef CONFIG_NET_CLS_IND
-                if(strlen(n->indev))
+                if (strlen(n->indev))
                        NLA_PUT_STRING(skb, TCA_U32_INDEV, n->indev);
 #endif
 #ifdef CONFIG_CLS_U32_PERF
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index bc450397487a..1c8360a2752a 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -33,40 +33,41 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
                return 0;
        switch (cmp->align) {
-                case TCF_EM_ALIGN_U8:
+        case TCF_EM_ALIGN_U8:
-                        val = *ptr;
+                val = *ptr;
-                        break;
+                break;
-                case TCF_EM_ALIGN_U16:
+        case TCF_EM_ALIGN_U16:
-                        val = get_unaligned_be16(ptr);
+                val = get_unaligned_be16(ptr);
-                        if (cmp_needs_transformation(cmp))
+                if (cmp_needs_transformation(cmp))
-                                val = be16_to_cpu(val);
+                        val = be16_to_cpu(val);
-                        break;
+                break;
-                case TCF_EM_ALIGN_U32:
+        case TCF_EM_ALIGN_U32:
-                        /* Worth checking boundries? The branching seems
+                /* Worth checking boundries? The branching seems
-                         * to get worse. Visit again. */
+                 * to get worse. Visit again.
-                        val = get_unaligned_be32(ptr);
+                 */
+                val = get_unaligned_be32(ptr);
-                        if (cmp_needs_transformation(cmp))
+                if (cmp_needs_transformation(cmp))
-                                val = be32_to_cpu(val);
+                        val = be32_to_cpu(val);
-                        break;
+                break;
-                default:
+        default:
-                        return 0;
+                return 0;
        }
        if (cmp->mask)
                val &= cmp->mask;
        switch (cmp->opnd) {
-                case TCF_EM_OPND_EQ:
+        case TCF_EM_OPND_EQ:
-                        return val == cmp->val;
+                return val == cmp->val;
-                case TCF_EM_OPND_LT:
+        case TCF_EM_OPND_LT:
-                        return val < cmp->val;
+                return val < cmp->val;
-                case TCF_EM_OPND_GT:
+        case TCF_EM_OPND_GT:
-                        return val > cmp->val;
+                return val > cmp->val;
        }
        return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..49130e8abff0 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -47,7 +47,7 @@
 *      on the meta type. Obviously, the length of the data must also
 *      be provided for non-numeric types.
 *
- *      Additionaly, type dependant modifiers such as shift operators
+ *      Additionally, type dependent modifiers such as shift operators
 *      or mask may be applied to extend the functionaliy. As of now,
 *      the variable length type supports shifting the byte string to
 *      the right, eating up any number of octets and thus supporting
@@ -73,21 +73,18 @@
 #include <net/pkt_cls.h>
 #include <net/sock.h>
-struct meta_obj
+struct meta_obj {
-{
        unsigned long           value;
        unsigned int            len;
 };
-struct meta_value
+struct meta_value {
-{
        struct tcf_meta_val     hdr;
        unsigned long           val;
        unsigned int            len;
 };
-struct meta_match
+struct meta_match {
-{
        struct meta_value       lvalue;
        struct meta_value       rvalue;
 };
@@ -223,6 +220,11 @@ META_COLLECTOR(int_maclen)
        dst->value = skb->mac_len;
 }
+META_COLLECTOR(int_rxhash)
+{
+        dst->value = skb_get_rxhash(skb);
+}
 /**************************************************************************
 * Netfilter
 **************************************************************************/
@@ -250,7 +252,7 @@ META_COLLECTOR(int_rtclassid)
        if (unlikely(skb_dst(skb) == NULL))
                *err = -1;
        else
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
                dst->value = skb_dst(skb)->tclassid;
 #else
                dst->value = 0;
@@ -262,7 +264,7 @@ META_COLLECTOR(int_rtiif)
        if (unlikely(skb_rtable(skb) == NULL))
                *err = -1;
        else
-                dst->value = skb_rtable(skb)->fl.iif;
+                dst->value = skb_rtable(skb)->rt_iif;
 }
 /**************************************************************************
@@ -399,7 +401,7 @@ META_COLLECTOR(int_sk_sndbuf)
 META_COLLECTOR(int_sk_alloc)
 {
        SKIP_NONLOCAL(skb);
-        dst->value = skb->sk->sk_allocation;
+        dst->value = (__force int) skb->sk->sk_allocation;
 }
 META_COLLECTOR(int_sk_route_caps)
@@ -478,8 +480,7 @@ META_COLLECTOR(int_sk_write_pend)
 * Meta value collectors assignment table
 **************************************************************************/
-struct meta_ops
+struct meta_ops {
-{
        void            (*get)(struct sk_buff *, struct tcf_pkt_info *,
                               struct meta_value *, struct meta_obj *, int *);
 };
@@ -489,7 +490,7 @@ struct meta_ops
 /* Meta value operations table listing all meta value collectors and
 * assigns them to a type and meta id. */
-static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
+static struct meta_ops __meta_ops[TCF_META_TYPE_MAX + 1][TCF_META_ID_MAX + 1] = {
        [TCF_META_TYPE_VAR] = {
                [META_ID(DEV)]                  = META_FUNC(var_dev),
                [META_ID(SK_BOUND_IF)]          = META_FUNC(var_sk_bound_if),
@@ -541,10 +542,11 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
                [META_ID(SK_SENDMSG_OFF)]       = META_FUNC(int_sk_sendmsg_off),
                [META_ID(SK_WRITE_PENDING)]     = META_FUNC(int_sk_write_pend),
                [META_ID(VLAN_TAG)]             = META_FUNC(int_vlan_tag),
+                [META_ID(RXHASH)]               = META_FUNC(int_rxhash),
        }
 };
-static inline struct meta_ops * meta_ops(struct meta_value *val)
+static inline struct meta_ops *meta_ops(struct meta_value *val)
 {
        return &__meta_ops[meta_type(val)][meta_id(val)];
 }
@@ -643,9 +645,8 @@ static int meta_int_dump(struct sk_buff *skb, struct meta_value *v, int tlv)
 {
        if (v->len == sizeof(unsigned long))
                NLA_PUT(skb, tlv, sizeof(unsigned long), &v->val);
-        else if (v->len == sizeof(u32)) {
+        else if (v->len == sizeof(u32))
                NLA_PUT_U32(skb, tlv, v->val);
-        }
        return 0;
@@ -657,8 +658,7 @@ nla_put_failure:
 * Type specific operations table
 **************************************************************************/
-struct meta_type_ops
+struct meta_type_ops {
-{
        void    (*destroy)(struct meta_value *);
        int     (*compare)(struct meta_obj *, struct meta_obj *);
        int     (*change)(struct meta_value *, struct nlattr *);
@@ -666,7 +666,7 @@ struct meta_type_ops
        int     (*dump)(struct sk_buff *, struct meta_value *, int);
 };
-static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
+static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX + 1] = {
        [TCF_META_TYPE_VAR] = {
                .destroy = meta_var_destroy,
                .compare = meta_var_compare,
@@ -682,7 +682,7 @@ static struct meta_type_ops __meta_type_ops[TCF_META_TYPE_MAX+1] = {
        }
 };
-static inline struct meta_type_ops * meta_type_ops(struct meta_value *v)
+static inline struct meta_type_ops *meta_type_ops(struct meta_value *v)
 {
        return &__meta_type_ops[meta_type(v)];
 }
@@ -707,7 +707,7 @@ static int meta_get(struct sk_buff *skb, struct tcf_pkt_info *info,
                return err;
        if (meta_type_ops(v)->apply_extras)
-            meta_type_ops(v)->apply_extras(v, dst);
+                meta_type_ops(v)->apply_extras(v, dst);
        return 0;
 }
@@ -726,12 +726,12 @@ static int em_meta_match(struct sk_buff *skb, struct tcf_ematch *m,
        r = meta_type_ops(&meta->lvalue)->compare(&l_value, &r_value);
        switch (meta->lvalue.hdr.op) {
-                case TCF_EM_OPND_EQ:
+        case TCF_EM_OPND_EQ:
-                        return !r;
+                return !r;
-                case TCF_EM_OPND_LT:
+        case TCF_EM_OPND_LT:
-                        return r < 0;
+                return r < 0;
-                case TCF_EM_OPND_GT:
+        case TCF_EM_OPND_GT:
-                        return r > 0;
+                return r > 0;
        }
        return 0;
@@ -765,7 +765,7 @@ static inline int meta_change_data(struct meta_value *dst, struct nlattr *nla)
 static inline int meta_is_supported(struct meta_value *val)
 {
-        return (!meta_id(val) || meta_ops(val)->get);
+        return !meta_id(val) || meta_ops(val)->get;
 }
 static const struct nla_policy meta_policy[TCA_EM_META_MAX + 1] = {
diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c
index 1a4176aee6e5..a3bed07a008b 100644
--- a/net/sched/em_nbyte.c
+++ b/net/sched/em_nbyte.c
@@ -18,8 +18,7 @@
 #include <linux/tc_ematch/tc_em_nbyte.h>
 #include <net/pkt_cls.h>
-struct nbyte_data
+struct nbyte_data {
-{
        struct tcf_em_nbyte     hdr;
        char                    pattern[0];
 };
diff --git a/net/sched/em_text.c b/net/sched/em_text.c
index 763253257411..15d353d2e4be 100644
--- a/net/sched/em_text.c
+++ b/net/sched/em_text.c
@@ -19,8 +19,7 @@
 #include <linux/tc_ematch/tc_em_text.h>
 #include <net/pkt_cls.h>
-struct text_match
+struct text_match {
-{
        u16                     from_offset;
        u16                     to_offset;
        u8                      from_layer;
@@ -103,7 +102,8 @@ retry:
 static void em_text_destroy(struct tcf_proto *tp, struct tcf_ematch *m)
 {
-        textsearch_destroy(EM_TEXT_PRIV(m)->config);
+        if (EM_TEXT_PRIV(m) && EM_TEXT_PRIV(m)->config)
+                textsearch_destroy(EM_TEXT_PRIV(m)->config);
 }
 static int em_text_dump(struct sk_buff *skb, struct tcf_ematch *m)
diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c
index 953f1479f7da..797bdb88c010 100644
--- a/net/sched/em_u32.c
+++ b/net/sched/em_u32.c
@@ -35,7 +35,7 @@ static int em_u32_match(struct sk_buff *skb, struct tcf_ematch *em,
        if (!tcf_valid_offset(skb, ptr, sizeof(u32)))
                return 0;
-        return !(((*(__be32*) ptr)  ^ key->val) & key->mask);
+        return !(((*(__be32 *) ptr)  ^ key->val) & key->mask);
 }
 static struct tcf_ematch_ops em_u32_ops = {
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 5e37da961f80..88d93eb92507 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -93,7 +93,7 @@
 static LIST_HEAD(ematch_ops);
 static DEFINE_RWLOCK(ematch_mod_lock);
-static inline struct tcf_ematch_ops * tcf_em_lookup(u16 kind)
+static struct tcf_ematch_ops *tcf_em_lookup(u16 kind)
 {
        struct tcf_ematch_ops *e = NULL;
@@ -163,8 +163,8 @@ void tcf_em_unregister(struct tcf_ematch_ops *ops)
 }
 EXPORT_SYMBOL(tcf_em_unregister);
-static inline struct tcf_ematch * tcf_em_get_match(struct tcf_ematch_tree *tree,
+static inline struct tcf_ematch *tcf_em_get_match(struct tcf_ematch_tree *tree,
-                                                   int index)
+                                                  int index)
 {
        return &tree->matches[index];
 }
@@ -184,7 +184,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
        if (em_hdr->kind == TCF_EM_CONTAINER) {
                /* Special ematch called "container", carries an index
-                 * referencing an external ematch sequence. */
+                 * referencing an external ematch sequence.
+                 */
                u32 ref;
                if (data_len < sizeof(ref))
@@ -195,7 +196,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                        goto errout;
                /* We do not allow backward jumps to avoid loops and jumps
-                 * to our own position are of course illegal. */
+                 * to our own position are of course illegal.
+                 */
                if (ref <= idx)
                        goto errout;
@@ -208,7 +210,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                 * which automatically releases the reference again, therefore
                 * the module MUST not be given back under any circumstances
                 * here. Be aware, the destroy function assumes that the
-                 * module is held if the ops field is non zero. */
+                 * module is held if the ops field is non zero.
+                 */
                em->ops = tcf_em_lookup(em_hdr->kind);
                if (em->ops == NULL) {
@@ -221,7 +224,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                        if (em->ops) {
                                /* We dropped the RTNL mutex in order to
                                 * perform the module load. Tell the caller
-                                 * to replay the request. */
+                                 * to replay the request.
+                                 */
                                module_put(em->ops->owner);
                                err = -EAGAIN;
                        }
@@ -230,7 +234,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                }
                /* ematch module provides expected length of data, so we
-                 * can do a basic sanity check. */
+                 * can do a basic sanity check.
+                 */
                if (em->ops->datalen && data_len < em->ops->datalen)
                        goto errout;
@@ -246,7 +251,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
                         * TCF_EM_SIMPLE may be specified stating that the
                         * data only consists of a u32 integer and the module
                         * does not expected a memory reference but rather
-                         * the value carried. */
+                         * the value carried.
+                         */
                        if (em_hdr->flags & TCF_EM_SIMPLE) {
                                if (data_len < sizeof(u32))
                                        goto errout;
@@ -334,7 +340,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
         * The array of rt attributes is parsed in the order as they are
         * provided, their type must be incremental from 1 to n. Even
         * if it does not serve any real purpose, a failure of sticking
-         * to this policy will result in parsing failure. */
+         * to this policy will result in parsing failure.
+         */
        for (idx = 0; nla_ok(rt_match, list_len); idx++) {
                err = -EINVAL;
@@ -359,7 +366,8 @@ int tcf_em_tree_validate(struct tcf_proto *tp, struct nlattr *nla,
        /* Check if the number of matches provided by userspace actually
         * complies with the array of matches. The number was used for
         * the validation of references and a mismatch could lead to
-         * undefined references during the matching process. */
+         * undefined references during the matching process.
+         */
        if (idx != tree_hdr->nmatches) {
                err = -EINVAL;
                goto errout_abort;
@@ -449,7 +457,7 @@ int tcf_em_tree_dump(struct sk_buff *skb, struct tcf_ematch_tree *tree, int tlv)
                        .flags = em->flags
                };
-                NLA_PUT(skb, i+1, sizeof(em_hdr), &em_hdr);
+                NLA_PUT(skb, i + 1, sizeof(em_hdr), &em_hdr);
                if (em->ops && em->ops->dump) {
                        if (em->ops->dump(skb, em) < 0)
@@ -478,6 +486,7 @@ static inline int tcf_em_match(struct sk_buff *skb, struct tcf_ematch *em,
                               struct tcf_pkt_info *info)
 {
        int r = em->ops->match(skb, em, info);
        return tcf_em_is_inverted(em) ? !r : r;
 }
@@ -527,8 +536,8 @@ pop_stack:
 stack_overflow:
        if (net_ratelimit())
-                printk(KERN_WARNING "tc ematch: local stack overflow,"
+                pr_warning("tc ematch: local stack overflow,"
-                        " increase NET_EMATCH_STACK\n");
+                           " increase NET_EMATCH_STACK\n");
        return -1;
 }
 EXPORT_SYMBOL(__tcf_em_tree_match);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..6b8627661c98 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -187,7 +187,7 @@ int unregister_qdisc(struct Qdisc_ops *qops)
        int err = -ENOENT;
        write_lock(&qdisc_mod_lock);
-        for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
+        for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
                if (q == qops)
                        break;
        if (q) {
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
        if (q)
                goto out;
-        q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle);
+        if (dev_ingress_queue(dev))
+                q = qdisc_match_from_root(
+                        dev_ingress_queue(dev)->qdisc_sleeping,
+                        handle);
 out:
        return q;
 }
@@ -318,7 +321,9 @@ void qdisc_put_rtab(struct qdisc_rate_table *tab)
        if (!tab || --tab->refcnt)
                return;
-        for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
+        for (rtabp = &qdisc_rtab_list;
+             (rtab = *rtabp) != NULL;
+             rtabp = &rtab->next) {
                if (rtab == tab) {
                        *rtabp = rtab->next;
                        kfree(rtab);
@@ -360,7 +365,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
                tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
        }
-        if (!s || tsize != s->tsize || (!tab && tsize > 0))
+        if (tsize != s->tsize || (!tab && tsize > 0))
                return ERR_PTR(-EINVAL);
        spin_lock(&qdisc_stab_lock);
@@ -393,6 +398,11 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
        return stab;
 }
+static void stab_kfree_rcu(struct rcu_head *head)
+{
+        kfree(container_of(head, struct qdisc_size_table, rcu));
+}
 void qdisc_put_stab(struct qdisc_size_table *tab)
 {
        if (!tab)
@@ -402,7 +412,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
        if (--tab->refcnt == 0) {
                list_del(&tab->list);
-                kfree(tab);
+                call_rcu_bh(&tab->rcu, stab_kfree_rcu);
        }
        spin_unlock(&qdisc_stab_lock);
@@ -425,7 +435,7 @@ nla_put_failure:
        return -1;
 }
-void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab)
+void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
 {
        int pkt_len, slot;
@@ -451,14 +461,13 @@ out:
                pkt_len = 1;
        qdisc_skb_cb(skb)->pkt_len = pkt_len;
 }
-EXPORT_SYMBOL(qdisc_calculate_pkt_len);
+EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
 void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
 {
        if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
-                printk(KERN_WARNING
+                pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
-                       "%s: %s qdisc %X: is non-work-conserving?\n",
+                        txt, qdisc->ops->id, qdisc->handle >> 16);
-                       txt, qdisc->ops->id, qdisc->handle >> 16);
                qdisc->flags |= TCQ_F_WARN_NONWC;
        }
 }
@@ -469,7 +478,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
        struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
                                                 timer);
-        wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+        qdisc_unthrottled(wd->qdisc);
        __netif_schedule(qdisc_root(wd->qdisc));
        return HRTIMER_NORESTART;
@@ -491,7 +500,7 @@ void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
                     &qdisc_root_sleeping(wd->qdisc)->state))
                return;
-        wd->qdisc->flags |= TCQ_F_THROTTLED;
+        qdisc_throttled(wd->qdisc);
        time = ktime_set(0, 0);
        time = ktime_add_ns(time, PSCHED_TICKS2NS(expires));
        hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
@@ -501,7 +510,7 @@ EXPORT_SYMBOL(qdisc_watchdog_schedule);
 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
 {
        hrtimer_cancel(&wd->timer);
-        wd->qdisc->flags &= ~TCQ_F_THROTTLED;
+        qdisc_unthrottled(wd->qdisc);
 }
 EXPORT_SYMBOL(qdisc_watchdog_cancel);
@@ -622,7 +631,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
                        autohandle = TC_H_MAKE(0x80000000U, 0);
        } while (qdisc_lookup(dev, autohandle) && --i > 0);
-        return i>0 ? autohandle : 0;
+        return i > 0 ? autohandle : 0;
 }
 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
@@ -690,6 +699,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                    (new && new->flags & TCQ_F_INGRESS)) {
                        num_q = 1;
                        ingress = 1;
+                        if (!dev_ingress_queue(dev))
+                                return -ENOENT;
                }
                if (dev->flags & IFF_UP)
@@ -701,7 +712,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
                }
                for (i = 0; i < num_q; i++) {
-                        struct netdev_queue *dev_queue = &dev->rx_queue;
+                        struct netdev_queue *dev_queue = dev_ingress_queue(dev);
                        if (!ingress)
                                dev_queue = netdev_get_tx_queue(dev, i);
@@ -829,7 +840,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
                                err = PTR_ERR(stab);
                                goto err_out4;
                        }
-                        sch->stab = stab;
+                        rcu_assign_pointer(sch->stab, stab);
                }
                if (tca[TCA_RATE]) {
                        spinlock_t *root_lock;
@@ -869,7 +880,7 @@ err_out4:
         * Any broken qdiscs that would require a ops->reset() here?
         * The qdisc was never in action so it shouldn't be necessary.
         */
-        qdisc_put_stab(sch->stab);
+        qdisc_put_stab(rtnl_dereference(sch->stab));
        if (ops->destroy)
                ops->destroy(sch);
        goto err_out3;
@@ -877,7 +888,7 @@ err_out4:
 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
 {
-        struct qdisc_size_table *stab = NULL;
+        struct qdisc_size_table *ostab, *stab = NULL;
        int err = 0;
        if (tca[TCA_OPTIONS]) {
@@ -894,8 +905,9 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
                        return PTR_ERR(stab);
        }
-        qdisc_put_stab(sch->stab);
+        ostab = rtnl_dereference(sch->stab);
-        sch->stab = stab;
+        rcu_assign_pointer(sch->stab, stab);
+        qdisc_put_stab(ostab);
        if (tca[TCA_RATE]) {
                /* NB: ignores errors from replace_estimator
@@ -910,9 +922,8 @@ out:
        return 0;
 }
-struct check_loop_arg
+struct check_loop_arg {
-{
+        struct qdisc_walker     w;
-        struct qdisc_walker     w;
        struct Qdisc            *p;
        int                     depth;
 };
@@ -965,7 +976,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        struct Qdisc *p = NULL;
        int err;
-        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+        if (!dev)
                return -ENODEV;
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -975,11 +987,12 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        if (clid) {
                if (clid != TC_H_ROOT) {
                        if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
-                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+                                p = qdisc_lookup(dev, TC_H_MAJ(clid));
+                                if (!p)
                                        return -ENOENT;
                                q = qdisc_leaf(p, clid);
-                        } else { /* ingress */
+                        } else if (dev_ingress_queue(dev)) {
-                                q = dev->rx_queue.qdisc_sleeping;
+                                q = dev_ingress_queue(dev)->qdisc_sleeping;
                        }
                } else {
                        q = dev->qdisc;
@@ -990,7 +1003,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
                        return -EINVAL;
        } else {
-                if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+                q = qdisc_lookup(dev, tcm->tcm_handle);
+                if (!q)
                        return -ENOENT;
        }
@@ -1002,7 +1016,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                        return -EINVAL;
                if (q->handle == 0)
                        return -ENOENT;
-                if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
+                err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
+                if (err != 0)
                        return err;
        } else {
                qdisc_notify(net, skb, n, clid, NULL, q);
@@ -1011,7 +1026,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 }
 /*
-   Create/change qdisc.
+ * Create/change qdisc.
 */
 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
@@ -1030,7 +1045,8 @@ replay:
        clid = tcm->tcm_parent;
        q = p = NULL;
-        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+        if (!dev)
                return -ENODEV;
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1040,11 +1056,12 @@ replay:
        if (clid) {
                if (clid != TC_H_ROOT) {
                        if (clid != TC_H_INGRESS) {
-                                if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
+                                p = qdisc_lookup(dev, TC_H_MAJ(clid));
+                                if (!p)
                                        return -ENOENT;
                                q = qdisc_leaf(p, clid);
-                        } else { /*ingress */
+                        } else if (dev_ingress_queue_create(dev)) {
-                                q = dev->rx_queue.qdisc_sleeping;
+                                q = dev_ingress_queue(dev)->qdisc_sleeping;
                        }
                } else {
                        q = dev->qdisc;
@@ -1056,13 +1073,14 @@ replay:
                if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
                        if (tcm->tcm_handle) {
-                                if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
+                                if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
                                        return -EEXIST;
                                if (TC_H_MIN(tcm->tcm_handle))
                                        return -EINVAL;
-                                if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
+                                q = qdisc_lookup(dev, tcm->tcm_handle);
+                                if (!q)
                                        goto create_n_graft;
-                                if (n->nlmsg_flags&NLM_F_EXCL)
+                                if (n->nlmsg_flags & NLM_F_EXCL)
                                        return -EEXIST;
                                if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
                                        return -EINVAL;
@@ -1072,7 +1090,7 @@ replay:
                                atomic_inc(&q->refcnt);
                                goto graft;
                        } else {
-                                if (q == NULL)
+                                if (!q)
                                        goto create_n_graft;
                                /* This magic test requires explanation.
@@ -1094,9 +1112,9 @@ replay:
                                 *   For now we select create/graft, if
                                 *   user gave KIND, which does not match existing.
                                 */
-                                if ((n->nlmsg_flags&NLM_F_CREATE) &&
+                                if ((n->nlmsg_flags & NLM_F_CREATE) &&
-                                    (n->nlmsg_flags&NLM_F_REPLACE) &&
+                                    (n->nlmsg_flags & NLM_F_REPLACE) &&
-                                    ((n->nlmsg_flags&NLM_F_EXCL) ||
+                                    ((n->nlmsg_flags & NLM_F_EXCL) ||
                                     (tca[TCA_KIND] &&
                                      nla_strcmp(tca[TCA_KIND], q->ops->id))))
                                        goto create_n_graft;
@@ -1111,7 +1129,7 @@ replay:
        /* Change qdisc parameters */
        if (q == NULL)
                return -ENOENT;
-        if (n->nlmsg_flags&NLM_F_EXCL)
+        if (n->nlmsg_flags & NLM_F_EXCL)
                return -EEXIST;
        if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
                return -EINVAL;
@@ -1121,13 +1139,16 @@ replay:
        return err;
 create_n_graft:
-        if (!(n->nlmsg_flags&NLM_F_CREATE))
+        if (!(n->nlmsg_flags & NLM_F_CREATE))
                return -ENOENT;
-        if (clid == TC_H_INGRESS)
+        if (clid == TC_H_INGRESS) {
-                q = qdisc_create(dev, &dev->rx_queue, p,
+                if (dev_ingress_queue(dev))
-                                 tcm->tcm_parent, tcm->tcm_parent,
+                        q = qdisc_create(dev, dev_ingress_queue(dev), p,
-                                 tca, &err);
+                                         tcm->tcm_parent, tcm->tcm_parent,
-        else {
+                                         tca, &err);
+                else
+                        err = -ENOENT;
+        } else {
                struct netdev_queue *dev_queue;
                if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1165,6 +1186,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
        struct nlmsghdr  *nlh;
        unsigned char *b = skb_tail_pointer(skb);
        struct gnet_dump d;
+        struct qdisc_size_table *stab;
        nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
        tcm = NLMSG_DATA(nlh);
@@ -1180,7 +1202,8 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
                goto nla_put_failure;
        q->qstats.qlen = q->q.qlen;
-        if (q->stab && qdisc_dump_stab(skb, q->stab) < 0)
+        stab = rtnl_dereference(q->stab);
+        if (stab && qdisc_dump_stab(skb, stab) < 0)
                goto nla_put_failure;
        if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
@@ -1224,16 +1247,19 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
                return -ENOBUFS;
        if (old && !tc_qdisc_dump_ignore(old)) {
-                if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
+                if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq,
+                                  0, RTM_DELQDISC) < 0)
                        goto err_out;
        }
        if (new && !tc_qdisc_dump_ignore(new)) {
-                if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+                if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq,
+                                  old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
                        goto err_out;
        }
        if (skb->len)
-                return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+                return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                                      n->nlmsg_flags & NLM_F_ECHO);
 err_out:
        kfree_skb(skb);
@@ -1265,7 +1291,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
                        q_idx++;
                        continue;
                }
-                if (!tc_qdisc_dump_ignore(q) && 
+                if (!tc_qdisc_dump_ignore(q) &&
                    tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
                                  cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
                        goto done;
@@ -1304,8 +1330,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
                if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
                        goto done;
-                dev_queue = &dev->rx_queue;
+                dev_queue = dev_ingress_queue(dev);
-                if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0)
+                if (dev_queue &&
+                    tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
+                                       &q_idx, s_q_idx) < 0)
                        goto done;
 cont:
@@ -1344,7 +1372,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        u32 qid = TC_H_MAJ(clid);
        int err;
-        if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+        dev = __dev_get_by_index(net, tcm->tcm_ifindex);
+        if (!dev)
                return -ENODEV;
        err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1379,9 +1408,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
                        qid = dev->qdisc->handle;
                /* Now qid is genuine qdisc handle consistent
-                   both with parent and child.
+                 * both with parent and child.
+                 *
-                   TC_H_MAJ(pid) still may be unspecified, complete it now.
+                 * TC_H_MAJ(pid) still may be unspecified, complete it now.
                 */
                if (pid)
                        pid = TC_H_MAKE(qid, pid);
@@ -1391,7 +1420,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        }
        /* OK. Locate qdisc */
-        if ((q = qdisc_lookup(dev, qid)) == NULL)
+        q = qdisc_lookup(dev, qid);
+        if (!q)
                return -ENOENT;
        /* An check that it supports classes */
@@ -1411,13 +1441,14 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
        if (cl == 0) {
                err = -ENOENT;
-                if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
+                if (n->nlmsg_type != RTM_NEWTCLASS ||
+                    !(n->nlmsg_flags & NLM_F_CREATE))
                        goto out;
        } else {
                switch (n->nlmsg_type) {
                case RTM_NEWTCLASS:
                        err = -EEXIST;
-                        if (n->nlmsg_flags&NLM_F_EXCL)
+                        if (n->nlmsg_flags & NLM_F_EXCL)
                                goto out;
                        break;
                case RTM_DELTCLASS:
@@ -1509,14 +1540,14 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
                return -EINVAL;
        }
-        return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+        return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
+                              n->nlmsg_flags & NLM_F_ECHO);
 }
-struct qdisc_dump_args
+struct qdisc_dump_args {
-{
+        struct qdisc_walker     w;
-        struct qdisc_walker w;
+        struct sk_buff          *skb;
-        struct sk_buff *skb;
+        struct netlink_callback *cb;
-        struct netlink_callback *cb;
 };
 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
@@ -1578,7 +1609,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 {
-        struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
+        struct tcmsg *tcm = (struct tcmsg *)NLMSG_DATA(cb->nlh);
        struct net *net = sock_net(skb->sk);
        struct netdev_queue *dev_queue;
        struct net_device *dev;
@@ -1586,7 +1617,8 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
                return 0;
-        if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
+        dev = dev_get_by_index(net, tcm->tcm_ifindex);
+        if (!dev)
                return 0;
        s_t = cb->args[0];
@@ -1595,8 +1627,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
                goto done;
-        dev_queue = &dev->rx_queue;
+        dev_queue = dev_ingress_queue(dev);
-        if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0)
+        if (dev_queue &&
+            tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
+                                &t, s_t) < 0)
                goto done;
 done:
@@ -1607,19 +1641,22 @@ done:
 }
 /* Main classifier routine: scans classifier chain attached
-   to this qdisc, (optionally) tests for protocol and asks
+ * to this qdisc, (optionally) tests for protocol and asks
-   specific classifiers.
+ * specific classifiers.
 */
 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
                       struct tcf_result *res)
 {
        __be16 protocol = skb->protocol;
-        int err = 0;
+        int err;
        for (; tp; tp = tp->next) {
-                if ((tp->protocol == protocol ||
+                if (tp->protocol != protocol &&
-                     tp->protocol == htons(ETH_P_ALL)) &&
+                    tp->protocol != htons(ETH_P_ALL))
-                    (err = tp->classify(skb, tp, res)) >= 0) {
+                        continue;
+                err = tp->classify(skb, tp, res);
+                if (err >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
                        if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
                                skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
@@ -1635,12 +1672,10 @@ int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
                struct tcf_result *res)
 {
        int err = 0;
-        __be16 protocol;
 #ifdef CONFIG_NET_CLS_ACT
        struct tcf_proto *otp = tp;
 reclassify:
 #endif
-        protocol = skb->protocol;
        err = tc_classify_compat(skb, tp, res);
 #ifdef CONFIG_NET_CLS_ACT
@@ -1650,11 +1685,11 @@ reclassify:
                if (verd++ >= MAX_REC_LOOP) {
                        if (net_ratelimit())
-                                printk(KERN_NOTICE
+                                pr_notice("%s: packet reclassify loop"
-                                       "%s: packet reclassify loop"
                                          " rule prio %u protocol %02x\n",
-                                       tp->q->ops->id,
+                                          tp->q->ops->id,
-                                       tp->prio & 0xffff, ntohs(tp->protocol));
+                                          tp->prio & 0xffff,
+                                          ntohs(tp->protocol));
                        return TC_ACT_SHOT;
                }
                skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
@@ -1747,7 +1782,7 @@ static int __init pktsched_init(void)
        err = register_pernet_subsys(&psched_net_ops);
        if (err) {
-                printk(KERN_ERR "pktsched_init: "
+                pr_err("pktsched_init: "
                       "cannot initialize per netns operations\n");
                return err;
        }
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6318e1136b83..3f08158b8688 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
                goto err_out;
        }
        flow->filter_list = NULL;
-        flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
-                                    &pfifo_qdisc_ops, classid);
        if (!flow->q)
                flow->q = &noop_qdisc;
        pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -320,7 +319,7 @@ static int atm_tc_delete(struct Qdisc *sch, unsigned long arg)
         * creation), and one for the reference held when calling delete.
         */
        if (flow->ref < 2) {
-                printk(KERN_ERR "atm_tc_delete: flow->ref == %d\n", flow->ref);
+                pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
                return -EINVAL;
        }
        if (flow->ref > 2)
@@ -385,12 +384,12 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                        }
                }
                flow = NULL;
-        done:
+done:
-                ;               
+                ;
        }
-        if (!flow)
+        if (!flow) {
                flow = &p->link;
-        else {
+        } else {
                if (flow->vcc)
                        ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
                /*@@@ looks good ... but it's not supposed to work :-) */
@@ -423,10 +422,8 @@ drop: __maybe_unused
                }
                return ret;
        }
-        sch->bstats.bytes += qdisc_pkt_len(skb);
+        qdisc_bstats_update(sch, skb);
-        sch->bstats.packets++;
+        bstats_update(&flow->bstats, skb);
-        flow->bstats.bytes += qdisc_pkt_len(skb);
-        flow->bstats.packets++;
        /*
         * Okay, this may seem weird. We pretend we've dropped the packet if
         * it goes via ATM. The reason for this is that the outer qdisc
@@ -543,7 +540,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
        INIT_LIST_HEAD(&p->flows);
        INIT_LIST_HEAD(&p->link.list);
        list_add(&p->link.list, &p->flows);
-        p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        p->link.q = qdisc_create_dflt(sch->dev_queue,
                                      &pfifo_qdisc_ops, sch->handle);
        if (!p->link.q)
                p->link.q = &noop_qdisc;
@@ -579,8 +576,7 @@ static void atm_tc_destroy(struct Qdisc *sch)
        list_for_each_entry_safe(flow, tmp, &p->flows, list) {
                if (flow->ref > 1)
-                        printk(KERN_ERR "atm_destroy: %p->ref = %d\n", flow,
+                        pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
-                               flow->ref);
                atm_tc_put(sch, (unsigned long)flow);
        }
        tasklet_kill(&p->task);
@@ -619,9 +615,8 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
        }
        if (flow->excess)
                NLA_PUT_U32(skb, TCA_ATM_EXCESS, flow->classid);
-        else {
+        else
                NLA_PUT_U32(skb, TCA_ATM_EXCESS, 0);
-        }
        nla_nest_end(skb, nest);
        return skb->len;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 28c01ef5abc8..24d94c097b35 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -72,8 +72,7 @@
 struct cbq_sched_data;
-struct cbq_class
+struct cbq_class {
-{
        struct Qdisc_class_common common;
        struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
@@ -139,19 +138,18 @@ struct cbq_class
        int                     refcnt;
        int                     filters;
-        struct cbq_class        *defaults[TC_PRIO_MAX+1];
+        struct cbq_class        *defaults[TC_PRIO_MAX + 1];
 };
-struct cbq_sched_data
+struct cbq_sched_data {
-{
        struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
-        int                     nclasses[TC_CBQ_MAXPRIO+1];
+        int                     nclasses[TC_CBQ_MAXPRIO + 1];
-        unsigned                quanta[TC_CBQ_MAXPRIO+1];
+        unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
        struct cbq_class        link;
-        unsigned                activemask;
+        unsigned int            activemask;
-        struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
+        struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
                                                                   with backlog */
 #ifdef CONFIG_NET_CLS_ACT
@@ -162,7 +160,7 @@ struct cbq_sched_data
        int                     tx_len;
        psched_time_t           now;            /* Cached timestamp */
        psched_time_t           now_rt;         /* Cached real time */
-        unsigned                pmask;
+        unsigned int            pmask;
        struct hrtimer          delay_timer;
        struct qdisc_watchdog   watchdog;       /* Watchdog timer,
@@ -175,9 +173,9 @@ struct cbq_sched_data
 };
-#define L2T(cl,len)     qdisc_l2t((cl)->R_tab,len)
+#define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 {
        struct Qdisc_class_common *clc;
@@ -193,25 +191,27 @@ cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 static struct cbq_class *
 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 {
-        struct cbq_class *cl, *new;
+        struct cbq_class *cl;
-        for (cl = this->tparent; cl; cl = cl->tparent)
+        for (cl = this->tparent; cl; cl = cl->tparent) {
-                if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
+                struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
-                        return new;
+                if (new != NULL && new != this)
+                        return new;
+        }
        return NULL;
 }
 #endif
 /* Classify packet. The procedure is pretty complicated, but
-   it allows us to combine link sharing and priority scheduling
+ * it allows us to combine link sharing and priority scheduling
-   transparently.
+ * transparently.
+ *
-   Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
+ * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
-   so that it resolves to split nodes. Then packets are classified
+ * so that it resolves to split nodes. Then packets are classified
-   by logical priority, or a more specific classifier may be attached
+ * by logical priority, or a more specific classifier may be attached
-   to the split node.
+ * to the split node.
 */
 static struct cbq_class *
@@ -227,7 +227,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
        /*
         *  Step 1. If skb->priority points to one of our classes, use it.
         */
-        if (TC_H_MAJ(prio^sch->handle) == 0 &&
+        if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
            (cl = cbq_class_lookup(q, prio)) != NULL)
                return cl;
@@ -243,10 +243,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                    (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
                        goto fallback;
-                if ((cl = (void*)res.class) == NULL) {
+                cl = (void *)res.class;
+                if (!cl) {
                        if (TC_H_MAJ(res.classid))
                                cl = cbq_class_lookup(q, res.classid);
-                        else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
+                        else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
                                cl = defmap[TC_PRIO_BESTEFFORT];
                        if (cl == NULL || cl->level >= head->level)
@@ -282,7 +283,7 @@ fallback:
         * Step 4. No success...
         */
        if (TC_H_MAJ(prio) == 0 &&
-            !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
+            !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
            !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
                return head;
@@ -290,12 +291,12 @@ fallback:
 }
 /*
-   A packet has just been enqueued on the empty class.
+ * A packet has just been enqueued on the empty class.
-   cbq_activate_class adds it to the tail of active class list
+ * cbq_activate_class adds it to the tail of active class list
-   of its priority band.
+ * of its priority band.
 */
-static __inline__ void cbq_activate_class(struct cbq_class *cl)
+static inline void cbq_activate_class(struct cbq_class *cl)
 {
        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
        int prio = cl->cpriority;
@@ -314,9 +315,9 @@ static __inline__ void cbq_activate_class(struct cbq_class *cl)
 }
 /*
-   Unlink class from active chain.
+ * Unlink class from active chain.
-   Note that this same procedure is done directly in cbq_dequeue*
+ * Note that this same procedure is done directly in cbq_dequeue*
-   during round-robin procedure.
+ * during round-robin procedure.
 */
 static void cbq_deactivate_class(struct cbq_class *this)
@@ -350,7 +351,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 {
        int toplevel = q->toplevel;
-        if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
+        if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
                psched_time_t now;
                psched_tdiff_t incr;
@@ -363,7 +364,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
                                q->toplevel = cl->level;
                                return;
                        }
-                } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
+                } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
        }
 }
@@ -390,8 +391,6 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        ret = qdisc_enqueue(skb, cl->q);
        if (ret == NET_XMIT_SUCCESS) {
                sch->q.qlen++;
-                sch->bstats.packets++;
-                sch->bstats.bytes += qdisc_pkt_len(skb);
                cbq_mark_toplevel(q, cl);
                if (!cl->next_alive)
                        cbq_activate_class(cl);
@@ -419,11 +418,11 @@ static void cbq_ovl_classic(struct cbq_class *cl)
                delay += cl->offtime;
                /*
-                   Class goes to sleep, so that it will have no
+                 * Class goes to sleep, so that it will have no
-                   chance to work avgidle. Let's forgive it 8)
+                 * chance to work avgidle. Let's forgive it 8)
+                 *
-                   BTW cbq-2.0 has a crap in this
+                 * BTW cbq-2.0 has a crap in this
-                   place, apparently they forgot to shift it by cl->ewma_log.
+                 * place, apparently they forgot to shift it by cl->ewma_log.
                 */
                if (cl->avgidle < 0)
                        delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
@@ -440,8 +439,8 @@ static void cbq_ovl_classic(struct cbq_class *cl)
                q->wd_expires = delay;
        /* Dirty work! We must schedule wakeups based on
-           real available rate, rather than leaf rate,
+         * real available rate, rather than leaf rate,
-           which may be tiny (even zero).
+         * which may be tiny (even zero).
         */
        if (q->toplevel == TC_CBQ_MAXLEVEL) {
                struct cbq_class *b;
@@ -461,7 +460,7 @@ static void cbq_ovl_classic(struct cbq_class *cl)
 }
 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
-   they go overlimit
+ * they go overlimit
 */
 static void cbq_ovl_rclassic(struct cbq_class *cl)
@@ -596,7 +595,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
        struct Qdisc *sch = q->watchdog.qdisc;
        psched_time_t now;
        psched_tdiff_t delay = 0;
-        unsigned pmask;
+        unsigned int pmask;
        now = psched_get_time();
@@ -625,7 +624,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
                hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
        }
-        sch->flags &= ~TCQ_F_THROTTLED;
+        qdisc_unthrottled(sch);
        __netif_schedule(qdisc_root(sch));
        return HRTIMER_NORESTART;
 }
@@ -650,8 +649,6 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
                ret = qdisc_enqueue(skb, cl->q);
                if (ret == NET_XMIT_SUCCESS) {
                        sch->q.qlen++;
-                        sch->bstats.packets++;
-                        sch->bstats.bytes += qdisc_pkt_len(skb);
                        if (!cl->next_alive)
                                cbq_activate_class(cl);
                        return 0;
@@ -667,15 +664,15 @@ static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 #endif
 /*
-   It is mission critical procedure.
+ * It is mission critical procedure.
+ *
-   We "regenerate" toplevel cutoff, if transmitting class
+ * We "regenerate" toplevel cutoff, if transmitting class
-   has backlog and it is not regulated. It is not part of
+ * has backlog and it is not regulated. It is not part of
-   original CBQ description, but looks more reasonable.
+ * original CBQ description, but looks more reasonable.
-   Probably, it is wrong. This question needs further investigation.
+ * Probably, it is wrong. This question needs further investigation.
-*/
+ */
-static __inline__ void
+static inline void
 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
                    struct cbq_class *borrowed)
 {
@@ -686,7 +683,7 @@ cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
                                        q->toplevel = borrowed->level;
                                        return;
                                }
-                        } while ((borrowed=borrowed->borrow) != NULL);
+                        } while ((borrowed = borrowed->borrow) != NULL);
                }
 #if 0
        /* It is not necessary now. Uncommenting it
@@ -714,10 +711,10 @@ cbq_update(struct cbq_sched_data *q)
                cl->bstats.bytes += len;
                /*
-                   (now - last) is total time between packet right edges.
+                 * (now - last) is total time between packet right edges.
-                   (last_pktlen/rate) is "virtual" busy time, so that
+                 * (last_pktlen/rate) is "virtual" busy time, so that
+                 *
-                         idle = (now - last) - last_pktlen/rate
+                 *      idle = (now - last) - last_pktlen/rate
                 */
                idle = q->now - cl->last;
@@ -727,9 +724,9 @@ cbq_update(struct cbq_sched_data *q)
                        idle -= L2T(cl, len);
                /* true_avgidle := (1-W)*true_avgidle + W*idle,
-                   where W=2^{-ewma_log}. But cl->avgidle is scaled:
+                 * where W=2^{-ewma_log}. But cl->avgidle is scaled:
-                   cl->avgidle == true_avgidle/W,
+                 * cl->avgidle == true_avgidle/W,
-                   hence:
+                 * hence:
                 */
                        avgidle += idle - (avgidle>>cl->ewma_log);
                }
@@ -743,22 +740,22 @@ cbq_update(struct cbq_sched_data *q)
                        cl->avgidle = avgidle;
                        /* Calculate expected time, when this class
-                           will be allowed to send.
+                         * will be allowed to send.
-                           It will occur, when:
+                         * It will occur, when:
-                           (1-W)*true_avgidle + W*delay = 0, i.e.
+                         * (1-W)*true_avgidle + W*delay = 0, i.e.
-                           idle = (1/W - 1)*(-true_avgidle)
+                         * idle = (1/W - 1)*(-true_avgidle)
-                           or
+                         * or
-                           idle = (1 - W)*(-cl->avgidle);
+                         * idle = (1 - W)*(-cl->avgidle);
                         */
                        idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
                        /*
-                           That is not all.
+                         * That is not all.
-                           To maintain the rate allocated to the class,
+                         * To maintain the rate allocated to the class,
-                           we add to undertime virtual clock,
+                         * we add to undertime virtual clock,
-                           necessary to complete transmitted packet.
+                         * necessary to complete transmitted packet.
-                           (len/phys_bandwidth has been already passed
+                         * (len/phys_bandwidth has been already passed
-                           to the moment of cbq_update)
+                         * to the moment of cbq_update)
                         */
                        idle -= L2T(&q->link, len);
@@ -780,7 +777,7 @@ cbq_update(struct cbq_sched_data *q)
        cbq_update_toplevel(q, this, q->tx_borrowed);
 }
-static __inline__ struct cbq_class *
+static inline struct cbq_class *
 cbq_under_limit(struct cbq_class *cl)
 {
        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
@@ -796,16 +793,17 @@ cbq_under_limit(struct cbq_class *cl)
        do {
                /* It is very suspicious place. Now overlimit
-                   action is generated for not bounded classes
+                 * action is generated for not bounded classes
-                   only if link is completely congested.
+                 * only if link is completely congested.
-                   Though it is in agree with ancestor-only paradigm,
+                 * Though it is in agree with ancestor-only paradigm,
-                   it looks very stupid. Particularly,
+                 * it looks very stupid. Particularly,
-                   it means that this chunk of code will either
+                 * it means that this chunk of code will either
-                   never be called or result in strong amplification
+                 * never be called or result in strong amplification
-                   of burstiness. Dangerous, silly, and, however,
+                 * of burstiness. Dangerous, silly, and, however,
-                   no another solution exists.
+                 * no another solution exists.
                 */
-                if ((cl = cl->borrow) == NULL) {
+                cl = cl->borrow;
+                if (!cl) {
                        this_cl->qstats.overlimits++;
                        this_cl->overlimit(this_cl);
                        return NULL;
@@ -818,7 +816,7 @@ cbq_under_limit(struct cbq_class *cl)
        return cl;
 }
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
@@ -842,7 +840,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
                        if (cl->deficit <= 0) {
                                /* Class exhausted its allotment per
-                                   this round. Switch to the next one.
+                                 * this round. Switch to the next one.
                                 */
                                deficit = 1;
                                cl->deficit += cl->quantum;
@@ -852,8 +850,8 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
                        skb = cl->q->dequeue(cl->q);
                        /* Class did not give us any skb :-(
-                           It could occur even if cl->q->q.qlen != 0
+                         * It could occur even if cl->q->q.qlen != 0
-                           f.e. if cl->q == "tbf"
+                         * f.e. if cl->q == "tbf"
                         */
                        if (skb == NULL)
                                goto skip_class;
@@ -882,7 +880,7 @@ cbq_dequeue_prio(struct Qdisc *sch, int prio)
 skip_class:
                        if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
                                /* Class is empty or penalized.
-                                   Unlink it from active chain.
+                                 * Unlink it from active chain.
                                 */
                                cl_prev->next_alive = cl->next_alive;
                                cl->next_alive = NULL;
@@ -921,14 +919,14 @@ next_class:
        return NULL;
 }
-static __inline__ struct sk_buff *
+static inline struct sk_buff *
 cbq_dequeue_1(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-        unsigned activemask;
+        unsigned int activemask;
-        activemask = q->activemask&0xFF;
+        activemask = q->activemask & 0xFF;
        while (activemask) {
                int prio = ffz(~activemask);
                activemask &= ~(1<<prio);
@@ -953,11 +951,11 @@ cbq_dequeue(struct Qdisc *sch)
        if (q->tx_class) {
                psched_tdiff_t incr2;
                /* Time integrator. We calculate EOS time
-                   by adding expected packet transmission time.
+                 * by adding expected packet transmission time.
-                   If real time is greater, we warp artificial clock,
+                 * If real time is greater, we warp artificial clock,
-                   so that:
+                 * so that:
+                 *
-                   cbq_time = max(real_time, work);
+                 * cbq_time = max(real_time, work);
                 */
                incr2 = L2T(&q->link, q->tx_len);
                q->now += incr2;
@@ -973,28 +971,29 @@ cbq_dequeue(struct Qdisc *sch)
                skb = cbq_dequeue_1(sch);
                if (skb) {
+                        qdisc_bstats_update(sch, skb);
                        sch->q.qlen--;
-                        sch->flags &= ~TCQ_F_THROTTLED;
+                        qdisc_unthrottled(sch);
                        return skb;
                }
                /* All the classes are overlimit.
+                 *
-                   It is possible, if:
+                 * It is possible, if:
+                 *
-                   1. Scheduler is empty.
+                 * 1. Scheduler is empty.
-                   2. Toplevel cutoff inhibited borrowing.
+                 * 2. Toplevel cutoff inhibited borrowing.
-                   3. Root class is overlimit.
+                 * 3. Root class is overlimit.
+                 *
-                   Reset 2d and 3d conditions and retry.
+                 * Reset 2d and 3d conditions and retry.
+                 *
-                   Note, that NS and cbq-2.0 are buggy, peeking
+                 * Note, that NS and cbq-2.0 are buggy, peeking
-                   an arbitrary class is appropriate for ancestor-only
+                 * an arbitrary class is appropriate for ancestor-only
-                   sharing, but not for toplevel algorithm.
+                 * sharing, but not for toplevel algorithm.
+                 *
-                   Our version is better, but slower, because it requires
+                 * Our version is better, but slower, because it requires
-                   two passes, but it is unavoidable with top-level sharing.
+                 * two passes, but it is unavoidable with top-level sharing.
-                */
+                 */
                if (q->toplevel == TC_CBQ_MAXLEVEL &&
                    q->link.undertime == PSCHED_PASTPERFECT)
@@ -1005,7 +1004,8 @@ cbq_dequeue(struct Qdisc *sch)
        }
        /* No packets in scheduler or nobody wants to give them to us :-(
-           Sigh... start watchdog timer in the last case. */
+         * Sigh... start watchdog timer in the last case.
+         */
        if (sch->q.qlen) {
                sch->qstats.overlimits++;
@@ -1027,13 +1027,14 @@ static void cbq_adjust_levels(struct cbq_class *this)
                int level = 0;
                struct cbq_class *cl;
-                if ((cl = this->children) != NULL) {
+                cl = this->children;
+                if (cl) {
                        do {
                                if (cl->level > level)
                                        level = cl->level;
                        } while ((cl = cl->sibling) != this->children);
                }
-                this->level = level+1;
+                this->level = level + 1;
        } while ((this = this->tparent) != NULL);
 }
@@ -1049,14 +1050,15 @@ static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
        for (h = 0; h < q->clhash.hashsize; h++) {
                hlist_for_each_entry(cl, n, &q->clhash.hash[h], common.hnode) {
                        /* BUGGGG... Beware! This expression suffer of
-                           arithmetic overflows!
+                         * arithmetic overflows!
                         */
                        if (cl->priority == prio) {
                                cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
                                        q->quanta[prio];
                        }
                        if (cl->quantum <= 0 || cl->quantum>32*qdisc_dev(cl->qdisc)->mtu) {
-                                printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->common.classid, cl->quantum);
+                                pr_warning("CBQ: class %08x has bad quantum==%ld, repaired.\n",
+                                           cl->common.classid, cl->quantum);
                                cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
                        }
                }
@@ -1067,18 +1069,18 @@ static void cbq_sync_defmap(struct cbq_class *cl)
 {
        struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
        struct cbq_class *split = cl->split;
-        unsigned h;
+        unsigned int h;
        int i;
        if (split == NULL)
                return;
-        for (i=0; i<=TC_PRIO_MAX; i++) {
+        for (i = 0; i <= TC_PRIO_MAX; i++) {
-                if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
+                if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
                        split->defaults[i] = NULL;
        }
-        for (i=0; i<=TC_PRIO_MAX; i++) {
+        for (i = 0; i <= TC_PRIO_MAX; i++) {
                int level = split->level;
                if (split->defaults[i])
@@ -1091,7 +1093,7 @@ static void cbq_sync_defmap(struct cbq_class *cl)
                        hlist_for_each_entry(c, n, &q->clhash.hash[h],
                                             common.hnode) {
                                if (c->split == split && c->level < level &&
-                                    c->defmap&(1<<i)) {
+                                    c->defmap & (1<<i)) {
                                        split->defaults[i] = c;
                                        level = c->level;
                                }
@@ -1105,7 +1107,8 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
        struct cbq_class *split = NULL;
        if (splitid == 0) {
-                if ((split = cl->split) == NULL)
+                split = cl->split;
+                if (!split)
                        return;
                splitid = split->common.classid;
        }
@@ -1123,9 +1126,9 @@ static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 ma
                cl->defmap = 0;
                cbq_sync_defmap(cl);
                cl->split = split;
-                cl->defmap = def&mask;
+                cl->defmap = def & mask;
        } else
-                cl->defmap = (cl->defmap&~mask)|(def&mask);
+                cl->defmap = (cl->defmap & ~mask) | (def & mask);
        cbq_sync_defmap(cl);
 }
@@ -1138,7 +1141,7 @@ static void cbq_unlink_class(struct cbq_class *this)
        qdisc_class_hash_remove(&q->clhash, &this->common);
        if (this->tparent) {
-                clp=&this->sibling;
+                clp = &this->sibling;
                cl = *clp;
                do {
                        if (cl == this) {
@@ -1177,7 +1180,7 @@ static void cbq_link_class(struct cbq_class *this)
        }
 }
-static unsigned int cbq_drop(struct Qdisc* sch)
+static unsigned int cbq_drop(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl, *cl_head;
@@ -1185,7 +1188,8 @@ static unsigned int cbq_drop(struct Qdisc* sch)
        unsigned int len;
        for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
-                if ((cl_head = q->active[prio]) == NULL)
+                cl_head = q->active[prio];
+                if (!cl_head)
                        continue;
                cl = cl_head;
@@ -1202,13 +1206,13 @@ static unsigned int cbq_drop(struct Qdisc* sch)
 }
 static void
-cbq_reset(struct Qdisc* sch)
+cbq_reset(struct Qdisc *sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl;
        struct hlist_node *n;
        int prio;
-        unsigned h;
+        unsigned int h;
        q->activemask = 0;
        q->pmask = 0;
@@ -1240,21 +1244,21 @@ cbq_reset(struct Qdisc* sch)
 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
 {
-        if (lss->change&TCF_CBQ_LSS_FLAGS) {
+        if (lss->change & TCF_CBQ_LSS_FLAGS) {
-                cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
+                cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
-                cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
+                cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
        }
-        if (lss->change&TCF_CBQ_LSS_EWMA)
+        if (lss->change & TCF_CBQ_LSS_EWMA)
                cl->ewma_log = lss->ewma_log;
-        if (lss->change&TCF_CBQ_LSS_AVPKT)
+        if (lss->change & TCF_CBQ_LSS_AVPKT)
                cl->avpkt = lss->avpkt;
-        if (lss->change&TCF_CBQ_LSS_MINIDLE)
+        if (lss->change & TCF_CBQ_LSS_MINIDLE)
                cl->minidle = -(long)lss->minidle;
-        if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
+        if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
                cl->maxidle = lss->maxidle;
                cl->avgidle = lss->maxidle;
        }
-        if (lss->change&TCF_CBQ_LSS_OFFTIME)
+        if (lss->change & TCF_CBQ_LSS_OFFTIME)
                cl->offtime = lss->offtime;
        return 0;
 }
@@ -1282,10 +1286,10 @@ static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
        if (wrr->weight)
                cl->weight = wrr->weight;
        if (wrr->priority) {
-                cl->priority = wrr->priority-1;
+                cl->priority = wrr->priority - 1;
                cl->cpriority = cl->priority;
                if (cl->priority >= cl->priority2)
-                        cl->priority2 = TC_CBQ_MAXPRIO-1;
+                        cl->priority2 = TC_CBQ_MAXPRIO - 1;
        }
        cbq_addprio(q, cl);
@@ -1302,10 +1306,10 @@ static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
                cl->overlimit = cbq_ovl_delay;
                break;
        case TC_CBQ_OVL_LOWPRIO:
-                if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
+                if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
-                    ovl->priority2-1 <= cl->priority)
+                    ovl->priority2 - 1 <= cl->priority)
                        return -EINVAL;
-                cl->priority2 = ovl->priority2-1;
+                cl->priority2 = ovl->priority2 - 1;
                cl->overlimit = cbq_ovl_lowprio;
                break;
        case TC_CBQ_OVL_DROP:
@@ -1379,14 +1383,14 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
        q->link.sibling = &q->link;
        q->link.common.classid = sch->handle;
        q->link.qdisc = sch;
-        if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                            &pfifo_qdisc_ops,
+                                      sch->handle);
-                                            sch->handle)))
+        if (!q->link.q)
                q->link.q = &noop_qdisc;
-        q->link.priority = TC_CBQ_MAXPRIO-1;
+        q->link.priority = TC_CBQ_MAXPRIO - 1;
-        q->link.priority2 = TC_CBQ_MAXPRIO-1;
+        q->link.priority2 = TC_CBQ_MAXPRIO - 1;
-        q->link.cpriority = TC_CBQ_MAXPRIO-1;
+        q->link.cpriority = TC_CBQ_MAXPRIO - 1;
        q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
        q->link.overlimit = cbq_ovl_classic;
        q->link.allot = psched_mtu(qdisc_dev(sch));
@@ -1417,7 +1421,7 @@ put_rtab:
        return err;
 }
-static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
@@ -1429,7 +1433,7 @@ nla_put_failure:
        return -1;
 }
-static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_lssopt opt;
@@ -1454,15 +1458,15 @@ nla_put_failure:
        return -1;
 }
-static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_wrropt opt;
        opt.flags = 0;
        opt.allot = cl->allot;
-        opt.priority = cl->priority+1;
+        opt.priority = cl->priority + 1;
-        opt.cpriority = cl->cpriority+1;
+        opt.cpriority = cl->cpriority + 1;
        opt.weight = cl->weight;
        NLA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
        return skb->len;
@@ -1472,13 +1476,13 @@ nla_put_failure:
        return -1;
 }
-static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_ovl opt;
        opt.strategy = cl->ovl_strategy;
-        opt.priority2 = cl->priority2+1;
+        opt.priority2 = cl->priority2 + 1;
        opt.pad = 0;
        opt.penalty = cl->penalty;
        NLA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
@@ -1489,7 +1493,7 @@ nla_put_failure:
        return -1;
 }
-static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_fopt opt;
@@ -1508,7 +1512,7 @@ nla_put_failure:
 }
 #ifdef CONFIG_NET_CLS_ACT
-static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
+static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
 {
        unsigned char *b = skb_tail_pointer(skb);
        struct tc_cbq_police opt;
@@ -1572,7 +1576,7 @@ static int
 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
               struct sk_buff *skb, struct tcmsg *tcm)
 {
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        struct nlattr *nest;
        if (cl->tparent)
@@ -1600,7 +1604,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
        struct gnet_dump *d)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        cl->qstats.qlen = cl->q->q.qlen;
        cl->xstats.avgidle = cl->avgidle;
@@ -1620,10 +1624,10 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
                     struct Qdisc **old)
 {
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        if (new == NULL) {
-                new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                new = qdisc_create_dflt(sch->dev_queue,
                                        &pfifo_qdisc_ops, cl->common.classid);
                if (new == NULL)
                        return -ENOBUFS;
@@ -1643,10 +1647,9 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
        return 0;
 }
-static struct Qdisc *
+static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
-cbq_leaf(struct Qdisc *sch, unsigned long arg)
 {
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        return cl->q;
 }
@@ -1685,13 +1688,12 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
                kfree(cl);
 }
-static void
+static void cbq_destroy(struct Qdisc *sch)
-cbq_destroy(struct Qdisc* sch)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct hlist_node *n, *next;
        struct cbq_class *cl;
-        unsigned h;
+        unsigned int h;
 #ifdef CONFIG_NET_CLS_ACT
        q->rx_class = NULL;
@@ -1715,7 +1717,7 @@ cbq_destroy(struct Qdisc* sch)
 static void cbq_put(struct Qdisc *sch, unsigned long arg)
 {
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        if (--cl->refcnt == 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1738,7 +1740,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
 {
        int err;
        struct cbq_sched_data *q = qdisc_priv(sch);
-        struct cbq_class *cl = (struct cbq_class*)*arg;
+        struct cbq_class *cl = (struct cbq_class *)*arg;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct nlattr *tb[TCA_CBQ_MAX + 1];
        struct cbq_class *parent;
@@ -1830,13 +1832,14 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        if (classid) {
                err = -EINVAL;
-                if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
+                if (TC_H_MAJ(classid ^ sch->handle) ||
+                    cbq_class_lookup(q, classid))
                        goto failure;
        } else {
                int i;
-                classid = TC_H_MAKE(sch->handle,0x8000);
+                classid = TC_H_MAKE(sch->handle, 0x8000);
-                for (i=0; i<0x8000; i++) {
+                for (i = 0; i < 0x8000; i++) {
                        if (++q->hgenerator >= 0x8000)
                                q->hgenerator = 1;
                        if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
@@ -1874,8 +1877,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        cl->R_tab = rtab;
        rtab = NULL;
        cl->refcnt = 1;
-        if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
-                                        &pfifo_qdisc_ops, classid)))
+        if (!cl->q)
                cl->q = &noop_qdisc;
        cl->common.classid = classid;
        cl->tparent = parent;
@@ -1893,11 +1896,11 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
        cl->minidle = -0x7FFFFFFF;
        cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
        cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
-        if (cl->ewma_log==0)
+        if (cl->ewma_log == 0)
                cl->ewma_log = q->link.ewma_log;
-        if (cl->maxidle==0)
+        if (cl->maxidle == 0)
                cl->maxidle = q->link.maxidle;
-        if (cl->avpkt==0)
+        if (cl->avpkt == 0)
                cl->avpkt = q->link.avpkt;
        cl->overlimit = cbq_ovl_classic;
        if (tb[TCA_CBQ_OVL_STRATEGY])
@@ -1923,7 +1926,7 @@ failure:
 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        unsigned int qlen;
        if (cl->filters || cl->children || cl == &q->link)
@@ -1981,7 +1984,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
                                     u32 classid)
 {
        struct cbq_sched_data *q = qdisc_priv(sch);
-        struct cbq_class *p = (struct cbq_class*)parent;
+        struct cbq_class *p = (struct cbq_class *)parent;
        struct cbq_class *cl = cbq_class_lookup(q, classid);
        if (cl) {
@@ -1995,7 +1998,7 @@ static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
 {
-        struct cbq_class *cl = (struct cbq_class*)arg;
+        struct cbq_class *cl = (struct cbq_class *)arg;
        cl->filters--;
 }
@@ -2005,7 +2008,7 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        struct cbq_sched_data *q = qdisc_priv(sch);
        struct cbq_class *cl;
        struct hlist_node *n;
-        unsigned h;
+        unsigned int h;
        if (arg->stop)
                return;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
new file mode 100644
index 000000000000..06afbaeb4c88
--- /dev/null
+++ b/net/sched/sch_choke.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/sch_choke.c        CHOKE scheduler
+ *
+ * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
+ * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/reciprocal_div.h>
+#include <linux/vmalloc.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/red.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+/*
+   CHOKe stateless AQM for fair bandwidth allocation
+   =================================================
+   CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
+   unresponsive flows) is a variant of RED that penalizes misbehaving flows but
+   maintains no flow state. The difference from RED is an additional step
+   during the enqueuing process. If average queue size is over the
+   low threshold (qmin), a packet is chosen at random from the queue.
+   If both the new and chosen packet are from the same flow, both
+   are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
+   needs to access packets in queue randomly. It has a minimal class
+   interface to allow overriding the builtin flow classifier with
+   filters.
+   Source:
+   R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
+   Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
+   IEEE INFOCOM, 2000.
+   A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
+   Characteristics", IEEE/ACM Transactions on Networking, 2004
+ */
+/* Upper bound on size of sk_buff table (packets) */
+#define CHOKE_MAX_QUEUE (128*1024 - 1)
+struct choke_sched_data {
+/* Parameters */
+        u32              limit;
+        unsigned char    flags;
+        struct red_parms parms;
+/* Variables */
+        struct tcf_proto *filter_list;
+        struct {
+                u32     prob_drop;      /* Early probability drops */
+                u32     prob_mark;      /* Early probability marks */
+                u32     forced_drop;    /* Forced drops, qavg > max_thresh */
+                u32     forced_mark;    /* Forced marks, qavg > max_thresh */
+                u32     pdrop;          /* Drops due to queue limits */
+                u32     other;          /* Drops due to drop() calls */
+                u32     matched;        /* Drops to flow match */
+        } stats;
+        unsigned int     head;
+        unsigned int     tail;
+        unsigned int     tab_mask; /* size - 1 */
+        struct sk_buff **tab;
+};
+/* deliver a random number between 0 and N - 1 */
+static u32 random_N(unsigned int N)
+{
+        return reciprocal_divide(random32(), N);
+}
+/* number of elements in queue including holes */
+static unsigned int choke_len(const struct choke_sched_data *q)
+{
+        return (q->tail - q->head) & q->tab_mask;
+}
+/* Is ECN parameter configured */
+static int use_ecn(const struct choke_sched_data *q)
+{
+        return q->flags & TC_RED_ECN;
+}
+/* Should packets over max just be dropped (versus marked) */
+static int use_harddrop(const struct choke_sched_data *q)
+{
+        return q->flags & TC_RED_HARDDROP;
+}
+/* Move head pointer forward to skip over holes */
+static void choke_zap_head_holes(struct choke_sched_data *q)
+{
+        do {
+                q->head = (q->head + 1) & q->tab_mask;
+                if (q->head == q->tail)
+                        break;
+        } while (q->tab[q->head] == NULL);
+}
+/* Move tail pointer backwards to reuse holes */
+static void choke_zap_tail_holes(struct choke_sched_data *q)
+{
+        do {
+                q->tail = (q->tail - 1) & q->tab_mask;
+                if (q->head == q->tail)
+                        break;
+        } while (q->tab[q->tail] == NULL);
+}
+/* Drop packet from queue array by creating a "hole" */
+static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct sk_buff *skb = q->tab[idx];
+        q->tab[idx] = NULL;
+        if (idx == q->head)
+                choke_zap_head_holes(q);
+        if (idx == q->tail)
+                choke_zap_tail_holes(q);
+        sch->qstats.backlog -= qdisc_pkt_len(skb);
+        qdisc_drop(skb, sch);
+        qdisc_tree_decrease_qlen(sch, 1);
+        --sch->q.qlen;
+}
+/*
+ * Compare flow of two packets
+ *  Returns true only if source and destination address and port match.
+ *          false for special cases
+ */
+static bool choke_match_flow(struct sk_buff *skb1,
+                             struct sk_buff *skb2)
+{
+        int off1, off2, poff;
+        const u32 *ports1, *ports2;
+        u8 ip_proto;
+        __u32 hash1;
+        if (skb1->protocol != skb2->protocol)
+                return false;
+        /* Use hash value as quick check
+         * Assumes that __skb_get_rxhash makes IP header and ports linear
+         */
+        hash1 = skb_get_rxhash(skb1);
+        if (!hash1 || hash1 != skb_get_rxhash(skb2))
+                return false;
+        /* Probably match, but be sure to avoid hash collisions */
+        off1 = skb_network_offset(skb1);
+        off2 = skb_network_offset(skb2);
+        switch (skb1->protocol) {
+        case __constant_htons(ETH_P_IP): {
+                const struct iphdr *ip1, *ip2;
+                ip1 = (const struct iphdr *) (skb1->data + off1);
+                ip2 = (const struct iphdr *) (skb2->data + off2);
+                ip_proto = ip1->protocol;
+                if (ip_proto != ip2->protocol ||
+                    ip1->saddr != ip2->saddr || ip1->daddr != ip2->daddr)
+                        return false;
+                if ((ip1->frag_off | ip2->frag_off) & htons(IP_MF | IP_OFFSET))
+                        ip_proto = 0;
+                off1 += ip1->ihl * 4;
+                off2 += ip2->ihl * 4;
+                break;
+        }
+        case __constant_htons(ETH_P_IPV6): {
+                const struct ipv6hdr *ip1, *ip2;
+                ip1 = (const struct ipv6hdr *) (skb1->data + off1);
+                ip2 = (const struct ipv6hdr *) (skb2->data + off2);
+                ip_proto = ip1->nexthdr;
+                if (ip_proto != ip2->nexthdr ||
+                    ipv6_addr_cmp(&ip1->saddr, &ip2->saddr) ||
+                    ipv6_addr_cmp(&ip1->daddr, &ip2->daddr))
+                        return false;
+                off1 += 40;
+                off2 += 40;
+        }
+        default: /* Maybe compare MAC header here? */
+                return false;
+        }
+        poff = proto_ports_offset(ip_proto);
+        if (poff < 0)
+                return true;
+        off1 += poff;
+        off2 += poff;
+        ports1 = (__force u32 *)(skb1->data + off1);
+        ports2 = (__force u32 *)(skb2->data + off2);
+        return *ports1 == *ports2;
+}
+struct choke_skb_cb {
+        u16 classid;
+};
+static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
+{
+        BUILD_BUG_ON(sizeof(skb->cb) <
+                sizeof(struct qdisc_skb_cb) + sizeof(struct choke_skb_cb));
+        return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
+{
+        choke_skb_cb(skb)->classid = classid;
+}
+static u16 choke_get_classid(const struct sk_buff *skb)
+{
+        return choke_skb_cb(skb)->classid;
+}
+/*
+ * Classify flow using either:
+ *  1. pre-existing classification result in skb
+ *  2. fast internal classification
+ *  3. use TC filter based classification
+ */
+static bool choke_classify(struct sk_buff *skb,
+                           struct Qdisc *sch, int *qerr)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct tcf_result res;
+        int result;
+        result = tc_classify(skb, q->filter_list, &res);
+        if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+                switch (result) {
+                case TC_ACT_STOLEN:
+                case TC_ACT_QUEUED:
+                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+                case TC_ACT_SHOT:
+                        return false;
+                }
+#endif
+                choke_set_classid(skb, TC_H_MIN(res.classid));
+                return true;
+        }
+        return false;
+}
+/*
+ * Select a packet at random from queue
+ * HACK: since queue can have holes from previous deletion; retry several
+ *   times to find a random skb but then just give up and return the head
+ * Will return NULL if queue is empty (q->head == q->tail)
+ */
+static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
+                                         unsigned int *pidx)
+{
+        struct sk_buff *skb;
+        int retrys = 3;
+        do {
+                *pidx = (q->head + random_N(choke_len(q))) & q->tab_mask;
+                skb = q->tab[*pidx];
+                if (skb)
+                        return skb;
+        } while (--retrys > 0);
+        return q->tab[*pidx = q->head];
+}
+/*
+ * Compare new packet with random packet in queue
+ * returns true if matched and sets *pidx
+ */
+static bool choke_match_random(const struct choke_sched_data *q,
+                               struct sk_buff *nskb,
+                               unsigned int *pidx)
+{
+        struct sk_buff *oskb;
+        if (q->head == q->tail)
+                return false;
+        oskb = choke_peek_random(q, pidx);
+        if (q->filter_list)
+                return choke_get_classid(nskb) == choke_get_classid(oskb);
+        return choke_match_flow(oskb, nskb);
+}
+static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct red_parms *p = &q->parms;
+        int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+        if (q->filter_list) {
+                /* If using external classifiers, get result and record it. */
+                if (!choke_classify(skb, sch, &ret))
+                        goto other_drop;        /* Packet was eaten by filter */
+        }
+        /* Compute average queue usage (see RED) */
+        p->qavg = red_calc_qavg(p, sch->q.qlen);
+        if (red_is_idling(p))
+                red_end_of_idle_period(p);
+        /* Is queue small? */
+        if (p->qavg <= p->qth_min)
+                p->qcount = -1;
+        else {
+                unsigned int idx;
+                /* Draw a packet at random from queue and compare flow */
+                if (choke_match_random(q, skb, &idx)) {
+                        q->stats.matched++;
+                        choke_drop_by_idx(sch, idx);
+                        goto congestion_drop;
+                }
+                /* Queue is large, always mark/drop */
+                if (p->qavg > p->qth_max) {
+                        p->qcount = -1;
+                        sch->qstats.overlimits++;
+                        if (use_harddrop(q) || !use_ecn(q) ||
+                            !INET_ECN_set_ce(skb)) {
+                                q->stats.forced_drop++;
+                                goto congestion_drop;
+                        }
+                        q->stats.forced_mark++;
+                } else if (++p->qcount) {
+                        if (red_mark_probability(p, p->qavg)) {
+                                p->qcount = 0;
+                                p->qR = red_random(p);
+                                sch->qstats.overlimits++;
+                                if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
+                                        q->stats.prob_drop++;
+                                        goto congestion_drop;
+                                }
+                                q->stats.prob_mark++;
+                        }
+                } else
+                        p->qR = red_random(p);
+        }
+        /* Admit new packet */
+        if (sch->q.qlen < q->limit) {
+                q->tab[q->tail] = skb;
+                q->tail = (q->tail + 1) & q->tab_mask;
+                ++sch->q.qlen;
+                sch->qstats.backlog += qdisc_pkt_len(skb);
+                return NET_XMIT_SUCCESS;
+        }
+        q->stats.pdrop++;
+        sch->qstats.drops++;
+        kfree_skb(skb);
+        return NET_XMIT_DROP;
+ congestion_drop:
+        qdisc_drop(skb, sch);
+        return NET_XMIT_CN;
+ other_drop:
+        if (ret & __NET_XMIT_BYPASS)
+                sch->qstats.drops++;
+        kfree_skb(skb);
+        return ret;
+}
+static struct sk_buff *choke_dequeue(struct Qdisc *sch)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct sk_buff *skb;
+        if (q->head == q->tail) {
+                if (!red_is_idling(&q->parms))
+                        red_start_of_idle_period(&q->parms);
+                return NULL;
+        }
+        skb = q->tab[q->head];
+        q->tab[q->head] = NULL;
+        choke_zap_head_holes(q);
+        --sch->q.qlen;
+        sch->qstats.backlog -= qdisc_pkt_len(skb);
+        qdisc_bstats_update(sch, skb);
+        return skb;
+}
+static unsigned int choke_drop(struct Qdisc *sch)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        unsigned int len;
+        len = qdisc_queue_drop(sch);
+        if (len > 0)
+                q->stats.other++;
+        else {
+                if (!red_is_idling(&q->parms))
+                        red_start_of_idle_period(&q->parms);
+        }
+        return len;
+}
+static void choke_reset(struct Qdisc *sch)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        red_restart(&q->parms);
+}
+static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
+        [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
+        [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
+};
+static void choke_free(void *addr)
+{
+        if (addr) {
+                if (is_vmalloc_addr(addr))
+                        vfree(addr);
+                else
+                        kfree(addr);
+        }
+}
+static int choke_change(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct nlattr *tb[TCA_CHOKE_MAX + 1];
+        const struct tc_red_qopt *ctl;
+        int err;
+        struct sk_buff **old = NULL;
+        unsigned int mask;
+        if (opt == NULL)
+                return -EINVAL;
+        err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
+        if (err < 0)
+                return err;
+        if (tb[TCA_CHOKE_PARMS] == NULL ||
+            tb[TCA_CHOKE_STAB] == NULL)
+                return -EINVAL;
+        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
+        if (ctl->limit > CHOKE_MAX_QUEUE)
+                return -EINVAL;
+        mask = roundup_pow_of_two(ctl->limit + 1) - 1;
+        if (mask != q->tab_mask) {
+                struct sk_buff **ntab;
+                ntab = kcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
+                if (!ntab)
+                        ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
+                if (!ntab)
+                        return -ENOMEM;
+                sch_tree_lock(sch);
+                old = q->tab;
+                if (old) {
+                        unsigned int oqlen = sch->q.qlen, tail = 0;
+                        while (q->head != q->tail) {
+                                struct sk_buff *skb = q->tab[q->head];
+                                q->head = (q->head + 1) & q->tab_mask;
+                                if (!skb)
+                                        continue;
+                                if (tail < mask) {
+                                        ntab[tail++] = skb;
+                                        continue;
+                                }
+                                sch->qstats.backlog -= qdisc_pkt_len(skb);
+                                --sch->q.qlen;
+                                qdisc_drop(skb, sch);
+                        }
+                        qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+                        q->head = 0;
+                        q->tail = tail;
+                }
+                q->tab_mask = mask;
+                q->tab = ntab;
+        } else
+                sch_tree_lock(sch);
+        q->flags = ctl->flags;
+        q->limit = ctl->limit;
+        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
+                      ctl->Plog, ctl->Scell_log,
+                      nla_data(tb[TCA_CHOKE_STAB]));
+        if (q->head == q->tail)
+                red_end_of_idle_period(&q->parms);
+        sch_tree_unlock(sch);
+        choke_free(old);
+        return 0;
+}
+static int choke_init(struct Qdisc *sch, struct nlattr *opt)
+{
+        return choke_change(sch, opt);
+}
+static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct nlattr *opts = NULL;
+        struct tc_red_qopt opt = {
+                .limit          = q->limit,
+                .flags          = q->flags,
+                .qth_min        = q->parms.qth_min >> q->parms.Wlog,
+                .qth_max        = q->parms.qth_max >> q->parms.Wlog,
+                .Wlog           = q->parms.Wlog,
+                .Plog           = q->parms.Plog,
+                .Scell_log      = q->parms.Scell_log,
+        };
+        opts = nla_nest_start(skb, TCA_OPTIONS);
+        if (opts == NULL)
+                goto nla_put_failure;
+        NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+        return nla_nest_end(skb, opts);
+nla_put_failure:
+        nla_nest_cancel(skb, opts);
+        return -EMSGSIZE;
+}
+static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        struct tc_choke_xstats st = {
+                .early  = q->stats.prob_drop + q->stats.forced_drop,
+                .marked = q->stats.prob_mark + q->stats.forced_mark,
+                .pdrop  = q->stats.pdrop,
+                .other  = q->stats.other,
+                .matched = q->stats.matched,
+        };
+        return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+static void choke_destroy(struct Qdisc *sch)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        tcf_destroy_chain(&q->filter_list);
+        choke_free(q->tab);
+}
+static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
+{
+        return NULL;
+}
+static unsigned long choke_get(struct Qdisc *sch, u32 classid)
+{
+        return 0;
+}
+static void choke_put(struct Qdisc *q, unsigned long cl)
+{
+}
+static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
+                                u32 classid)
+{
+        return 0;
+}
+static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        if (cl)
+                return NULL;
+        return &q->filter_list;
+}
+static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
+                          struct sk_buff *skb, struct tcmsg *tcm)
+{
+        tcm->tcm_handle |= TC_H_MIN(cl);
+        return 0;
+}
+static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+        if (!arg->stop) {
+                if (arg->fn(sch, 1, arg) < 0) {
+                        arg->stop = 1;
+                        return;
+                }
+                arg->count++;
+        }
+}
+static const struct Qdisc_class_ops choke_class_ops = {
+        .leaf           =       choke_leaf,
+        .get            =       choke_get,
+        .put            =       choke_put,
+        .tcf_chain      =       choke_find_tcf,
+        .bind_tcf       =       choke_bind,
+        .unbind_tcf     =       choke_put,
+        .dump           =       choke_dump_class,
+        .walk           =       choke_walk,
+};
+static struct sk_buff *choke_peek_head(struct Qdisc *sch)
+{
+        struct choke_sched_data *q = qdisc_priv(sch);
+        return (q->head != q->tail) ? q->tab[q->head] : NULL;
+}
+static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
+        .id             =       "choke",
+        .priv_size      =       sizeof(struct choke_sched_data),
+        .enqueue        =       choke_enqueue,
+        .dequeue        =       choke_dequeue,
+        .peek           =       choke_peek_head,
+        .drop           =       choke_drop,
+        .init           =       choke_init,
+        .destroy        =       choke_destroy,
+        .reset          =       choke_reset,
+        .change         =       choke_change,
+        .dump           =       choke_dump,
+        .dump_stats     =       choke_dump_stats,
+        .owner          =       THIS_MODULE,
+};
+static int __init choke_module_init(void)
+{
+        return register_qdisc(&choke_qdisc_ops);
+}
+static void __exit choke_module_exit(void)
+{
+        unregister_qdisc(&choke_qdisc_ops);
+}
+module_init(choke_module_init)
+module_exit(choke_module_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index b74046a95397..6b7fe4a84f13 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
        cl->refcnt         = 1;
        cl->common.classid = classid;
        cl->quantum        = quantum;
-        cl->qdisc          = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        cl->qdisc          = qdisc_create_dflt(sch->dev_queue,
                                               &pfifo_qdisc_ops, classid);
        if (cl->qdisc == NULL)
                cl->qdisc = &noop_qdisc;
@@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
        struct drr_class *cl = (struct drr_class *)arg;
        if (new == NULL) {
-                new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                new = qdisc_create_dflt(sch->dev_queue,
                                        &pfifo_qdisc_ops, cl->common.classid);
                if (new == NULL)
                        new = &noop_qdisc;
@@ -351,7 +351,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct drr_sched *q = qdisc_priv(sch);
        struct drr_class *cl;
-        unsigned int len;
        int err;
        cl = drr_classify(skb, sch, &err);
@@ -362,7 +361,6 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                return err;
        }
-        len = qdisc_pkt_len(skb);
        err = qdisc_enqueue(skb, cl->qdisc);
        if (unlikely(err != NET_XMIT_SUCCESS)) {
                if (net_xmit_drop_count(err)) {
@@ -377,10 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                cl->deficit = cl->quantum;
        }
-        cl->bstats.packets++;
+        bstats_update(&cl->bstats, skb);
-        cl->bstats.bytes += len;
-        sch->bstats.packets++;
-        sch->bstats.bytes += len;
        sch->q.qlen++;
        return err;
@@ -407,6 +402,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch)
                        skb = qdisc_dequeue_peeked(cl->qdisc);
                        if (cl->qdisc->q.qlen == 0)
                                list_del(&cl->alist);
+                        qdisc_bstats_update(sch, skb);
                        sch->q.qlen--;
                        return skb;
                }
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 63d41f86679c..2c790204d042 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
                sch, p, new, old);
        if (new == NULL) {
-                new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                        &pfifo_qdisc_ops,
                                        sch->handle);
                if (new == NULL)
                        new = &noop_qdisc;
@@ -138,10 +137,10 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
                mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
        if (tb[TCA_DSMARK_VALUE])
-                p->value[*arg-1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+                p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
        if (tb[TCA_DSMARK_MASK])
-                p->mask[*arg-1] = mask;
+                p->mask[*arg - 1] = mask;
        err = 0;
@@ -156,8 +155,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
        if (!dsmark_valid_index(p, arg))
                return -EINVAL;
-        p->mask[arg-1] = 0xff;
+        p->mask[arg - 1] = 0xff;
-        p->value[arg-1] = 0;
+        p->value[arg - 1] = 0;
        return 0;
 }
@@ -176,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
                if (p->mask[i] == 0xff && !p->value[i])
                        goto ignore;
                if (walker->count >= walker->skip) {
-                        if (walker->fn(sch, i+1, walker) < 0) {
+                        if (walker->fn(sch, i + 1, walker) < 0) {
                                walker->stop = 1;
                                break;
                        }
@@ -261,8 +260,6 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                return err;
        }
-        sch->bstats.bytes += qdisc_pkt_len(skb);
-        sch->bstats.packets++;
        sch->q.qlen++;
        return NET_XMIT_SUCCESS;
@@ -285,6 +282,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
        if (skb == NULL)
                return NULL;
+        qdisc_bstats_update(sch, skb);
        sch->q.qlen--;
        index = skb->tc_index & (p->indices - 1);
@@ -306,9 +304,8 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
                 * and don't need yet another qdisc as a bypass.
                 */
                if (p->mask[index] != 0xff || p->value[index])
-                        printk(KERN_WARNING
+                        pr_warning("dsmark_dequeue: unsupported protocol %d\n",
-                               "dsmark_dequeue: unsupported protocol %d\n",
+                                   ntohs(skb->protocol));
-                               ntohs(skb->protocol));
                break;
        }
@@ -384,8 +381,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
        p->default_index = default_index;
        p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
-        p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
-                                 &pfifo_qdisc_ops, sch->handle);
        if (p->q == NULL)
                p->q = &noop_qdisc;
@@ -427,14 +423,14 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
        if (!dsmark_valid_index(p, cl))
                return -EINVAL;
-        tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl-1);
+        tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
        tcm->tcm_info = p->q->handle;
        opts = nla_nest_start(skb, TCA_OPTIONS);
        if (opts == NULL)
                goto nla_put_failure;
-        NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl-1]);
+        NLA_PUT_U8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]);
-        NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl-1]);
+        NLA_PUT_U8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]);
        return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 5948bafa8ce2..66effe2da8e0 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -19,46 +19,30 @@
 /* 1 band FIFO pseudo-"scheduler" */
-struct fifo_sched_data
+static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-        u32 limit;
+        if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
-};
-static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
-{
-        struct fifo_sched_data *q = qdisc_priv(sch);
-        if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= q->limit))
                return qdisc_enqueue_tail(skb, sch);
        return qdisc_reshape_fail(skb, sch);
 }
-static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-        struct fifo_sched_data *q = qdisc_priv(sch);
+        if (likely(skb_queue_len(&sch->q) < sch->limit))
-        if (likely(skb_queue_len(&sch->q) < q->limit))
                return qdisc_enqueue_tail(skb, sch);
        return qdisc_reshape_fail(skb, sch);
 }
-static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-        struct sk_buff *skb_head;
+        if (likely(skb_queue_len(&sch->q) < sch->limit))
-        struct fifo_sched_data *q = qdisc_priv(sch);
-        if (likely(skb_queue_len(&sch->q) < q->limit))
                return qdisc_enqueue_tail(skb, sch);
        /* queue full, remove one skb to fulfill the limit */
-        skb_head = qdisc_dequeue_head(sch);
+        __qdisc_queue_drop_head(sch, &sch->q);
-        sch->bstats.bytes -= qdisc_pkt_len(skb_head);
-        sch->bstats.packets--;
        sch->qstats.drops++;
-        kfree_skb(skb_head);
        qdisc_enqueue_tail(skb, sch);
        return NET_XMIT_CN;
@@ -66,31 +50,40 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
-        struct fifo_sched_data *q = qdisc_priv(sch);
+        bool bypass;
+        bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
        if (opt == NULL) {
                u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
-                if (sch->ops == &bfifo_qdisc_ops)
+                if (is_bfifo)
                        limit *= psched_mtu(qdisc_dev(sch));
-                q->limit = limit;
+                sch->limit = limit;
        } else {
                struct tc_fifo_qopt *ctl = nla_data(opt);
                if (nla_len(opt) < sizeof(*ctl))
                        return -EINVAL;
-                q->limit = ctl->limit;
+                sch->limit = ctl->limit;
        }
+        if (is_bfifo)
+                bypass = sch->limit >= psched_mtu(qdisc_dev(sch));
+        else
+                bypass = sch->limit >= 1;
+        if (bypass)
+                sch->flags |= TCQ_F_CAN_BYPASS;
+        else
+                sch->flags &= ~TCQ_F_CAN_BYPASS;
        return 0;
 }
 static int fifo_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-        struct fifo_sched_data *q = qdisc_priv(sch);
+        struct tc_fifo_qopt opt = { .limit = sch->limit };
-        struct tc_fifo_qopt opt = { .limit = q->limit };
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
        return skb->len;
@@ -101,7 +94,7 @@ nla_put_failure:
 struct Qdisc_ops pfifo_qdisc_ops __read_mostly = {
        .id             =       "pfifo",
-        .priv_size      =       sizeof(struct fifo_sched_data),
+        .priv_size      =       0,
        .enqueue        =       pfifo_enqueue,
        .dequeue        =       qdisc_dequeue_head,
        .peek           =       qdisc_peek_head,
@@ -116,7 +109,7 @@ EXPORT_SYMBOL(pfifo_qdisc_ops);
 struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
        .id             =       "bfifo",
-        .priv_size      =       sizeof(struct fifo_sched_data),
+        .priv_size      =       0,
        .enqueue        =       bfifo_enqueue,
        .dequeue        =       qdisc_dequeue_head,
        .peek           =       qdisc_peek_head,
@@ -131,7 +124,7 @@ EXPORT_SYMBOL(bfifo_qdisc_ops);
 struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
        .id             =       "pfifo_head_drop",
-        .priv_size      =       sizeof(struct fifo_sched_data),
+        .priv_size      =       0,
        .enqueue        =       pfifo_tail_enqueue,
        .dequeue        =       qdisc_dequeue_head,
        .peek           =       qdisc_peek_head,
@@ -172,8 +165,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
        struct Qdisc *q;
        int err = -ENOMEM;
-        q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1));
-                              ops, TC_H_MAKE(sch->handle, 1));
        if (q) {
                err = fifo_set_limit(q, limit);
                if (err < 0) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4386a1..b4c680900d7a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,8 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
                /* check the reason of requeuing without tx lock first */
                txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-                if (!netif_tx_queue_stopped(txq) &&
+                if (!netif_tx_queue_frozen_or_stopped(txq)) {
-                    !netif_tx_queue_frozen(txq)) {
                        q->gso_skb = NULL;
                        q->q.qlen--;
                } else
@@ -88,8 +87,8 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
                 */
                kfree_skb(skb);
                if (net_ratelimit())
-                        printk(KERN_WARNING "Dead loop on netdevice %s, "
+                        pr_warning("Dead loop on netdevice %s, fix it urgently!\n",
-                               "fix it urgently!\n", dev_queue->dev->name);
+                                   dev_queue->dev->name);
                ret = qdisc_qlen(q);
        } else {
                /*
@@ -122,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
        spin_unlock(root_lock);
        HARD_TX_LOCK(dev, txq, smp_processor_id());
-        if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+        if (!netif_tx_queue_frozen_or_stopped(txq))
                ret = dev_hard_start_xmit(skb, dev, txq);
        HARD_TX_UNLOCK(dev, txq);
@@ -138,14 +137,13 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
        } else {
                /* Driver returned NETDEV_TX_BUSY - requeue skb */
                if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
-                        printk(KERN_WARNING "BUG %s code %d qlen %d\n",
+                        pr_warning("BUG %s code %d qlen %d\n",
-                               dev->name, ret, q->q.qlen);
+                                   dev->name, ret, q->q.qlen);
                ret = dev_requeue_skb(skb, q);
        }
-        if (ret && (netif_tx_queue_stopped(txq) ||
+        if (ret && netif_tx_queue_frozen_or_stopped(txq))
-                    netif_tx_queue_frozen(txq)))
                ret = 0;
        return ret;
@@ -253,9 +251,8 @@ static void dev_watchdog(unsigned long arg)
                        }
                        if (some_queue_timedout) {
-                                char drivername[64];
                                WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
-                                       dev->name, netdev_drivername(dev, drivername, 64), i);
+                                       dev->name, netdev_drivername(dev), i);
                                dev->netdev_ops->ndo_tx_timeout(dev);
                        }
                        if (!mod_timer(&dev->watchdog_timer,
@@ -383,6 +380,7 @@ struct Qdisc noop_qdisc = {
        .list           =       LIST_HEAD_INIT(noop_qdisc.list),
        .q.lock         =       __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
        .dev_queue      =       &noop_netdev_queue,
+        .busylock       =       __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
 };
 EXPORT_SYMBOL(noop_qdisc);
@@ -409,11 +407,13 @@ static struct Qdisc noqueue_qdisc = {
        .list           =       LIST_HEAD_INIT(noqueue_qdisc.list),
        .q.lock         =       __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
        .dev_queue      =       &noqueue_netdev_queue,
+        .busylock       =       __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
 };
-static const u8 prio2band[TC_PRIO_MAX+1] =
+static const u8 prio2band[TC_PRIO_MAX + 1] = {
-        { 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };
+        1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
+};
 /* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
@@ -445,7 +445,7 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv,
        return priv->q + band;
 }
-static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
+static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc)
 {
        if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) {
                int band = prio2band[skb->priority & TC_PRIO_MAX];
@@ -460,7 +460,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
        return qdisc_drop(skb, qdisc);
 }
-static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
 {
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        int band = bitmap2band[priv->bitmap];
@@ -479,7 +479,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
        return NULL;
 }
-static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
+static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
 {
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
        int band = bitmap2band[priv->bitmap];
@@ -493,7 +493,7 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
        return NULL;
 }
-static void pfifo_fast_reset(struct Qdisc* qdisc)
+static void pfifo_fast_reset(struct Qdisc *qdisc)
 {
        int prio;
        struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
@@ -510,7 +510,7 @@ static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
 {
        struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
-        memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
+        memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
        return skb->len;
@@ -526,6 +526,8 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
        for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
                skb_queue_head_init(band2list(priv, prio));
+        /* Can by-pass the queue discipline */
+        qdisc->flags |= TCQ_F_CAN_BYPASS;
        return 0;
 }
@@ -540,25 +542,32 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
        .dump           =       pfifo_fast_dump,
        .owner          =       THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_fast_ops);
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
                          struct Qdisc_ops *ops)
 {
        void *p;
        struct Qdisc *sch;
-        unsigned int size;
+        unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
        int err = -ENOBUFS;
-        /* ensure that the Qdisc and the private data are 64-byte aligned */
+        p = kzalloc_node(size, GFP_KERNEL,
-        size = QDISC_ALIGN(sizeof(*sch));
+                         netdev_queue_numa_node_read(dev_queue));
-        size += ops->priv_size + (QDISC_ALIGNTO - 1);
-        p = kzalloc(size, GFP_KERNEL);
        if (!p)
                goto errout;
        sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
-        sch->padded = (char *) sch - (char *) p;
+        /* if we got non aligned memory, ask more and do alignment ourself */
+        if (sch != p) {
+                kfree(p);
+                p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
+                                 netdev_queue_numa_node_read(dev_queue));
+                if (!p)
+                        goto errout;
+                sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
+                sch->padded = (char *) sch - (char *) p;
+        }
        INIT_LIST_HEAD(&sch->list);
        skb_queue_head_init(&sch->q);
        spin_lock_init(&sch->busylock);
@@ -574,10 +583,8 @@ errout:
        return ERR_PTR(err);
 }
-struct Qdisc * qdisc_create_dflt(struct net_device *dev,
+struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
-                                 struct netdev_queue *dev_queue,
+                                struct Qdisc_ops *ops, unsigned int parentid)
-                                 struct Qdisc_ops *ops,
-                                 unsigned int parentid)
 {
        struct Qdisc *sch;
@@ -630,7 +637,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 #ifdef CONFIG_NET_SCHED
        qdisc_list_del(qdisc);
-        qdisc_put_stab(qdisc->stab);
+        qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
        gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
        if (ops->reset)
@@ -674,25 +681,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
        return oqdisc;
 }
+EXPORT_SYMBOL(dev_graft_qdisc);
 static void attach_one_default_qdisc(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
                                     void *_unused)
 {
-        struct Qdisc *qdisc;
+        struct Qdisc *qdisc = &noqueue_qdisc;
        if (dev->tx_queue_len) {
-                qdisc = qdisc_create_dflt(dev, dev_queue,
+                qdisc = qdisc_create_dflt(dev_queue,
                                          &pfifo_fast_ops, TC_H_ROOT);
                if (!qdisc) {
-                        printk(KERN_INFO "%s: activation failed\n", dev->name);
+                        netdev_info(dev, "activation failed\n");
                        return;
                }
-                /* Can by-pass the queue discipline for default qdisc */
-                qdisc->flags |= TCQ_F_CAN_BYPASS;
-        } else {
-                qdisc =  &noqueue_qdisc;
        }
        dev_queue->qdisc_sleeping = qdisc;
 }
@@ -709,7 +712,7 @@ static void attach_default_qdiscs(struct net_device *dev)
                dev->qdisc = txq->qdisc_sleeping;
                atomic_inc(&dev->qdisc->refcnt);
        } else {
-                qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT);
+                qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
                if (qdisc) {
                        qdisc->ops->attach(qdisc);
                        dev->qdisc = qdisc;
@@ -753,13 +756,15 @@ void dev_activate(struct net_device *dev)
        need_watchdog = 0;
        netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
-        transition_one_qdisc(dev, &dev->rx_queue, NULL);
+        if (dev_ingress_queue(dev))
+                transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
        if (need_watchdog) {
                dev->trans_start = jiffies;
                dev_watchdog_up(dev);
        }
 }
+EXPORT_SYMBOL(dev_activate);
 static void dev_deactivate_queue(struct net_device *dev,
                                 struct netdev_queue *dev_queue,
@@ -809,20 +814,51 @@ static bool some_qdisc_is_busy(struct net_device *dev)
        return false;
 }
-void dev_deactivate(struct net_device *dev)
+/**
+ *      dev_deactivate_many - deactivate transmissions on several devices
+ *      @head: list of devices to deactivate
+ *
+ *      This function returns only when all outstanding transmissions
+ *      have completed, unless all devices are in dismantle phase.
+ */
+void dev_deactivate_many(struct list_head *head)
 {
-        netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
+        struct net_device *dev;
-        dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
+        bool sync_needed = false;
-        dev_watchdog_down(dev);
+        list_for_each_entry(dev, head, unreg_list) {
+                netdev_for_each_tx_queue(dev, dev_deactivate_queue,
+                                         &noop_qdisc);
+                if (dev_ingress_queue(dev))
+                        dev_deactivate_queue(dev, dev_ingress_queue(dev),
+                                             &noop_qdisc);
-        /* Wait for outstanding qdisc-less dev_queue_xmit calls. */
+                dev_watchdog_down(dev);
-        synchronize_rcu();
+                sync_needed |= !dev->dismantle;
+        }
+        /* Wait for outstanding qdisc-less dev_queue_xmit calls.
+         * This is avoided if all devices are in dismantle phase :
+         * Caller will call synchronize_net() for us
+         */
+        if (sync_needed)
+                synchronize_net();
        /* Wait for outstanding qdisc_run calls. */
-        while (some_qdisc_is_busy(dev))
+        list_for_each_entry(dev, head, unreg_list)
-                yield();
+                while (some_qdisc_is_busy(dev))
+                        yield();
+}
+void dev_deactivate(struct net_device *dev)
+{
+        LIST_HEAD(single);
+        list_add(&dev->unreg_list, &single);
+        dev_deactivate_many(&single);
+        list_del(&single);
 }
+EXPORT_SYMBOL(dev_deactivate);
 static void dev_init_scheduler_queue(struct net_device *dev,
                                     struct netdev_queue *dev_queue,
@@ -838,7 +874,8 @@ void dev_init_scheduler(struct net_device *dev)
 {
        dev->qdisc = &noop_qdisc;
        netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
-        dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+        if (dev_ingress_queue(dev))
+                dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
        setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
 }
@@ -861,7 +898,8 @@ static void shutdown_scheduler_queue(struct net_device *dev,
 void dev_shutdown(struct net_device *dev)
 {
        netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
-        shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
+        if (dev_ingress_queue(dev))
+                shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
        qdisc_destroy(dev->qdisc);
        dev->qdisc = &noop_qdisc;
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 51dcc2aa5c92..b9493a09a870 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -32,8 +32,7 @@
 struct gred_sched_data;
 struct gred_sched;
-struct gred_sched_data
+struct gred_sched_data {
-{
        u32             limit;          /* HARD maximal queue length    */
        u32             DP;             /* the drop pramaters */
        u32             bytesin;        /* bytes seen on virtualQ so far*/
@@ -50,8 +49,7 @@ enum {
        GRED_RIO_MODE,
 };
-struct gred_sched
+struct gred_sched {
-{
        struct gred_sched_data *tab[MAX_DPs];
        unsigned long   flags;
        u32             red_flags;
@@ -150,17 +148,18 @@ static inline int gred_use_harddrop(struct gred_sched *t)
        return t->red_flags & TC_RED_HARDDROP;
 }
-static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
-        struct gred_sched_data *q=NULL;
+        struct gred_sched_data *q = NULL;
-        struct gred_sched *t= qdisc_priv(sch);
+        struct gred_sched *t = qdisc_priv(sch);
        unsigned long qavg = 0;
        u16 dp = tc_index_to_dp(skb);
-        if (dp >= t->DPs  || (q = t->tab[dp]) == NULL) {
+        if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                dp = t->def;
-                if ((q = t->tab[dp]) == NULL) {
+                q = t->tab[dp];
+                if (!q) {
                        /* Pass through packets not assigned to a DP
                         * if no default DP has been configured. This
                         * allows for DP flows to be left untouched.
@@ -183,7 +182,7 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                for (i = 0; i < t->DPs; i++) {
                        if (t->tab[i] && t->tab[i]->prio < q->prio &&
                            !red_is_idling(&t->tab[i]->parms))
-                                qavg +=t->tab[i]->parms.qavg;
+                                qavg += t->tab[i]->parms.qavg;
                }
        }
@@ -203,28 +202,28 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                gred_store_wred_set(t, q);
        switch (red_action(&q->parms, q->parms.qavg + qavg)) {
-                case RED_DONT_MARK:
+        case RED_DONT_MARK:
-                        break;
+                break;
-                case RED_PROB_MARK:
+        case RED_PROB_MARK:
-                        sch->qstats.overlimits++;
+                sch->qstats.overlimits++;
-                        if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
+                if (!gred_use_ecn(t) || !INET_ECN_set_ce(skb)) {
-                                q->stats.prob_drop++;
+                        q->stats.prob_drop++;
-                                goto congestion_drop;
+                        goto congestion_drop;
-                        }
+                }
-                        q->stats.prob_mark++;
+                q->stats.prob_mark++;
-                        break;
+                break;
-                case RED_HARD_MARK:
+        case RED_HARD_MARK:
-                        sch->qstats.overlimits++;
+                sch->qstats.overlimits++;
-                        if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
+                if (gred_use_harddrop(t) || !gred_use_ecn(t) ||
-                            !INET_ECN_set_ce(skb)) {
+                    !INET_ECN_set_ce(skb)) {
-                                q->stats.forced_drop++;
+                        q->stats.forced_drop++;
-                                goto congestion_drop;
+                        goto congestion_drop;
-                        }
+                }
-                        q->stats.forced_mark++;
+                q->stats.forced_mark++;
-                        break;
+                break;
        }
        if (q->backlog + qdisc_pkt_len(skb) <= q->limit) {
@@ -241,7 +240,7 @@ congestion_drop:
        return NET_XMIT_CN;
 }
-static struct sk_buff *gred_dequeue(struct Qdisc* sch)
+static struct sk_buff *gred_dequeue(struct Qdisc *sch)
 {
        struct sk_buff *skb;
        struct gred_sched *t = qdisc_priv(sch);
@@ -254,9 +253,9 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
                if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                        if (net_ratelimit())
-                                printk(KERN_WARNING "GRED: Unable to relocate "
+                                pr_warning("GRED: Unable to relocate VQ 0x%x "
-                                       "VQ 0x%x after dequeue, screwing up "
+                                           "after dequeue, screwing up "
-                                       "backlog.\n", tc_index_to_dp(skb));
+                                           "backlog.\n", tc_index_to_dp(skb));
                } else {
                        q->backlog -= qdisc_pkt_len(skb);
@@ -273,7 +272,7 @@ static struct sk_buff *gred_dequeue(struct Qdisc* sch)
        return NULL;
 }
-static unsigned int gred_drop(struct Qdisc* sch)
+static unsigned int gred_drop(struct Qdisc *sch)
 {
        struct sk_buff *skb;
        struct gred_sched *t = qdisc_priv(sch);
@@ -286,9 +285,9 @@ static unsigned int gred_drop(struct Qdisc* sch)
                if (dp >= t->DPs || (q = t->tab[dp]) == NULL) {
                        if (net_ratelimit())
-                                printk(KERN_WARNING "GRED: Unable to relocate "
+                                pr_warning("GRED: Unable to relocate VQ 0x%x "
-                                       "VQ 0x%x while dropping, screwing up "
+                                           "while dropping, screwing up "
-                                       "backlog.\n", tc_index_to_dp(skb));
+                                           "backlog.\n", tc_index_to_dp(skb));
                } else {
                        q->backlog -= len;
                        q->stats.other++;
@@ -308,7 +307,7 @@ static unsigned int gred_drop(struct Qdisc* sch)
 }
-static void gred_reset(struct Qdisc* sch)
+static void gred_reset(struct Qdisc *sch)
 {
        int i;
        struct gred_sched *t = qdisc_priv(sch);
@@ -369,8 +368,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
        for (i = table->DPs; i < MAX_DPs; i++) {
                if (table->tab[i]) {
-                        printk(KERN_WARNING "GRED: Warning: Destroying "
+                        pr_warning("GRED: Warning: Destroying "
-                               "shadowed VQ 0x%x\n", i);
+                                   "shadowed VQ 0x%x\n", i);
                        gred_destroy_vq(table->tab[i]);
                        table->tab[i] = NULL;
                }
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 47496098d35c..6488e6425652 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -81,8 +81,7 @@
 *   that are expensive on 32-bit architectures.
 */
-struct internal_sc
+struct internal_sc {
-{
        u64     sm1;    /* scaled slope of the 1st segment */
        u64     ism1;   /* scaled inverse-slope of the 1st segment */
        u64     dx;     /* the x-projection of the 1st segment */
@@ -92,8 +91,7 @@ struct internal_sc
 };
 /* runtime service curve */
-struct runtime_sc
+struct runtime_sc {
-{
        u64     x;      /* current starting position on x-axis */
        u64     y;      /* current starting position on y-axis */
        u64     sm1;    /* scaled slope of the 1st segment */
@@ -104,15 +102,13 @@ struct runtime_sc
        u64     ism2;   /* scaled inverse-slope of the 2nd segment */
 };
-enum hfsc_class_flags
+enum hfsc_class_flags {
-{
        HFSC_RSC = 0x1,
        HFSC_FSC = 0x2,
        HFSC_USC = 0x4
 };
-struct hfsc_class
+struct hfsc_class {
-{
        struct Qdisc_class_common cl_common;
        unsigned int    refcnt;         /* usage count */
@@ -140,8 +136,8 @@ struct hfsc_class
        u64     cl_cumul;               /* cumulative work in bytes done by
                                           real-time criteria */
-        u64     cl_d;                   /* deadline*/
+        u64     cl_d;                   /* deadline*/
-        u64     cl_e;                   /* eligible time */
+        u64     cl_e;                   /* eligible time */
        u64     cl_vt;                  /* virtual time */
        u64     cl_f;                   /* time when this class will fit for
                                           link-sharing, max(myf, cfmin) */
@@ -176,8 +172,7 @@ struct hfsc_class
        unsigned long   cl_nactive;     /* number of active children */
 };
-struct hfsc_sched
+struct hfsc_sched {
-{
        u16     defcls;                         /* default class id */
        struct hfsc_class root;                 /* root class */
        struct Qdisc_class_hash clhash;         /* class hash */
@@ -693,7 +688,7 @@ init_vf(struct hfsc_class *cl, unsigned int len)
                if (go_active) {
                        n = rb_last(&cl->cl_parent->vt_tree);
                        if (n != NULL) {
-                                max_cl = rb_entry(n, struct hfsc_class,vt_node);
+                                max_cl = rb_entry(n, struct hfsc_class, vt_node);
                                /*
                                 * set vt to the average of the min and max
                                 * classes.  if the parent's period didn't
@@ -1088,7 +1083,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
        cl->refcnt    = 1;
        cl->sched     = q;
        cl->cl_parent = parent;
-        cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        cl->qdisc = qdisc_create_dflt(sch->dev_queue,
                                      &pfifo_qdisc_ops, classid);
        if (cl->qdisc == NULL)
                cl->qdisc = &noop_qdisc;
@@ -1177,8 +1172,10 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                        return NULL;
                }
 #endif
-                if ((cl = (struct hfsc_class *)res.class) == NULL) {
+                cl = (struct hfsc_class *)res.class;
-                        if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
+                if (!cl) {
+                        cl = hfsc_find_class(res.classid, sch);
+                        if (!cl)
                                break; /* filter selected invalid classid */
                        if (cl->level >= head->level)
                                break; /* filter may only point downwards */
@@ -1209,8 +1206,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
        if (cl->level > 0)
                return -EINVAL;
        if (new == NULL) {
-                new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                        &pfifo_qdisc_ops,
                                        cl->cl_common.classid);
                if (new == NULL)
                        new = &noop_qdisc;
@@ -1317,7 +1313,7 @@ hfsc_dump_sc(struct sk_buff *skb, int attr, struct internal_sc *sc)
        return -1;
 }
-static inline int
+static int
 hfsc_dump_curves(struct sk_buff *skb, struct hfsc_class *cl)
 {
        if ((cl->cl_flags & HFSC_RSC) &&
@@ -1421,7 +1417,8 @@ hfsc_schedule_watchdog(struct Qdisc *sch)
        struct hfsc_class *cl;
        u64 next_time = 0;
-        if ((cl = eltree_get_minel(q)) != NULL)
+        cl = eltree_get_minel(q);
+        if (cl)
                next_time = cl->cl_e;
        if (q->root.cl_cfmin != 0) {
                if (next_time == 0 || next_time > q->root.cl_cfmin)
@@ -1452,8 +1449,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
        q->root.cl_common.classid = sch->handle;
        q->root.refcnt  = 1;
        q->root.sched   = q;
-        q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                          &pfifo_qdisc_ops,
                                          sch->handle);
        if (q->root.qdisc == NULL)
                q->root.qdisc = &noop_qdisc;
@@ -1601,10 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        if (cl->qdisc->q.qlen == 1)
                set_active(cl, qdisc_pkt_len(skb));
-        cl->bstats.packets++;
+        bstats_update(&cl->bstats, skb);
-        cl->bstats.bytes += qdisc_pkt_len(skb);
-        sch->bstats.packets++;
-        sch->bstats.bytes += qdisc_pkt_len(skb);
        sch->q.qlen++;
        return NET_XMIT_SUCCESS;
@@ -1630,7 +1623,8 @@ hfsc_dequeue(struct Qdisc *sch)
         * find the class with the minimum deadline among
         * the eligible classes.
         */
-        if ((cl = eltree_get_mindl(q, cur_time)) != NULL) {
+        cl = eltree_get_mindl(q, cur_time);
+        if (cl) {
                realtime = 1;
        } else {
                /*
@@ -1669,7 +1663,8 @@ hfsc_dequeue(struct Qdisc *sch)
                set_passive(cl);
        }
-        sch->flags &= ~TCQ_F_THROTTLED;
+        qdisc_unthrottled(sch);
+        qdisc_bstats_update(sch, skb);
        sch->q.qlen--;
        return skb;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4be8d04b262d..29b942ce9e82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -99,9 +99,10 @@ struct htb_class {
                        struct rb_root feed[TC_HTB_NUMPRIO];    /* feed trees */
                        struct rb_node *ptr[TC_HTB_NUMPRIO];    /* current class ptr */
                        /* When class changes from state 1->2 and disconnects from
-                           parent's feed then we lost ptr value and start from the
+                         * parent's feed then we lost ptr value and start from the
-                           first child again. Here we store classid of the
+                         * first child again. Here we store classid of the
-                           last valid ptr (used when ptr is NULL). */
+                         * last valid ptr (used when ptr is NULL).
+                         */
                        u32 last_ptr_id[TC_HTB_NUMPRIO];
                } inner;
        } un;
@@ -182,10 +183,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch)
 * filters in qdisc and in inner nodes (if higher filter points to the inner
 * node). If we end up with classid MAJOR:0 we enqueue the skb into special
 * internal fifo (direct). These packets then go directly thru. If we still
- * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessfull
+ * have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
 * then finish and return direct queue.
 */
-#define HTB_DIRECT (struct htb_class*)-1
+#define HTB_DIRECT ((struct htb_class *)-1L)
 static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
                                      int *qerr)
@@ -197,11 +198,13 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
        int result;
        /* allow to select class by setting skb->priority to valid classid;
-           note that nfmark can be used too by attaching filter fw with no
+         * note that nfmark can be used too by attaching filter fw with no
-           rules in it */
+         * rules in it
+         */
        if (skb->priority == sch->handle)
                return HTB_DIRECT;      /* X:0 (direct flow) selected */
-        if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0)
+        cl = htb_find(skb->priority, sch);
+        if (cl && cl->level == 0)
                return cl;
        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
@@ -216,10 +219,12 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
                        return NULL;
                }
 #endif
-                if ((cl = (void *)res.class) == NULL) {
+                cl = (void *)res.class;
+                if (!cl) {
                        if (res.classid == sch->handle)
                                return HTB_DIRECT;      /* X:0 (direct flow) */
-                        if ((cl = htb_find(res.classid, sch)) == NULL)
+                        cl = htb_find(res.classid, sch);
+                        if (!cl)
                                break;  /* filter selected invalid classid */
                }
                if (!cl->level)
@@ -378,7 +383,8 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
                        if (p->un.inner.feed[prio].rb_node)
                                /* parent already has its feed in use so that
-                                   reset bit in mask as parent is already ok */
+                                 * reset bit in mask as parent is already ok
+                                 */
                                mask &= ~(1 << prio);
                        htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio);
@@ -413,8 +419,9 @@ static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl)
                        if (p->un.inner.ptr[prio] == cl->node + prio) {
                                /* we are removing child which is pointed to from
-                                   parent feed - forget the pointer but remember
+                                 * parent feed - forget the pointer but remember
-                                   classid */
+                                 * classid
+                                 */
                                p->un.inner.last_ptr_id[prio] = cl->common.classid;
                                p->un.inner.ptr[prio] = NULL;
                        }
@@ -569,15 +576,11 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                }
                return ret;
        } else {
-                cl->bstats.packets +=
+                bstats_update(&cl->bstats, skb);
-                        skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-                cl->bstats.bytes += qdisc_pkt_len(skb);
                htb_activate(q, cl);
        }
        sch->q.qlen++;
-        sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1;
-        sch->bstats.bytes += qdisc_pkt_len(skb);
        return NET_XMIT_SUCCESS;
 }
@@ -648,12 +651,10 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
                                htb_add_to_wait_tree(q, cl, diff);
                }
-                /* update byte stats except for leaves which are already updated */
+                /* update basic stats except for leaves which are already updated */
-                if (cl->level) {
+                if (cl->level)
-                        cl->bstats.bytes += bytes;
+                        bstats_update(&cl->bstats, skb);
-                        cl->bstats.packets += skb_is_gso(skb)?
-                                        skb_shinfo(skb)->gso_segs:1;
-                }
                cl = cl->parent;
        }
 }
@@ -669,8 +670,9 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
                                   unsigned long start)
 {
        /* don't run for longer than 2 jiffies; 2 is used instead of
-           1 to simplify things when jiffy is going to be incremented
+         * 1 to simplify things when jiffy is going to be incremented
-           too soon */
+         * too soon
+         */
        unsigned long stop_at = start + 2;
        while (time_before(jiffies, stop_at)) {
                struct htb_class *cl;
@@ -693,7 +695,7 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
        /* too much load - let's continue after a break for scheduling */
        if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
-                printk(KERN_WARNING "htb: too many events!\n");
+                pr_warning("htb: too many events!\n");
                q->warned |= HTB_WARN_TOOMANYEVENTS;
        }
@@ -701,7 +703,8 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
 }
 /* Returns class->node+prio from id-tree where classe's id is >= id. NULL
-   is no such one exists. */
+ * is no such one exists.
+ */
 static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n,
                                              u32 id)
 {
@@ -745,12 +748,14 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
        for (i = 0; i < 65535; i++) {
                if (!*sp->pptr && *sp->pid) {
                        /* ptr was invalidated but id is valid - try to recover
-                           the original or next ptr */
+                         * the original or next ptr
+                         */
                        *sp->pptr =
                            htb_id_find_next_upper(prio, sp->root, *sp->pid);
                }
                *sp->pid = 0;   /* ptr is valid now so that remove this hint as it
-                                   can become out of date quickly */
+                                 * can become out of date quickly
+                                 */
                if (!*sp->pptr) {       /* we are at right end; rewind & go up */
                        *sp->pptr = sp->root;
                        while ((*sp->pptr)->rb_left)
@@ -778,7 +783,8 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio,
 }
 /* dequeues packet at given priority and level; call only if
-   you are sure that there is active class at prio/level */
+ * you are sure that there is active class at prio/level
+ */
 static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio,
                                        int level)
 {
@@ -795,9 +801,10 @@ next:
                        return NULL;
                /* class can be empty - it is unlikely but can be true if leaf
-                   qdisc drops packets in enqueue routine or if someone used
+                 * qdisc drops packets in enqueue routine or if someone used
-                   graft operation on the leaf since last dequeue;
+                 * graft operation on the leaf since last dequeue;
-                   simply deactivate and skip such class */
+                 * simply deactivate and skip such class
+                 */
                if (unlikely(cl->un.leaf.q->q.qlen == 0)) {
                        struct htb_class *next;
                        htb_deactivate(q, cl);
@@ -837,7 +844,8 @@ next:
                                          ptr[0]) + prio);
                }
                /* this used to be after charge_class but this constelation
-                   gives us slightly better performance */
+                 * gives us slightly better performance
+                 */
                if (!cl->un.leaf.q->q.qlen)
                        htb_deactivate(q, cl);
                htb_charge_class(q, cl, level, skb);
@@ -847,7 +855,7 @@ next:
 static struct sk_buff *htb_dequeue(struct Qdisc *sch)
 {
-        struct sk_buff *skb = NULL;
+        struct sk_buff *skb;
        struct htb_sched *q = qdisc_priv(sch);
        int level;
        psched_time_t next_event;
@@ -856,7 +864,9 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
        /* try to dequeue direct packets as high prio (!) to minimize cpu work */
        skb = __skb_dequeue(&q->direct_queue);
        if (skb != NULL) {
-                sch->flags &= ~TCQ_F_THROTTLED;
+ok:
+                qdisc_bstats_update(sch, skb);
+                qdisc_unthrottled(sch);
                sch->q.qlen--;
                return skb;
        }
@@ -887,13 +897,11 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
                m = ~q->row_mask[level];
                while (m != (int)(-1)) {
                        int prio = ffz(m);
                        m |= 1 << prio;
                        skb = htb_dequeue_tree(q, prio, level);
-                        if (likely(skb != NULL)) {
+                        if (likely(skb != NULL))
-                                sch->q.qlen--;
+                                goto ok;
-                                sch->flags &= ~TCQ_F_THROTTLED;
-                                goto fin;
-                        }
                }
        }
        sch->qstats.overlimits++;
@@ -994,13 +1002,12 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
                return err;
        if (tb[TCA_HTB_INIT] == NULL) {
-                printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n");
+                pr_err("HTB: hey probably you have bad tc tool ?\n");
                return -EINVAL;
        }
        gopt = nla_data(tb[TCA_HTB_INIT]);
        if (gopt->version != HTB_VER >> 16) {
-                printk(KERN_ERR
+                pr_err("HTB: need tc/htb version %d (minor is %d), you have %d\n",
-                       "HTB: need tc/htb version %d (minor is %d), you have %d\n",
                       HTB_VER >> 16, HTB_VER & 0xffff, gopt->version);
                return -EINVAL;
        }
@@ -1121,8 +1128,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
        if (cl->level)
                return -EINVAL;
        if (new == NULL &&
-            (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+            (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                     &pfifo_qdisc_ops,
                                     cl->common.classid)) == NULL)
                return -ENOBUFS;
@@ -1214,9 +1220,10 @@ static void htb_destroy(struct Qdisc *sch)
        cancel_work_sync(&q->work);
        qdisc_watchdog_cancel(&q->watchdog);
        /* This line used to be after htb_destroy_class call below
-           and surprisingly it worked in 2.4. But it must precede it
+         * and surprisingly it worked in 2.4. But it must precede it
-           because filter need its target class alive to be able to call
+         * because filter need its target class alive to be able to call
-           unbind_filter on it (without Oops). */
+         * unbind_filter on it (without Oops).
+         */
        tcf_destroy_chain(&q->filter_list);
        for (i = 0; i < q->clhash.hashsize; i++) {
@@ -1247,8 +1254,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
                return -EBUSY;
        if (!cl->level && htb_parent_last_child(cl)) {
-                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
-                                          &pfifo_qdisc_ops,
                                          cl->parent->common.classid);
                last_child = 1;
        }
@@ -1302,14 +1308,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        struct htb_class *cl = (struct htb_class *)*arg, *parent;
        struct nlattr *opt = tca[TCA_OPTIONS];
        struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
-        struct nlattr *tb[TCA_HTB_RTAB + 1];
+        struct nlattr *tb[__TCA_HTB_MAX];
        struct tc_htb_opt *hopt;
        /* extract all subattrs from opt attr */
        if (!opt)
                goto failure;
-        err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy);
+        err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
        if (err < 0)
                goto failure;
@@ -1351,11 +1357,12 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                /* check maximal depth */
                if (parent && parent->parent && parent->parent->level < 2) {
-                        printk(KERN_ERR "htb: tree is too deep\n");
+                        pr_err("htb: tree is too deep\n");
                        goto failure;
                }
                err = -ENOBUFS;
-                if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL)
+                cl = kzalloc(sizeof(*cl), GFP_KERNEL);
+                if (!cl)
                        goto failure;
                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
@@ -1375,9 +1382,10 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
                        RB_CLEAR_NODE(&cl->node[prio]);
                /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
-                   so that can't be used inside of sch_tree_lock
+                 * so that can't be used inside of sch_tree_lock
-                   -- thanks to Karlis Peisenieks */
+                 * -- thanks to Karlis Peisenieks
-                new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                 */
+                new_q = qdisc_create_dflt(sch->dev_queue,
                                          &pfifo_qdisc_ops, classid);
                sch_tree_lock(sch);
                if (parent && !parent->level) {
@@ -1428,17 +1436,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        }
        /* it used to be a nasty bug here, we have to check that node
-           is really leaf before changing cl->un.leaf ! */
+         * is really leaf before changing cl->un.leaf !
+         */
        if (!cl->level) {
                cl->quantum = rtab->rate.rate / q->rate2quantum;
                if (!hopt->quantum && cl->quantum < 1000) {
-                        printk(KERN_WARNING
+                        pr_warning(
                               "HTB: quantum of class %X is small. Consider r2q change.\n",
                               cl->common.classid);
                        cl->quantum = 1000;
                }
                if (!hopt->quantum && cl->quantum > 200000) {
-                        printk(KERN_WARNING
+                        pr_warning(
                               "HTB: quantum of class %X is big. Consider r2q change.\n",
                               cl->common.classid);
                        cl->quantum = 200000;
@@ -1487,13 +1496,13 @@ static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent,
        struct htb_class *cl = htb_find(classid, sch);
        /*if (cl && !cl->level) return 0;
-           The line above used to be there to prevent attaching filters to
+         * The line above used to be there to prevent attaching filters to
-           leaves. But at least tc_index filter uses this just to get class
+         * leaves. But at least tc_index filter uses this just to get class
-           for other reasons so that we have to allow for it.
+         * for other reasons so that we have to allow for it.
-           ----
+         * ----
-           19.6.2002 As Werner explained it is ok - bind filter is just
+         * 19.6.2002 As Werner explained it is ok - bind filter is just
-           another way to "lock" the class - unlike "get" this lock can
+         * another way to "lock" the class - unlike "get" this lock can
-           be broken by class during destroy IIUC.
+         * be broken by class during destroy IIUC.
         */
        if (cl)
                cl->filter_cnt++;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index f10e34a68445..bce1665239b8 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -63,8 +63,7 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        result = tc_classify(skb, p->filter_list, &res);
-        sch->bstats.packets++;
+        qdisc_bstats_update(sch, skb);
-        sch->bstats.bytes += qdisc_pkt_len(skb);
        switch (result) {
        case TC_ACT_SHOT:
                result = TC_ACT_SHOT;
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index fe91e50f9d98..ec5cbc848963 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -56,12 +56,11 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
        for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
                dev_queue = netdev_get_tx_queue(dev, ntx);
-                qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops,
+                qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
                                          TC_H_MAKE(TC_H_MAJ(sch->handle),
                                                    TC_H_MIN(ntx + 1)));
                if (qdisc == NULL)
                        goto err;
-                qdisc->flags |= TCQ_F_CAN_BYPASS;
                priv->qdiscs[ntx] = qdisc;
        }
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 000000000000..ea17cbed29ef
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,418 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+struct mqprio_sched {
+        struct Qdisc            **qdiscs;
+        int hw_owned;
+};
+static void mqprio_destroy(struct Qdisc *sch)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        struct mqprio_sched *priv = qdisc_priv(sch);
+        unsigned int ntx;
+        if (priv->qdiscs) {
+                for (ntx = 0;
+                     ntx < dev->num_tx_queues && priv->qdiscs[ntx];
+                     ntx++)
+                        qdisc_destroy(priv->qdiscs[ntx]);
+                kfree(priv->qdiscs);
+        }
+        if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+                dev->netdev_ops->ndo_setup_tc(dev, 0);
+        else
+                netdev_set_num_tc(dev, 0);
+}
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+        int i, j;
+        /* Verify num_tc is not out of max range */
+        if (qopt->num_tc > TC_MAX_QUEUE)
+                return -EINVAL;
+        /* Verify priority mapping uses valid tcs */
+        for (i = 0; i < TC_BITMASK + 1; i++) {
+                if (qopt->prio_tc_map[i] >= qopt->num_tc)
+                        return -EINVAL;
+        }
+        /* net_device does not support requested operation */
+        if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+                return -EINVAL;
+        /* if hw owned qcount and qoffset are taken from LLD so
+         * no reason to verify them here
+         */
+        if (qopt->hw)
+                return 0;
+        for (i = 0; i < qopt->num_tc; i++) {
+                unsigned int last = qopt->offset[i] + qopt->count[i];
+                /* Verify the queue count is in tx range being equal to the
+                 * real_num_tx_queues indicates the last queue is in use.
+                 */
+                if (qopt->offset[i] >= dev->real_num_tx_queues ||
+                    !qopt->count[i] ||
+                    last > dev->real_num_tx_queues)
+                        return -EINVAL;
+                /* Verify that the offset and counts do not overlap */
+                for (j = i + 1; j < qopt->num_tc; j++) {
+                        if (last > qopt->offset[j])
+                                return -EINVAL;
+                }
+        }
+        return 0;
+}
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        struct mqprio_sched *priv = qdisc_priv(sch);
+        struct netdev_queue *dev_queue;
+        struct Qdisc *qdisc;
+        int i, err = -EOPNOTSUPP;
+        struct tc_mqprio_qopt *qopt = NULL;
+        BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
+        BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK);
+        if (sch->parent != TC_H_ROOT)
+                return -EOPNOTSUPP;
+        if (!netif_is_multiqueue(dev))
+                return -EOPNOTSUPP;
+        if (nla_len(opt) < sizeof(*qopt))
+                return -EINVAL;
+        qopt = nla_data(opt);
+        if (mqprio_parse_opt(dev, qopt))
+                return -EINVAL;
+        /* pre-allocate qdisc, attachment can't fail */
+        priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+                               GFP_KERNEL);
+        if (priv->qdiscs == NULL) {
+                err = -ENOMEM;
+                goto err;
+        }
+        for (i = 0; i < dev->num_tx_queues; i++) {
+                dev_queue = netdev_get_tx_queue(dev, i);
+                qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+                                          TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                    TC_H_MIN(i + 1)));
+                if (qdisc == NULL) {
+                        err = -ENOMEM;
+                        goto err;
+                }
+                priv->qdiscs[i] = qdisc;
+        }
+        /* If the mqprio options indicate that hardware should own
+         * the queue mapping then run ndo_setup_tc otherwise use the
+         * supplied and verified mapping
+         */
+        if (qopt->hw) {
+                priv->hw_owned = 1;
+                err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+                if (err)
+                        goto err;
+        } else {
+                netdev_set_num_tc(dev, qopt->num_tc);
+                for (i = 0; i < qopt->num_tc; i++)
+                        netdev_set_tc_queue(dev, i,
+                                            qopt->count[i], qopt->offset[i]);
+        }
+        /* Always use supplied priority mappings */
+        for (i = 0; i < TC_BITMASK + 1; i++)
+                netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i]);
+        sch->flags |= TCQ_F_MQROOT;
+        return 0;
+err:
+        mqprio_destroy(sch);
+        return err;
+}
+static void mqprio_attach(struct Qdisc *sch)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        struct mqprio_sched *priv = qdisc_priv(sch);
+        struct Qdisc *qdisc;
+        unsigned int ntx;
+        /* Attach underlying qdisc */
+        for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+                qdisc = priv->qdiscs[ntx];
+                qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+                if (qdisc)
+                        qdisc_destroy(qdisc);
+        }
+        kfree(priv->qdiscs);
+        priv->qdiscs = NULL;
+}
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+                                             unsigned long cl)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        unsigned long ntx = cl - 1 - netdev_get_num_tc(dev);
+        if (ntx >= dev->num_tx_queues)
+                return NULL;
+        return netdev_get_tx_queue(dev, ntx);
+}
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+                    struct Qdisc **old)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+        if (!dev_queue)
+                return -EINVAL;
+        if (dev->flags & IFF_UP)
+                dev_deactivate(dev);
+        *old = dev_graft_qdisc(dev_queue, new);
+        if (dev->flags & IFF_UP)
+                dev_activate(dev);
+        return 0;
+}
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        struct mqprio_sched *priv = qdisc_priv(sch);
+        unsigned char *b = skb_tail_pointer(skb);
+        struct tc_mqprio_qopt opt = { 0 };
+        struct Qdisc *qdisc;
+        unsigned int i;
+        sch->q.qlen = 0;
+        memset(&sch->bstats, 0, sizeof(sch->bstats));
+        memset(&sch->qstats, 0, sizeof(sch->qstats));
+        for (i = 0; i < dev->num_tx_queues; i++) {
+                qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+                spin_lock_bh(qdisc_lock(qdisc));
+                sch->q.qlen             += qdisc->q.qlen;
+                sch->bstats.bytes       += qdisc->bstats.bytes;
+                sch->bstats.packets     += qdisc->bstats.packets;
+                sch->qstats.qlen        += qdisc->qstats.qlen;
+                sch->qstats.backlog     += qdisc->qstats.backlog;
+                sch->qstats.drops       += qdisc->qstats.drops;
+                sch->qstats.requeues    += qdisc->qstats.requeues;
+                sch->qstats.overlimits  += qdisc->qstats.overlimits;
+                spin_unlock_bh(qdisc_lock(qdisc));
+        }
+        opt.num_tc = netdev_get_num_tc(dev);
+        memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+        opt.hw = priv->hw_owned;
+        for (i = 0; i < netdev_get_num_tc(dev); i++) {
+                opt.count[i] = dev->tc_to_txq[i].count;
+                opt.offset[i] = dev->tc_to_txq[i].offset;
+        }
+        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+        return skb->len;
+nla_put_failure:
+        nlmsg_trim(skb, b);
+        return -1;
+}
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+        struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+        if (!dev_queue)
+                return NULL;
+        return dev_queue->qdisc_sleeping;
+}
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        unsigned int ntx = TC_H_MIN(classid);
+        if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev))
+                return 0;
+        return ntx;
+}
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+                         struct sk_buff *skb, struct tcmsg *tcm)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        if (cl <= netdev_get_num_tc(dev)) {
+                tcm->tcm_parent = TC_H_ROOT;
+                tcm->tcm_info = 0;
+        } else {
+                int i;
+                struct netdev_queue *dev_queue;
+                dev_queue = mqprio_queue_get(sch, cl);
+                tcm->tcm_parent = 0;
+                for (i = 0; i < netdev_get_num_tc(dev); i++) {
+                        struct netdev_tc_txq tc = dev->tc_to_txq[i];
+                        int q_idx = cl - netdev_get_num_tc(dev);
+                        if (q_idx > tc.offset &&
+                            q_idx <= tc.offset + tc.count) {
+                                tcm->tcm_parent =
+                                        TC_H_MAKE(TC_H_MAJ(sch->handle),
+                                                  TC_H_MIN(i + 1));
+                                break;
+                        }
+                }
+                tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+        }
+        tcm->tcm_handle |= TC_H_MIN(cl);
+        return 0;
+}
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+                                   struct gnet_dump *d)
+        __releases(d->lock)
+        __acquires(d->lock)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        if (cl <= netdev_get_num_tc(dev)) {
+                int i;
+                struct Qdisc *qdisc;
+                struct gnet_stats_queue qstats = {0};
+                struct gnet_stats_basic_packed bstats = {0};
+                struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1];
+                /* Drop lock here it will be reclaimed before touching
+                 * statistics this is required because the d->lock we
+                 * hold here is the look on dev_queue->qdisc_sleeping
+                 * also acquired below.
+                 */
+                spin_unlock_bh(d->lock);
+                for (i = tc.offset; i < tc.offset + tc.count; i++) {
+                        qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+                        spin_lock_bh(qdisc_lock(qdisc));
+                        bstats.bytes      += qdisc->bstats.bytes;
+                        bstats.packets    += qdisc->bstats.packets;
+                        qstats.qlen       += qdisc->qstats.qlen;
+                        qstats.backlog    += qdisc->qstats.backlog;
+                        qstats.drops      += qdisc->qstats.drops;
+                        qstats.requeues   += qdisc->qstats.requeues;
+                        qstats.overlimits += qdisc->qstats.overlimits;
+                        spin_unlock_bh(qdisc_lock(qdisc));
+                }
+                /* Reclaim root sleeping lock before completing stats */
+                spin_lock_bh(d->lock);
+                if (gnet_stats_copy_basic(d, &bstats) < 0 ||
+                    gnet_stats_copy_queue(d, &qstats) < 0)
+                        return -1;
+        } else {
+                struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+                sch = dev_queue->qdisc_sleeping;
+                sch->qstats.qlen = sch->q.qlen;
+                if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+                    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+                        return -1;
+        }
+        return 0;
+}
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+        struct net_device *dev = qdisc_dev(sch);
+        unsigned long ntx;
+        if (arg->stop)
+                return;
+        /* Walk hierarchy with a virtual class per tc */
+        arg->count = arg->skip;
+        for (ntx = arg->skip;
+             ntx < dev->num_tx_queues + netdev_get_num_tc(dev);
+             ntx++) {
+                if (arg->fn(sch, ntx + 1, arg) < 0) {
+                        arg->stop = 1;
+                        break;
+                }
+                arg->count++;
+        }
+}
+static const struct Qdisc_class_ops mqprio_class_ops = {
+        .graft          = mqprio_graft,
+        .leaf           = mqprio_leaf,
+        .get            = mqprio_get,
+        .put            = mqprio_put,
+        .walk           = mqprio_walk,
+        .dump           = mqprio_dump_class,
+        .dump_stats     = mqprio_dump_class_stats,
+};
+static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+        .cl_ops         = &mqprio_class_ops,
+        .id             = "mqprio",
+        .priv_size      = sizeof(struct mqprio_sched),
+        .init           = mqprio_init,
+        .destroy        = mqprio_destroy,
+        .attach         = mqprio_attach,
+        .dump           = mqprio_dump,
+        .owner          = THIS_MODULE,
+};
+static int __init mqprio_module_init(void)
+{
+        return register_qdisc(&mqprio_qdisc_ops);
+}
+static void __exit mqprio_module_exit(void)
+{
+        unregister_qdisc(&mqprio_qdisc_ops);
+}
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 6ae251279fc2..edc1950e0e77 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -83,8 +83,6 @@ multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        ret = qdisc_enqueue(skb, qdisc);
        if (ret == NET_XMIT_SUCCESS) {
-                sch->bstats.bytes += qdisc_pkt_len(skb);
-                sch->bstats.packets++;
                sch->q.qlen++;
                return NET_XMIT_SUCCESS;
        }
@@ -113,6 +111,7 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
                        qdisc = q->queues[q->curband];
                        skb = qdisc->dequeue(qdisc);
                        if (skb) {
+                                qdisc_bstats_update(sch, skb);
                                sch->q.qlen--;
                                return skb;
                        }
@@ -157,7 +156,7 @@ static unsigned int multiq_drop(struct Qdisc *sch)
        unsigned int len;
        struct Qdisc *qdisc;
-        for (band = q->bands-1; band >= 0; band--) {
+        for (band = q->bands - 1; band >= 0; band--) {
                qdisc = q->queues[band];
                if (qdisc->ops->drop) {
                        len = qdisc->ops->drop(qdisc);
@@ -227,8 +226,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
        for (i = 0; i < q->bands; i++) {
                if (q->queues[i] == &noop_qdisc) {
                        struct Qdisc *child, *old;
-                        child = qdisc_create_dflt(qdisc_dev(sch),
+                        child = qdisc_create_dflt(sch->dev_queue,
-                                                  sch->dev_queue,
                                                  &pfifo_qdisc_ops,
                                                  TC_H_MAKE(sch->handle,
                                                            i + 1));
@@ -267,7 +265,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
        for (i = 0; i < q->max_bands; i++)
                q->queues[i] = &noop_qdisc;
-        err = multiq_tune(sch,opt);
+        err = multiq_tune(sch, opt);
        if (err)
                kfree(q->queues);
@@ -348,7 +346,7 @@ static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
        struct multiq_sched_data *q = qdisc_priv(sch);
        tcm->tcm_handle |= TC_H_MIN(cl);
-        tcm->tcm_info = q->queues[cl-1]->handle;
+        tcm->tcm_info = q->queues[cl - 1]->handle;
        return 0;
 }
@@ -380,7 +378,7 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                        arg->count++;
                        continue;
                }
-                if (arg->fn(sch, band+1, arg) < 0) {
+                if (arg->fn(sch, band + 1, arg) < 0) {
                        arg->stop = 1;
                        break;
                }
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4714ff162bbd..69c35f6cd13f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -19,12 +19,13 @@
 #include <linux/kernel.h>
 #include <linux/errno.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
-#define VERSION "1.2"
+#define VERSION "1.3"
 /*      Network Emulation Queuing algorithm.
        ====================================
@@ -47,6 +48,20 @@
         layering other disciplines.  It does not need to do bandwidth
         control either since that can be handled by using token
         bucket or other rate control.
+     Correlated Loss Generator models
+        Added generation of correlated loss according to the
+        "Gilbert-Elliot" model, a 4-state markov model.
+        References:
+        [1] NetemCLG Home http://netgroup.uniroma2.it/NetemCLG
+        [2] S. Salsano, F. Ludovici, A. Ordine, "Definition of a general
+        and intuitive loss model for packet networks and its implementation
+        in the Netem module in the Linux kernel", available in [1]
+        Authors: Stefano Salsano <stefano.salsano at uniroma2.it
+                 Fabio Ludovici <fabio.ludovici at yahoo.it>
 */
 struct netem_sched_data {
@@ -73,6 +88,26 @@ struct netem_sched_data {
                u32  size;
                s16 table[0];
        } *delay_dist;
+        enum  {
+                CLG_RANDOM,
+                CLG_4_STATES,
+                CLG_GILB_ELL,
+        } loss_model;
+        /* Correlated Loss Generation models */
+        struct clgstate {
+                /* state of the Markov chain */
+                u8 state;
+                /* 4-states and Gilbert-Elliot models */
+                u32 a1; /* p13 for 4-states or p for GE */
+                u32 a2; /* p31 for 4-states or r for GE */
+                u32 a3; /* p32 for 4-states or h for GE */
+                u32 a4; /* p14 for 4-states or 1-k for GE */
+                u32 a5; /* p23 used only in 4-states */
+        } clg;
 };
 /* Time stamp put into socket buffer control block */
@@ -115,6 +150,122 @@ static u32 get_crandom(struct crndstate *state)
        return answer;
 }
+/* loss_4state - 4-state model loss generator
+ * Generates losses according to the 4-state Markov chain adopted in
+ * the GI (General and Intuitive) loss model.
+ */
+static bool loss_4state(struct netem_sched_data *q)
+{
+        struct clgstate *clg = &q->clg;
+        u32 rnd = net_random();
+        /*
+         * Makes a comparison between rnd and the transition
+         * probabilities outgoing from the current state, then decides the
+         * next state and if the next packet has to be transmitted or lost.
+         * The four states correspond to:
+         *   1 => successfully transmitted packets within a gap period
+         *   4 => isolated losses within a gap period
+         *   3 => lost packets within a burst period
+         *   2 => successfully transmitted packets within a burst period
+         */
+        switch (clg->state) {
+        case 1:
+                if (rnd < clg->a4) {
+                        clg->state = 4;
+                        return true;
+                } else if (clg->a4 < rnd && rnd < clg->a1) {
+                        clg->state = 3;
+                        return true;
+                } else if (clg->a1 < rnd)
+                        clg->state = 1;
+                break;
+        case 2:
+                if (rnd < clg->a5) {
+                        clg->state = 3;
+                        return true;
+                } else
+                        clg->state = 2;
+                break;
+        case 3:
+                if (rnd < clg->a3)
+                        clg->state = 2;
+                else if (clg->a3 < rnd && rnd < clg->a2 + clg->a3) {
+                        clg->state = 1;
+                        return true;
+                } else if (clg->a2 + clg->a3 < rnd) {
+                        clg->state = 3;
+                        return true;
+                }
+                break;
+        case 4:
+                clg->state = 1;
+                break;
+        }
+        return false;
+}
+/* loss_gilb_ell - Gilbert-Elliot model loss generator
+ * Generates losses according to the Gilbert-Elliot loss model or
+ * its special cases  (Gilbert or Simple Gilbert)
+ *
+ * Makes a comparison between random number and the transition
+ * probabilities outgoing from the current state, then decides the
+ * next state. A second random number is extracted and the comparison
+ * with the loss probability of the current state decides if the next
+ * packet will be transmitted or lost.
+ */
+static bool loss_gilb_ell(struct netem_sched_data *q)
+{
+        struct clgstate *clg = &q->clg;
+        switch (clg->state) {
+        case 1:
+                if (net_random() < clg->a1)
+                        clg->state = 2;
+                if (net_random() < clg->a4)
+                        return true;
+        case 2:
+                if (net_random() < clg->a2)
+                        clg->state = 1;
+                if (clg->a3 > net_random())
+                        return true;
+        }
+        return false;
+}
+static bool loss_event(struct netem_sched_data *q)
+{
+        switch (q->loss_model) {
+        case CLG_RANDOM:
+                /* Random packet drop 0 => none, ~0 => all */
+                return q->loss && q->loss >= get_crandom(&q->loss_cor);
+        case CLG_4_STATES:
+                /* 4state loss model algorithm (used also for GI model)
+                * Extracts a value from the markov 4 state loss generator,
+                * if it is 1 drops a packet and if needed writes the event in
+                * the kernel logs
+                */
+                return loss_4state(q);
+        case CLG_GILB_ELL:
+                /* Gilbert-Elliot loss model algorithm
+                * Extracts a value from the Gilbert-Elliot loss generator,
+                * if it is 1 drops a packet and if needed writes the event in
+                * the kernel logs
+                */
+                return loss_gilb_ell(q);
+        }
+        return false;   /* not reached */
+}
 /* tabledist - return a pseudo-randomly distributed value with mean mu and
 * std deviation sigma.  Uses table lookup to approximate the desired
 * distribution, and a uniformly-distributed pseudo-random source.
@@ -161,14 +312,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        int ret;
        int count = 1;
-        pr_debug("netem_enqueue skb=%p\n", skb);
        /* Random duplication */
        if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
                ++count;
-        /* Random packet drop 0 => none, ~0 => all */
+        /* Drop packet? */
-        if (q->loss && q->loss >= get_crandom(&q->loss_cor))
+        if (loss_event(q))
                --count;
        if (count == 0) {
@@ -211,8 +360,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        }
        cb = netem_skb_cb(skb);
-        if (q->gap == 0 ||              /* not doing reordering */
+        if (q->gap == 0 ||              /* not doing reordering */
-            q->counter < q->gap ||      /* inside last reordering gap */
+            q->counter < q->gap ||      /* inside last reordering gap */
            q->reorder < get_crandom(&q->reorder_cor)) {
                psched_time_t now;
                psched_tdiff_t delay;
@@ -238,19 +387,18 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
                ret = NET_XMIT_SUCCESS;
        }
-        if (likely(ret == NET_XMIT_SUCCESS)) {
+        if (ret != NET_XMIT_SUCCESS) {
-                sch->q.qlen++;
+                if (net_xmit_drop_count(ret)) {
-                sch->bstats.bytes += qdisc_pkt_len(skb);
+                        sch->qstats.drops++;
-                sch->bstats.packets++;
+                        return ret;
-        } else if (net_xmit_drop_count(ret)) {
+                }
-                sch->qstats.drops++;
        }
-        pr_debug("netem: enqueue ret %d\n", ret);
+        sch->q.qlen++;
-        return ret;
+        return NET_XMIT_SUCCESS;
 }
-static unsigned int netem_drop(struct Qdisc* sch)
+static unsigned int netem_drop(struct Qdisc *sch)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
        unsigned int len = 0;
@@ -267,7 +415,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
        struct netem_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-        if (sch->flags & TCQ_F_THROTTLED)
+        if (qdisc_is_throttled(sch))
                return NULL;
        skb = q->qdisc->ops->peek(q->qdisc);
@@ -289,8 +437,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
                        if (G_TC_FROM(skb->tc_verd) & AT_INGRESS)
                                skb->tstamp.tv64 = 0;
 #endif
-                        pr_debug("netem_dequeue: return skb=%p\n", skb);
                        sch->q.qlen--;
+                        qdisc_unthrottled(sch);
+                        qdisc_bstats_update(sch, skb);
                        return skb;
                }
@@ -309,6 +459,16 @@ static void netem_reset(struct Qdisc *sch)
        qdisc_watchdog_cancel(&q->watchdog);
 }
+static void dist_free(struct disttable *d)
+{
+        if (d) {
+                if (is_vmalloc_addr(d))
+                        vfree(d);
+                else
+                        kfree(d);
+        }
+}
 /*
 * Distribution data is a variable size payload containing
 * signed 16 bit values.
@@ -316,16 +476,20 @@ static void netem_reset(struct Qdisc *sch)
 static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
 {
        struct netem_sched_data *q = qdisc_priv(sch);
-        unsigned long n = nla_len(attr)/sizeof(__s16);
+        size_t n = nla_len(attr)/sizeof(__s16);
        const __s16 *data = nla_data(attr);
        spinlock_t *root_lock;
        struct disttable *d;
        int i;
+        size_t s;
-        if (n > 65536)
+        if (n > NETEM_DIST_MAX)
                return -EINVAL;
-        d = kmalloc(sizeof(*d) + n*sizeof(d->table[0]), GFP_KERNEL);
+        s = sizeof(struct disttable) + n * sizeof(s16);
+        d = kmalloc(s, GFP_KERNEL);
+        if (!d)
+                d = vmalloc(s);
        if (!d)
                return -ENOMEM;
@@ -336,7 +500,7 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr)
        root_lock = qdisc_root_sleeping_lock(sch);
        spin_lock_bh(root_lock);
-        kfree(q->delay_dist);
+        dist_free(q->delay_dist);
        q->delay_dist = d;
        spin_unlock_bh(root_lock);
        return 0;
@@ -370,10 +534,66 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
        init_crandom(&q->corrupt_cor, r->correlation);
 }
+static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
+{
+        struct netem_sched_data *q = qdisc_priv(sch);
+        const struct nlattr *la;
+        int rem;
+        nla_for_each_nested(la, attr, rem) {
+                u16 type = nla_type(la);
+                switch(type) {
+                case NETEM_LOSS_GI: {
+                        const struct tc_netem_gimodel *gi = nla_data(la);
+                        if (nla_len(la) != sizeof(struct tc_netem_gimodel)) {
+                                pr_info("netem: incorrect gi model size\n");
+                                return -EINVAL;
+                        }
+                        q->loss_model = CLG_4_STATES;
+                        q->clg.state = 1;
+                        q->clg.a1 = gi->p13;
+                        q->clg.a2 = gi->p31;
+                        q->clg.a3 = gi->p32;
+                        q->clg.a4 = gi->p14;
+                        q->clg.a5 = gi->p23;
+                        break;
+                }
+                case NETEM_LOSS_GE: {
+                        const struct tc_netem_gemodel *ge = nla_data(la);
+                        if (nla_len(la) != sizeof(struct tc_netem_gemodel)) {
+                                pr_info("netem: incorrect gi model size\n");
+                                return -EINVAL;
+                        }
+                        q->loss_model = CLG_GILB_ELL;
+                        q->clg.state = 1;
+                        q->clg.a1 = ge->p;
+                        q->clg.a2 = ge->r;
+                        q->clg.a3 = ge->h;
+                        q->clg.a4 = ge->k1;
+                        break;
+                }
+                default:
+                        pr_info("netem: unknown loss type %u\n", type);
+                        return -EINVAL;
+                }
+        }
+        return 0;
+}
 static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
        [TCA_NETEM_CORR]        = { .len = sizeof(struct tc_netem_corr) },
        [TCA_NETEM_REORDER]     = { .len = sizeof(struct tc_netem_reorder) },
        [TCA_NETEM_CORRUPT]     = { .len = sizeof(struct tc_netem_corrupt) },
+        [TCA_NETEM_LOSS]        = { .type = NLA_NESTED },
 };
 static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
@@ -381,11 +601,15 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
 {
        int nested_len = nla_len(nla) - NLA_ALIGN(len);
-        if (nested_len < 0)
+        if (nested_len < 0) {
+                pr_info("netem: invalid attributes len %d\n", nested_len);
                return -EINVAL;
+        }
        if (nested_len >= nla_attr_size(0))
                return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
                                 nested_len, policy);
        memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
        return 0;
 }
@@ -408,7 +632,7 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
        ret = fifo_set_limit(q->qdisc, qopt->limit);
        if (ret) {
-                pr_debug("netem: can't set fifo limit\n");
+                pr_info("netem: can't set fifo limit\n");
                return ret;
        }
@@ -441,7 +665,11 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
        if (tb[TCA_NETEM_CORRUPT])
                get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
-        return 0;
+        q->loss_model = CLG_RANDOM;
+        if (tb[TCA_NETEM_LOSS])
+                ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
+        return ret;
 }
 /*
@@ -477,8 +705,6 @@ static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
                __skb_queue_after(list, skb, nskb);
                sch->qstats.backlog += qdisc_pkt_len(nskb);
-                sch->bstats.bytes += qdisc_pkt_len(nskb);
-                sch->bstats.packets++;
                return NET_XMIT_SUCCESS;
        }
@@ -538,17 +764,17 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
        qdisc_watchdog_init(&q->watchdog, sch);
-        q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+        q->loss_model = CLG_RANDOM;
-                                     &tfifo_qdisc_ops,
+        q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
                                     TC_H_MAKE(sch->handle, 1));
        if (!q->qdisc) {
-                pr_debug("netem: qdisc create failed\n");
+                pr_notice("netem: qdisc create tfifo qdisc failed\n");
                return -ENOMEM;
        }
        ret = netem_change(sch, opt);
        if (ret) {
-                pr_debug("netem: change failed\n");
+                pr_info("netem: change failed\n");
                qdisc_destroy(q->qdisc);
        }
        return ret;
@@ -560,14 +786,61 @@ static void netem_destroy(struct Qdisc *sch)
        qdisc_watchdog_cancel(&q->watchdog);
        qdisc_destroy(q->qdisc);
-        kfree(q->delay_dist);
+        dist_free(q->delay_dist);
+}
+static int dump_loss_model(const struct netem_sched_data *q,
+                           struct sk_buff *skb)
+{
+        struct nlattr *nest;
+        nest = nla_nest_start(skb, TCA_NETEM_LOSS);
+        if (nest == NULL)
+                goto nla_put_failure;
+        switch (q->loss_model) {
+        case CLG_RANDOM:
+                /* legacy loss model */
+                nla_nest_cancel(skb, nest);
+                return 0;       /* no data */
+        case CLG_4_STATES: {
+                struct tc_netem_gimodel gi = {
+                        .p13 = q->clg.a1,
+                        .p31 = q->clg.a2,
+                        .p32 = q->clg.a3,
+                        .p14 = q->clg.a4,
+                        .p23 = q->clg.a5,
+                };
+                NLA_PUT(skb, NETEM_LOSS_GI, sizeof(gi), &gi);
+                break;
+        }
+        case CLG_GILB_ELL: {
+                struct tc_netem_gemodel ge = {
+                        .p = q->clg.a1,
+                        .r = q->clg.a2,
+                        .h = q->clg.a3,
+                        .k1 = q->clg.a4,
+                };
+                NLA_PUT(skb, NETEM_LOSS_GE, sizeof(ge), &ge);
+                break;
+        }
+        }
+        nla_nest_end(skb, nest);
+        return 0;
+nla_put_failure:
+        nla_nest_cancel(skb, nest);
+        return -1;
 }
 static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
        const struct netem_sched_data *q = qdisc_priv(sch);
-        unsigned char *b = skb_tail_pointer(skb);
+        struct nlattr *nla = (struct nlattr *) skb_tail_pointer(skb);
-        struct nlattr *nla = (struct nlattr *) b;
        struct tc_netem_qopt qopt;
        struct tc_netem_corr cor;
        struct tc_netem_reorder reorder;
@@ -594,17 +867,87 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
        corrupt.correlation = q->corrupt_cor.rho;
        NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
-        nla->nla_len = skb_tail_pointer(skb) - b;
+        if (dump_loss_model(q, skb) != 0)
+                goto nla_put_failure;
-        return skb->len;
+        return nla_nest_end(skb, nla);
 nla_put_failure:
-        nlmsg_trim(skb, b);
+        nlmsg_trim(skb, nla);
        return -1;
 }
+static int netem_dump_class(struct Qdisc *sch, unsigned long cl,
+                          struct sk_buff *skb, struct tcmsg *tcm)
+{
+        struct netem_sched_data *q = qdisc_priv(sch);
+        if (cl != 1)    /* only one class */
+                return -ENOENT;
+        tcm->tcm_handle |= TC_H_MIN(1);
+        tcm->tcm_info = q->qdisc->handle;
+        return 0;
+}
+static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+                     struct Qdisc **old)
+{
+        struct netem_sched_data *q = qdisc_priv(sch);
+        if (new == NULL)
+                new = &noop_qdisc;
+        sch_tree_lock(sch);
+        *old = q->qdisc;
+        q->qdisc = new;
+        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+        qdisc_reset(*old);
+        sch_tree_unlock(sch);
+        return 0;
+}
+static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg)
+{
+        struct netem_sched_data *q = qdisc_priv(sch);
+        return q->qdisc;
+}
+static unsigned long netem_get(struct Qdisc *sch, u32 classid)
+{
+        return 1;
+}
+static void netem_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+        if (!walker->stop) {
+                if (walker->count >= walker->skip)
+                        if (walker->fn(sch, 1, walker) < 0) {
+                                walker->stop = 1;
+                                return;
+                        }
+                walker->count++;
+        }
+}
+static const struct Qdisc_class_ops netem_class_ops = {
+        .graft          =       netem_graft,
+        .leaf           =       netem_leaf,
+        .get            =       netem_get,
+        .put            =       netem_put,
+        .walk           =       netem_walk,
+        .dump           =       netem_dump_class,
+};
 static struct Qdisc_ops netem_qdisc_ops __read_mostly = {
        .id             =       "netem",
+        .cl_ops         =       &netem_class_ops,
        .priv_size      =       sizeof(struct netem_sched_data),
        .enqueue        =       netem_enqueue,
        .dequeue        =       netem_dequeue,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0748fb1e3a49..2a318f2dc3e5 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -22,8 +22,7 @@
 #include <net/pkt_sched.h>
-struct prio_sched_data
+struct prio_sched_data {
-{
        int bands;
        struct tcf_proto *filter_list;
        u8  prio2band[TC_PRIO_MAX+1];
@@ -54,7 +53,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
                if (!q->filter_list || err < 0) {
                        if (TC_H_MAJ(band))
                                band = 0;
-                        return q->queues[q->prio2band[band&TC_PRIO_MAX]];
+                        return q->queues[q->prio2band[band & TC_PRIO_MAX]];
                }
                band = res.classid;
        }
@@ -84,8 +83,6 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        ret = qdisc_enqueue(skb, qdisc);
        if (ret == NET_XMIT_SUCCESS) {
-                sch->bstats.bytes += qdisc_pkt_len(skb);
-                sch->bstats.packets++;
                sch->q.qlen++;
                return NET_XMIT_SUCCESS;
        }
@@ -108,7 +105,7 @@ static struct sk_buff *prio_peek(struct Qdisc *sch)
        return NULL;
 }
-static struct sk_buff *prio_dequeue(struct Qdisc* sch)
+static struct sk_buff *prio_dequeue(struct Qdisc *sch)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
        int prio;
@@ -117,6 +114,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
                struct Qdisc *qdisc = q->queues[prio];
                struct sk_buff *skb = qdisc->dequeue(qdisc);
                if (skb) {
+                        qdisc_bstats_update(sch, skb);
                        sch->q.qlen--;
                        return skb;
                }
@@ -125,7 +123,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc* sch)
 }
-static unsigned int prio_drop(struct Qdisc* sch)
+static unsigned int prio_drop(struct Qdisc *sch)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
        int prio;
@@ -144,24 +142,24 @@ static unsigned int prio_drop(struct Qdisc* sch)
 static void
-prio_reset(struct Qdisc* sch)
+prio_reset(struct Qdisc *sch)
 {
        int prio;
        struct prio_sched_data *q = qdisc_priv(sch);
-        for (prio=0; prio<q->bands; prio++)
+        for (prio = 0; prio < q->bands; prio++)
                qdisc_reset(q->queues[prio]);
        sch->q.qlen = 0;
 }
 static void
-prio_destroy(struct Qdisc* sch)
+prio_destroy(struct Qdisc *sch)
 {
        int prio;
        struct prio_sched_data *q = qdisc_priv(sch);
        tcf_destroy_chain(&q->filter_list);
-        for (prio=0; prio<q->bands; prio++)
+        for (prio = 0; prio < q->bands; prio++)
                qdisc_destroy(q->queues[prio]);
 }
@@ -178,7 +176,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
        if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
                return -EINVAL;
-        for (i=0; i<=TC_PRIO_MAX; i++) {
+        for (i = 0; i <= TC_PRIO_MAX; i++) {
                if (qopt->priomap[i] >= qopt->bands)
                        return -EINVAL;
        }
@@ -187,7 +185,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
        q->bands = qopt->bands;
        memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
-        for (i=q->bands; i<TCQ_PRIO_BANDS; i++) {
+        for (i = q->bands; i < TCQ_PRIO_BANDS; i++) {
                struct Qdisc *child = q->queues[i];
                q->queues[i] = &noop_qdisc;
                if (child != &noop_qdisc) {
@@ -197,10 +195,11 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
        }
        sch_tree_unlock(sch);
-        for (i=0; i<q->bands; i++) {
+        for (i = 0; i < q->bands; i++) {
                if (q->queues[i] == &noop_qdisc) {
                        struct Qdisc *child, *old;
-                        child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue,
+                        child = qdisc_create_dflt(sch->dev_queue,
                                                  &pfifo_qdisc_ops,
                                                  TC_H_MAKE(sch->handle, i + 1));
                        if (child) {
@@ -225,7 +224,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
        struct prio_sched_data *q = qdisc_priv(sch);
        int i;
-        for (i=0; i<TCQ_PRIO_BANDS; i++)
+        for (i = 0; i < TCQ_PRIO_BANDS; i++)
                q->queues[i] = &noop_qdisc;
        if (opt == NULL) {
@@ -233,7 +232,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt)
        } else {
                int err;
-                if ((err= prio_tune(sch, opt)) != 0)
+                if ((err = prio_tune(sch, opt)) != 0)
                        return err;
        }
        return 0;
@@ -246,7 +245,7 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
        struct tc_prio_qopt opt;
        opt.bands = q->bands;
-        memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
+        memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -343,7 +342,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                        arg->count++;
                        continue;
                }
-                if (arg->fn(sch, prio+1, arg) < 0) {
+                if (arg->fn(sch, prio + 1, arg) < 0) {
                        arg->stop = 1;
                        break;
                }
@@ -351,7 +350,7 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        }
 }
-static struct tcf_proto ** prio_find_tcf(struct Qdisc *sch, unsigned long cl)
+static struct tcf_proto **prio_find_tcf(struct Qdisc *sch, unsigned long cl)
 {
        struct prio_sched_data *q = qdisc_priv(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
new file mode 100644
index 000000000000..103343408593
--- /dev/null
+++ b/net/sched/sch_qfq.c
@@ -0,0 +1,1137 @@
+/*
+ * net/sched/sch_qfq.c         Quick Fair Queueing Scheduler.
+ *
+ * Copyright (c) 2009 Fabio Checconi, Luigi Rizzo, and Paolo Valente.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/bitops.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
+#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
+/*  Quick Fair Queueing
+    ===================
+    Sources:
+    Fabio Checconi, Luigi Rizzo, and Paolo Valente: "QFQ: Efficient
+    Packet Scheduling with Tight Bandwidth Distribution Guarantees."
+    See also:
+    http://retis.sssup.it/~fabio/linux/qfq/
+ */
+/*
+  Virtual time computations.
+  S, F and V are all computed in fixed point arithmetic with
+  FRAC_BITS decimal bits.
+  QFQ_MAX_INDEX is the maximum index allowed for a group. We need
+        one bit per index.
+  QFQ_MAX_WSHIFT is the maximum power of two supported as a weight.
+  The layout of the bits is as below:
+                   [ MTU_SHIFT ][      FRAC_BITS    ]
+                   [ MAX_INDEX    ][ MIN_SLOT_SHIFT ]
+                                 ^.__grp->index = 0
+                                 *.__grp->slot_shift
+  where MIN_SLOT_SHIFT is derived by difference from the others.
+  The max group index corresponds to Lmax/w_min, where
+  Lmax=1<<MTU_SHIFT, w_min = 1 .
+  From this, and knowing how many groups (MAX_INDEX) we want,
+  we can derive the shift corresponding to each group.
+  Because we often need to compute
+        F = S + len/w_i  and V = V + len/wsum
+  instead of storing w_i store the value
+        inv_w = (1<<FRAC_BITS)/w_i
+  so we can do F = S + len * inv_w * wsum.
+  We use W_TOT in the formulas so we can easily move between
+  static and adaptive weight sum.
+  The per-scheduler-instance data contain all the data structures
+  for the scheduler: bitmaps and bucket lists.
+ */
+/*
+ * Maximum number of consecutive slots occupied by backlogged classes
+ * inside a group.
+ */
+#define QFQ_MAX_SLOTS   32
+/*
+ * Shifts used for class<->group mapping.  We allow class weights that are
+ * in the range [1, 2^MAX_WSHIFT], and we try to map each class i to the
+ * group with the smallest index that can support the L_i / r_i configured
+ * for the class.
+ *
+ * grp->index is the index of the group; and grp->slot_shift
+ * is the shift for the corresponding (scaled) sigma_i.
+ */
+#define QFQ_MAX_INDEX           19
+#define QFQ_MAX_WSHIFT          16
+#define QFQ_MAX_WEIGHT          (1<<QFQ_MAX_WSHIFT)
+#define QFQ_MAX_WSUM            (2*QFQ_MAX_WEIGHT)
+#define FRAC_BITS               30      /* fixed point arithmetic */
+#define ONE_FP                  (1UL << FRAC_BITS)
+#define IWSUM                   (ONE_FP/QFQ_MAX_WSUM)
+#define QFQ_MTU_SHIFT           11
+#define QFQ_MIN_SLOT_SHIFT      (FRAC_BITS + QFQ_MTU_SHIFT - QFQ_MAX_INDEX)
+/*
+ * Possible group states.  These values are used as indexes for the bitmaps
+ * array of struct qfq_queue.
+ */
+enum qfq_state { ER, IR, EB, IB, QFQ_MAX_STATE };
+struct qfq_group;
+struct qfq_class {
+        struct Qdisc_class_common common;
+        unsigned int refcnt;
+        unsigned int filter_cnt;
+        struct gnet_stats_basic_packed bstats;
+        struct gnet_stats_queue qstats;
+        struct gnet_stats_rate_est rate_est;
+        struct Qdisc *qdisc;
+        struct hlist_node next; /* Link for the slot list. */
+        u64 S, F;               /* flow timestamps (exact) */
+        /* group we belong to. In principle we would need the index,
+         * which is log_2(lmax/weight), but we never reference it
+         * directly, only the group.
+         */
+        struct qfq_group *grp;
+        /* these are copied from the flowset. */
+        u32     inv_w;          /* ONE_FP/weight */
+        u32     lmax;           /* Max packet size for this flow. */
+};
+struct qfq_group {
+        u64 S, F;                       /* group timestamps (approx). */
+        unsigned int slot_shift;        /* Slot shift. */
+        unsigned int index;             /* Group index. */
+        unsigned int front;             /* Index of the front slot. */
+        unsigned long full_slots;       /* non-empty slots */
+        /* Array of RR lists of active classes. */
+        struct hlist_head slots[QFQ_MAX_SLOTS];
+};
+struct qfq_sched {
+        struct tcf_proto *filter_list;
+        struct Qdisc_class_hash clhash;
+        u64             V;              /* Precise virtual time. */
+        u32             wsum;           /* weight sum */
+        unsigned long bitmaps[QFQ_MAX_STATE];       /* Group bitmaps. */
+        struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
+};
+static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct Qdisc_class_common *clc;
+        clc = qdisc_class_find(&q->clhash, classid);
+        if (clc == NULL)
+                return NULL;
+        return container_of(clc, struct qfq_class, common);
+}
+static void qfq_purge_queue(struct qfq_class *cl)
+{
+        unsigned int len = cl->qdisc->q.qlen;
+        qdisc_reset(cl->qdisc);
+        qdisc_tree_decrease_qlen(cl->qdisc, len);
+}
+static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
+        [TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
+        [TCA_QFQ_LMAX] = { .type = NLA_U32 },
+};
+/*
+ * Calculate a flow index, given its weight and maximum packet length.
+ * index = log_2(maxlen/weight) but we need to apply the scaling.
+ * This is used only once at flow creation.
+ */
+static int qfq_calc_index(u32 inv_w, unsigned int maxlen)
+{
+        u64 slot_size = (u64)maxlen * inv_w;
+        unsigned long size_map;
+        int index = 0;
+        size_map = slot_size >> QFQ_MIN_SLOT_SHIFT;
+        if (!size_map)
+                goto out;
+        index = __fls(size_map) + 1;    /* basically a log_2 */
+        index -= !(slot_size - (1ULL << (index + QFQ_MIN_SLOT_SHIFT - 1)));
+        if (index < 0)
+                index = 0;
+out:
+        pr_debug("qfq calc_index: W = %lu, L = %u, I = %d\n",
+                 (unsigned long) ONE_FP/inv_w, maxlen, index);
+        return index;
+}
+static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+                            struct nlattr **tca, unsigned long *arg)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_class *cl = (struct qfq_class *)*arg;
+        struct nlattr *tb[TCA_QFQ_MAX + 1];
+        u32 weight, lmax, inv_w;
+        int i, err;
+        if (tca[TCA_OPTIONS] == NULL) {
+                pr_notice("qfq: no options\n");
+                return -EINVAL;
+        }
+        err = nla_parse_nested(tb, TCA_QFQ_MAX, tca[TCA_OPTIONS], qfq_policy);
+        if (err < 0)
+                return err;
+        if (tb[TCA_QFQ_WEIGHT]) {
+                weight = nla_get_u32(tb[TCA_QFQ_WEIGHT]);
+                if (!weight || weight > (1UL << QFQ_MAX_WSHIFT)) {
+                        pr_notice("qfq: invalid weight %u\n", weight);
+                        return -EINVAL;
+                }
+        } else
+                weight = 1;
+        inv_w = ONE_FP / weight;
+        weight = ONE_FP / inv_w;
+        if (q->wsum + weight > QFQ_MAX_WSUM) {
+                pr_notice("qfq: total weight out of range (%u + %u)\n",
+                          weight, q->wsum);
+                return -EINVAL;
+        }
+        if (tb[TCA_QFQ_LMAX]) {
+                lmax = nla_get_u32(tb[TCA_QFQ_LMAX]);
+                if (!lmax || lmax > (1UL << QFQ_MTU_SHIFT)) {
+                        pr_notice("qfq: invalid max length %u\n", lmax);
+                        return -EINVAL;
+                }
+        } else
+                lmax = 1UL << QFQ_MTU_SHIFT;
+        if (cl != NULL) {
+                if (tca[TCA_RATE]) {
+                        err = gen_replace_estimator(&cl->bstats, &cl->rate_est,
+                                                    qdisc_root_sleeping_lock(sch),
+                                                    tca[TCA_RATE]);
+                        if (err)
+                                return err;
+                }
+                sch_tree_lock(sch);
+                if (tb[TCA_QFQ_WEIGHT]) {
+                        q->wsum = weight - ONE_FP / cl->inv_w;
+                        cl->inv_w = inv_w;
+                }
+                sch_tree_unlock(sch);
+                return 0;
+        }
+        cl = kzalloc(sizeof(struct qfq_class), GFP_KERNEL);
+        if (cl == NULL)
+                return -ENOBUFS;
+        cl->refcnt = 1;
+        cl->common.classid = classid;
+        cl->lmax = lmax;
+        cl->inv_w = inv_w;
+        i = qfq_calc_index(cl->inv_w, cl->lmax);
+        cl->grp = &q->groups[i];
+        q->wsum += weight;
+        cl->qdisc = qdisc_create_dflt(sch->dev_queue,
+                                      &pfifo_qdisc_ops, classid);
+        if (cl->qdisc == NULL)
+                cl->qdisc = &noop_qdisc;
+        if (tca[TCA_RATE]) {
+                err = gen_new_estimator(&cl->bstats, &cl->rate_est,
+                                        qdisc_root_sleeping_lock(sch),
+                                        tca[TCA_RATE]);
+                if (err) {
+                        qdisc_destroy(cl->qdisc);
+                        kfree(cl);
+                        return err;
+                }
+        }
+        sch_tree_lock(sch);
+        qdisc_class_hash_insert(&q->clhash, &cl->common);
+        sch_tree_unlock(sch);
+        qdisc_class_hash_grow(sch, &q->clhash);
+        *arg = (unsigned long)cl;
+        return 0;
+}
+static void qfq_destroy_class(struct Qdisc *sch, struct qfq_class *cl)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        if (cl->inv_w) {
+                q->wsum -= ONE_FP / cl->inv_w;
+                cl->inv_w = 0;
+        }
+        gen_kill_estimator(&cl->bstats, &cl->rate_est);
+        qdisc_destroy(cl->qdisc);
+        kfree(cl);
+}
+static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        if (cl->filter_cnt > 0)
+                return -EBUSY;
+        sch_tree_lock(sch);
+        qfq_purge_queue(cl);
+        qdisc_class_hash_remove(&q->clhash, &cl->common);
+        BUG_ON(--cl->refcnt == 0);
+        /*
+         * This shouldn't happen: we "hold" one cops->get() when called
+         * from tc_ctl_tclass; the destroy method is done from cops->put().
+         */
+        sch_tree_unlock(sch);
+        return 0;
+}
+static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid)
+{
+        struct qfq_class *cl = qfq_find_class(sch, classid);
+        if (cl != NULL)
+                cl->refcnt++;
+        return (unsigned long)cl;
+}
+static void qfq_put_class(struct Qdisc *sch, unsigned long arg)
+{
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        if (--cl->refcnt == 0)
+                qfq_destroy_class(sch, cl);
+}
+static struct tcf_proto **qfq_tcf_chain(struct Qdisc *sch, unsigned long cl)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        if (cl)
+                return NULL;
+        return &q->filter_list;
+}
+static unsigned long qfq_bind_tcf(struct Qdisc *sch, unsigned long parent,
+                                  u32 classid)
+{
+        struct qfq_class *cl = qfq_find_class(sch, classid);
+        if (cl != NULL)
+                cl->filter_cnt++;
+        return (unsigned long)cl;
+}
+static void qfq_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        cl->filter_cnt--;
+}
+static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
+                           struct Qdisc *new, struct Qdisc **old)
+{
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        if (new == NULL) {
+                new = qdisc_create_dflt(sch->dev_queue,
+                                        &pfifo_qdisc_ops, cl->common.classid);
+                if (new == NULL)
+                        new = &noop_qdisc;
+        }
+        sch_tree_lock(sch);
+        qfq_purge_queue(cl);
+        *old = cl->qdisc;
+        cl->qdisc = new;
+        sch_tree_unlock(sch);
+        return 0;
+}
+static struct Qdisc *qfq_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        return cl->qdisc;
+}
+static int qfq_dump_class(struct Qdisc *sch, unsigned long arg,
+                          struct sk_buff *skb, struct tcmsg *tcm)
+{
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        struct nlattr *nest;
+        tcm->tcm_parent = TC_H_ROOT;
+        tcm->tcm_handle = cl->common.classid;
+        tcm->tcm_info   = cl->qdisc->handle;
+        nest = nla_nest_start(skb, TCA_OPTIONS);
+        if (nest == NULL)
+                goto nla_put_failure;
+        NLA_PUT_U32(skb, TCA_QFQ_WEIGHT, ONE_FP/cl->inv_w);
+        NLA_PUT_U32(skb, TCA_QFQ_LMAX, cl->lmax);
+        return nla_nest_end(skb, nest);
+nla_put_failure:
+        nla_nest_cancel(skb, nest);
+        return -EMSGSIZE;
+}
+static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
+                                struct gnet_dump *d)
+{
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        struct tc_qfq_stats xstats;
+        memset(&xstats, 0, sizeof(xstats));
+        cl->qdisc->qstats.qlen = cl->qdisc->q.qlen;
+        xstats.weight = ONE_FP/cl->inv_w;
+        xstats.lmax = cl->lmax;
+        if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
+            gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
+            gnet_stats_copy_queue(d, &cl->qdisc->qstats) < 0)
+                return -1;
+        return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
+}
+static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_class *cl;
+        struct hlist_node *n;
+        unsigned int i;
+        if (arg->stop)
+                return;
+        for (i = 0; i < q->clhash.hashsize; i++) {
+                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode) {
+                        if (arg->count < arg->skip) {
+                                arg->count++;
+                                continue;
+                        }
+                        if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
+                                arg->stop = 1;
+                                return;
+                        }
+                        arg->count++;
+                }
+        }
+}
+static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
+                                      int *qerr)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_class *cl;
+        struct tcf_result res;
+        int result;
+        if (TC_H_MAJ(skb->priority ^ sch->handle) == 0) {
+                pr_debug("qfq_classify: found %d\n", skb->priority);
+                cl = qfq_find_class(sch, skb->priority);
+                if (cl != NULL)
+                        return cl;
+        }
+        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+        result = tc_classify(skb, q->filter_list, &res);
+        if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+                switch (result) {
+                case TC_ACT_QUEUED:
+                case TC_ACT_STOLEN:
+                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+                case TC_ACT_SHOT:
+                        return NULL;
+                }
+#endif
+                cl = (struct qfq_class *)res.class;
+                if (cl == NULL)
+                        cl = qfq_find_class(sch, res.classid);
+                return cl;
+        }
+        return NULL;
+}
+/* Generic comparison function, handling wraparound. */
+static inline int qfq_gt(u64 a, u64 b)
+{
+        return (s64)(a - b) > 0;
+}
+/* Round a precise timestamp to its slotted value. */
+static inline u64 qfq_round_down(u64 ts, unsigned int shift)
+{
+        return ts & ~((1ULL << shift) - 1);
+}
+/* return the pointer to the group with lowest index in the bitmap */
+static inline struct qfq_group *qfq_ffs(struct qfq_sched *q,
+                                        unsigned long bitmap)
+{
+        int index = __ffs(bitmap);
+        return &q->groups[index];
+}
+/* Calculate a mask to mimic what would be ffs_from(). */
+static inline unsigned long mask_from(unsigned long bitmap, int from)
+{
+        return bitmap & ~((1UL << from) - 1);
+}
+/*
+ * The state computation relies on ER=0, IR=1, EB=2, IB=3
+ * First compute eligibility comparing grp->S, q->V,
+ * then check if someone is blocking us and possibly add EB
+ */
+static int qfq_calc_state(struct qfq_sched *q, const struct qfq_group *grp)
+{
+        /* if S > V we are not eligible */
+        unsigned int state = qfq_gt(grp->S, q->V);
+        unsigned long mask = mask_from(q->bitmaps[ER], grp->index);
+        struct qfq_group *next;
+        if (mask) {
+                next = qfq_ffs(q, mask);
+                if (qfq_gt(grp->F, next->F))
+                        state |= EB;
+        }
+        return state;
+}
+/*
+ * In principle
+ *      q->bitmaps[dst] |= q->bitmaps[src] & mask;
+ *      q->bitmaps[src] &= ~mask;
+ * but we should make sure that src != dst
+ */
+static inline void qfq_move_groups(struct qfq_sched *q, unsigned long mask,
+                                   int src, int dst)
+{
+        q->bitmaps[dst] |= q->bitmaps[src] & mask;
+        q->bitmaps[src] &= ~mask;
+}
+static void qfq_unblock_groups(struct qfq_sched *q, int index, u64 old_F)
+{
+        unsigned long mask = mask_from(q->bitmaps[ER], index + 1);
+        struct qfq_group *next;
+        if (mask) {
+                next = qfq_ffs(q, mask);
+                if (!qfq_gt(next->F, old_F))
+                        return;
+        }
+        mask = (1UL << index) - 1;
+        qfq_move_groups(q, mask, EB, ER);
+        qfq_move_groups(q, mask, IB, IR);
+}
+/*
+ * perhaps
+ *
+        old_V ^= q->V;
+        old_V >>= QFQ_MIN_SLOT_SHIFT;
+        if (old_V) {
+                ...
+        }
+ *
+ */
+static void qfq_make_eligible(struct qfq_sched *q, u64 old_V)
+{
+        unsigned long vslot = q->V >> QFQ_MIN_SLOT_SHIFT;
+        unsigned long old_vslot = old_V >> QFQ_MIN_SLOT_SHIFT;
+        if (vslot != old_vslot) {
+                unsigned long mask = (1UL << fls(vslot ^ old_vslot)) - 1;
+                qfq_move_groups(q, mask, IR, ER);
+                qfq_move_groups(q, mask, IB, EB);
+        }
+}
+/*
+ * XXX we should make sure that slot becomes less than 32.
+ * This is guaranteed by the input values.
+ * roundedS is always cl->S rounded on grp->slot_shift bits.
+ */
+static void qfq_slot_insert(struct qfq_group *grp, struct qfq_class *cl,
+                            u64 roundedS)
+{
+        u64 slot = (roundedS - grp->S) >> grp->slot_shift;
+        unsigned int i = (grp->front + slot) % QFQ_MAX_SLOTS;
+        hlist_add_head(&cl->next, &grp->slots[i]);
+        __set_bit(slot, &grp->full_slots);
+}
+/* Maybe introduce hlist_first_entry?? */
+static struct qfq_class *qfq_slot_head(struct qfq_group *grp)
+{
+        return hlist_entry(grp->slots[grp->front].first,
+                           struct qfq_class, next);
+}
+/*
+ * remove the entry from the slot
+ */
+static void qfq_front_slot_remove(struct qfq_group *grp)
+{
+        struct qfq_class *cl = qfq_slot_head(grp);
+        BUG_ON(!cl);
+        hlist_del(&cl->next);
+        if (hlist_empty(&grp->slots[grp->front]))
+                __clear_bit(0, &grp->full_slots);
+}
+/*
+ * Returns the first full queue in a group. As a side effect,
+ * adjust the bucket list so the first non-empty bucket is at
+ * position 0 in full_slots.
+ */
+static struct qfq_class *qfq_slot_scan(struct qfq_group *grp)
+{
+        unsigned int i;
+        pr_debug("qfq slot_scan: grp %u full %#lx\n",
+                 grp->index, grp->full_slots);
+        if (grp->full_slots == 0)
+                return NULL;
+        i = __ffs(grp->full_slots);  /* zero based */
+        if (i > 0) {
+                grp->front = (grp->front + i) % QFQ_MAX_SLOTS;
+                grp->full_slots >>= i;
+        }
+        return qfq_slot_head(grp);
+}
+/*
+ * adjust the bucket list. When the start time of a group decreases,
+ * we move the index down (modulo QFQ_MAX_SLOTS) so we don't need to
+ * move the objects. The mask of occupied slots must be shifted
+ * because we use ffs() to find the first non-empty slot.
+ * This covers decreases in the group's start time, but what about
+ * increases of the start time ?
+ * Here too we should make sure that i is less than 32
+ */
+static void qfq_slot_rotate(struct qfq_group *grp, u64 roundedS)
+{
+        unsigned int i = (grp->S - roundedS) >> grp->slot_shift;
+        grp->full_slots <<= i;
+        grp->front = (grp->front - i) % QFQ_MAX_SLOTS;
+}
+static void qfq_update_eligible(struct qfq_sched *q, u64 old_V)
+{
+        struct qfq_group *grp;
+        unsigned long ineligible;
+        ineligible = q->bitmaps[IR] | q->bitmaps[IB];
+        if (ineligible) {
+                if (!q->bitmaps[ER]) {
+                        grp = qfq_ffs(q, ineligible);
+                        if (qfq_gt(grp->S, q->V))
+                                q->V = grp->S;
+                }
+                qfq_make_eligible(q, old_V);
+        }
+}
+/* What is length of next packet in queue (0 if queue is empty) */
+static unsigned int qdisc_peek_len(struct Qdisc *sch)
+{
+        struct sk_buff *skb;
+        skb = sch->ops->peek(sch);
+        return skb ? qdisc_pkt_len(skb) : 0;
+}
+/*
+ * Updates the class, returns true if also the group needs to be updated.
+ */
+static bool qfq_update_class(struct qfq_group *grp, struct qfq_class *cl)
+{
+        unsigned int len = qdisc_peek_len(cl->qdisc);
+        cl->S = cl->F;
+        if (!len)
+                qfq_front_slot_remove(grp);     /* queue is empty */
+        else {
+                u64 roundedS;
+                cl->F = cl->S + (u64)len * cl->inv_w;
+                roundedS = qfq_round_down(cl->S, grp->slot_shift);
+                if (roundedS == grp->S)
+                        return false;
+                qfq_front_slot_remove(grp);
+                qfq_slot_insert(grp, cl, roundedS);
+        }
+        return true;
+}
+static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_group *grp;
+        struct qfq_class *cl;
+        struct sk_buff *skb;
+        unsigned int len;
+        u64 old_V;
+        if (!q->bitmaps[ER])
+                return NULL;
+        grp = qfq_ffs(q, q->bitmaps[ER]);
+        cl = qfq_slot_head(grp);
+        skb = qdisc_dequeue_peeked(cl->qdisc);
+        if (!skb) {
+                WARN_ONCE(1, "qfq_dequeue: non-workconserving leaf\n");
+                return NULL;
+        }
+        sch->q.qlen--;
+        qdisc_bstats_update(sch, skb);
+        old_V = q->V;
+        len = qdisc_pkt_len(skb);
+        q->V += (u64)len * IWSUM;
+        pr_debug("qfq dequeue: len %u F %lld now %lld\n",
+                 len, (unsigned long long) cl->F, (unsigned long long) q->V);
+        if (qfq_update_class(grp, cl)) {
+                u64 old_F = grp->F;
+                cl = qfq_slot_scan(grp);
+                if (!cl)
+                        __clear_bit(grp->index, &q->bitmaps[ER]);
+                else {
+                        u64 roundedS = qfq_round_down(cl->S, grp->slot_shift);
+                        unsigned int s;
+                        if (grp->S == roundedS)
+                                goto skip_unblock;
+                        grp->S = roundedS;
+                        grp->F = roundedS + (2ULL << grp->slot_shift);
+                        __clear_bit(grp->index, &q->bitmaps[ER]);
+                        s = qfq_calc_state(q, grp);
+                        __set_bit(grp->index, &q->bitmaps[s]);
+                }
+                qfq_unblock_groups(q, grp->index, old_F);
+        }
+skip_unblock:
+        qfq_update_eligible(q, old_V);
+        return skb;
+}
+/*
+ * Assign a reasonable start time for a new flow k in group i.
+ * Admissible values for \hat(F) are multiples of \sigma_i
+ * no greater than V+\sigma_i . Larger values mean that
+ * we had a wraparound so we consider the timestamp to be stale.
+ *
+ * If F is not stale and F >= V then we set S = F.
+ * Otherwise we should assign S = V, but this may violate
+ * the ordering in ER. So, if we have groups in ER, set S to
+ * the F_j of the first group j which would be blocking us.
+ * We are guaranteed not to move S backward because
+ * otherwise our group i would still be blocked.
+ */
+static void qfq_update_start(struct qfq_sched *q, struct qfq_class *cl)
+{
+        unsigned long mask;
+        uint32_t limit, roundedF;
+        int slot_shift = cl->grp->slot_shift;
+        roundedF = qfq_round_down(cl->F, slot_shift);
+        limit = qfq_round_down(q->V, slot_shift) + (1UL << slot_shift);
+        if (!qfq_gt(cl->F, q->V) || qfq_gt(roundedF, limit)) {
+                /* timestamp was stale */
+                mask = mask_from(q->bitmaps[ER], cl->grp->index);
+                if (mask) {
+                        struct qfq_group *next = qfq_ffs(q, mask);
+                        if (qfq_gt(roundedF, next->F)) {
+                                cl->S = next->F;
+                                return;
+                        }
+                }
+                cl->S = q->V;
+        } else  /* timestamp is not stale */
+                cl->S = cl->F;
+}
+static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_group *grp;
+        struct qfq_class *cl;
+        int err;
+        u64 roundedS;
+        int s;
+        cl = qfq_classify(skb, sch, &err);
+        if (cl == NULL) {
+                if (err & __NET_XMIT_BYPASS)
+                        sch->qstats.drops++;
+                kfree_skb(skb);
+                return err;
+        }
+        pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid);
+        err = qdisc_enqueue(skb, cl->qdisc);
+        if (unlikely(err != NET_XMIT_SUCCESS)) {
+                pr_debug("qfq_enqueue: enqueue failed %d\n", err);
+                if (net_xmit_drop_count(err)) {
+                        cl->qstats.drops++;
+                        sch->qstats.drops++;
+                }
+                return err;
+        }
+        bstats_update(&cl->bstats, skb);
+        ++sch->q.qlen;
+        /* If the new skb is not the head of queue, then done here. */
+        if (cl->qdisc->q.qlen != 1)
+                return err;
+        /* If reach this point, queue q was idle */
+        grp = cl->grp;
+        qfq_update_start(q, cl);
+        /* compute new finish time and rounded start. */
+        cl->F = cl->S + (u64)qdisc_pkt_len(skb) * cl->inv_w;
+        roundedS = qfq_round_down(cl->S, grp->slot_shift);
+        /*
+         * insert cl in the correct bucket.
+         * If cl->S >= grp->S we don't need to adjust the
+         * bucket list and simply go to the insertion phase.
+         * Otherwise grp->S is decreasing, we must make room
+         * in the bucket list, and also recompute the group state.
+         * Finally, if there were no flows in this group and nobody
+         * was in ER make sure to adjust V.
+         */
+        if (grp->full_slots) {
+                if (!qfq_gt(grp->S, cl->S))
+                        goto skip_update;
+                /* create a slot for this cl->S */
+                qfq_slot_rotate(grp, roundedS);
+                /* group was surely ineligible, remove */
+                __clear_bit(grp->index, &q->bitmaps[IR]);
+                __clear_bit(grp->index, &q->bitmaps[IB]);
+        } else if (!q->bitmaps[ER] && qfq_gt(roundedS, q->V))
+                q->V = roundedS;
+        grp->S = roundedS;
+        grp->F = roundedS + (2ULL << grp->slot_shift);
+        s = qfq_calc_state(q, grp);
+        __set_bit(grp->index, &q->bitmaps[s]);
+        pr_debug("qfq enqueue: new state %d %#lx S %lld F %lld V %lld\n",
+                 s, q->bitmaps[s],
+                 (unsigned long long) cl->S,
+                 (unsigned long long) cl->F,
+                 (unsigned long long) q->V);
+skip_update:
+        qfq_slot_insert(grp, cl, roundedS);
+        return err;
+}
+static void qfq_slot_remove(struct qfq_sched *q, struct qfq_group *grp,
+                            struct qfq_class *cl)
+{
+        unsigned int i, offset;
+        u64 roundedS;
+        roundedS = qfq_round_down(cl->S, grp->slot_shift);
+        offset = (roundedS - grp->S) >> grp->slot_shift;
+        i = (grp->front + offset) % QFQ_MAX_SLOTS;
+        hlist_del(&cl->next);
+        if (hlist_empty(&grp->slots[i]))
+                __clear_bit(offset, &grp->full_slots);
+}
+/*
+ * called to forcibly destroy a queue.
+ * If the queue is not in the front bucket, or if it has
+ * other queues in the front bucket, we can simply remove
+ * the queue with no other side effects.
+ * Otherwise we must propagate the event up.
+ */
+static void qfq_deactivate_class(struct qfq_sched *q, struct qfq_class *cl)
+{
+        struct qfq_group *grp = cl->grp;
+        unsigned long mask;
+        u64 roundedS;
+        int s;
+        cl->F = cl->S;
+        qfq_slot_remove(q, grp, cl);
+        if (!grp->full_slots) {
+                __clear_bit(grp->index, &q->bitmaps[IR]);
+                __clear_bit(grp->index, &q->bitmaps[EB]);
+                __clear_bit(grp->index, &q->bitmaps[IB]);
+                if (test_bit(grp->index, &q->bitmaps[ER]) &&
+                    !(q->bitmaps[ER] & ~((1UL << grp->index) - 1))) {
+                        mask = q->bitmaps[ER] & ((1UL << grp->index) - 1);
+                        if (mask)
+                                mask = ~((1UL << __fls(mask)) - 1);
+                        else
+                                mask = ~0UL;
+                        qfq_move_groups(q, mask, EB, ER);
+                        qfq_move_groups(q, mask, IB, IR);
+                }
+                __clear_bit(grp->index, &q->bitmaps[ER]);
+        } else if (hlist_empty(&grp->slots[grp->front])) {
+                cl = qfq_slot_scan(grp);
+                roundedS = qfq_round_down(cl->S, grp->slot_shift);
+                if (grp->S != roundedS) {
+                        __clear_bit(grp->index, &q->bitmaps[ER]);
+                        __clear_bit(grp->index, &q->bitmaps[IR]);
+                        __clear_bit(grp->index, &q->bitmaps[EB]);
+                        __clear_bit(grp->index, &q->bitmaps[IB]);
+                        grp->S = roundedS;
+                        grp->F = roundedS + (2ULL << grp->slot_shift);
+                        s = qfq_calc_state(q, grp);
+                        __set_bit(grp->index, &q->bitmaps[s]);
+                }
+        }
+        qfq_update_eligible(q, q->V);
+}
+static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_class *cl = (struct qfq_class *)arg;
+        if (cl->qdisc->q.qlen == 0)
+                qfq_deactivate_class(q, cl);
+}
+static unsigned int qfq_drop(struct Qdisc *sch)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_group *grp;
+        unsigned int i, j, len;
+        for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+                grp = &q->groups[i];
+                for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+                        struct qfq_class *cl;
+                        struct hlist_node *n;
+                        hlist_for_each_entry(cl, n, &grp->slots[j], next) {
+                                if (!cl->qdisc->ops->drop)
+                                        continue;
+                                len = cl->qdisc->ops->drop(cl->qdisc);
+                                if (len > 0) {
+                                        sch->q.qlen--;
+                                        if (!cl->qdisc->q.qlen)
+                                                qfq_deactivate_class(q, cl);
+                                        return len;
+                                }
+                        }
+                }
+        }
+        return 0;
+}
+static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_group *grp;
+        int i, j, err;
+        err = qdisc_class_hash_init(&q->clhash);
+        if (err < 0)
+                return err;
+        for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+                grp = &q->groups[i];
+                grp->index = i;
+                grp->slot_shift = QFQ_MTU_SHIFT + FRAC_BITS
+                                   - (QFQ_MAX_INDEX - i);
+                for (j = 0; j < QFQ_MAX_SLOTS; j++)
+                        INIT_HLIST_HEAD(&grp->slots[j]);
+        }
+        return 0;
+}
+static void qfq_reset_qdisc(struct Qdisc *sch)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_group *grp;
+        struct qfq_class *cl;
+        struct hlist_node *n, *tmp;
+        unsigned int i, j;
+        for (i = 0; i <= QFQ_MAX_INDEX; i++) {
+                grp = &q->groups[i];
+                for (j = 0; j < QFQ_MAX_SLOTS; j++) {
+                        hlist_for_each_entry_safe(cl, n, tmp,
+                                                  &grp->slots[j], next) {
+                                qfq_deactivate_class(q, cl);
+                        }
+                }
+        }
+        for (i = 0; i < q->clhash.hashsize; i++) {
+                hlist_for_each_entry(cl, n, &q->clhash.hash[i], common.hnode)
+                        qdisc_reset(cl->qdisc);
+        }
+        sch->q.qlen = 0;
+}
+static void qfq_destroy_qdisc(struct Qdisc *sch)
+{
+        struct qfq_sched *q = qdisc_priv(sch);
+        struct qfq_class *cl;
+        struct hlist_node *n, *next;
+        unsigned int i;
+        tcf_destroy_chain(&q->filter_list);
+        for (i = 0; i < q->clhash.hashsize; i++) {
+                hlist_for_each_entry_safe(cl, n, next, &q->clhash.hash[i],
+                                          common.hnode) {
+                        qfq_destroy_class(sch, cl);
+                }
+        }
+        qdisc_class_hash_destroy(&q->clhash);
+}
+static const struct Qdisc_class_ops qfq_class_ops = {
+        .change         = qfq_change_class,
+        .delete         = qfq_delete_class,
+        .get            = qfq_get_class,
+        .put            = qfq_put_class,
+        .tcf_chain      = qfq_tcf_chain,
+        .bind_tcf       = qfq_bind_tcf,
+        .unbind_tcf     = qfq_unbind_tcf,
+        .graft          = qfq_graft_class,
+        .leaf           = qfq_class_leaf,
+        .qlen_notify    = qfq_qlen_notify,
+        .dump           = qfq_dump_class,
+        .dump_stats     = qfq_dump_class_stats,
+        .walk           = qfq_walk,
+};
+static struct Qdisc_ops qfq_qdisc_ops __read_mostly = {
+        .cl_ops         = &qfq_class_ops,
+        .id             = "qfq",
+        .priv_size      = sizeof(struct qfq_sched),
+        .enqueue        = qfq_enqueue,
+        .dequeue        = qfq_dequeue,
+        .peek           = qdisc_peek_dequeued,
+        .drop           = qfq_drop,
+        .init           = qfq_init_qdisc,
+        .reset          = qfq_reset_qdisc,
+        .destroy        = qfq_destroy_qdisc,
+        .owner          = THIS_MODULE,
+};
+static int __init qfq_init(void)
+{
+        return register_qdisc(&qfq_qdisc_ops);
+}
+static void __exit qfq_exit(void)
+{
+        unregister_qdisc(&qfq_qdisc_ops);
+}
+module_init(qfq_init);
+module_exit(qfq_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 8d42bb3ba540..6649463da1b6 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -36,8 +36,7 @@
        if RED works correctly.
 */
-struct red_sched_data
+struct red_sched_data {
-{
        u32                     limit;          /* HARD maximal queue length */
        unsigned char           flags;
        struct red_parms        parms;
@@ -55,7 +54,7 @@ static inline int red_use_harddrop(struct red_sched_data *q)
        return q->flags & TC_RED_HARDDROP;
 }
-static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
@@ -67,35 +66,33 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
                red_end_of_idle_period(&q->parms);
        switch (red_action(&q->parms, q->parms.qavg)) {
-                case RED_DONT_MARK:
+        case RED_DONT_MARK:
-                        break;
+                break;
-                case RED_PROB_MARK:
+        case RED_PROB_MARK:
-                        sch->qstats.overlimits++;
+                sch->qstats.overlimits++;
-                        if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+                if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
-                                q->stats.prob_drop++;
+                        q->stats.prob_drop++;
-                                goto congestion_drop;
+                        goto congestion_drop;
-                        }
+                }
-                        q->stats.prob_mark++;
+                q->stats.prob_mark++;
-                        break;
+                break;
-                case RED_HARD_MARK:
+        case RED_HARD_MARK:
-                        sch->qstats.overlimits++;
+                sch->qstats.overlimits++;
-                        if (red_use_harddrop(q) || !red_use_ecn(q) ||
+                if (red_use_harddrop(q) || !red_use_ecn(q) ||
-                            !INET_ECN_set_ce(skb)) {
+                    !INET_ECN_set_ce(skb)) {
-                                q->stats.forced_drop++;
+                        q->stats.forced_drop++;
-                                goto congestion_drop;
+                        goto congestion_drop;
-                        }
+                }
-                        q->stats.forced_mark++;
+                q->stats.forced_mark++;
-                        break;
+                break;
        }
        ret = qdisc_enqueue(skb, child);
        if (likely(ret == NET_XMIT_SUCCESS)) {
-                sch->bstats.bytes += qdisc_pkt_len(skb);
-                sch->bstats.packets++;
                sch->q.qlen++;
        } else if (net_xmit_drop_count(ret)) {
                q->stats.pdrop++;
@@ -108,22 +105,24 @@ congestion_drop:
        return NET_XMIT_CN;
 }
-static struct sk_buff * red_dequeue(struct Qdisc* sch)
+static struct sk_buff *red_dequeue(struct Qdisc *sch)
 {
        struct sk_buff *skb;
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
        skb = child->dequeue(child);
-        if (skb)
+        if (skb) {
+                qdisc_bstats_update(sch, skb);
                sch->q.qlen--;
-        else if (!red_is_idling(&q->parms))
+        } else {
-                red_start_of_idle_period(&q->parms);
+                if (!red_is_idling(&q->parms))
+                        red_start_of_idle_period(&q->parms);
+        }
        return skb;
 }
-static struct sk_buff * red_peek(struct Qdisc* sch)
+static struct sk_buff *red_peek(struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
@@ -131,7 +130,7 @@ static struct sk_buff * red_peek(struct Qdisc* sch)
        return child->ops->peek(child);
 }
-static unsigned int red_drop(struct Qdisc* sch)
+static unsigned int red_drop(struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
        struct Qdisc *child = q->qdisc;
@@ -150,7 +149,7 @@ static unsigned int red_drop(struct Qdisc* sch)
        return 0;
 }
-static void red_reset(struct Qdisc* sch)
+static void red_reset(struct Qdisc *sch)
 {
        struct red_sched_data *q = qdisc_priv(sch);
@@ -217,7 +216,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
        return 0;
 }
-static int red_init(struct Qdisc* sch, struct nlattr *opt)
+static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct red_sched_data *q = qdisc_priv(sch);
@@ -239,6 +238,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
                .Scell_log      = q->parms.Scell_log,
        };
+        sch->qstats.backlog = q->qdisc->qstats.backlog;
        opts = nla_nest_start(skb, TCA_OPTIONS);
        if (opts == NULL)
                goto nla_put_failure;
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
new file mode 100644
index 000000000000..0a833d0c1f61
--- /dev/null
+++ b/net/sched/sch_sfb.c
@@ -0,0 +1,709 @@
+/*
+ * net/sched/sch_sfb.c    Stochastic Fair Blue
+ *
+ * Copyright (c) 2008-2011 Juliusz Chroboczek <jch@pps.jussieu.fr>
+ * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * W. Feng, D. Kandlur, D. Saha, K. Shin. Blue:
+ * A New Class of Active Queue Management Algorithms.
+ * U. Michigan CSE-TR-387-99, April 1999.
+ *
+ * http://www.thefengs.com/wuchang/blue/CSE-TR-387-99.pdf
+ *
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/jhash.h>
+#include <net/ip.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+/*
+ * SFB uses two B[l][n] : L x N arrays of bins (L levels, N bins per level)
+ * This implementation uses L = 8 and N = 16
+ * This permits us to split one 32bit hash (provided per packet by rxhash or
+ * external classifier) into 8 subhashes of 4 bits.
+ */
+#define SFB_BUCKET_SHIFT 4
+#define SFB_NUMBUCKETS  (1 << SFB_BUCKET_SHIFT) /* N bins per Level */
+#define SFB_BUCKET_MASK (SFB_NUMBUCKETS - 1)
+#define SFB_LEVELS      (32 / SFB_BUCKET_SHIFT) /* L */
+/* SFB algo uses a virtual queue, named "bin" */
+struct sfb_bucket {
+        u16             qlen; /* length of virtual queue */
+        u16             p_mark; /* marking probability */
+};
+/* We use a double buffering right before hash change
+ * (Section 4.4 of SFB reference : moving hash functions)
+ */
+struct sfb_bins {
+        u32               perturbation; /* jhash perturbation */
+        struct sfb_bucket bins[SFB_LEVELS][SFB_NUMBUCKETS];
+};
+struct sfb_sched_data {
+        struct Qdisc    *qdisc;
+        struct tcf_proto *filter_list;
+        unsigned long   rehash_interval;
+        unsigned long   warmup_time;    /* double buffering warmup time in jiffies */
+        u32             max;
+        u32             bin_size;       /* maximum queue length per bin */
+        u32             increment;      /* d1 */
+        u32             decrement;      /* d2 */
+        u32             limit;          /* HARD maximal queue length */
+        u32             penalty_rate;
+        u32             penalty_burst;
+        u32             tokens_avail;
+        unsigned long   rehash_time;
+        unsigned long   token_time;
+        u8              slot;           /* current active bins (0 or 1) */
+        bool            double_buffering;
+        struct sfb_bins bins[2];
+        struct {
+                u32     earlydrop;
+                u32     penaltydrop;
+                u32     bucketdrop;
+                u32     queuedrop;
+                u32     childdrop;      /* drops in child qdisc */
+                u32     marked;         /* ECN mark */
+        } stats;
+};
+/*
+ * Each queued skb might be hashed on one or two bins
+ * We store in skb_cb the two hash values.
+ * (A zero value means double buffering was not used)
+ */
+struct sfb_skb_cb {
+        u32 hashes[2];
+};
+static inline struct sfb_skb_cb *sfb_skb_cb(const struct sk_buff *skb)
+{
+        BUILD_BUG_ON(sizeof(skb->cb) <
+                sizeof(struct qdisc_skb_cb) + sizeof(struct sfb_skb_cb));
+        return (struct sfb_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+/*
+ * If using 'internal' SFB flow classifier, hash comes from skb rxhash
+ * If using external classifier, hash comes from the classid.
+ */
+static u32 sfb_hash(const struct sk_buff *skb, u32 slot)
+{
+        return sfb_skb_cb(skb)->hashes[slot];
+}
+/* Probabilities are coded as Q0.16 fixed-point values,
+ * with 0xFFFF representing 65535/65536 (almost 1.0)
+ * Addition and subtraction are saturating in [0, 65535]
+ */
+static u32 prob_plus(u32 p1, u32 p2)
+{
+        u32 res = p1 + p2;
+        return min_t(u32, res, SFB_MAX_PROB);
+}
+static u32 prob_minus(u32 p1, u32 p2)
+{
+        return p1 > p2 ? p1 - p2 : 0;
+}
+static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q)
+{
+        int i;
+        struct sfb_bucket *b = &q->bins[slot].bins[0][0];
+        for (i = 0; i < SFB_LEVELS; i++) {
+                u32 hash = sfbhash & SFB_BUCKET_MASK;
+                sfbhash >>= SFB_BUCKET_SHIFT;
+                if (b[hash].qlen < 0xFFFF)
+                        b[hash].qlen++;
+                b += SFB_NUMBUCKETS; /* next level */
+        }
+}
+static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+{
+        u32 sfbhash;
+        sfbhash = sfb_hash(skb, 0);
+        if (sfbhash)
+                increment_one_qlen(sfbhash, 0, q);
+        sfbhash = sfb_hash(skb, 1);
+        if (sfbhash)
+                increment_one_qlen(sfbhash, 1, q);
+}
+static void decrement_one_qlen(u32 sfbhash, u32 slot,
+                               struct sfb_sched_data *q)
+{
+        int i;
+        struct sfb_bucket *b = &q->bins[slot].bins[0][0];
+        for (i = 0; i < SFB_LEVELS; i++) {
+                u32 hash = sfbhash & SFB_BUCKET_MASK;
+                sfbhash >>= SFB_BUCKET_SHIFT;
+                if (b[hash].qlen > 0)
+                        b[hash].qlen--;
+                b += SFB_NUMBUCKETS; /* next level */
+        }
+}
+static void decrement_qlen(const struct sk_buff *skb, struct sfb_sched_data *q)
+{
+        u32 sfbhash;
+        sfbhash = sfb_hash(skb, 0);
+        if (sfbhash)
+                decrement_one_qlen(sfbhash, 0, q);
+        sfbhash = sfb_hash(skb, 1);
+        if (sfbhash)
+                decrement_one_qlen(sfbhash, 1, q);
+}
+static void decrement_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
+{
+        b->p_mark = prob_minus(b->p_mark, q->decrement);
+}
+static void increment_prob(struct sfb_bucket *b, struct sfb_sched_data *q)
+{
+        b->p_mark = prob_plus(b->p_mark, q->increment);
+}
+static void sfb_zero_all_buckets(struct sfb_sched_data *q)
+{
+        memset(&q->bins, 0, sizeof(q->bins));
+}
+/*
+ * compute max qlen, max p_mark, and avg p_mark
+ */
+static u32 sfb_compute_qlen(u32 *prob_r, u32 *avgpm_r, const struct sfb_sched_data *q)
+{
+        int i;
+        u32 qlen = 0, prob = 0, totalpm = 0;
+        const struct sfb_bucket *b = &q->bins[q->slot].bins[0][0];
+        for (i = 0; i < SFB_LEVELS * SFB_NUMBUCKETS; i++) {
+                if (qlen < b->qlen)
+                        qlen = b->qlen;
+                totalpm += b->p_mark;
+                if (prob < b->p_mark)
+                        prob = b->p_mark;
+                b++;
+        }
+        *prob_r = prob;
+        *avgpm_r = totalpm / (SFB_LEVELS * SFB_NUMBUCKETS);
+        return qlen;
+}
+static void sfb_init_perturbation(u32 slot, struct sfb_sched_data *q)
+{
+        q->bins[slot].perturbation = net_random();
+}
+static void sfb_swap_slot(struct sfb_sched_data *q)
+{
+        sfb_init_perturbation(q->slot, q);
+        q->slot ^= 1;
+        q->double_buffering = false;
+}
+/* Non elastic flows are allowed to use part of the bandwidth, expressed
+ * in "penalty_rate" packets per second, with "penalty_burst" burst
+ */
+static bool sfb_rate_limit(struct sk_buff *skb, struct sfb_sched_data *q)
+{
+        if (q->penalty_rate == 0 || q->penalty_burst == 0)
+                return true;
+        if (q->tokens_avail < 1) {
+                unsigned long age = min(10UL * HZ, jiffies - q->token_time);
+                q->tokens_avail = (age * q->penalty_rate) / HZ;
+                if (q->tokens_avail > q->penalty_burst)
+                        q->tokens_avail = q->penalty_burst;
+                q->token_time = jiffies;
+                if (q->tokens_avail < 1)
+                        return true;
+        }
+        q->tokens_avail--;
+        return false;
+}
+static bool sfb_classify(struct sk_buff *skb, struct sfb_sched_data *q,
+                         int *qerr, u32 *salt)
+{
+        struct tcf_result res;
+        int result;
+        result = tc_classify(skb, q->filter_list, &res);
+        if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+                switch (result) {
+                case TC_ACT_STOLEN:
+                case TC_ACT_QUEUED:
+                        *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+                case TC_ACT_SHOT:
+                        return false;
+                }
+#endif
+                *salt = TC_H_MIN(res.classid);
+                return true;
+        }
+        return false;
+}
+static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        struct Qdisc *child = q->qdisc;
+        int i;
+        u32 p_min = ~0;
+        u32 minqlen = ~0;
+        u32 r, slot, salt, sfbhash;
+        int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+        if (q->rehash_interval > 0) {
+                unsigned long limit = q->rehash_time + q->rehash_interval;
+                if (unlikely(time_after(jiffies, limit))) {
+                        sfb_swap_slot(q);
+                        q->rehash_time = jiffies;
+                } else if (unlikely(!q->double_buffering && q->warmup_time > 0 &&
+                                    time_after(jiffies, limit - q->warmup_time))) {
+                        q->double_buffering = true;
+                }
+        }
+        if (q->filter_list) {
+                /* If using external classifiers, get result and record it. */
+                if (!sfb_classify(skb, q, &ret, &salt))
+                        goto other_drop;
+        } else {
+                salt = skb_get_rxhash(skb);
+        }
+        slot = q->slot;
+        sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+        if (!sfbhash)
+                sfbhash = 1;
+        sfb_skb_cb(skb)->hashes[slot] = sfbhash;
+        for (i = 0; i < SFB_LEVELS; i++) {
+                u32 hash = sfbhash & SFB_BUCKET_MASK;
+                struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
+                sfbhash >>= SFB_BUCKET_SHIFT;
+                if (b->qlen == 0)
+                        decrement_prob(b, q);
+                else if (b->qlen >= q->bin_size)
+                        increment_prob(b, q);
+                if (minqlen > b->qlen)
+                        minqlen = b->qlen;
+                if (p_min > b->p_mark)
+                        p_min = b->p_mark;
+        }
+        slot ^= 1;
+        sfb_skb_cb(skb)->hashes[slot] = 0;
+        if (unlikely(minqlen >= q->max || sch->q.qlen >= q->limit)) {
+                sch->qstats.overlimits++;
+                if (minqlen >= q->max)
+                        q->stats.bucketdrop++;
+                else
+                        q->stats.queuedrop++;
+                goto drop;
+        }
+        if (unlikely(p_min >= SFB_MAX_PROB)) {
+                /* Inelastic flow */
+                if (q->double_buffering) {
+                        sfbhash = jhash_1word(salt, q->bins[slot].perturbation);
+                        if (!sfbhash)
+                                sfbhash = 1;
+                        sfb_skb_cb(skb)->hashes[slot] = sfbhash;
+                        for (i = 0; i < SFB_LEVELS; i++) {
+                                u32 hash = sfbhash & SFB_BUCKET_MASK;
+                                struct sfb_bucket *b = &q->bins[slot].bins[i][hash];
+                                sfbhash >>= SFB_BUCKET_SHIFT;
+                                if (b->qlen == 0)
+                                        decrement_prob(b, q);
+                                else if (b->qlen >= q->bin_size)
+                                        increment_prob(b, q);
+                        }
+                }
+                if (sfb_rate_limit(skb, q)) {
+                        sch->qstats.overlimits++;
+                        q->stats.penaltydrop++;
+                        goto drop;
+                }
+                goto enqueue;
+        }
+        r = net_random() & SFB_MAX_PROB;
+        if (unlikely(r < p_min)) {
+                if (unlikely(p_min > SFB_MAX_PROB / 2)) {
+                        /* If we're marking that many packets, then either
+                         * this flow is unresponsive, or we're badly congested.
+                         * In either case, we want to start dropping packets.
+                         */
+                        if (r < (p_min - SFB_MAX_PROB / 2) * 2) {
+                                q->stats.earlydrop++;
+                                goto drop;
+                        }
+                }
+                if (INET_ECN_set_ce(skb)) {
+                        q->stats.marked++;
+                } else {
+                        q->stats.earlydrop++;
+                        goto drop;
+                }
+        }
+enqueue:
+        ret = qdisc_enqueue(skb, child);
+        if (likely(ret == NET_XMIT_SUCCESS)) {
+                sch->q.qlen++;
+                increment_qlen(skb, q);
+        } else if (net_xmit_drop_count(ret)) {
+                q->stats.childdrop++;
+                sch->qstats.drops++;
+        }
+        return ret;
+drop:
+        qdisc_drop(skb, sch);
+        return NET_XMIT_CN;
+other_drop:
+        if (ret & __NET_XMIT_BYPASS)
+                sch->qstats.drops++;
+        kfree_skb(skb);
+        return ret;
+}
+static struct sk_buff *sfb_dequeue(struct Qdisc *sch)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        struct Qdisc *child = q->qdisc;
+        struct sk_buff *skb;
+        skb = child->dequeue(q->qdisc);
+        if (skb) {
+                qdisc_bstats_update(sch, skb);
+                sch->q.qlen--;
+                decrement_qlen(skb, q);
+        }
+        return skb;
+}
+static struct sk_buff *sfb_peek(struct Qdisc *sch)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        struct Qdisc *child = q->qdisc;
+        return child->ops->peek(child);
+}
+/* No sfb_drop -- impossible since the child doesn't return the dropped skb. */
+static void sfb_reset(struct Qdisc *sch)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        qdisc_reset(q->qdisc);
+        sch->q.qlen = 0;
+        q->slot = 0;
+        q->double_buffering = false;
+        sfb_zero_all_buckets(q);
+        sfb_init_perturbation(0, q);
+}
+static void sfb_destroy(struct Qdisc *sch)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        tcf_destroy_chain(&q->filter_list);
+        qdisc_destroy(q->qdisc);
+}
+static const struct nla_policy sfb_policy[TCA_SFB_MAX + 1] = {
+        [TCA_SFB_PARMS] = { .len = sizeof(struct tc_sfb_qopt) },
+};
+static const struct tc_sfb_qopt sfb_default_ops = {
+        .rehash_interval = 600 * MSEC_PER_SEC,
+        .warmup_time = 60 * MSEC_PER_SEC,
+        .limit = 0,
+        .max = 25,
+        .bin_size = 20,
+        .increment = (SFB_MAX_PROB + 500) / 1000, /* 0.1 % */
+        .decrement = (SFB_MAX_PROB + 3000) / 6000,
+        .penalty_rate = 10,
+        .penalty_burst = 20,
+};
+static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        struct Qdisc *child;
+        struct nlattr *tb[TCA_SFB_MAX + 1];
+        const struct tc_sfb_qopt *ctl = &sfb_default_ops;
+        u32 limit;
+        int err;
+        if (opt) {
+                err = nla_parse_nested(tb, TCA_SFB_MAX, opt, sfb_policy);
+                if (err < 0)
+                        return -EINVAL;
+                if (tb[TCA_SFB_PARMS] == NULL)
+                        return -EINVAL;
+                ctl = nla_data(tb[TCA_SFB_PARMS]);
+        }
+        limit = ctl->limit;
+        if (limit == 0)
+                limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+        child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
+        if (IS_ERR(child))
+                return PTR_ERR(child);
+        sch_tree_lock(sch);
+        qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+        qdisc_destroy(q->qdisc);
+        q->qdisc = child;
+        q->rehash_interval = msecs_to_jiffies(ctl->rehash_interval);
+        q->warmup_time = msecs_to_jiffies(ctl->warmup_time);
+        q->rehash_time = jiffies;
+        q->limit = limit;
+        q->increment = ctl->increment;
+        q->decrement = ctl->decrement;
+        q->max = ctl->max;
+        q->bin_size = ctl->bin_size;
+        q->penalty_rate = ctl->penalty_rate;
+        q->penalty_burst = ctl->penalty_burst;
+        q->tokens_avail = ctl->penalty_burst;
+        q->token_time = jiffies;
+        q->slot = 0;
+        q->double_buffering = false;
+        sfb_zero_all_buckets(q);
+        sfb_init_perturbation(0, q);
+        sfb_init_perturbation(1, q);
+        sch_tree_unlock(sch);
+        return 0;
+}
+static int sfb_init(struct Qdisc *sch, struct nlattr *opt)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        q->qdisc = &noop_qdisc;
+        return sfb_change(sch, opt);
+}
+static int sfb_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        struct nlattr *opts;
+        struct tc_sfb_qopt opt = {
+                .rehash_interval = jiffies_to_msecs(q->rehash_interval),
+                .warmup_time = jiffies_to_msecs(q->warmup_time),
+                .limit = q->limit,
+                .max = q->max,
+                .bin_size = q->bin_size,
+                .increment = q->increment,
+                .decrement = q->decrement,
+                .penalty_rate = q->penalty_rate,
+                .penalty_burst = q->penalty_burst,
+        };
+        sch->qstats.backlog = q->qdisc->qstats.backlog;
+        opts = nla_nest_start(skb, TCA_OPTIONS);
+        NLA_PUT(skb, TCA_SFB_PARMS, sizeof(opt), &opt);
+        return nla_nest_end(skb, opts);
+nla_put_failure:
+        nla_nest_cancel(skb, opts);
+        return -EMSGSIZE;
+}
+static int sfb_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        struct tc_sfb_xstats st = {
+                .earlydrop = q->stats.earlydrop,
+                .penaltydrop = q->stats.penaltydrop,
+                .bucketdrop = q->stats.bucketdrop,
+                .queuedrop = q->stats.queuedrop,
+                .childdrop = q->stats.childdrop,
+                .marked = q->stats.marked,
+        };
+        st.maxqlen = sfb_compute_qlen(&st.maxprob, &st.avgprob, q);
+        return gnet_stats_copy_app(d, &st, sizeof(st));
+}
+static int sfb_dump_class(struct Qdisc *sch, unsigned long cl,
+                          struct sk_buff *skb, struct tcmsg *tcm)
+{
+        return -ENOSYS;
+}
+static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+                     struct Qdisc **old)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        if (new == NULL)
+                new = &noop_qdisc;
+        sch_tree_lock(sch);
+        *old = q->qdisc;
+        q->qdisc = new;
+        qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+        qdisc_reset(*old);
+        sch_tree_unlock(sch);
+        return 0;
+}
+static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        return q->qdisc;
+}
+static unsigned long sfb_get(struct Qdisc *sch, u32 classid)
+{
+        return 1;
+}
+static void sfb_put(struct Qdisc *sch, unsigned long arg)
+{
+}
+static int sfb_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+                            struct nlattr **tca, unsigned long *arg)
+{
+        return -ENOSYS;
+}
+static int sfb_delete(struct Qdisc *sch, unsigned long cl)
+{
+        return -ENOSYS;
+}
+static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+        if (!walker->stop) {
+                if (walker->count >= walker->skip)
+                        if (walker->fn(sch, 1, walker) < 0) {
+                                walker->stop = 1;
+                                return;
+                        }
+                walker->count++;
+        }
+}
+static struct tcf_proto **sfb_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+        struct sfb_sched_data *q = qdisc_priv(sch);
+        if (cl)
+                return NULL;
+        return &q->filter_list;
+}
+static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent,
+                              u32 classid)
+{
+        return 0;
+}
+static const struct Qdisc_class_ops sfb_class_ops = {
+        .graft          =       sfb_graft,
+        .leaf           =       sfb_leaf,
+        .get            =       sfb_get,
+        .put            =       sfb_put,
+        .change         =       sfb_change_class,
+        .delete         =       sfb_delete,
+        .walk           =       sfb_walk,
+        .tcf_chain      =       sfb_find_tcf,
+        .bind_tcf       =       sfb_bind,
+        .unbind_tcf     =       sfb_put,
+        .dump           =       sfb_dump_class,
+};
+static struct Qdisc_ops sfb_qdisc_ops __read_mostly = {
+        .id             =       "sfb",
+        .priv_size      =       sizeof(struct sfb_sched_data),
+        .cl_ops         =       &sfb_class_ops,
+        .enqueue        =       sfb_enqueue,
+        .dequeue        =       sfb_dequeue,
+        .peek           =       sfb_peek,
+        .init           =       sfb_init,
+        .reset          =       sfb_reset,
+        .destroy        =       sfb_destroy,
+        .change         =       sfb_change,
+        .dump           =       sfb_dump,
+        .dump_stats     =       sfb_dump_stats,
+        .owner          =       THIS_MODULE,
+};
+static int __init sfb_module_init(void)
+{
+        return register_qdisc(&sfb_qdisc_ops);
+}
+static void __exit sfb_module_exit(void)
+{
+        unregister_qdisc(&sfb_qdisc_ops);
+}
+module_init(sfb_module_init)
+module_exit(sfb_module_exit)
+MODULE_DESCRIPTION("Stochastic Fair Blue queue discipline");
+MODULE_AUTHOR("Juliusz Chroboczek");
+MODULE_AUTHOR("Eric Dumazet");
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..b6ea6afa55b0 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -21,6 +21,7 @@
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <net/ip.h>
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -67,55 +68,81 @@
        IMPLEMENTATION:
        This implementation limits maximal queue length to 128;
-        maximal mtu to 2^15-1; number of hash buckets to 1024.
+        max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
        The only goal of this restrictions was that all data
-        fit into one 4K page :-). Struct sfq_sched_data is
+        fit into one 4K page on 32bit arches.
-        organized in anti-cache manner: all the data for a bucket
-        are scattered over different locations. This is not good,
-        but it allowed me to put it into 4K.
        It is easy to increase these values, but not in flight.  */
-#define SFQ_DEPTH               128
+#define SFQ_DEPTH               128 /* max number of packets per flow */
-#define SFQ_HASH_DIVISOR        1024
+#define SFQ_SLOTS               128 /* max number of flows */
+#define SFQ_EMPTY_SLOT          255
+#define SFQ_DEFAULT_HASH_DIVISOR 1024
-/* This type should contain at least SFQ_DEPTH*2 values */
+/* We use 16 bits to store allot, and want to handle packets up to 64K
+ * Scale allot by 8 (1<<3) so that no overflow occurs.
+ */
+#define SFQ_ALLOT_SHIFT         3
+#define SFQ_ALLOT_SIZE(X)       DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
+/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
-struct sfq_head
+/*
-{
+ * We dont use pointers to save space.
+ * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * are 'pointers' to dep[] array
+ */
+struct sfq_head {
        sfq_index       next;
        sfq_index       prev;
 };
-struct sfq_sched_data
+struct sfq_slot {
-{
+        struct sk_buff  *skblist_next;
+        struct sk_buff  *skblist_prev;
+        sfq_index       qlen; /* number of skbs in skblist */
+        sfq_index       next; /* next slot in sfq chain */
+        struct sfq_head dep; /* anchor in dep[] chains */
+        unsigned short  hash; /* hash value (index in ht[]) */
+        short           allot; /* credit for this slot */
+};
+struct sfq_sched_data {
 /* Parameters */
        int             perturb_period;
-        unsigned        quantum;        /* Allotment per round: MUST BE >= MTU */
+        unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
        int             limit;
+        unsigned int    divisor;        /* number of slots in hash table */
 /* Variables */
        struct tcf_proto *filter_list;
        struct timer_list perturb_timer;
        u32             perturbation;
-        sfq_index       tail;           /* Index of current slot in round */
+        sfq_index       cur_depth;      /* depth of longest slot */
-        sfq_index       max_depth;      /* Maximal depth */
+        unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
+        struct sfq_slot *tail;          /* current slot in round */
-        sfq_index       ht[SFQ_HASH_DIVISOR];   /* Hash table */
+        sfq_index       *ht;            /* Hash table (divisor slots) */
-        sfq_index       next[SFQ_DEPTH];        /* Active slots link */
+        struct sfq_slot slots[SFQ_SLOTS];
-        short           allot[SFQ_DEPTH];       /* Current allotment per slot */
+        struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
-        unsigned short  hash[SFQ_DEPTH];        /* Hash value indexed by slots */
-        struct sk_buff_head     qs[SFQ_DEPTH];          /* Slot queue */
-        struct sfq_head dep[SFQ_DEPTH*2];       /* Linked list of slots, indexed by depth */
 };
-static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+/*
+ * sfq_head are either in a sfq_slot or in dep[] array
+ */
+static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
 {
-        return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
+        if (val < SFQ_SLOTS)
+                return &q->slots[val].dep;
+        return &q->dep[val - SFQ_SLOTS];
 }
-static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
+static unsigned int sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
+{
+        return jhash_2words(h, h1, q->perturbation) & (q->divisor - 1);
+}
+static unsigned int sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 {
        u32 h, h2;
@@ -123,40 +150,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
        case htons(ETH_P_IP):
        {
                const struct iphdr *iph;
+                int poff;
                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        goto err;
                iph = ip_hdr(skb);
                h = (__force u32)iph->daddr;
                h2 = (__force u32)iph->saddr ^ iph->protocol;
-                if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+                if (iph->frag_off & htons(IP_MF | IP_OFFSET))
-                    (iph->protocol == IPPROTO_TCP ||
+                        break;
-                     iph->protocol == IPPROTO_UDP ||
+                poff = proto_ports_offset(iph->protocol);
-                     iph->protocol == IPPROTO_UDPLITE ||
+                if (poff >= 0 &&
-                     iph->protocol == IPPROTO_SCTP ||
+                    pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
-                     iph->protocol == IPPROTO_DCCP ||
+                        iph = ip_hdr(skb);
-                     iph->protocol == IPPROTO_ESP) &&
+                        h2 ^= *(u32 *)((void *)iph + iph->ihl * 4 + poff);
-                     pskb_network_may_pull(skb, iph->ihl * 4 + 4))
+                }
-                        h2 ^= *(((u32*)iph) + iph->ihl);
                break;
        }
        case htons(ETH_P_IPV6):
        {
-                struct ipv6hdr *iph;
+                const struct ipv6hdr *iph;
+                int poff;
                if (!pskb_network_may_pull(skb, sizeof(*iph)))
                        goto err;
                iph = ipv6_hdr(skb);
                h = (__force u32)iph->daddr.s6_addr32[3];
                h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
-                if ((iph->nexthdr == IPPROTO_TCP ||
+                poff = proto_ports_offset(iph->nexthdr);
-                     iph->nexthdr == IPPROTO_UDP ||
+                if (poff >= 0 &&
-                     iph->nexthdr == IPPROTO_UDPLITE ||
+                    pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
-                     iph->nexthdr == IPPROTO_SCTP ||
+                        iph = ipv6_hdr(skb);
-                     iph->nexthdr == IPPROTO_DCCP ||
+                        h2 ^= *(u32 *)((void *)iph + sizeof(*iph) + poff);
-                     iph->nexthdr == IPPROTO_ESP) &&
+                }
-                    pskb_network_may_pull(skb, sizeof(*iph) + 4))
-                        h2 ^= *(u32*)&iph[1];
                break;
        }
        default:
@@ -177,7 +203,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
        if (TC_H_MAJ(skb->priority) == sch->handle &&
            TC_H_MIN(skb->priority) > 0 &&
-            TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+            TC_H_MIN(skb->priority) <= q->divisor)
                return TC_H_MIN(skb->priority);
        if (!q->filter_list)
@@ -195,36 +221,47 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
                        return 0;
                }
 #endif
-                if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+                if (TC_H_MIN(res.classid) <= q->divisor)
                        return TC_H_MIN(res.classid);
        }
        return 0;
 }
+/*
+ * x : slot number [0 .. SFQ_SLOTS - 1]
+ */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
        sfq_index p, n;
-        int d = q->qs[x].qlen + SFQ_DEPTH;
+        int qlen = q->slots[x].qlen;
-        p = d;
+        p = qlen + SFQ_SLOTS;
-        n = q->dep[d].next;
+        n = q->dep[qlen].next;
-        q->dep[x].next = n;
-        q->dep[x].prev = p;
+        q->slots[x].dep.next = n;
-        q->dep[p].next = q->dep[n].prev = x;
+        q->slots[x].dep.prev = p;
+        q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
+        sfq_dep_head(q, n)->prev = x;
 }
+#define sfq_unlink(q, x, n, p)                  \
+        n = q->slots[x].dep.next;               \
+        p = q->slots[x].dep.prev;               \
+        sfq_dep_head(q, p)->next = n;           \
+        sfq_dep_head(q, n)->prev = p
 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 {
        sfq_index p, n;
+        int d;
-        n = q->dep[x].next;
+        sfq_unlink(q, x, n, p);
-        p = q->dep[x].prev;
-        q->dep[p].next = n;
-        q->dep[n].prev = p;
-        if (n == p && q->max_depth == q->qs[x].qlen + 1)
-                q->max_depth--;
+        d = q->slots[x].qlen--;
+        if (n == p && q->cur_depth == d)
+                q->cur_depth--;
        sfq_link(q, x);
 }
@@ -233,34 +270,74 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
        sfq_index p, n;
        int d;
-        n = q->dep[x].next;
+        sfq_unlink(q, x, n, p);
-        p = q->dep[x].prev;
-        q->dep[p].next = n;
-        q->dep[n].prev = p;
-        d = q->qs[x].qlen;
-        if (q->max_depth < d)
-                q->max_depth = d;
+        d = ++q->slots[x].qlen;
+        if (q->cur_depth < d)
+                q->cur_depth = d;
        sfq_link(q, x);
 }
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from tail of slot queue */
+static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
+{
+        struct sk_buff *skb = slot->skblist_prev;
+        slot->skblist_prev = skb->prev;
+        skb->prev->next = (struct sk_buff *)slot;
+        skb->next = skb->prev = NULL;
+        return skb;
+}
+/* remove one skb from head of slot queue */
+static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
+{
+        struct sk_buff *skb = slot->skblist_next;
+        slot->skblist_next = skb->next;
+        skb->next->prev = (struct sk_buff *)slot;
+        skb->next = skb->prev = NULL;
+        return skb;
+}
+static inline void slot_queue_init(struct sfq_slot *slot)
+{
+        slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
+}
+/* add skb to slot queue (tail add) */
+static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
+{
+        skb->prev = slot->skblist_prev;
+        skb->next = (struct sk_buff *)slot;
+        slot->skblist_prev->next = skb;
+        slot->skblist_prev = skb;
+}
+#define slot_queue_walk(slot, skb)              \
+        for (skb = slot->skblist_next;          \
+             skb != (struct sk_buff *)slot;     \
+             skb = skb->next)
 static unsigned int sfq_drop(struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
-        sfq_index d = q->max_depth;
+        sfq_index x, d = q->cur_depth;
        struct sk_buff *skb;
        unsigned int len;
+        struct sfq_slot *slot;
-        /* Queue is full! Find the longest slot and
+        /* Queue is full! Find the longest slot and drop tail packet from it */
-           drop a packet from it */
        if (d > 1) {
-                sfq_index x = q->dep[d + SFQ_DEPTH].next;
+                x = q->dep[d].next;
-                skb = q->qs[x].prev;
+                slot = &q->slots[x];
+drop:
+                skb = slot_dequeue_tail(slot);
                len = qdisc_pkt_len(skb);
-                __skb_unlink(skb, &q->qs[x]);
-                kfree_skb(skb);
                sfq_dec(q, x);
+                kfree_skb(skb);
                sch->q.qlen--;
                sch->qstats.drops++;
                sch->qstats.backlog -= len;
@@ -269,19 +346,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
        if (d == 1) {
                /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
-                d = q->next[q->tail];
+                x = q->tail->next;
-                q->next[q->tail] = q->next[d];
+                slot = &q->slots[x];
-                q->allot[q->next[d]] += q->quantum;
+                q->tail->next = slot->next;
-                skb = q->qs[d].prev;
+                q->ht[slot->hash] = SFQ_EMPTY_SLOT;
-                len = qdisc_pkt_len(skb);
+                goto drop;
-                __skb_unlink(skb, &q->qs[d]);
-                kfree_skb(skb);
-                sfq_dec(q, d);
-                sch->q.qlen--;
-                q->ht[q->hash[d]] = SFQ_DEPTH;
-                sch->qstats.drops++;
-                sch->qstats.backlog -= len;
-                return len;
        }
        return 0;
@@ -292,7 +361,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
        unsigned int hash;
-        sfq_index x;
+        sfq_index x, qlen;
+        struct sfq_slot *slot;
        int uninitialized_var(ret);
        hash = sfq_classify(skb, sch, &ret);
@@ -305,54 +375,42 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        hash--;
        x = q->ht[hash];
-        if (x == SFQ_DEPTH) {
+        slot = &q->slots[x];
-                q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+        if (x == SFQ_EMPTY_SLOT) {
-                q->hash[x] = hash;
+                x = q->dep[0].next; /* get a free slot */
+                q->ht[hash] = x;
+                slot = &q->slots[x];
+                slot->hash = hash;
        }
-        /* If selected queue has length q->limit, this means that
+        /* If selected queue has length q->limit, do simple tail drop,
-         * all another queues are empty and that we do simple tail drop,
         * i.e. drop _this_ packet.
         */
-        if (q->qs[x].qlen >= q->limit)
+        if (slot->qlen >= q->limit)
                return qdisc_drop(skb, sch);
        sch->qstats.backlog += qdisc_pkt_len(skb);
-        __skb_queue_tail(&q->qs[x], skb);
+        slot_queue_add(slot, skb);
        sfq_inc(q, x);
-        if (q->qs[x].qlen == 1) {               /* The flow is new */
+        if (slot->qlen == 1) {          /* The flow is new */
-                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
+                if (q->tail == NULL) {  /* It is the first flow */
-                        q->tail = x;
+                        slot->next = x;
-                        q->next[x] = x;
-                        q->allot[x] = q->quantum;
                } else {
-                        q->next[x] = q->next[q->tail];
+                        slot->next = q->tail->next;
-                        q->next[q->tail] = x;
+                        q->tail->next = x;
-                        q->tail = x;
                }
+                q->tail = slot;
+                slot->allot = q->scaled_quantum;
        }
-        if (++sch->q.qlen <= q->limit) {
+        if (++sch->q.qlen <= q->limit)
-                sch->bstats.bytes += qdisc_pkt_len(skb);
-                sch->bstats.packets++;
                return NET_XMIT_SUCCESS;
-        }
+        qlen = slot->qlen;
        sfq_drop(sch);
-        return NET_XMIT_CN;
+        /* Return Congestion Notification only if we dropped a packet
-}
+         * from this flow.
+         */
-static struct sk_buff *
+        return (qlen != slot->qlen) ? NET_XMIT_CN : NET_XMIT_SUCCESS;
-sfq_peek(struct Qdisc *sch)
-{
-        struct sfq_sched_data *q = qdisc_priv(sch);
-        sfq_index a;
-        /* No active slots */
-        if (q->tail == SFQ_DEPTH)
-                return NULL;
-        a = q->next[q->tail];
-        return skb_peek(&q->qs[a]);
 }
 static struct sk_buff *
@@ -360,34 +418,38 @@ sfq_dequeue(struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
-        sfq_index a, old_a;
+        sfq_index a, next_a;
+        struct sfq_slot *slot;
        /* No active slots */
-        if (q->tail == SFQ_DEPTH)
+        if (q->tail == NULL)
                return NULL;
-        a = old_a = q->next[q->tail];
+next_slot:
+        a = q->tail->next;
-        /* Grab packet */
+        slot = &q->slots[a];
-        skb = __skb_dequeue(&q->qs[a]);
+        if (slot->allot <= 0) {
+                q->tail = slot;
+                slot->allot += q->scaled_quantum;
+                goto next_slot;
+        }
+        skb = slot_dequeue_head(slot);
        sfq_dec(q, a);
+        qdisc_bstats_update(sch, skb);
        sch->q.qlen--;
        sch->qstats.backlog -= qdisc_pkt_len(skb);
        /* Is the slot empty? */
-        if (q->qs[a].qlen == 0) {
+        if (slot->qlen == 0) {
-                q->ht[q->hash[a]] = SFQ_DEPTH;
+                q->ht[slot->hash] = SFQ_EMPTY_SLOT;
-                a = q->next[a];
+                next_a = slot->next;
-                if (a == old_a) {
+                if (a == next_a) {
-                        q->tail = SFQ_DEPTH;
+                        q->tail = NULL; /* no more active slots */
                        return skb;
                }
-                q->next[q->tail] = a;
+                q->tail->next = next_a;
-                q->allot[a] += q->quantum;
+        } else {
-        } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
+                slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
-                q->tail = a;
-                a = q->next[a];
-                q->allot[a] += q->quantum;
        }
        return skb;
 }
@@ -421,12 +483,18 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
        if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
                return -EINVAL;
+        if (ctl->divisor &&
+            (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
+                return -EINVAL;
        sch_tree_lock(sch);
        q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
+        q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
        q->perturb_period = ctl->perturb_period * HZ;
        if (ctl->limit)
                q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
+        if (ctl->divisor)
+                q->divisor = ctl->divisor;
        qlen = sch->q.qlen;
        while (sch->q.qlen > q->limit)
                sfq_drop(sch);
@@ -444,26 +512,25 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
+        size_t sz;
        int i;
        q->perturb_timer.function = sfq_perturbation;
        q->perturb_timer.data = (unsigned long)sch;
        init_timer_deferrable(&q->perturb_timer);
-        for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-                q->ht[i] = SFQ_DEPTH;
        for (i = 0; i < SFQ_DEPTH; i++) {
-                skb_queue_head_init(&q->qs[i]);
+                q->dep[i].next = i + SFQ_SLOTS;
-                q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
+                q->dep[i].prev = i + SFQ_SLOTS;
-                q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
        }
        q->limit = SFQ_DEPTH - 1;
-        q->max_depth = 0;
+        q->cur_depth = 0;
-        q->tail = SFQ_DEPTH;
+        q->tail = NULL;
+        q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
        if (opt == NULL) {
                q->quantum = psched_mtu(qdisc_dev(sch));
+                q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
                q->perturb_period = 0;
                q->perturbation = net_random();
        } else {
@@ -472,8 +539,23 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
                        return err;
        }
-        for (i = 0; i < SFQ_DEPTH; i++)
+        sz = sizeof(q->ht[0]) * q->divisor;
+        q->ht = kmalloc(sz, GFP_KERNEL);
+        if (!q->ht && sz > PAGE_SIZE)
+                q->ht = vmalloc(sz);
+        if (!q->ht)
+                return -ENOMEM;
+        for (i = 0; i < q->divisor; i++)
+                q->ht[i] = SFQ_EMPTY_SLOT;
+        for (i = 0; i < SFQ_SLOTS; i++) {
+                slot_queue_init(&q->slots[i]);
                sfq_link(q, i);
+        }
+        if (q->limit >= 1)
+                sch->flags |= TCQ_F_CAN_BYPASS;
+        else
+                sch->flags &= ~TCQ_F_CAN_BYPASS;
        return 0;
 }
@@ -484,6 +566,10 @@ static void sfq_destroy(struct Qdisc *sch)
        tcf_destroy_chain(&q->filter_list);
        q->perturb_period = 0;
        del_timer_sync(&q->perturb_timer);
+        if (is_vmalloc_addr(q->ht))
+                vfree(q->ht);
+        else
+                kfree(q->ht);
 }
 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -496,7 +582,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
        opt.perturb_period = q->perturb_period / HZ;
        opt.limit = q->limit;
-        opt.divisor = SFQ_HASH_DIVISOR;
+        opt.divisor = q->divisor;
        opt.flows = q->limit;
        NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
@@ -521,6 +607,8 @@ static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
                              u32 classid)
 {
+        /* we cannot bypass queue discipline anymore */
+        sch->flags &= ~TCQ_F_CAN_BYPASS;
        return 0;
 }
@@ -548,10 +636,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
                                struct gnet_dump *d)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
-        sfq_index idx = q->ht[cl-1];
+        sfq_index idx = q->ht[cl - 1];
-        struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen };
+        struct gnet_stats_queue qs = { 0 };
-        struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
+        struct tc_sfq_xstats xstats = { 0 };
+        struct sk_buff *skb;
+        if (idx != SFQ_EMPTY_SLOT) {
+                const struct sfq_slot *slot = &q->slots[idx];
+                xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
+                qs.qlen = slot->qlen;
+                slot_queue_walk(slot, skb)
+                        qs.backlog += qdisc_pkt_len(skb);
+        }
        if (gnet_stats_copy_queue(d, &qs) < 0)
                return -1;
        return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
@@ -565,8 +662,8 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
        if (arg->stop)
                return;
-        for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
+        for (i = 0; i < q->divisor; i++) {
-                if (q->ht[i] == SFQ_DEPTH ||
+                if (q->ht[i] == SFQ_EMPTY_SLOT ||
                    arg->count < arg->skip) {
                        arg->count++;
                        continue;
@@ -597,7 +694,7 @@ static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
        .priv_size      =       sizeof(struct sfq_sched_data),
        .enqueue        =       sfq_enqueue,
        .dequeue        =       sfq_dequeue,
-        .peek           =       sfq_peek,
+        .peek           =       qdisc_peek_dequeued,
        .drop           =       sfq_drop,
        .init           =       sfq_init,
        .reset          =       sfq_reset,
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 641a30d64635..1dcfb5223a86 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -97,8 +97,7 @@
        changed the limit is not effective anymore.
 */
-struct tbf_sched_data
+struct tbf_sched_data {
-{
 /* Parameters */
        u32             limit;          /* Maximal length of backlog: bytes */
        u32             buffer;         /* Token bucket depth/rate: MUST BE >= MTU/B */
@@ -115,10 +114,10 @@ struct tbf_sched_data
        struct qdisc_watchdog watchdog; /* Watchdog timer */
 };
-#define L2T(q,L)   qdisc_l2t((q)->R_tab,L)
+#define L2T(q, L)   qdisc_l2t((q)->R_tab, L)
-#define L2T_P(q,L) qdisc_l2t((q)->P_tab,L)
+#define L2T_P(q, L) qdisc_l2t((q)->P_tab, L)
-static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
        int ret;
@@ -134,12 +133,10 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch)
        }
        sch->q.qlen++;
-        sch->bstats.bytes += qdisc_pkt_len(skb);
-        sch->bstats.packets++;
        return NET_XMIT_SUCCESS;
 }
-static unsigned int tbf_drop(struct Qdisc* sch)
+static unsigned int tbf_drop(struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
        unsigned int len = 0;
@@ -151,7 +148,7 @@ static unsigned int tbf_drop(struct Qdisc* sch)
        return len;
 }
-static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
+static struct sk_buff *tbf_dequeue(struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
@@ -187,7 +184,8 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
                        q->tokens = toks;
                        q->ptokens = ptoks;
                        sch->q.qlen--;
-                        sch->flags &= ~TCQ_F_THROTTLED;
+                        qdisc_unthrottled(sch);
+                        qdisc_bstats_update(sch, skb);
                        return skb;
                }
@@ -210,7 +208,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc* sch)
        return NULL;
 }
-static void tbf_reset(struct Qdisc* sch)
+static void tbf_reset(struct Qdisc *sch)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
@@ -228,7 +226,7 @@ static const struct nla_policy tbf_policy[TCA_TBF_MAX + 1] = {
        [TCA_TBF_PTAB]  = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
 };
-static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
 {
        int err;
        struct tbf_sched_data *q = qdisc_priv(sch);
@@ -237,7 +235,7 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
        struct qdisc_rate_table *rtab = NULL;
        struct qdisc_rate_table *ptab = NULL;
        struct Qdisc *child = NULL;
-        int max_size,n;
+        int max_size, n;
        err = nla_parse_nested(tb, TCA_TBF_PTAB, opt, tbf_policy);
        if (err < 0)
@@ -260,15 +258,18 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
        }
        for (n = 0; n < 256; n++)
-                if (rtab->data[n] > qopt->buffer) break;
+                if (rtab->data[n] > qopt->buffer)
-        max_size = (n << qopt->rate.cell_log)-1;
+                        break;
+        max_size = (n << qopt->rate.cell_log) - 1;
        if (ptab) {
                int size;
                for (n = 0; n < 256; n++)
-                        if (ptab->data[n] > qopt->mtu) break;
+                        if (ptab->data[n] > qopt->mtu)
-                size = (n << qopt->peakrate.cell_log)-1;
+                                break;
-                if (size < max_size) max_size = size;
+                size = (n << qopt->peakrate.cell_log) - 1;
+                if (size < max_size)
+                        max_size = size;
        }
        if (max_size < 0)
                goto done;
@@ -311,7 +312,7 @@ done:
        return err;
 }
-static int tbf_init(struct Qdisc* sch, struct nlattr *opt)
+static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct tbf_sched_data *q = qdisc_priv(sch);
@@ -423,8 +424,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker)
        }
 }
-static const struct Qdisc_class_ops tbf_class_ops =
+static const struct Qdisc_class_ops tbf_class_ops = {
-{
        .graft          =       tbf_graft,
        .leaf           =       tbf_leaf,
        .get            =       tbf_get,
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc103ce6..45cd30098e34 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -53,38 +53,38 @@
      which will not break load balancing, though native slave
      traffic will have the highest priority.  */
-struct teql_master
+struct teql_master {
-{
        struct Qdisc_ops qops;
        struct net_device *dev;
        struct Qdisc *slaves;
        struct list_head master_list;
+        unsigned long   tx_bytes;
+        unsigned long   tx_packets;
+        unsigned long   tx_errors;
+        unsigned long   tx_dropped;
 };
-struct teql_sched_data
+struct teql_sched_data {
-{
        struct Qdisc *next;
        struct teql_master *m;
        struct neighbour *ncache;
        struct sk_buff_head q;
 };
-#define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
+#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
-#define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
+#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
 /* "teql*" qdisc routines */
 static int
-teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 {
        struct net_device *dev = qdisc_dev(sch);
        struct teql_sched_data *q = qdisc_priv(sch);
        if (q->q.qlen < dev->tx_queue_len) {
                __skb_queue_tail(&q->q, skb);
-                sch->bstats.bytes += qdisc_pkt_len(skb);
-                sch->bstats.packets++;
                return NET_XMIT_SUCCESS;
        }
@@ -94,7 +94,7 @@ teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 }
 static struct sk_buff *
-teql_dequeue(struct Qdisc* sch)
+teql_dequeue(struct Qdisc *sch)
 {
        struct teql_sched_data *dat = qdisc_priv(sch);
        struct netdev_queue *dat_queue;
@@ -108,19 +108,21 @@ teql_dequeue(struct Qdisc* sch)
                        dat->m->slaves = sch;
                        netif_wake_queue(m);
                }
+        } else {
+                qdisc_bstats_update(sch, skb);
        }
        sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
        return skb;
 }
 static struct sk_buff *
-teql_peek(struct Qdisc* sch)
+teql_peek(struct Qdisc *sch)
 {
        /* teql is meant to be used as root qdisc */
        return NULL;
 }
-static __inline__ void
+static inline void
 teql_neigh_release(struct neighbour *n)
 {
        if (n)
@@ -128,7 +130,7 @@ teql_neigh_release(struct neighbour *n)
 }
 static void
-teql_reset(struct Qdisc* sch)
+teql_reset(struct Qdisc *sch)
 {
        struct teql_sched_data *dat = qdisc_priv(sch);
@@ -138,13 +140,14 @@ teql_reset(struct Qdisc* sch)
 }
 static void
-teql_destroy(struct Qdisc* sch)
+teql_destroy(struct Qdisc *sch)
 {
        struct Qdisc *q, *prev;
        struct teql_sched_data *dat = qdisc_priv(sch);
        struct teql_master *master = dat->m;
-        if ((prev = master->slaves) != NULL) {
+        prev = master->slaves;
+        if (prev) {
                do {
                        q = NEXT_SLAVE(prev);
                        if (q == sch) {
@@ -176,7 +179,7 @@ teql_destroy(struct Qdisc* sch)
 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
 {
        struct net_device *dev = qdisc_dev(sch);
-        struct teql_master *m = (struct teql_master*)sch->ops;
+        struct teql_master *m = (struct teql_master *)sch->ops;
        struct teql_sched_data *q = qdisc_priv(sch);
        if (dev->hard_header_len > m->dev->hard_header_len)
@@ -241,11 +244,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
        }
        if (neigh_event_send(n, skb_res) == 0) {
                int err;
+                char haddr[MAX_ADDR_LEN];
-                read_lock(&n->lock);
+                neigh_ha_snapshot(haddr, n, dev);
-                err = dev_hard_header(skb, dev, ntohs(skb->protocol),
+                err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
-                                      n->ha, NULL, skb->len);
+                                      NULL, skb->len);
-                read_unlock(&n->lock);
                if (err < 0) {
                        neigh_release(n);
@@ -275,7 +278,6 @@ static inline int teql_resolve(struct sk_buff *skb,
 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct teql_master *master = netdev_priv(dev);
-        struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
        struct Qdisc *start, *q;
        int busy;
        int nores;
@@ -288,7 +290,8 @@ restart:
        nores = 0;
        busy = 0;
-        if ((q = start) == NULL)
+        q = start;
+        if (!q)
                goto drop;
        do {
@@ -309,15 +312,14 @@ restart:
                        if (__netif_tx_trylock(slave_txq)) {
                                unsigned int length = qdisc_pkt_len(skb);
-                                if (!netif_tx_queue_stopped(slave_txq) &&
+                                if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
-                                    !netif_tx_queue_frozen(slave_txq) &&
                                    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
                                        txq_trans_update(slave_txq);
                                        __netif_tx_unlock(slave_txq);
                                        master->slaves = NEXT_SLAVE(q);
                                        netif_wake_queue(dev);
-                                        txq->tx_packets++;
+                                        master->tx_packets++;
-                                        txq->tx_bytes += length;
+                                        master->tx_bytes += length;
                                        return NETDEV_TX_OK;
                                }
                                __netif_tx_unlock(slave_txq);
@@ -344,20 +346,20 @@ restart:
                netif_stop_queue(dev);
                return NETDEV_TX_BUSY;
        }
-        dev->stats.tx_errors++;
+        master->tx_errors++;
 drop:
-        txq->tx_dropped++;
+        master->tx_dropped++;
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
 }
 static int teql_master_open(struct net_device *dev)
 {
-        struct Qdisc * q;
+        struct Qdisc *q;
        struct teql_master *m = netdev_priv(dev);
        int mtu = 0xFFFE;
-        unsigned flags = IFF_NOARP|IFF_MULTICAST;
+        unsigned int flags = IFF_NOARP | IFF_MULTICAST;
        if (m->slaves == NULL)
                return -EUNATCH;
@@ -400,6 +402,18 @@ static int teql_master_close(struct net_device *dev)
        return 0;
 }
+static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
+                                                     struct rtnl_link_stats64 *stats)
+{
+        struct teql_master *m = netdev_priv(dev);
+        stats->tx_packets       = m->tx_packets;
+        stats->tx_bytes         = m->tx_bytes;
+        stats->tx_errors        = m->tx_errors;
+        stats->tx_dropped       = m->tx_dropped;
+        return stats;
+}
 static int teql_master_mtu(struct net_device *dev, int new_mtu)
 {
        struct teql_master *m = netdev_priv(dev);
@@ -413,7 +427,7 @@ static int teql_master_mtu(struct net_device *dev, int new_mtu)
                do {
                        if (new_mtu > qdisc_dev(q)->mtu)
                                return -EINVAL;
-                } while ((q=NEXT_SLAVE(q)) != m->slaves);
+                } while ((q = NEXT_SLAVE(q)) != m->slaves);
        }
        dev->mtu = new_mtu;
@@ -424,6 +438,7 @@ static const struct net_device_ops teql_netdev_ops = {
        .ndo_open       = teql_master_open,
        .ndo_stop       = teql_master_close,
        .ndo_start_xmit = teql_master_xmit,
+        .ndo_get_stats64 = teql_master_stats64,
        .ndo_change_mtu = teql_master_mtu,
 };
author	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
committer	Glenn Elliott <gelliott@cs.unc.edu>	2012-03-04 19:47:13 -0500
commit	c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
tree	ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /net/sched
parent	ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent	6a00f206debf8a5c8899055726ad127dbeeed098 (diff)