net_sched: sch_sfq: better struct layouts

Here is a respin of patch. I'll send a short patch to make SFQ more fair in presence of large packets as well. Thanks [PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts This patch shrinks sizeof(struct sfq_sched_data) from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and reduce text size as well. text data bss dec hex filename 4821 152 0 4973 136d old/net/sched/sch_sfq.o 4627 136 0 4763 129b new/net/sched/sch_sfq.o All data for a slot/flow is now grouped in a compact and cache friendly structure, instead of being spreaded in many different points. struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev; sfq_index qlen; /* number of skbs in skblist */ sfq_index next; /* next slot in sfq chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ }; Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Jarek Poplawski <jarkao2@gmail.com> Cc: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Eric Dumazet <eric.dumazet@gmail.com> 2010-12-20 07:54:58 -0500
committer: David S. Miller <davem@davemloft.net> 2010-12-21 00:32:59 -0500
commit: eda83e3b63e88351310c13c99178eb4634f137b2 (patch)
tree: 55b9c1f75337a8ca4032e607405e370b437c398e /net/sched
parent: d9993be65a77f500ae926176baa264816bfe3816 (diff)
1 files changed, 162 insertions, 98 deletions
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 42396c965dd6..13322e8a0456 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -67,27 +67,42 @@
        IMPLEMENTATION:
        This implementation limits maximal queue length to 128;
-        maximal mtu to 2^15-1; number of hash buckets to 1024.
+        maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
        The only goal of this restrictions was that all data
-        fit into one 4K page :-). Struct sfq_sched_data is
+        fit into one 4K page on 32bit arches.
-        organized in anti-cache manner: all the data for a bucket
-        are scattered over different locations. This is not good,
-        but it allowed me to put it into 4K.
        It is easy to increase these values, but not in flight.  */
-#define SFQ_DEPTH               128
+#define SFQ_DEPTH               128 /* max number of packets per flow */
+#define SFQ_SLOTS               128 /* max number of flows */
+#define SFQ_EMPTY_SLOT          255
 #define SFQ_HASH_DIVISOR        1024
-/* This type should contain at least SFQ_DEPTH*2 values */
+/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
+/*
+ * We dont use pointers to save space.
+ * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * are 'pointers' to dep[] array
+ */
 struct sfq_head
 {
        sfq_index       next;
        sfq_index       prev;
 };
+struct sfq_slot {
+        struct sk_buff  *skblist_next;
+        struct sk_buff  *skblist_prev;
+        sfq_index       qlen; /* number of skbs in skblist */
+        sfq_index       next; /* next slot in sfq chain */
+        struct sfq_head dep; /* anchor in dep[] chains */
+        unsigned short  hash; /* hash value (index in ht[]) */
+        short           allot; /* credit for this slot */
+};
 struct sfq_sched_data
 {
 /* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
        struct tcf_proto *filter_list;
        struct timer_list perturb_timer;
        u32             perturbation;
-        sfq_index       tail;           /* Index of current slot in round */
+        sfq_index       cur_depth;      /* depth of longest slot */
-        sfq_index       max_depth;      /* Maximal depth */
+        struct sfq_slot *tail;          /* current slot in round */
        sfq_index       ht[SFQ_HASH_DIVISOR];   /* Hash table */
-        sfq_index       next[SFQ_DEPTH];        /* Active slots link */
+        struct sfq_slot slots[SFQ_SLOTS];
-        short           allot[SFQ_DEPTH];       /* Current allotment per slot */
+        struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
-        unsigned short  hash[SFQ_DEPTH];        /* Hash value indexed by slots */
-        struct sk_buff_head     qs[SFQ_DEPTH];          /* Slot queue */
-        struct sfq_head dep[SFQ_DEPTH*2];       /* Linked list of slots, indexed by depth */
 };
+/*
+ * sfq_head are either in a sfq_slot or in dep[] array
+ */
+static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
+{
+        if (val < SFQ_SLOTS)
+                return &q->slots[val].dep;
+        return &q->dep[val - SFQ_SLOTS];
+}
 static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
 {
        return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
        return 0;
 }
+/*
+ * x : slot number [0 .. SFQ_SLOTS - 1]
+ */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
        sfq_index p, n;
-        int d = q->qs[x].qlen + SFQ_DEPTH;
+        int qlen = q->slots[x].qlen;
+        p = qlen + SFQ_SLOTS;
+        n = q->dep[qlen].next;
-        p = d;
+        q->slots[x].dep.next = n;
-        n = q->dep[d].next;
+        q->slots[x].dep.prev = p;
-        q->dep[x].next = n;
-        q->dep[x].prev = p;
+        q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
-        q->dep[p].next = q->dep[n].prev = x;
+        sfq_dep_head(q, n)->prev = x;
 }
+#define sfq_unlink(q, x, n, p)                  \
+        n = q->slots[x].dep.next;               \
+        p = q->slots[x].dep.prev;               \
+        sfq_dep_head(q, p)->next = n;           \
+        sfq_dep_head(q, n)->prev = p
 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
 {
        sfq_index p, n;
+        int d;
-        n = q->dep[x].next;
+        sfq_unlink(q, x, n, p);
-        p = q->dep[x].prev;
-        q->dep[p].next = n;
-        q->dep[n].prev = p;
-        if (n == p && q->max_depth == q->qs[x].qlen + 1)
-                q->max_depth--;
+        d = q->slots[x].qlen--;
+        if (n == p && q->cur_depth == d)
+                q->cur_depth--;
        sfq_link(q, x);
 }
@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
        sfq_index p, n;
        int d;
-        n = q->dep[x].next;
+        sfq_unlink(q, x, n, p);
-        p = q->dep[x].prev;
-        q->dep[p].next = n;
-        q->dep[n].prev = p;
-        d = q->qs[x].qlen;
-        if (q->max_depth < d)
-                q->max_depth = d;
+        d = ++q->slots[x].qlen;
+        if (q->cur_depth < d)
+                q->cur_depth = d;
        sfq_link(q, x);
 }
+/* helper functions : might be changed when/if skb use a standard list_head */
+/* remove one skb from tail of slot queue */
+static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
+{
+        struct sk_buff *skb = slot->skblist_prev;
+        slot->skblist_prev = skb->prev;
+        skb->next = skb->prev = NULL;
+        return skb;
+}
+/* remove one skb from head of slot queue */
+static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
+{
+        struct sk_buff *skb = slot->skblist_next;
+        slot->skblist_next = skb->next;
+        skb->next = skb->prev = NULL;
+        return skb;
+}
+static inline void slot_queue_init(struct sfq_slot *slot)
+{
+        slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
+}
+/* add skb to slot queue (tail add) */
+static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
+{
+        skb->prev = slot->skblist_prev;
+        skb->next = (struct sk_buff *)slot;
+        slot->skblist_prev->next = skb;
+        slot->skblist_prev = skb;
+}
+#define slot_queue_walk(slot, skb)              \
+        for (skb = slot->skblist_next;          \
+             skb != (struct sk_buff *)slot;     \
+             skb = skb->next)
 static unsigned int sfq_drop(struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
-        sfq_index d = q->max_depth;
+        sfq_index x, d = q->cur_depth;
        struct sk_buff *skb;
        unsigned int len;
+        struct sfq_slot *slot;
-        /* Queue is full! Find the longest slot and
+        /* Queue is full! Find the longest slot and drop tail packet from it */
-           drop a packet from it */
        if (d > 1) {
-                sfq_index x = q->dep[d + SFQ_DEPTH].next;
+                x = q->dep[d].next;
-                skb = q->qs[x].prev;
+                slot = &q->slots[x];
+drop:
+                skb = slot_dequeue_tail(slot);
                len = qdisc_pkt_len(skb);
-                __skb_unlink(skb, &q->qs[x]);
-                kfree_skb(skb);
                sfq_dec(q, x);
+                kfree_skb(skb);
                sch->q.qlen--;
                sch->qstats.drops++;
                sch->qstats.backlog -= len;
@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
        if (d == 1) {
                /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
-                d = q->next[q->tail];
+                x = q->tail->next;
-                q->next[q->tail] = q->next[d];
+                slot = &q->slots[x];
-                skb = q->qs[d].prev;
+                q->tail->next = slot->next;
-                len = qdisc_pkt_len(skb);
+                q->ht[slot->hash] = SFQ_EMPTY_SLOT;
-                __skb_unlink(skb, &q->qs[d]);
+                goto drop;
-                kfree_skb(skb);
-                sfq_dec(q, d);
-                sch->q.qlen--;
-                q->ht[q->hash[d]] = SFQ_DEPTH;
-                sch->qstats.drops++;
-                sch->qstats.backlog -= len;
-                return len;
        }
        return 0;
@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        struct sfq_sched_data *q = qdisc_priv(sch);
        unsigned int hash;
        sfq_index x;
+        struct sfq_slot *slot;
        int uninitialized_var(ret);
        hash = sfq_classify(skb, sch, &ret);
@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
        hash--;
        x = q->ht[hash];
-        if (x == SFQ_DEPTH) {
+        slot = &q->slots[x];
-                q->ht[hash] = x = q->dep[SFQ_DEPTH].next;
+        if (x == SFQ_EMPTY_SLOT) {
-                q->hash[x] = hash;
+                x = q->dep[0].next; /* get a free slot */
+                q->ht[hash] = x;
+                slot = &q->slots[x];
+                slot->hash = hash;
+                slot_queue_init(slot);
        }
-        /* If selected queue has length q->limit, this means that
+        /* If selected queue has length q->limit, do simple tail drop,
-         * all another queues are empty and that we do simple tail drop,
         * i.e. drop _this_ packet.
         */
-        if (q->qs[x].qlen >= q->limit)
+        if (slot->qlen >= q->limit)
                return qdisc_drop(skb, sch);
        sch->qstats.backlog += qdisc_pkt_len(skb);
-        __skb_queue_tail(&q->qs[x], skb);
+        slot_queue_add(slot, skb);
        sfq_inc(q, x);
-        if (q->qs[x].qlen == 1) {               /* The flow is new */
+        if (slot->qlen == 1) {          /* The flow is new */
-                if (q->tail == SFQ_DEPTH) {     /* It is the first flow */
+                if (q->tail == NULL) {  /* It is the first flow */
-                        q->next[x] = x;
+                        slot->next = x;
                } else {
-                        q->next[x] = q->next[q->tail];
+                        slot->next = q->tail->next;
-                        q->next[q->tail] = x;
+                        q->tail->next = x;
                }
-                q->tail = x;
+                q->tail = slot;
-                q->allot[x] = q->quantum;
+                slot->allot = q->quantum;
        }
        if (++sch->q.qlen <= q->limit) {
                sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -342,14 +410,12 @@ static struct sk_buff *
 sfq_peek(struct Qdisc *sch)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
-        sfq_index a;
        /* No active slots */
-        if (q->tail == SFQ_DEPTH)
+        if (q->tail == NULL)
                return NULL;
-        a = q->next[q->tail];
+        return q->slots[q->tail->next].skblist_next;
-        return skb_peek(&q->qs[a]);
 }
 static struct sk_buff *
@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
        struct sfq_sched_data *q = qdisc_priv(sch);
        struct sk_buff *skb;
        sfq_index a, next_a;
+        struct sfq_slot *slot;
        /* No active slots */
-        if (q->tail == SFQ_DEPTH)
+        if (q->tail == NULL)
                return NULL;
-        a = q->next[q->tail];
+        a = q->tail->next;
+        slot = &q->slots[a];
-        /* Grab packet */
+        skb = slot_dequeue_head(slot);
-        skb = __skb_dequeue(&q->qs[a]);
        sfq_dec(q, a);
        sch->q.qlen--;
        sch->qstats.backlog -= qdisc_pkt_len(skb);
        /* Is the slot empty? */
-        if (q->qs[a].qlen == 0) {
+        if (slot->qlen == 0) {
-                q->ht[q->hash[a]] = SFQ_DEPTH;
+                q->ht[slot->hash] = SFQ_EMPTY_SLOT;
-                next_a = q->next[a];
+                next_a = slot->next;
                if (a == next_a) {
-                        q->tail = SFQ_DEPTH;
+                        q->tail = NULL; /* no more active slots */
                        return skb;
                }
-                q->next[q->tail] = next_a;
+                q->tail->next = next_a;
-        } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {
+        } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
-                q->allot[a] += q->quantum;
+                q->tail = slot;
-                q->tail = a;
+                slot->allot += q->quantum;
        }
        return skb;
 }
@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
        init_timer_deferrable(&q->perturb_timer);
        for (i = 0; i < SFQ_HASH_DIVISOR; i++)
-                q->ht[i] = SFQ_DEPTH;
+                q->ht[i] = SFQ_EMPTY_SLOT;
        for (i = 0; i < SFQ_DEPTH; i++) {
-                skb_queue_head_init(&q->qs[i]);
+                q->dep[i].next = i + SFQ_SLOTS;
-                q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;
+                q->dep[i].prev = i + SFQ_SLOTS;
-                q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
        }
        q->limit = SFQ_DEPTH - 1;
-        q->max_depth = 0;
+        q->cur_depth = 0;
-        q->tail = SFQ_DEPTH;
+        q->tail = NULL;
        if (opt == NULL) {
                q->quantum = psched_mtu(qdisc_dev(sch));
                q->perturb_period = 0;
@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
                        return err;
        }
-        for (i = 0; i < SFQ_DEPTH; i++)
+        for (i = 0; i < SFQ_SLOTS; i++)
                sfq_link(q, i);
        return 0;
 }
@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
                                struct gnet_dump *d)
 {
        struct sfq_sched_data *q = qdisc_priv(sch);
-        sfq_index idx = q->ht[cl-1];
+        const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
-        struct sk_buff_head *list = &q->qs[idx];
+        struct gnet_stats_queue qs = { .qlen = slot->qlen };
-        struct gnet_stats_queue qs = { .qlen = list->qlen };
+        struct tc_sfq_xstats xstats = { .allot = slot->allot };
-        struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
        struct sk_buff *skb;
-        skb_queue_walk(list, skb)
+        slot_queue_walk(slot, skb)
                qs.backlog += qdisc_pkt_len(skb);
        if (gnet_stats_copy_queue(d, &qs) < 0)
@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
                return;
        for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
-                if (q->ht[i] == SFQ_DEPTH ||
+                if (q->ht[i] == SFQ_EMPTY_SLOT ||
                    arg->count < arg->skip) {
                        arg->count++;
                        continue;
author	Eric Dumazet <eric.dumazet@gmail.com>	2010-12-20 07:54:58 -0500
committer	David S. Miller <davem@davemloft.net>	2010-12-21 00:32:59 -0500
commit	eda83e3b63e88351310c13c99178eb4634f137b2 (patch)
tree	55b9c1f75337a8ca4032e607405e370b437c398e /net/sched
parent	d9993be65a77f500ae926176baa264816bfe3816 (diff)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 42396c965dd6..13322e8a0456 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c
@@ -67,27 +67,42 @@
67		67
68	IMPLEMENTATION:	68	IMPLEMENTATION:
69	This implementation limits maximal queue length to 128;	69	This implementation limits maximal queue length to 128;
70	maximal mtu to 2^15-1; number of hash buckets to 1024.	70	maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
71	The only goal of this restrictions was that all data	71	The only goal of this restrictions was that all data
72	fit into one 4K page :-). Struct sfq_sched_data is	72	fit into one 4K page on 32bit arches.
73	organized in anti-cache manner: all the data for a bucket
74	are scattered over different locations. This is not good,
75	but it allowed me to put it into 4K.
76		73
77	It is easy to increase these values, but not in flight. */	74	It is easy to increase these values, but not in flight. */
78		75
79	#define SFQ_DEPTH 128	76	#define SFQ_DEPTH 128 /* max number of packets per flow */
		77	#define SFQ_SLOTS 128 /* max number of flows */
		78	#define SFQ_EMPTY_SLOT 255
80	#define SFQ_HASH_DIVISOR 1024	79	#define SFQ_HASH_DIVISOR 1024
81		80
82	/* This type should contain at least SFQ_DEPTH2 values /	81	/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
83	typedef unsigned char sfq_index;	82	typedef unsigned char sfq_index;
84		83
		84	/*
		85	* We dont use pointers to save space.
		86	* Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
		87	* while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
		88	* are 'pointers' to dep[] array
		89	*/
85	struct sfq_head	90	struct sfq_head
86	{	91	{
87	sfq_index next;	92	sfq_index next;
88	sfq_index prev;	93	sfq_index prev;
89	};	94	};
90		95
		96	struct sfq_slot {
		97	struct sk_buff *skblist_next;
		98	struct sk_buff *skblist_prev;
		99	sfq_index qlen; /* number of skbs in skblist */
		100	sfq_index next; /* next slot in sfq chain */
		101	struct sfq_head dep; /* anchor in dep[] chains */
		102	unsigned short hash; /* hash value (index in ht[]) */
		103	short allot; /* credit for this slot */
		104	};
		105
91	struct sfq_sched_data	106	struct sfq_sched_data
92	{	107	{
93	/* Parameters */	108	/* Parameters */
@@ -99,17 +114,24 @@ struct sfq_sched_data
99	struct tcf_proto *filter_list;	114	struct tcf_proto *filter_list;
100	struct timer_list perturb_timer;	115	struct timer_list perturb_timer;
101	u32 perturbation;	116	u32 perturbation;
102	sfq_index tail; /* Index of current slot in round */	117	sfq_index cur_depth; /* depth of longest slot */
103	sfq_index max_depth; /* Maximal depth */
104		118
		119	struct sfq_slot tail; / current slot in round */
105	sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */	120	sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */
106	sfq_index next[SFQ_DEPTH]; /* Active slots link */	121	struct sfq_slot slots[SFQ_SLOTS];
107	short allot[SFQ_DEPTH]; /* Current allotment per slot */	122	struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */
108	unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */
109	struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */
110	struct sfq_head dep[SFQ_DEPTH2]; / Linked list of slots, indexed by depth */
111	};	123	};
112		124
		125	/*
		126	* sfq_head are either in a sfq_slot or in dep[] array
		127	*/
		128	static inline struct sfq_head sfq_dep_head(struct sfq_sched_data q, sfq_index val)
		129	{
		130	if (val < SFQ_SLOTS)
		131	return &q->slots[val].dep;
		132	return &q->dep[val - SFQ_SLOTS];
		133	}
		134
113	static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)	135	static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1)
114	{	136	{
115	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);	137	return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1);
@@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff skb, struct Qdisc sch,
200	return 0;	222	return 0;
201	}	223	}
202		224
		225	/*
		226	* x : slot number [0 .. SFQ_SLOTS - 1]
		227	*/
203	static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)	228	static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
204	{	229	{
205	sfq_index p, n;	230	sfq_index p, n;
206	int d = q->qs[x].qlen + SFQ_DEPTH;	231	int qlen = q->slots[x].qlen;
		232
		233	p = qlen + SFQ_SLOTS;
		234	n = q->dep[qlen].next;
207		235
208	p = d;	236	q->slots[x].dep.next = n;
209	n = q->dep[d].next;	237	q->slots[x].dep.prev = p;
210	q->dep[x].next = n;	238
211	q->dep[x].prev = p;	239	q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */
212	q->dep[p].next = q->dep[n].prev = x;	240	sfq_dep_head(q, n)->prev = x;
213	}	241	}
214		242
		243	#define sfq_unlink(q, x, n, p) \
		244	n = q->slots[x].dep.next; \
		245	p = q->slots[x].dep.prev; \
		246	sfq_dep_head(q, p)->next = n; \
		247	sfq_dep_head(q, n)->prev = p
		248
		249
215	static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)	250	static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
216	{	251	{
217	sfq_index p, n;	252	sfq_index p, n;
		253	int d;
218		254
219	n = q->dep[x].next;	255	sfq_unlink(q, x, n, p);
220	p = q->dep[x].prev;
221	q->dep[p].next = n;
222	q->dep[n].prev = p;
223
224	if (n == p && q->max_depth == q->qs[x].qlen + 1)
225	q->max_depth--;
226		256
		257	d = q->slots[x].qlen--;
		258	if (n == p && q->cur_depth == d)
		259	q->cur_depth--;
227	sfq_link(q, x);	260	sfq_link(q, x);
228	}	261	}
229		262
@@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
232	sfq_index p, n;	265	sfq_index p, n;
233	int d;	266	int d;
234		267
235	n = q->dep[x].next;	268	sfq_unlink(q, x, n, p);
236	p = q->dep[x].prev;
237	q->dep[p].next = n;
238	q->dep[n].prev = p;
239	d = q->qs[x].qlen;
240	if (q->max_depth < d)
241	q->max_depth = d;
242		269
		270	d = ++q->slots[x].qlen;
		271	if (q->cur_depth < d)
		272	q->cur_depth = d;
243	sfq_link(q, x);	273	sfq_link(q, x);
244	}	274	}
245		275
		276	/* helper functions : might be changed when/if skb use a standard list_head */
		277
		278	/* remove one skb from tail of slot queue */
		279	static inline struct sk_buff slot_dequeue_tail(struct sfq_slot slot)
		280	{
		281	struct sk_buff *skb = slot->skblist_prev;
		282
		283	slot->skblist_prev = skb->prev;
		284	skb->next = skb->prev = NULL;
		285	return skb;
		286	}
		287
		288	/* remove one skb from head of slot queue */
		289	static inline struct sk_buff slot_dequeue_head(struct sfq_slot slot)
		290	{
		291	struct sk_buff *skb = slot->skblist_next;
		292
		293	slot->skblist_next = skb->next;
		294	skb->next = skb->prev = NULL;
		295	return skb;
		296	}
		297
		298	static inline void slot_queue_init(struct sfq_slot *slot)
		299	{
		300	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
		301	}
		302
		303	/* add skb to slot queue (tail add) */
		304	static inline void slot_queue_add(struct sfq_slot slot, struct sk_buff skb)
		305	{
		306	skb->prev = slot->skblist_prev;
		307	skb->next = (struct sk_buff *)slot;
		308	slot->skblist_prev->next = skb;
		309	slot->skblist_prev = skb;
		310	}
		311
		312	#define slot_queue_walk(slot, skb) \
		313	for (skb = slot->skblist_next; \
		314	skb != (struct sk_buff *)slot; \
		315	skb = skb->next)
		316
246	static unsigned int sfq_drop(struct Qdisc *sch)	317	static unsigned int sfq_drop(struct Qdisc *sch)
247	{	318	{
248	struct sfq_sched_data *q = qdisc_priv(sch);	319	struct sfq_sched_data *q = qdisc_priv(sch);
249	sfq_index d = q->max_depth;	320	sfq_index x, d = q->cur_depth;
250	struct sk_buff *skb;	321	struct sk_buff *skb;
251	unsigned int len;	322	unsigned int len;
		323	struct sfq_slot *slot;
252		324
253	/* Queue is full! Find the longest slot and	325	/* Queue is full! Find the longest slot and drop tail packet from it */
254	drop a packet from it */
255
256	if (d > 1) {	326	if (d > 1) {
257	sfq_index x = q->dep[d + SFQ_DEPTH].next;	327	x = q->dep[d].next;
258	skb = q->qs[x].prev;	328	slot = &q->slots[x];
		329	drop:
		330	skb = slot_dequeue_tail(slot);
259	len = qdisc_pkt_len(skb);	331	len = qdisc_pkt_len(skb);
260	__skb_unlink(skb, &q->qs[x]);
261	kfree_skb(skb);
262	sfq_dec(q, x);	332	sfq_dec(q, x);
		333	kfree_skb(skb);
263	sch->q.qlen--;	334	sch->q.qlen--;
264	sch->qstats.drops++;	335	sch->qstats.drops++;
265	sch->qstats.backlog -= len;	336	sch->qstats.backlog -= len;
@@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch)
268		339
269	if (d == 1) {	340	if (d == 1) {
270	/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */	341	/* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
271	d = q->next[q->tail];	342	x = q->tail->next;
272	q->next[q->tail] = q->next[d];	343	slot = &q->slots[x];
273	skb = q->qs[d].prev;	344	q->tail->next = slot->next;
274	len = qdisc_pkt_len(skb);	345	q->ht[slot->hash] = SFQ_EMPTY_SLOT;
275	__skb_unlink(skb, &q->qs[d]);	346	goto drop;
276	kfree_skb(skb);
277	sfq_dec(q, d);
278	sch->q.qlen--;
279	q->ht[q->hash[d]] = SFQ_DEPTH;
280	sch->qstats.drops++;
281	sch->qstats.backlog -= len;
282	return len;
283	}	347	}
284		348
285	return 0;	349	return 0;
@@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff skb, struct Qdisc sch)
291	struct sfq_sched_data *q = qdisc_priv(sch);	355	struct sfq_sched_data *q = qdisc_priv(sch);
292	unsigned int hash;	356	unsigned int hash;
293	sfq_index x;	357	sfq_index x;
		358	struct sfq_slot *slot;
294	int uninitialized_var(ret);	359	int uninitialized_var(ret);
295		360
296	hash = sfq_classify(skb, sch, &ret);	361	hash = sfq_classify(skb, sch, &ret);
@@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff skb, struct Qdisc sch)
303	hash--;	368	hash--;
304		369
305	x = q->ht[hash];	370	x = q->ht[hash];
306	if (x == SFQ_DEPTH) {	371	slot = &q->slots[x];
307	q->ht[hash] = x = q->dep[SFQ_DEPTH].next;	372	if (x == SFQ_EMPTY_SLOT) {
308	q->hash[x] = hash;	373	x = q->dep[0].next; /* get a free slot */
		374	q->ht[hash] = x;
		375	slot = &q->slots[x];
		376	slot->hash = hash;
		377	slot_queue_init(slot);
309	}	378	}
310		379
311	/* If selected queue has length q->limit, this means that	380	/* If selected queue has length q->limit, do simple tail drop,
312	* all another queues are empty and that we do simple tail drop,
313	* i.e. drop _this_ packet.	381	* i.e. drop _this_ packet.
314	*/	382	*/
315	if (q->qs[x].qlen >= q->limit)	383	if (slot->qlen >= q->limit)
316	return qdisc_drop(skb, sch);	384	return qdisc_drop(skb, sch);
317		385
318	sch->qstats.backlog += qdisc_pkt_len(skb);	386	sch->qstats.backlog += qdisc_pkt_len(skb);
319	__skb_queue_tail(&q->qs[x], skb);	387	slot_queue_add(slot, skb);
320	sfq_inc(q, x);	388	sfq_inc(q, x);
321	if (q->qs[x].qlen == 1) { /* The flow is new */	389	if (slot->qlen == 1) { /* The flow is new */
322	if (q->tail == SFQ_DEPTH) { /* It is the first flow */	390	if (q->tail == NULL) { /* It is the first flow */
323	q->next[x] = x;	391	slot->next = x;
324	} else {	392	} else {
325	q->next[x] = q->next[q->tail];	393	slot->next = q->tail->next;
326	q->next[q->tail] = x;	394	q->tail->next = x;
327	}	395	}
328	q->tail = x;	396	q->tail = slot;
329	q->allot[x] = q->quantum;	397	slot->allot = q->quantum;
330	}	398	}
331	if (++sch->q.qlen <= q->limit) {	399	if (++sch->q.qlen <= q->limit) {
332	sch->bstats.bytes += qdisc_pkt_len(skb);	400	sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -342,14 +410,12 @@ static struct sk_buff *
342	sfq_peek(struct Qdisc *sch)	410	sfq_peek(struct Qdisc *sch)
343	{	411	{
344	struct sfq_sched_data *q = qdisc_priv(sch);	412	struct sfq_sched_data *q = qdisc_priv(sch);
345	sfq_index a;
346		413
347	/* No active slots */	414	/* No active slots */
348	if (q->tail == SFQ_DEPTH)	415	if (q->tail == NULL)
349	return NULL;	416	return NULL;
350		417
351	a = q->next[q->tail];	418	return q->slots[q->tail->next].skblist_next;
352	return skb_peek(&q->qs[a]);
353	}	419	}
354		420
355	static struct sk_buff *	421	static struct sk_buff *
@@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch)
358	struct sfq_sched_data *q = qdisc_priv(sch);	424	struct sfq_sched_data *q = qdisc_priv(sch);
359	struct sk_buff *skb;	425	struct sk_buff *skb;
360	sfq_index a, next_a;	426	sfq_index a, next_a;
		427	struct sfq_slot *slot;
361		428
362	/* No active slots */	429	/* No active slots */
363	if (q->tail == SFQ_DEPTH)	430	if (q->tail == NULL)
364	return NULL;	431	return NULL;
365		432
366	a = q->next[q->tail];	433	a = q->tail->next;
367		434	slot = &q->slots[a];
368	/* Grab packet */	435	skb = slot_dequeue_head(slot);
369	skb = __skb_dequeue(&q->qs[a]);
370	sfq_dec(q, a);	436	sfq_dec(q, a);
371	sch->q.qlen--;	437	sch->q.qlen--;
372	sch->qstats.backlog -= qdisc_pkt_len(skb);	438	sch->qstats.backlog -= qdisc_pkt_len(skb);
373		439
374	/* Is the slot empty? */	440	/* Is the slot empty? */
375	if (q->qs[a].qlen == 0) {	441	if (slot->qlen == 0) {
376	q->ht[q->hash[a]] = SFQ_DEPTH;	442	q->ht[slot->hash] = SFQ_EMPTY_SLOT;
377	next_a = q->next[a];	443	next_a = slot->next;
378	if (a == next_a) {	444	if (a == next_a) {
379	q->tail = SFQ_DEPTH;	445	q->tail = NULL; /* no more active slots */
380	return skb;	446	return skb;
381	}	447	}
382	q->next[q->tail] = next_a;	448	q->tail->next = next_a;
383	} else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) {	449	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
384	q->allot[a] += q->quantum;	450	q->tail = slot;
385	q->tail = a;	451	slot->allot += q->quantum;
386	}	452	}
387	return skb;	453	return skb;
388	}	454	}
@@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc sch, struct nlattr opt)
446	init_timer_deferrable(&q->perturb_timer);	512	init_timer_deferrable(&q->perturb_timer);
447		513
448	for (i = 0; i < SFQ_HASH_DIVISOR; i++)	514	for (i = 0; i < SFQ_HASH_DIVISOR; i++)
449	q->ht[i] = SFQ_DEPTH;	515	q->ht[i] = SFQ_EMPTY_SLOT;
450		516
451	for (i = 0; i < SFQ_DEPTH; i++) {	517	for (i = 0; i < SFQ_DEPTH; i++) {
452	skb_queue_head_init(&q->qs[i]);	518	q->dep[i].next = i + SFQ_SLOTS;
453	q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH;	519	q->dep[i].prev = i + SFQ_SLOTS;
454	q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH;
455	}	520	}
456		521
457	q->limit = SFQ_DEPTH - 1;	522	q->limit = SFQ_DEPTH - 1;
458	q->max_depth = 0;	523	q->cur_depth = 0;
459	q->tail = SFQ_DEPTH;	524	q->tail = NULL;
460	if (opt == NULL) {	525	if (opt == NULL) {
461	q->quantum = psched_mtu(qdisc_dev(sch));	526	q->quantum = psched_mtu(qdisc_dev(sch));
462	q->perturb_period = 0;	527	q->perturb_period = 0;
@@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc sch, struct nlattr opt)
467	return err;	532	return err;
468	}	533	}
469		534
470	for (i = 0; i < SFQ_DEPTH; i++)	535	for (i = 0; i < SFQ_SLOTS; i++)
471	sfq_link(q, i);	536	sfq_link(q, i);
472	return 0;	537	return 0;
473	}	538	}
@@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
543	struct gnet_dump *d)	608	struct gnet_dump *d)
544	{	609	{
545	struct sfq_sched_data *q = qdisc_priv(sch);	610	struct sfq_sched_data *q = qdisc_priv(sch);
546	sfq_index idx = q->ht[cl-1];	611	const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]];
547	struct sk_buff_head *list = &q->qs[idx];	612	struct gnet_stats_queue qs = { .qlen = slot->qlen };
548	struct gnet_stats_queue qs = { .qlen = list->qlen };	613	struct tc_sfq_xstats xstats = { .allot = slot->allot };
549	struct tc_sfq_xstats xstats = { .allot = q->allot[idx] };
550	struct sk_buff *skb;	614	struct sk_buff *skb;
551		615
552	skb_queue_walk(list, skb)	616	slot_queue_walk(slot, skb)
553	qs.backlog += qdisc_pkt_len(skb);	617	qs.backlog += qdisc_pkt_len(skb);
554		618
555	if (gnet_stats_copy_queue(d, &qs) < 0)	619	if (gnet_stats_copy_queue(d, &qs) < 0)
@@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc sch, struct qdisc_walker arg)
566	return;	630	return;
567		631
568	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {	632	for (i = 0; i < SFQ_HASH_DIVISOR; i++) {
569	if (q->ht[i] == SFQ_DEPTH \|\|	633	if (q->ht[i] == SFQ_EMPTY_SLOT \|\|
570	arg->count < arg->skip) {	634	arg->count < arg->skip) {
571	arg->count++;	635	arg->count++;
572	continue;	636	continue;