fq: add fair queuing framework

This works on the same implementation principle as codel*.h, i.e. there's a generic header with structures and macros and a implementation header carrying function definitions to include in given, e.g. driver or module. The fairness logic comes from net/sched/sch_fq_codel.c but is generalized so it is more flexible and easier to re-use. Signed-off-by: Michal Kazior <michal.kazior@tieto.com> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Michal Kazior <michal.kazior@tieto.com> 2016-04-22 08:20:13 -0400
committer: David S. Miller <davem@davemloft.net> 2016-04-25 16:45:53 -0400
commit: 557fc4a098039cf296fe33f118bab99a925fd881 (patch)
tree: 707eb27e1a7af4b9f5a84a85bf811bd32123289b
parent: 05d82c42509c2936e328a6b8b1e1cd5684f427ac (diff)
2 files changed, 364 insertions, 0 deletions
diff --git a/include/net/fq.h b/include/net/fq.h
new file mode 100644
index 000000000000..268b49049c37
--- /dev/null
+++ b/include/net/fq.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2016 Qualcomm Atheros, Inc
+ *
+ * GPL v2
+ *
+ * Based on net/sched/sch_fq_codel.c
+ */
+#ifndef __NET_SCHED_FQ_H
+#define __NET_SCHED_FQ_H
+struct fq_tin;
+/**
+ * struct fq_flow - per traffic flow queue
+ *
+ * @tin: owner of this flow. Used to manage collisions, i.e. when a packet
+ *      hashes to an index which points to a flow that is already owned by a
+ *      different tin the packet is destined to. In such case the implementer
+ *      must provide a fallback flow
+ * @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++
+ *      (deficit round robin) based round robin queuing similar to the one
+ *      found in net/sched/sch_fq_codel.c
+ * @backlogchain: can be linked to other fq_flow and fq. Used to keep track of
+ *      fat flows and efficient head-dropping if packet limit is reached
+ * @queue: sk_buff queue to hold packets
+ * @backlog: number of bytes pending in the queue. The number of packets can be
+ *      found in @queue.qlen
+ * @deficit: used for DRR++
+ */
+struct fq_flow {
+        struct fq_tin *tin;
+        struct list_head flowchain;
+        struct list_head backlogchain;
+        struct sk_buff_head queue;
+        u32 backlog;
+        int deficit;
+};
+/**
+ * struct fq_tin - a logical container of fq_flows
+ *
+ * Used to group fq_flows into a logical aggregate. DRR++ scheme is used to
+ * pull interleaved packets out of the associated flows.
+ *
+ * @new_flows: linked list of fq_flow
+ * @old_flows: linked list of fq_flow
+ */
+struct fq_tin {
+        struct list_head new_flows;
+        struct list_head old_flows;
+        u32 backlog_bytes;
+        u32 backlog_packets;
+        u32 overlimit;
+        u32 collisions;
+        u32 flows;
+        u32 tx_bytes;
+        u32 tx_packets;
+};
+/**
+ * struct fq - main container for fair queuing purposes
+ *
+ * @backlogs: linked to fq_flows. Used to maintain fat flows for efficient
+ *      head-dropping when @backlog reaches @limit
+ * @limit: max number of packets that can be queued across all flows
+ * @backlog: number of packets queued across all flows
+ */
+struct fq {
+        struct fq_flow *flows;
+        struct list_head backlogs;
+        spinlock_t lock;
+        u32 flows_cnt;
+        u32 perturbation;
+        u32 limit;
+        u32 quantum;
+        u32 backlog;
+        u32 overlimit;
+        u32 collisions;
+};
+typedef struct sk_buff *fq_tin_dequeue_t(struct fq *,
+                                         struct fq_tin *,
+                                         struct fq_flow *flow);
+typedef void fq_skb_free_t(struct fq *,
+                           struct fq_tin *,
+                           struct fq_flow *,
+                           struct sk_buff *);
+typedef struct fq_flow *fq_flow_get_default_t(struct fq *,
+                                              struct fq_tin *,
+                                              int idx,
+                                              struct sk_buff *);
+#endif
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
new file mode 100644
index 000000000000..02eab7c51adb
--- /dev/null
+++ b/include/net/fq_impl.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2016 Qualcomm Atheros, Inc
+ *
+ * GPL v2
+ *
+ * Based on net/sched/sch_fq_codel.c
+ */
+#ifndef __NET_SCHED_FQ_IMPL_H
+#define __NET_SCHED_FQ_IMPL_H
+#include <net/fq.h>
+/* functions that are embedded into includer */
+static struct sk_buff *fq_flow_dequeue(struct fq *fq,
+                                       struct fq_flow *flow)
+{
+        struct fq_tin *tin = flow->tin;
+        struct fq_flow *i;
+        struct sk_buff *skb;
+        lockdep_assert_held(&fq->lock);
+        skb = __skb_dequeue(&flow->queue);
+        if (!skb)
+                return NULL;
+        tin->backlog_bytes -= skb->len;
+        tin->backlog_packets--;
+        flow->backlog -= skb->len;
+        fq->backlog--;
+        if (flow->backlog == 0) {
+                list_del_init(&flow->backlogchain);
+        } else {
+                i = flow;
+                list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
+                        if (i->backlog < flow->backlog)
+                                break;
+                list_move_tail(&flow->backlogchain,
+                               &i->backlogchain);
+        }
+        return skb;
+}
+static struct sk_buff *fq_tin_dequeue(struct fq *fq,
+                                      struct fq_tin *tin,
+                                      fq_tin_dequeue_t dequeue_func)
+{
+        struct fq_flow *flow;
+        struct list_head *head;
+        struct sk_buff *skb;
+        lockdep_assert_held(&fq->lock);
+begin:
+        head = &tin->new_flows;
+        if (list_empty(head)) {
+                head = &tin->old_flows;
+                if (list_empty(head))
+                        return NULL;
+        }
+        flow = list_first_entry(head, struct fq_flow, flowchain);
+        if (flow->deficit <= 0) {
+                flow->deficit += fq->quantum;
+                list_move_tail(&flow->flowchain,
+                               &tin->old_flows);
+                goto begin;
+        }
+        skb = dequeue_func(fq, tin, flow);
+        if (!skb) {
+                /* force a pass through old_flows to prevent starvation */
+                if ((head == &tin->new_flows) &&
+                    !list_empty(&tin->old_flows)) {
+                        list_move_tail(&flow->flowchain, &tin->old_flows);
+                } else {
+                        list_del_init(&flow->flowchain);
+                        flow->tin = NULL;
+                }
+                goto begin;
+        }
+        flow->deficit -= skb->len;
+        tin->tx_bytes += skb->len;
+        tin->tx_packets++;
+        return skb;
+}
+static struct fq_flow *fq_flow_classify(struct fq *fq,
+                                        struct fq_tin *tin,
+                                        struct sk_buff *skb,
+                                        fq_flow_get_default_t get_default_func)
+{
+        struct fq_flow *flow;
+        u32 hash;
+        u32 idx;
+        lockdep_assert_held(&fq->lock);
+        hash = skb_get_hash_perturb(skb, fq->perturbation);
+        idx = reciprocal_scale(hash, fq->flows_cnt);
+        flow = &fq->flows[idx];
+        if (flow->tin && flow->tin != tin) {
+                flow = get_default_func(fq, tin, idx, skb);
+                tin->collisions++;
+                fq->collisions++;
+        }
+        if (!flow->tin)
+                tin->flows++;
+        return flow;
+}
+static void fq_tin_enqueue(struct fq *fq,
+                           struct fq_tin *tin,
+                           struct sk_buff *skb,
+                           fq_skb_free_t free_func,
+                           fq_flow_get_default_t get_default_func)
+{
+        struct fq_flow *flow;
+        struct fq_flow *i;
+        lockdep_assert_held(&fq->lock);
+        flow = fq_flow_classify(fq, tin, skb, get_default_func);
+        flow->tin = tin;
+        flow->backlog += skb->len;
+        tin->backlog_bytes += skb->len;
+        tin->backlog_packets++;
+        fq->backlog++;
+        if (list_empty(&flow->backlogchain))
+                list_add_tail(&flow->backlogchain, &fq->backlogs);
+        i = flow;
+        list_for_each_entry_continue_reverse(i, &fq->backlogs,
+                                             backlogchain)
+                if (i->backlog > flow->backlog)
+                        break;
+        list_move(&flow->backlogchain, &i->backlogchain);
+        if (list_empty(&flow->flowchain)) {
+                flow->deficit = fq->quantum;
+                list_add_tail(&flow->flowchain,
+                              &tin->new_flows);
+        }
+        __skb_queue_tail(&flow->queue, skb);
+        if (fq->backlog > fq->limit) {
+                flow = list_first_entry_or_null(&fq->backlogs,
+                                                struct fq_flow,
+                                                backlogchain);
+                if (!flow)
+                        return;
+                skb = fq_flow_dequeue(fq, flow);
+                if (!skb)
+                        return;
+                free_func(fq, flow->tin, flow, skb);
+                flow->tin->overlimit++;
+                fq->overlimit++;
+        }
+}
+static void fq_flow_reset(struct fq *fq,
+                          struct fq_flow *flow,
+                          fq_skb_free_t free_func)
+{
+        struct sk_buff *skb;
+        while ((skb = fq_flow_dequeue(fq, flow)))
+                free_func(fq, flow->tin, flow, skb);
+        if (!list_empty(&flow->flowchain))
+                list_del_init(&flow->flowchain);
+        if (!list_empty(&flow->backlogchain))
+                list_del_init(&flow->backlogchain);
+        flow->tin = NULL;
+        WARN_ON_ONCE(flow->backlog);
+}
+static void fq_tin_reset(struct fq *fq,
+                         struct fq_tin *tin,
+                         fq_skb_free_t free_func)
+{
+        struct list_head *head;
+        struct fq_flow *flow;
+        for (;;) {
+                head = &tin->new_flows;
+                if (list_empty(head)) {
+                        head = &tin->old_flows;
+                        if (list_empty(head))
+                                break;
+                }
+                flow = list_first_entry(head, struct fq_flow, flowchain);
+                fq_flow_reset(fq, flow, free_func);
+        }
+        WARN_ON_ONCE(tin->backlog_bytes);
+        WARN_ON_ONCE(tin->backlog_packets);
+}
+static void fq_flow_init(struct fq_flow *flow)
+{
+        INIT_LIST_HEAD(&flow->flowchain);
+        INIT_LIST_HEAD(&flow->backlogchain);
+        __skb_queue_head_init(&flow->queue);
+}
+static void fq_tin_init(struct fq_tin *tin)
+{
+        INIT_LIST_HEAD(&tin->new_flows);
+        INIT_LIST_HEAD(&tin->old_flows);
+}
+static int fq_init(struct fq *fq, int flows_cnt)
+{
+        int i;
+        memset(fq, 0, sizeof(fq[0]));
+        INIT_LIST_HEAD(&fq->backlogs);
+        spin_lock_init(&fq->lock);
+        fq->flows_cnt = max_t(u32, flows_cnt, 1);
+        fq->perturbation = prandom_u32();
+        fq->quantum = 300;
+        fq->limit = 8192;
+        fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
+        if (!fq->flows)
+                return -ENOMEM;
+        for (i = 0; i < fq->flows_cnt; i++)
+                fq_flow_init(&fq->flows[i]);
+        return 0;
+}
+static void fq_reset(struct fq *fq,
+                     fq_skb_free_t free_func)
+{
+        int i;
+        for (i = 0; i < fq->flows_cnt; i++)
+                fq_flow_reset(fq, &fq->flows[i], free_func);
+        kfree(fq->flows);
+        fq->flows = NULL;
+}
+#endif
author	Michal Kazior <michal.kazior@tieto.com>	2016-04-22 08:20:13 -0400
committer	David S. Miller <davem@davemloft.net>	2016-04-25 16:45:53 -0400
commit	557fc4a098039cf296fe33f118bab99a925fd881 (patch)
tree	707eb27e1a7af4b9f5a84a85bf811bd32123289b
parent	05d82c42509c2936e328a6b8b1e1cd5684f427ac (diff)

diff --git a/include/net/fq.h b/include/net/fq.h new file mode 100644 index 000000000000..268b49049c37 --- /dev/null +++ b/include/net/fq.h
@@ -0,0 +1,95 @@
	1	/*
	2	* Copyright (c) 2016 Qualcomm Atheros, Inc
	3	*
	4	* GPL v2
	5	*
	6	* Based on net/sched/sch_fq_codel.c
	7	*/
	8	#ifndef __NET_SCHED_FQ_H
	9	#define __NET_SCHED_FQ_H
	10
	11	struct fq_tin;
	12
	13	/**
	14	* struct fq_flow - per traffic flow queue
	15	*
	16	* @tin: owner of this flow. Used to manage collisions, i.e. when a packet
	17	* hashes to an index which points to a flow that is already owned by a
	18	* different tin the packet is destined to. In such case the implementer
	19	* must provide a fallback flow
	20	* @flowchain: can be linked to fq_tin's new_flows or old_flows. Used for DRR++
	21	* (deficit round robin) based round robin queuing similar to the one
	22	* found in net/sched/sch_fq_codel.c
	23	* @backlogchain: can be linked to other fq_flow and fq. Used to keep track of
	24	* fat flows and efficient head-dropping if packet limit is reached
	25	* @queue: sk_buff queue to hold packets
	26	* @backlog: number of bytes pending in the queue. The number of packets can be
	27	* found in @queue.qlen
	28	* @deficit: used for DRR++
	29	*/
	30	struct fq_flow {
	31	struct fq_tin *tin;
	32	struct list_head flowchain;
	33	struct list_head backlogchain;
	34	struct sk_buff_head queue;
	35	u32 backlog;
	36	int deficit;
	37	};
	38
	39	/**
	40	* struct fq_tin - a logical container of fq_flows
	41	*
	42	* Used to group fq_flows into a logical aggregate. DRR++ scheme is used to
	43	* pull interleaved packets out of the associated flows.
	44	*
	45	* @new_flows: linked list of fq_flow
	46	* @old_flows: linked list of fq_flow
	47	*/
	48	struct fq_tin {
	49	struct list_head new_flows;
	50	struct list_head old_flows;
	51	u32 backlog_bytes;
	52	u32 backlog_packets;
	53	u32 overlimit;
	54	u32 collisions;
	55	u32 flows;
	56	u32 tx_bytes;
	57	u32 tx_packets;
	58	};
	59
	60	/**
	61	* struct fq - main container for fair queuing purposes
	62	*
	63	* @backlogs: linked to fq_flows. Used to maintain fat flows for efficient
	64	* head-dropping when @backlog reaches @limit
	65	* @limit: max number of packets that can be queued across all flows
	66	* @backlog: number of packets queued across all flows
	67	*/
	68	struct fq {
	69	struct fq_flow *flows;
	70	struct list_head backlogs;
	71	spinlock_t lock;
	72	u32 flows_cnt;
	73	u32 perturbation;
	74	u32 limit;
	75	u32 quantum;
	76	u32 backlog;
	77	u32 overlimit;
	78	u32 collisions;
	79	};
	80
	81	typedef struct sk_buff fq_tin_dequeue_t(struct fq ,
	82	struct fq_tin *,
	83	struct fq_flow *flow);
	84
	85	typedef void fq_skb_free_t(struct fq *,
	86	struct fq_tin *,
	87	struct fq_flow *,
	88	struct sk_buff *);
	89
	90	typedef struct fq_flow fq_flow_get_default_t(struct fq ,
	91	struct fq_tin *,
	92	int idx,
	93	struct sk_buff *);
	94
	95	#endif


diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h new file mode 100644 index 000000000000..02eab7c51adb --- /dev/null +++ b/include/net/fq_impl.h
@@ -0,0 +1,269 @@
	1	/*
	2	* Copyright (c) 2016 Qualcomm Atheros, Inc
	3	*
	4	* GPL v2
	5	*
	6	* Based on net/sched/sch_fq_codel.c
	7	*/
	8	#ifndef __NET_SCHED_FQ_IMPL_H
	9	#define __NET_SCHED_FQ_IMPL_H
	10
	11	#include <net/fq.h>
	12
	13	/* functions that are embedded into includer */
	14
	15	static struct sk_buff fq_flow_dequeue(struct fq fq,
	16	struct fq_flow *flow)
	17	{
	18	struct fq_tin *tin = flow->tin;
	19	struct fq_flow *i;
	20	struct sk_buff *skb;
	21
	22	lockdep_assert_held(&fq->lock);
	23
	24	skb = __skb_dequeue(&flow->queue);
	25	if (!skb)
	26	return NULL;
	27
	28	tin->backlog_bytes -= skb->len;
	29	tin->backlog_packets--;
	30	flow->backlog -= skb->len;
	31	fq->backlog--;
	32
	33	if (flow->backlog == 0) {
	34	list_del_init(&flow->backlogchain);
	35	} else {
	36	i = flow;
	37
	38	list_for_each_entry_continue(i, &fq->backlogs, backlogchain)
	39	if (i->backlog < flow->backlog)
	40	break;
	41
	42	list_move_tail(&flow->backlogchain,
	43	&i->backlogchain);
	44	}
	45
	46	return skb;
	47	}
	48
	49	static struct sk_buff fq_tin_dequeue(struct fq fq,
	50	struct fq_tin *tin,
	51	fq_tin_dequeue_t dequeue_func)
	52	{
	53	struct fq_flow *flow;
	54	struct list_head *head;
	55	struct sk_buff *skb;
	56
	57	lockdep_assert_held(&fq->lock);
	58
	59	begin:
	60	head = &tin->new_flows;
	61	if (list_empty(head)) {
	62	head = &tin->old_flows;
	63	if (list_empty(head))
	64	return NULL;
	65	}
	66
	67	flow = list_first_entry(head, struct fq_flow, flowchain);
	68
	69	if (flow->deficit <= 0) {
	70	flow->deficit += fq->quantum;
	71	list_move_tail(&flow->flowchain,
	72	&tin->old_flows);
	73	goto begin;
	74	}
	75
	76	skb = dequeue_func(fq, tin, flow);
	77	if (!skb) {
	78	/* force a pass through old_flows to prevent starvation */
	79	if ((head == &tin->new_flows) &&
	80	!list_empty(&tin->old_flows)) {
	81	list_move_tail(&flow->flowchain, &tin->old_flows);
	82	} else {
	83	list_del_init(&flow->flowchain);
	84	flow->tin = NULL;
	85	}
	86	goto begin;
	87	}
	88
	89	flow->deficit -= skb->len;
	90	tin->tx_bytes += skb->len;
	91	tin->tx_packets++;
	92
	93	return skb;
	94	}
	95
	96	static struct fq_flow fq_flow_classify(struct fq fq,
	97	struct fq_tin *tin,
	98	struct sk_buff *skb,
	99	fq_flow_get_default_t get_default_func)
	100	{
	101	struct fq_flow *flow;
	102	u32 hash;
	103	u32 idx;
	104
	105	lockdep_assert_held(&fq->lock);
	106
	107	hash = skb_get_hash_perturb(skb, fq->perturbation);
	108	idx = reciprocal_scale(hash, fq->flows_cnt);
	109	flow = &fq->flows[idx];
	110
	111	if (flow->tin && flow->tin != tin) {
	112	flow = get_default_func(fq, tin, idx, skb);
	113	tin->collisions++;
	114	fq->collisions++;
	115	}
	116
	117	if (!flow->tin)
	118	tin->flows++;
	119
	120	return flow;
	121	}
	122
	123	static void fq_tin_enqueue(struct fq *fq,
	124	struct fq_tin *tin,
	125	struct sk_buff *skb,
	126	fq_skb_free_t free_func,
	127	fq_flow_get_default_t get_default_func)
	128	{
	129	struct fq_flow *flow;
	130	struct fq_flow *i;
	131
	132	lockdep_assert_held(&fq->lock);
	133
	134	flow = fq_flow_classify(fq, tin, skb, get_default_func);
	135
	136	flow->tin = tin;
	137	flow->backlog += skb->len;
	138	tin->backlog_bytes += skb->len;
	139	tin->backlog_packets++;
	140	fq->backlog++;
	141
	142	if (list_empty(&flow->backlogchain))
	143	list_add_tail(&flow->backlogchain, &fq->backlogs);
	144
	145	i = flow;
	146	list_for_each_entry_continue_reverse(i, &fq->backlogs,
	147	backlogchain)
	148	if (i->backlog > flow->backlog)
	149	break;
	150
	151	list_move(&flow->backlogchain, &i->backlogchain);
	152
	153	if (list_empty(&flow->flowchain)) {
	154	flow->deficit = fq->quantum;
	155	list_add_tail(&flow->flowchain,
	156	&tin->new_flows);
	157	}
	158
	159	__skb_queue_tail(&flow->queue, skb);
	160
	161	if (fq->backlog > fq->limit) {
	162	flow = list_first_entry_or_null(&fq->backlogs,
	163	struct fq_flow,
	164	backlogchain);
	165	if (!flow)
	166	return;
	167
	168	skb = fq_flow_dequeue(fq, flow);
	169	if (!skb)
	170	return;
	171
	172	free_func(fq, flow->tin, flow, skb);
	173
	174	flow->tin->overlimit++;
	175	fq->overlimit++;
	176	}
	177	}
	178
	179	static void fq_flow_reset(struct fq *fq,
	180	struct fq_flow *flow,
	181	fq_skb_free_t free_func)
	182	{
	183	struct sk_buff *skb;
	184
	185	while ((skb = fq_flow_dequeue(fq, flow)))
	186	free_func(fq, flow->tin, flow, skb);
	187
	188	if (!list_empty(&flow->flowchain))
	189	list_del_init(&flow->flowchain);
	190
	191	if (!list_empty(&flow->backlogchain))
	192	list_del_init(&flow->backlogchain);
	193
	194	flow->tin = NULL;
	195
	196	WARN_ON_ONCE(flow->backlog);
	197	}
	198
	199	static void fq_tin_reset(struct fq *fq,
	200	struct fq_tin *tin,
	201	fq_skb_free_t free_func)
	202	{
	203	struct list_head *head;
	204	struct fq_flow *flow;
	205
	206	for (;;) {
	207	head = &tin->new_flows;
	208	if (list_empty(head)) {
	209	head = &tin->old_flows;
	210	if (list_empty(head))
	211	break;
	212	}
	213
	214	flow = list_first_entry(head, struct fq_flow, flowchain);
	215	fq_flow_reset(fq, flow, free_func);
	216	}
	217
	218	WARN_ON_ONCE(tin->backlog_bytes);
	219	WARN_ON_ONCE(tin->backlog_packets);
	220	}
	221
	222	static void fq_flow_init(struct fq_flow *flow)
	223	{
	224	INIT_LIST_HEAD(&flow->flowchain);
	225	INIT_LIST_HEAD(&flow->backlogchain);
	226	__skb_queue_head_init(&flow->queue);
	227	}
	228
	229	static void fq_tin_init(struct fq_tin *tin)
	230	{
	231	INIT_LIST_HEAD(&tin->new_flows);
	232	INIT_LIST_HEAD(&tin->old_flows);
	233	}
	234
	235	static int fq_init(struct fq *fq, int flows_cnt)
	236	{
	237	int i;
	238
	239	memset(fq, 0, sizeof(fq[0]));
	240	INIT_LIST_HEAD(&fq->backlogs);
	241	spin_lock_init(&fq->lock);
	242	fq->flows_cnt = max_t(u32, flows_cnt, 1);
	243	fq->perturbation = prandom_u32();
	244	fq->quantum = 300;
	245	fq->limit = 8192;
	246
	247	fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL);
	248	if (!fq->flows)
	249	return -ENOMEM;
	250
	251	for (i = 0; i < fq->flows_cnt; i++)
	252	fq_flow_init(&fq->flows[i]);
	253
	254	return 0;
	255	}
	256
	257	static void fq_reset(struct fq *fq,
	258	fq_skb_free_t free_func)
	259	{
	260	int i;
	261
	262	for (i = 0; i < fq->flows_cnt; i++)
	263	fq_flow_reset(fq, &fq->flows[i], free_func);
	264
	265	kfree(fq->flows);
	266	fq->flows = NULL;
	267	}
	268
	269	#endif