1 files changed, 163 insertions, 0 deletions
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c
new file mode 100644
index 000000000000..ff366f7390d9
--- /dev/null
+++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -0,0 +1,163 @@
+/*
+ * IPVS:        Shortest Expected Delay scheduling module
+ *
+ * Version:     $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
+ *
+ * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
+ *
+ *              This program is free software; you can redistribute it and/or
+ *              modify it under the terms of the GNU General Public License
+ *              as published by the Free Software Foundation; either version
+ *              2 of the License, or (at your option) any later version.
+ *
+ * Changes:
+ *
+ */
+/*
+ * The SED algorithm attempts to minimize each job's expected delay until
+ * completion. The expected delay that the job will experience is
+ * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
+ * jobs on the the ith server and Ui is the fixed service rate (weight) of
+ * the ith server. The SED algorithm adopts a greedy policy that each does
+ * what is in its own best interest, i.e. to join the queue which would
+ * minimize its expected delay of completion.
+ *
+ * See the following paper for more information:
+ * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
+ * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
+ * pages 986-994, 1988.
+ *
+ * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
+ *
+ * The difference between SED and WLC is that SED includes the incoming
+ * job in the cost function (the increment of 1). SED may outperform
+ * WLC, while scheduling big jobs under larger heterogeneous systems
+ * (the server weight varies a lot).
+ *
+ */
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <net/ip_vs.h>
+static int
+ip_vs_sed_init_svc(struct ip_vs_service *svc)
+{
+        return 0;
+}
+static int
+ip_vs_sed_done_svc(struct ip_vs_service *svc)
+{
+        return 0;
+}
+static int
+ip_vs_sed_update_svc(struct ip_vs_service *svc)
+{
+        return 0;
+}
+static inline unsigned int
+ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
+{
+        /*
+         * We only use the active connection number in the cost
+         * calculation here.
+         */
+        return atomic_read(&dest->activeconns) + 1;
+}
+/*
+ *      Weighted Least Connection scheduling
+ */
+static struct ip_vs_dest *
+ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
+{
+        struct ip_vs_dest *dest, *least;
+        unsigned int loh, doh;
+        IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
+        /*
+         * We calculate the load of each dest server as follows:
+         *      (server expected overhead) / dest->weight
+         *
+         * Remember -- no floats in kernel mode!!!
+         * The comparison of h1*w2 > h2*w1 is equivalent to that of
+         *                h1/w1 > h2/w2
+         * if every weight is larger than zero.
+         *
+         * The server with weight=0 is quiesced and will not receive any
+         * new connections.
+         */
+        list_for_each_entry(dest, &svc->destinations, n_list) {
+                if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
+                    atomic_read(&dest->weight) > 0) {
+                        least = dest;
+                        loh = ip_vs_sed_dest_overhead(least);
+                        goto nextstage;
+                }
+        }
+        return NULL;
+        /*
+         *    Find the destination with the least load.
+         */
+  nextstage:
+        list_for_each_entry_continue(dest, &svc->destinations, n_list) {
+                if (dest->flags & IP_VS_DEST_F_OVERLOAD)
+                        continue;
+                doh = ip_vs_sed_dest_overhead(dest);
+                if (loh * atomic_read(&dest->weight) >
+                    doh * atomic_read(&least->weight)) {
+                        least = dest;
+                        loh = doh;
+                }
+        }
+        IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
+                  "activeconns %d refcnt %d weight %d overhead %d\n",
+                  NIPQUAD(least->addr), ntohs(least->port),
+                  atomic_read(&least->activeconns),
+                  atomic_read(&least->refcnt),
+                  atomic_read(&least->weight), loh);
+        return least;
+}
+static struct ip_vs_scheduler ip_vs_sed_scheduler =
+{
+        .name =                 "sed",
+        .refcnt =               ATOMIC_INIT(0),
+        .module =               THIS_MODULE,
+        .init_service =         ip_vs_sed_init_svc,
+        .done_service =         ip_vs_sed_done_svc,
+        .update_service =       ip_vs_sed_update_svc,
+        .schedule =             ip_vs_sed_schedule,
+};
+static int __init ip_vs_sed_init(void)
+{
+        INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
+        return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+static void __exit ip_vs_sed_cleanup(void)
+{
+        unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
+}
+module_init(ip_vs_sed_init);
+module_exit(ip_vs_sed_cleanup);
+MODULE_LICENSE("GPL");

diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c new file mode 100644 index 000000000000..ff366f7390d9 --- /dev/null +++ b/net/ipv4/ipvs/ip_vs_sed.c
@@ -0,0 +1,163 @@
	1	/*
	2	* IPVS: Shortest Expected Delay scheduling module
	3	*
	4	* Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
	5	*
	6	* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
	7	*
	8	* This program is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU General Public License
	10	* as published by the Free Software Foundation; either version
	11	* 2 of the License, or (at your option) any later version.
	12	*
	13	* Changes:
	14	*
	15	*/
	16
	17	/*
	18	* The SED algorithm attempts to minimize each job's expected delay until
	19	* completion. The expected delay that the job will experience is
	20	* (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
	21	* jobs on the the ith server and Ui is the fixed service rate (weight) of
	22	* the ith server. The SED algorithm adopts a greedy policy that each does
	23	* what is in its own best interest, i.e. to join the queue which would
	24	* minimize its expected delay of completion.
	25	*
	26	* See the following paper for more information:
	27	* A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
	28	* in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
	29	* pages 986-994, 1988.
	30	*
	31	* Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
	32	*
	33	* The difference between SED and WLC is that SED includes the incoming
	34	* job in the cost function (the increment of 1). SED may outperform
	35	* WLC, while scheduling big jobs under larger heterogeneous systems
	36	* (the server weight varies a lot).
	37	*
	38	*/
	39
	40	#include <linux/module.h>
	41	#include <linux/kernel.h>
	42
	43	#include <net/ip_vs.h>
	44
	45
	46	static int
	47	ip_vs_sed_init_svc(struct ip_vs_service *svc)
	48	{
	49	return 0;
	50	}
	51
	52
	53	static int
	54	ip_vs_sed_done_svc(struct ip_vs_service *svc)
	55	{
	56	return 0;
	57	}
	58
	59
	60	static int
	61	ip_vs_sed_update_svc(struct ip_vs_service *svc)
	62	{
	63	return 0;
	64	}
	65
	66
	67	static inline unsigned int
	68	ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
	69	{
	70	/*
	71	* We only use the active connection number in the cost
	72	* calculation here.
	73	*/
	74	return atomic_read(&dest->activeconns) + 1;
	75	}
	76
	77
	78	/*
	79	* Weighted Least Connection scheduling
	80	*/
	81	static struct ip_vs_dest *
	82	ip_vs_sed_schedule(struct ip_vs_service svc, const struct sk_buff skb)
	83	{
	84	struct ip_vs_dest dest, least;
	85	unsigned int loh, doh;
	86
	87	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
	88
	89	/*
	90	* We calculate the load of each dest server as follows:
	91	* (server expected overhead) / dest->weight
	92	*
	93	* Remember -- no floats in kernel mode!!!
	94	* The comparison of h1w2 > h2w1 is equivalent to that of
	95	* h1/w1 > h2/w2
	96	* if every weight is larger than zero.
	97	*
	98	* The server with weight=0 is quiesced and will not receive any
	99	* new connections.
	100	*/
	101
	102	list_for_each_entry(dest, &svc->destinations, n_list) {
	103	if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
	104	atomic_read(&dest->weight) > 0) {
	105	least = dest;
	106	loh = ip_vs_sed_dest_overhead(least);
	107	goto nextstage;
	108	}
	109	}
	110	return NULL;
	111
	112	/*
	113	* Find the destination with the least load.
	114	*/
	115	nextstage:
	116	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
	117	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
	118	continue;
	119	doh = ip_vs_sed_dest_overhead(dest);
	120	if (loh * atomic_read(&dest->weight) >
	121	doh * atomic_read(&least->weight)) {
	122	least = dest;
	123	loh = doh;
	124	}
	125	}
	126
	127	IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
	128	"activeconns %d refcnt %d weight %d overhead %d\n",
	129	NIPQUAD(least->addr), ntohs(least->port),
	130	atomic_read(&least->activeconns),
	131	atomic_read(&least->refcnt),
	132	atomic_read(&least->weight), loh);
	133
	134	return least;
	135	}
	136
	137
	138	static struct ip_vs_scheduler ip_vs_sed_scheduler =
	139	{
	140	.name = "sed",
	141	.refcnt = ATOMIC_INIT(0),
	142	.module = THIS_MODULE,
	143	.init_service = ip_vs_sed_init_svc,
	144	.done_service = ip_vs_sed_done_svc,
	145	.update_service = ip_vs_sed_update_svc,
	146	.schedule = ip_vs_sed_schedule,
	147	};
	148
	149
	150	static int __init ip_vs_sed_init(void)
	151	{
	152	INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
	153	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
	154	}
	155
	156	static void __exit ip_vs_sed_cleanup(void)
	157	{
	158	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
	159	}
	160
	161	module_init(ip_vs_sed_init);
	162	module_exit(ip_vs_sed_cleanup);
	163	MODULE_LICENSE("GPL");