diff options
Diffstat (limited to 'net/ipv4/ipvs/ip_vs_sed.c')
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sed.c | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c new file mode 100644 index 000000000000..ff366f7390d9 --- /dev/null +++ b/net/ipv4/ipvs/ip_vs_sed.c | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * IPVS: Shortest Expected Delay scheduling module | ||
3 | * | ||
4 | * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $ | ||
5 | * | ||
6 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * Changes: | ||
14 | * | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * The SED algorithm attempts to minimize each job's expected delay until | ||
19 | * completion. The expected delay that the job will experience is | ||
20 | * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of | ||
21 | * jobs on the the ith server and Ui is the fixed service rate (weight) of | ||
22 | * the ith server. The SED algorithm adopts a greedy policy that each does | ||
23 | * what is in its own best interest, i.e. to join the queue which would | ||
24 | * minimize its expected delay of completion. | ||
25 | * | ||
26 | * See the following paper for more information: | ||
27 | * A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing | ||
28 | * in large heterogeneous systems. In Proceedings IEEE INFOCOM'88, | ||
29 | * pages 986-994, 1988. | ||
30 | * | ||
31 | * Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me. | ||
32 | * | ||
33 | * The difference between SED and WLC is that SED includes the incoming | ||
34 | * job in the cost function (the increment of 1). SED may outperform | ||
35 | * WLC, while scheduling big jobs under larger heterogeneous systems | ||
36 | * (the server weight varies a lot). | ||
37 | * | ||
38 | */ | ||
39 | |||
40 | #include <linux/module.h> | ||
41 | #include <linux/kernel.h> | ||
42 | |||
43 | #include <net/ip_vs.h> | ||
44 | |||
45 | |||
46 | static int | ||
47 | ip_vs_sed_init_svc(struct ip_vs_service *svc) | ||
48 | { | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | |||
53 | static int | ||
54 | ip_vs_sed_done_svc(struct ip_vs_service *svc) | ||
55 | { | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | |||
60 | static int | ||
61 | ip_vs_sed_update_svc(struct ip_vs_service *svc) | ||
62 | { | ||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | |||
67 | static inline unsigned int | ||
68 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) | ||
69 | { | ||
70 | /* | ||
71 | * We only use the active connection number in the cost | ||
72 | * calculation here. | ||
73 | */ | ||
74 | return atomic_read(&dest->activeconns) + 1; | ||
75 | } | ||
76 | |||
77 | |||
78 | /* | ||
79 | * Weighted Least Connection scheduling | ||
80 | */ | ||
81 | static struct ip_vs_dest * | ||
82 | ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | ||
83 | { | ||
84 | struct ip_vs_dest *dest, *least; | ||
85 | unsigned int loh, doh; | ||
86 | |||
87 | IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n"); | ||
88 | |||
89 | /* | ||
90 | * We calculate the load of each dest server as follows: | ||
91 | * (server expected overhead) / dest->weight | ||
92 | * | ||
93 | * Remember -- no floats in kernel mode!!! | ||
94 | * The comparison of h1*w2 > h2*w1 is equivalent to that of | ||
95 | * h1/w1 > h2/w2 | ||
96 | * if every weight is larger than zero. | ||
97 | * | ||
98 | * The server with weight=0 is quiesced and will not receive any | ||
99 | * new connections. | ||
100 | */ | ||
101 | |||
102 | list_for_each_entry(dest, &svc->destinations, n_list) { | ||
103 | if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && | ||
104 | atomic_read(&dest->weight) > 0) { | ||
105 | least = dest; | ||
106 | loh = ip_vs_sed_dest_overhead(least); | ||
107 | goto nextstage; | ||
108 | } | ||
109 | } | ||
110 | return NULL; | ||
111 | |||
112 | /* | ||
113 | * Find the destination with the least load. | ||
114 | */ | ||
115 | nextstage: | ||
116 | list_for_each_entry_continue(dest, &svc->destinations, n_list) { | ||
117 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | ||
118 | continue; | ||
119 | doh = ip_vs_sed_dest_overhead(dest); | ||
120 | if (loh * atomic_read(&dest->weight) > | ||
121 | doh * atomic_read(&least->weight)) { | ||
122 | least = dest; | ||
123 | loh = doh; | ||
124 | } | ||
125 | } | ||
126 | |||
127 | IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " | ||
128 | "activeconns %d refcnt %d weight %d overhead %d\n", | ||
129 | NIPQUAD(least->addr), ntohs(least->port), | ||
130 | atomic_read(&least->activeconns), | ||
131 | atomic_read(&least->refcnt), | ||
132 | atomic_read(&least->weight), loh); | ||
133 | |||
134 | return least; | ||
135 | } | ||
136 | |||
137 | |||
138 | static struct ip_vs_scheduler ip_vs_sed_scheduler = | ||
139 | { | ||
140 | .name = "sed", | ||
141 | .refcnt = ATOMIC_INIT(0), | ||
142 | .module = THIS_MODULE, | ||
143 | .init_service = ip_vs_sed_init_svc, | ||
144 | .done_service = ip_vs_sed_done_svc, | ||
145 | .update_service = ip_vs_sed_update_svc, | ||
146 | .schedule = ip_vs_sed_schedule, | ||
147 | }; | ||
148 | |||
149 | |||
150 | static int __init ip_vs_sed_init(void) | ||
151 | { | ||
152 | INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list); | ||
153 | return register_ip_vs_scheduler(&ip_vs_sed_scheduler); | ||
154 | } | ||
155 | |||
156 | static void __exit ip_vs_sed_cleanup(void) | ||
157 | { | ||
158 | unregister_ip_vs_scheduler(&ip_vs_sed_scheduler); | ||
159 | } | ||
160 | |||
161 | module_init(ip_vs_sed_init); | ||
162 | module_exit(ip_vs_sed_cleanup); | ||
163 | MODULE_LICENSE("GPL"); | ||