aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorAlexander Frolkin <avf@eldamar.org.uk>2013-06-19 05:54:25 -0400
committerSimon Horman <horms@verge.net.au>2013-06-26 05:01:46 -0400
commiteba3b5a78799d21dea05118b294524958f0ab592 (patch)
tree2f84838b89f17c7b10cee8fa618f232c64e9fc1d /net
parentacaac5d8bbedf6bd96f53960780942e1ad90d70e (diff)
ipvs: SH fallback and L4 hashing
By default the SH scheduler rejects connections that are hashed onto a realserver of weight 0. This patch adds a flag to make SH choose a different realserver in this case, instead of rejecting the connection. The patch also adds a flag to make SH include the source port (TCP, UDP, SCTP) in the hash as well as the source address. This basically allows for deterministic round-robin load balancing (i.e., where any director in a cluster of directors with identical config will send the same packet the same way). The flags are service flags (IP_VS_SVC_F_SCHED*) so that these options can be set per service. They are set using a new option to ipvsadm. Signed-off-by: Alexander Frolkin <avf@eldamar.org.uk> Acked-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c100
1 files changed, 85 insertions, 15 deletions
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index e0d5d1653566..f16c027df15b 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -48,6 +48,10 @@
48 48
49#include <net/ip_vs.h> 49#include <net/ip_vs.h>
50 50
51#include <net/tcp.h>
52#include <linux/udp.h>
53#include <linux/sctp.h>
54
51 55
52/* 56/*
53 * IPVS SH bucket 57 * IPVS SH bucket
@@ -71,10 +75,19 @@ struct ip_vs_sh_state {
71 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; 75 struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE];
72}; 76};
73 77
78/* Helper function to determine if server is unavailable */
79static inline bool is_unavailable(struct ip_vs_dest *dest)
80{
81 return atomic_read(&dest->weight) <= 0 ||
82 dest->flags & IP_VS_DEST_F_OVERLOAD;
83}
84
74/* 85/*
75 * Returns hash value for IPVS SH entry 86 * Returns hash value for IPVS SH entry
76 */ 87 */
77static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr) 88static inline unsigned int
89ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr,
90 __be16 port, unsigned int offset)
78{ 91{
79 __be32 addr_fold = addr->ip; 92 __be32 addr_fold = addr->ip;
80 93
@@ -83,7 +96,8 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
83 addr_fold = addr->ip6[0]^addr->ip6[1]^ 96 addr_fold = addr->ip6[0]^addr->ip6[1]^
84 addr->ip6[2]^addr->ip6[3]; 97 addr->ip6[2]^addr->ip6[3];
85#endif 98#endif
86 return (ntohl(addr_fold)*2654435761UL) & IP_VS_SH_TAB_MASK; 99 return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) &
100 IP_VS_SH_TAB_MASK;
87} 101}
88 102
89 103
@@ -91,12 +105,42 @@ static inline unsigned int ip_vs_sh_hashkey(int af, const union nf_inet_addr *ad
91 * Get ip_vs_dest associated with supplied parameters. 105 * Get ip_vs_dest associated with supplied parameters.
92 */ 106 */
93static inline struct ip_vs_dest * 107static inline struct ip_vs_dest *
94ip_vs_sh_get(int af, struct ip_vs_sh_state *s, const union nf_inet_addr *addr) 108ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
109 const union nf_inet_addr *addr, __be16 port)
95{ 110{
96 return rcu_dereference(s->buckets[ip_vs_sh_hashkey(af, addr)].dest); 111 unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0);
112 struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest);
113
114 return (!dest || is_unavailable(dest)) ? NULL : dest;
97} 115}
98 116
99 117
118/* As ip_vs_sh_get, but with fallback if selected server is unavailable */
119static inline struct ip_vs_dest *
120ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s,
121 const union nf_inet_addr *addr, __be16 port)
122{
123 unsigned int offset;
124 unsigned int hash;
125 struct ip_vs_dest *dest;
126
127 for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) {
128 hash = ip_vs_sh_hashkey(svc->af, addr, port, offset);
129 dest = rcu_dereference(s->buckets[hash].dest);
130 if (!dest)
131 break;
132 if (is_unavailable(dest))
133 IP_VS_DBG_BUF(6, "SH: selected unavailable server "
134 "%s:%d (offset %d)",
135 IP_VS_DBG_ADDR(svc->af, &dest->addr),
136 ntohs(dest->port), offset);
137 else
138 return dest;
139 }
140
141 return NULL;
142}
143
100/* 144/*
101 * Assign all the hash buckets of the specified table with the service. 145 * Assign all the hash buckets of the specified table with the service.
102 */ 146 */
@@ -213,13 +257,33 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
213} 257}
214 258
215 259
216/* 260/* Helper function to get port number */
217 * If the dest flags is set with IP_VS_DEST_F_OVERLOAD, 261static inline __be16
218 * consider that the server is overloaded here. 262ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
219 */
220static inline int is_overloaded(struct ip_vs_dest *dest)
221{ 263{
222 return dest->flags & IP_VS_DEST_F_OVERLOAD; 264 __be16 port;
265 struct tcphdr _tcph, *th;
266 struct udphdr _udph, *uh;
267 sctp_sctphdr_t _sctph, *sh;
268
269 switch (iph->protocol) {
270 case IPPROTO_TCP:
271 th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
272 port = th->source;
273 break;
274 case IPPROTO_UDP:
275 uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
276 port = uh->source;
277 break;
278 case IPPROTO_SCTP:
279 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
280 port = sh->source;
281 break;
282 default:
283 port = 0;
284 }
285
286 return port;
223} 287}
224 288
225 289
@@ -232,15 +296,21 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
232{ 296{
233 struct ip_vs_dest *dest; 297 struct ip_vs_dest *dest;
234 struct ip_vs_sh_state *s; 298 struct ip_vs_sh_state *s;
299 __be16 port = 0;
235 300
236 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); 301 IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
237 302
303 if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT)
304 port = ip_vs_sh_get_port(skb, iph);
305
238 s = (struct ip_vs_sh_state *) svc->sched_data; 306 s = (struct ip_vs_sh_state *) svc->sched_data;
239 dest = ip_vs_sh_get(svc->af, s, &iph->saddr); 307
240 if (!dest 308 if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
241 || !(dest->flags & IP_VS_DEST_F_AVAILABLE) 309 dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
242 || atomic_read(&dest->weight) <= 0 310 else
243 || is_overloaded(dest)) { 311 dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
312
313 if (!dest) {
244 ip_vs_scheduler_err(svc, "no destination available"); 314 ip_vs_scheduler_err(svc, "no destination available");
245 return NULL; 315 return NULL;
246 } 316 }