diff options
| -rw-r--r-- | net/netfilter/ipvs/ip_vs_mh.c | 540 |
1 files changed, 540 insertions, 0 deletions
diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c new file mode 100644 index 000000000000..0f795b186eb3 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_mh.c | |||
| @@ -0,0 +1,540 @@ | |||
| 1 | // SPDX-License-Identifier: GPL-2.0 | ||
| 2 | /* IPVS: Maglev Hashing scheduling module | ||
| 3 | * | ||
| 4 | * Authors: Inju Song <inju.song@navercorp.com> | ||
| 5 | * | ||
| 6 | */ | ||
| 7 | |||
| 8 | /* The mh algorithm is to assign a preference list of all the lookup | ||
| 9 | * table positions to each destination and populate the table with | ||
| 10 | * the most-preferred position of destinations. Then it is to select | ||
| 11 | * destination with the hash key of source IP address through looking | ||
| 12 | * up a the lookup table. | ||
| 13 | * | ||
| 14 | * The algorithm is detailed in: | ||
| 15 | * [3.4 Consistent Hasing] | ||
| 16 | https://www.usenix.org/system/files/conference/nsdi16/nsdi16-paper-eisenbud.pdf | ||
| 17 | * | ||
| 18 | */ | ||
| 19 | |||
| 20 | #define KMSG_COMPONENT "IPVS" | ||
| 21 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | ||
| 22 | |||
| 23 | #include <linux/ip.h> | ||
| 24 | #include <linux/slab.h> | ||
| 25 | #include <linux/module.h> | ||
| 26 | #include <linux/kernel.h> | ||
| 27 | #include <linux/skbuff.h> | ||
| 28 | |||
| 29 | #include <net/ip_vs.h> | ||
| 30 | |||
| 31 | #include <linux/siphash.h> | ||
| 32 | #include <linux/bitops.h> | ||
| 33 | #include <linux/gcd.h> | ||
| 34 | |||
| 35 | #define IP_VS_SVC_F_SCHED_MH_FALLBACK IP_VS_SVC_F_SCHED1 /* MH fallback */ | ||
| 36 | #define IP_VS_SVC_F_SCHED_MH_PORT IP_VS_SVC_F_SCHED2 /* MH use port */ | ||
| 37 | |||
| 38 | struct ip_vs_mh_lookup { | ||
| 39 | struct ip_vs_dest __rcu *dest; /* real server (cache) */ | ||
| 40 | }; | ||
| 41 | |||
| 42 | struct ip_vs_mh_dest_setup { | ||
| 43 | unsigned int offset; /* starting offset */ | ||
| 44 | unsigned int skip; /* skip */ | ||
| 45 | unsigned int perm; /* next_offset */ | ||
| 46 | int turns; /* weight / gcd() and rshift */ | ||
| 47 | }; | ||
| 48 | |||
| 49 | /* Available prime numbers for MH table */ | ||
| 50 | static int primes[] = {251, 509, 1021, 2039, 4093, | ||
| 51 | 8191, 16381, 32749, 65521, 131071}; | ||
| 52 | |||
| 53 | /* For IPVS MH entry hash table */ | ||
| 54 | #ifndef CONFIG_IP_VS_MH_TAB_INDEX | ||
| 55 | #define CONFIG_IP_VS_MH_TAB_INDEX 12 | ||
| 56 | #endif | ||
| 57 | #define IP_VS_MH_TAB_BITS (CONFIG_IP_VS_MH_TAB_INDEX / 2) | ||
| 58 | #define IP_VS_MH_TAB_INDEX (CONFIG_IP_VS_MH_TAB_INDEX - 8) | ||
| 59 | #define IP_VS_MH_TAB_SIZE primes[IP_VS_MH_TAB_INDEX] | ||
| 60 | |||
| 61 | struct ip_vs_mh_state { | ||
| 62 | struct rcu_head rcu_head; | ||
| 63 | struct ip_vs_mh_lookup *lookup; | ||
| 64 | struct ip_vs_mh_dest_setup *dest_setup; | ||
| 65 | hsiphash_key_t hash1, hash2; | ||
| 66 | int gcd; | ||
| 67 | int rshift; | ||
| 68 | }; | ||
| 69 | |||
| 70 | static inline void generate_hash_secret(hsiphash_key_t *hash1, | ||
| 71 | hsiphash_key_t *hash2) | ||
| 72 | { | ||
| 73 | hash1->key[0] = 2654435761UL; | ||
| 74 | hash1->key[1] = 2654435761UL; | ||
| 75 | |||
| 76 | hash2->key[0] = 2654446892UL; | ||
| 77 | hash2->key[1] = 2654446892UL; | ||
| 78 | } | ||
| 79 | |||
| 80 | /* Helper function to determine if server is unavailable */ | ||
| 81 | static inline bool is_unavailable(struct ip_vs_dest *dest) | ||
| 82 | { | ||
| 83 | return atomic_read(&dest->weight) <= 0 || | ||
| 84 | dest->flags & IP_VS_DEST_F_OVERLOAD; | ||
| 85 | } | ||
| 86 | |||
| 87 | /* Returns hash value for IPVS MH entry */ | ||
| 88 | static inline unsigned int | ||
| 89 | ip_vs_mh_hashkey(int af, const union nf_inet_addr *addr, | ||
| 90 | __be16 port, hsiphash_key_t *key, unsigned int offset) | ||
| 91 | { | ||
| 92 | unsigned int v; | ||
| 93 | __be32 addr_fold = addr->ip; | ||
| 94 | |||
| 95 | #ifdef CONFIG_IP_VS_IPV6 | ||
| 96 | if (af == AF_INET6) | ||
| 97 | addr_fold = addr->ip6[0] ^ addr->ip6[1] ^ | ||
| 98 | addr->ip6[2] ^ addr->ip6[3]; | ||
| 99 | #endif | ||
| 100 | v = (offset + ntohs(port) + ntohl(addr_fold)); | ||
| 101 | return hsiphash(&v, sizeof(v), key); | ||
| 102 | } | ||
| 103 | |||
| 104 | /* Reset all the hash buckets of the specified table. */ | ||
| 105 | static void ip_vs_mh_reset(struct ip_vs_mh_state *s) | ||
| 106 | { | ||
| 107 | int i; | ||
| 108 | struct ip_vs_mh_lookup *l; | ||
| 109 | struct ip_vs_dest *dest; | ||
| 110 | |||
| 111 | l = &s->lookup[0]; | ||
| 112 | for (i = 0; i < IP_VS_MH_TAB_SIZE; i++) { | ||
| 113 | dest = rcu_dereference_protected(l->dest, 1); | ||
| 114 | if (dest) { | ||
| 115 | ip_vs_dest_put(dest); | ||
| 116 | RCU_INIT_POINTER(l->dest, NULL); | ||
| 117 | } | ||
| 118 | l++; | ||
| 119 | } | ||
| 120 | } | ||
| 121 | |||
| 122 | static int ip_vs_mh_permutate(struct ip_vs_mh_state *s, | ||
| 123 | struct ip_vs_service *svc) | ||
| 124 | { | ||
| 125 | struct list_head *p; | ||
| 126 | struct ip_vs_mh_dest_setup *ds; | ||
| 127 | struct ip_vs_dest *dest; | ||
| 128 | int lw; | ||
| 129 | |||
| 130 | /* If gcd is smaller then 1, number of dests or | ||
| 131 | * all last_weight of dests are zero. So, skip | ||
| 132 | * permutation for the dests. | ||
| 133 | */ | ||
| 134 | if (s->gcd < 1) | ||
| 135 | return 0; | ||
| 136 | |||
| 137 | /* Set dest_setup for the dests permutation */ | ||
| 138 | p = &svc->destinations; | ||
| 139 | ds = &s->dest_setup[0]; | ||
| 140 | while ((p = p->next) != &svc->destinations) { | ||
| 141 | dest = list_entry(p, struct ip_vs_dest, n_list); | ||
| 142 | |||
| 143 | ds->offset = ip_vs_mh_hashkey(svc->af, &dest->addr, | ||
| 144 | dest->port, &s->hash1, 0) % | ||
| 145 | IP_VS_MH_TAB_SIZE; | ||
| 146 | ds->skip = ip_vs_mh_hashkey(svc->af, &dest->addr, | ||
| 147 | dest->port, &s->hash2, 0) % | ||
| 148 | (IP_VS_MH_TAB_SIZE - 1) + 1; | ||
| 149 | ds->perm = ds->offset; | ||
| 150 | |||
| 151 | lw = atomic_read(&dest->last_weight); | ||
| 152 | ds->turns = ((lw / s->gcd) >> s->rshift) ? : (lw != 0); | ||
| 153 | ds++; | ||
| 154 | } | ||
| 155 | |||
| 156 | return 0; | ||
| 157 | } | ||
| 158 | |||
| 159 | static int ip_vs_mh_populate(struct ip_vs_mh_state *s, | ||
| 160 | struct ip_vs_service *svc) | ||
| 161 | { | ||
| 162 | int n, c, dt_count; | ||
| 163 | unsigned long *table; | ||
| 164 | struct list_head *p; | ||
| 165 | struct ip_vs_mh_dest_setup *ds; | ||
| 166 | struct ip_vs_dest *dest, *new_dest; | ||
| 167 | |||
| 168 | /* If gcd is smaller then 1, number of dests or | ||
| 169 | * all last_weight of dests are zero. So, skip | ||
| 170 | * the population for the dests and reset lookup table. | ||
| 171 | */ | ||
| 172 | if (s->gcd < 1) { | ||
| 173 | ip_vs_mh_reset(s); | ||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 177 | table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), | ||
| 178 | sizeof(unsigned long), GFP_KERNEL); | ||
| 179 | if (!table) | ||
| 180 | return -ENOMEM; | ||
| 181 | |||
| 182 | p = &svc->destinations; | ||
| 183 | n = 0; | ||
| 184 | dt_count = 0; | ||
| 185 | while (n < IP_VS_MH_TAB_SIZE) { | ||
| 186 | if (p == &svc->destinations) | ||
| 187 | p = p->next; | ||
| 188 | |||
| 189 | ds = &s->dest_setup[0]; | ||
| 190 | while (p != &svc->destinations) { | ||
| 191 | /* Ignore added server with zero weight */ | ||
| 192 | if (ds->turns < 1) { | ||
| 193 | p = p->next; | ||
| 194 | ds++; | ||
| 195 | continue; | ||
| 196 | } | ||
| 197 | |||
| 198 | c = ds->perm; | ||
| 199 | while (test_bit(c, table)) { | ||
| 200 | /* Add skip, mod IP_VS_MH_TAB_SIZE */ | ||
| 201 | ds->perm += ds->skip; | ||
| 202 | if (ds->perm >= IP_VS_MH_TAB_SIZE) | ||
| 203 | ds->perm -= IP_VS_MH_TAB_SIZE; | ||
| 204 | c = ds->perm; | ||
| 205 | } | ||
| 206 | |||
| 207 | __set_bit(c, table); | ||
| 208 | |||
| 209 | dest = rcu_dereference_protected(s->lookup[c].dest, 1); | ||
| 210 | new_dest = list_entry(p, struct ip_vs_dest, n_list); | ||
| 211 | if (dest != new_dest) { | ||
| 212 | if (dest) | ||
| 213 | ip_vs_dest_put(dest); | ||
| 214 | ip_vs_dest_hold(new_dest); | ||
| 215 | RCU_INIT_POINTER(s->lookup[c].dest, new_dest); | ||
| 216 | } | ||
| 217 | |||
| 218 | if (++n == IP_VS_MH_TAB_SIZE) | ||
| 219 | goto out; | ||
| 220 | |||
| 221 | if (++dt_count >= ds->turns) { | ||
| 222 | dt_count = 0; | ||
| 223 | p = p->next; | ||
| 224 | ds++; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | } | ||
| 228 | |||
| 229 | out: | ||
| 230 | kfree(table); | ||
| 231 | return 0; | ||
| 232 | } | ||
| 233 | |||
| 234 | /* Get ip_vs_dest associated with supplied parameters. */ | ||
| 235 | static inline struct ip_vs_dest * | ||
| 236 | ip_vs_mh_get(struct ip_vs_service *svc, struct ip_vs_mh_state *s, | ||
| 237 | const union nf_inet_addr *addr, __be16 port) | ||
| 238 | { | ||
| 239 | unsigned int hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, 0) | ||
| 240 | % IP_VS_MH_TAB_SIZE; | ||
| 241 | struct ip_vs_dest *dest = rcu_dereference(s->lookup[hash].dest); | ||
| 242 | |||
| 243 | return (!dest || is_unavailable(dest)) ? NULL : dest; | ||
| 244 | } | ||
| 245 | |||
| 246 | /* As ip_vs_mh_get, but with fallback if selected server is unavailable */ | ||
| 247 | static inline struct ip_vs_dest * | ||
| 248 | ip_vs_mh_get_fallback(struct ip_vs_service *svc, struct ip_vs_mh_state *s, | ||
| 249 | const union nf_inet_addr *addr, __be16 port) | ||
| 250 | { | ||
| 251 | unsigned int offset, roffset; | ||
| 252 | unsigned int hash, ihash; | ||
| 253 | struct ip_vs_dest *dest; | ||
| 254 | |||
| 255 | /* First try the dest it's supposed to go to */ | ||
| 256 | ihash = ip_vs_mh_hashkey(svc->af, addr, port, | ||
| 257 | &s->hash1, 0) % IP_VS_MH_TAB_SIZE; | ||
| 258 | dest = rcu_dereference(s->lookup[ihash].dest); | ||
| 259 | if (!dest) | ||
| 260 | return NULL; | ||
| 261 | if (!is_unavailable(dest)) | ||
| 262 | return dest; | ||
| 263 | |||
| 264 | IP_VS_DBG_BUF(6, "MH: selected unavailable server %s:%u, reselecting", | ||
| 265 | IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); | ||
| 266 | |||
| 267 | /* If the original dest is unavailable, loop around the table | ||
| 268 | * starting from ihash to find a new dest | ||
| 269 | */ | ||
| 270 | for (offset = 0; offset < IP_VS_MH_TAB_SIZE; offset++) { | ||
| 271 | roffset = (offset + ihash) % IP_VS_MH_TAB_SIZE; | ||
| 272 | hash = ip_vs_mh_hashkey(svc->af, addr, port, &s->hash1, | ||
| 273 | roffset) % IP_VS_MH_TAB_SIZE; | ||
| 274 | dest = rcu_dereference(s->lookup[hash].dest); | ||
| 275 | if (!dest) | ||
| 276 | break; | ||
| 277 | if (!is_unavailable(dest)) | ||
| 278 | return dest; | ||
| 279 | IP_VS_DBG_BUF(6, | ||
| 280 | "MH: selected unavailable server %s:%u (offset %u), reselecting", | ||
| 281 | IP_VS_DBG_ADDR(dest->af, &dest->addr), | ||
| 282 | ntohs(dest->port), roffset); | ||
| 283 | } | ||
| 284 | |||
| 285 | return NULL; | ||
| 286 | } | ||
| 287 | |||
| 288 | /* Assign all the hash buckets of the specified table with the service. */ | ||
| 289 | static int ip_vs_mh_reassign(struct ip_vs_mh_state *s, | ||
| 290 | struct ip_vs_service *svc) | ||
| 291 | { | ||
| 292 | int ret; | ||
| 293 | |||
| 294 | if (svc->num_dests > IP_VS_MH_TAB_SIZE) | ||
| 295 | return -EINVAL; | ||
| 296 | |||
| 297 | if (svc->num_dests >= 1) { | ||
| 298 | s->dest_setup = kcalloc(svc->num_dests, | ||
| 299 | sizeof(struct ip_vs_mh_dest_setup), | ||
| 300 | GFP_KERNEL); | ||
| 301 | if (!s->dest_setup) | ||
| 302 | return -ENOMEM; | ||
| 303 | } | ||
| 304 | |||
| 305 | ip_vs_mh_permutate(s, svc); | ||
| 306 | |||
| 307 | ret = ip_vs_mh_populate(s, svc); | ||
| 308 | if (ret < 0) | ||
| 309 | goto out; | ||
| 310 | |||
| 311 | IP_VS_DBG_BUF(6, "MH: reassign lookup table of %s:%u\n", | ||
| 312 | IP_VS_DBG_ADDR(svc->af, &svc->addr), | ||
| 313 | ntohs(svc->port)); | ||
| 314 | |||
| 315 | out: | ||
| 316 | if (svc->num_dests >= 1) { | ||
| 317 | kfree(s->dest_setup); | ||
| 318 | s->dest_setup = NULL; | ||
| 319 | } | ||
| 320 | return ret; | ||
| 321 | } | ||
| 322 | |||
| 323 | static int ip_vs_mh_gcd_weight(struct ip_vs_service *svc) | ||
| 324 | { | ||
| 325 | struct ip_vs_dest *dest; | ||
| 326 | int weight; | ||
| 327 | int g = 0; | ||
| 328 | |||
| 329 | list_for_each_entry(dest, &svc->destinations, n_list) { | ||
| 330 | weight = atomic_read(&dest->last_weight); | ||
| 331 | if (weight > 0) { | ||
| 332 | if (g > 0) | ||
| 333 | g = gcd(weight, g); | ||
| 334 | else | ||
| 335 | g = weight; | ||
| 336 | } | ||
| 337 | } | ||
| 338 | return g; | ||
| 339 | } | ||
| 340 | |||
| 341 | /* To avoid assigning huge weight for the MH table, | ||
| 342 | * calculate shift value with gcd. | ||
| 343 | */ | ||
| 344 | static int ip_vs_mh_shift_weight(struct ip_vs_service *svc, int gcd) | ||
| 345 | { | ||
| 346 | struct ip_vs_dest *dest; | ||
| 347 | int new_weight, weight = 0; | ||
| 348 | int mw, shift; | ||
| 349 | |||
| 350 | /* If gcd is smaller then 1, number of dests or | ||
| 351 | * all last_weight of dests are zero. So, return | ||
| 352 | * shift value as zero. | ||
| 353 | */ | ||
| 354 | if (gcd < 1) | ||
| 355 | return 0; | ||
| 356 | |||
| 357 | list_for_each_entry(dest, &svc->destinations, n_list) { | ||
| 358 | new_weight = atomic_read(&dest->last_weight); | ||
| 359 | if (new_weight > weight) | ||
| 360 | weight = new_weight; | ||
| 361 | } | ||
| 362 | |||
| 363 | /* Because gcd is greater than zero, | ||
| 364 | * the maximum weight and gcd are always greater than zero | ||
| 365 | */ | ||
| 366 | mw = weight / gcd; | ||
| 367 | |||
| 368 | /* shift = occupied bits of weight/gcd - MH highest bits */ | ||
| 369 | shift = fls(mw) - IP_VS_MH_TAB_BITS; | ||
| 370 | return (shift >= 0) ? shift : 0; | ||
| 371 | } | ||
| 372 | |||
| 373 | static void ip_vs_mh_state_free(struct rcu_head *head) | ||
| 374 | { | ||
| 375 | struct ip_vs_mh_state *s; | ||
| 376 | |||
| 377 | s = container_of(head, struct ip_vs_mh_state, rcu_head); | ||
| 378 | kfree(s->lookup); | ||
| 379 | kfree(s); | ||
| 380 | } | ||
| 381 | |||
| 382 | static int ip_vs_mh_init_svc(struct ip_vs_service *svc) | ||
| 383 | { | ||
| 384 | int ret; | ||
| 385 | struct ip_vs_mh_state *s; | ||
| 386 | |||
| 387 | /* Allocate the MH table for this service */ | ||
| 388 | s = kzalloc(sizeof(*s), GFP_KERNEL); | ||
| 389 | if (!s) | ||
| 390 | return -ENOMEM; | ||
| 391 | |||
| 392 | s->lookup = kcalloc(IP_VS_MH_TAB_SIZE, sizeof(struct ip_vs_mh_lookup), | ||
| 393 | GFP_KERNEL); | ||
| 394 | if (!s->lookup) { | ||
| 395 | kfree(s); | ||
| 396 | return -ENOMEM; | ||
| 397 | } | ||
| 398 | |||
| 399 | generate_hash_secret(&s->hash1, &s->hash2); | ||
| 400 | s->gcd = ip_vs_mh_gcd_weight(svc); | ||
| 401 | s->rshift = ip_vs_mh_shift_weight(svc, s->gcd); | ||
| 402 | |||
| 403 | IP_VS_DBG(6, | ||
| 404 | "MH lookup table (memory=%zdbytes) allocated for current service\n", | ||
| 405 | sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE); | ||
| 406 | |||
| 407 | /* Assign the lookup table with current dests */ | ||
| 408 | ret = ip_vs_mh_reassign(s, svc); | ||
| 409 | if (ret < 0) { | ||
| 410 | ip_vs_mh_reset(s); | ||
| 411 | ip_vs_mh_state_free(&s->rcu_head); | ||
| 412 | return ret; | ||
| 413 | } | ||
| 414 | |||
| 415 | /* No more failures, attach state */ | ||
| 416 | svc->sched_data = s; | ||
| 417 | return 0; | ||
| 418 | } | ||
| 419 | |||
| 420 | static void ip_vs_mh_done_svc(struct ip_vs_service *svc) | ||
| 421 | { | ||
| 422 | struct ip_vs_mh_state *s = svc->sched_data; | ||
| 423 | |||
| 424 | /* Got to clean up lookup entry here */ | ||
| 425 | ip_vs_mh_reset(s); | ||
| 426 | |||
| 427 | call_rcu(&s->rcu_head, ip_vs_mh_state_free); | ||
| 428 | IP_VS_DBG(6, "MH lookup table (memory=%zdbytes) released\n", | ||
| 429 | sizeof(struct ip_vs_mh_lookup) * IP_VS_MH_TAB_SIZE); | ||
| 430 | } | ||
| 431 | |||
| 432 | static int ip_vs_mh_dest_changed(struct ip_vs_service *svc, | ||
| 433 | struct ip_vs_dest *dest) | ||
| 434 | { | ||
| 435 | struct ip_vs_mh_state *s = svc->sched_data; | ||
| 436 | |||
| 437 | s->gcd = ip_vs_mh_gcd_weight(svc); | ||
| 438 | s->rshift = ip_vs_mh_shift_weight(svc, s->gcd); | ||
| 439 | |||
| 440 | /* Assign the lookup table with the updated service */ | ||
| 441 | return ip_vs_mh_reassign(s, svc); | ||
| 442 | } | ||
| 443 | |||
| 444 | /* Helper function to get port number */ | ||
| 445 | static inline __be16 | ||
| 446 | ip_vs_mh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) | ||
| 447 | { | ||
| 448 | __be16 _ports[2], *ports; | ||
| 449 | |||
| 450 | /* At this point we know that we have a valid packet of some kind. | ||
| 451 | * Because ICMP packets are only guaranteed to have the first 8 | ||
| 452 | * bytes, let's just grab the ports. Fortunately they're in the | ||
| 453 | * same position for all three of the protocols we care about. | ||
| 454 | */ | ||
| 455 | switch (iph->protocol) { | ||
| 456 | case IPPROTO_TCP: | ||
| 457 | case IPPROTO_UDP: | ||
| 458 | case IPPROTO_SCTP: | ||
| 459 | ports = skb_header_pointer(skb, iph->len, sizeof(_ports), | ||
| 460 | &_ports); | ||
| 461 | if (unlikely(!ports)) | ||
| 462 | return 0; | ||
| 463 | |||
| 464 | if (likely(!ip_vs_iph_inverse(iph))) | ||
| 465 | return ports[0]; | ||
| 466 | else | ||
| 467 | return ports[1]; | ||
| 468 | default: | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | |||
| 473 | /* Maglev Hashing scheduling */ | ||
| 474 | static struct ip_vs_dest * | ||
| 475 | ip_vs_mh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, | ||
| 476 | struct ip_vs_iphdr *iph) | ||
| 477 | { | ||
| 478 | struct ip_vs_dest *dest; | ||
| 479 | struct ip_vs_mh_state *s; | ||
| 480 | __be16 port = 0; | ||
| 481 | const union nf_inet_addr *hash_addr; | ||
| 482 | |||
| 483 | hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; | ||
| 484 | |||
| 485 | IP_VS_DBG(6, "%s : Scheduling...\n", __func__); | ||
| 486 | |||
| 487 | if (svc->flags & IP_VS_SVC_F_SCHED_MH_PORT) | ||
| 488 | port = ip_vs_mh_get_port(skb, iph); | ||
| 489 | |||
| 490 | s = (struct ip_vs_mh_state *)svc->sched_data; | ||
| 491 | |||
| 492 | if (svc->flags & IP_VS_SVC_F_SCHED_MH_FALLBACK) | ||
| 493 | dest = ip_vs_mh_get_fallback(svc, s, hash_addr, port); | ||
| 494 | else | ||
| 495 | dest = ip_vs_mh_get(svc, s, hash_addr, port); | ||
| 496 | |||
| 497 | if (!dest) { | ||
| 498 | ip_vs_scheduler_err(svc, "no destination available"); | ||
| 499 | return NULL; | ||
| 500 | } | ||
| 501 | |||
| 502 | IP_VS_DBG_BUF(6, "MH: source IP address %s:%u --> server %s:%u\n", | ||
| 503 | IP_VS_DBG_ADDR(svc->af, hash_addr), | ||
| 504 | ntohs(port), | ||
| 505 | IP_VS_DBG_ADDR(dest->af, &dest->addr), | ||
| 506 | ntohs(dest->port)); | ||
| 507 | |||
| 508 | return dest; | ||
| 509 | } | ||
| 510 | |||
| 511 | /* IPVS MH Scheduler structure */ | ||
| 512 | static struct ip_vs_scheduler ip_vs_mh_scheduler = { | ||
| 513 | .name = "mh", | ||
| 514 | .refcnt = ATOMIC_INIT(0), | ||
| 515 | .module = THIS_MODULE, | ||
| 516 | .n_list = LIST_HEAD_INIT(ip_vs_mh_scheduler.n_list), | ||
| 517 | .init_service = ip_vs_mh_init_svc, | ||
| 518 | .done_service = ip_vs_mh_done_svc, | ||
| 519 | .add_dest = ip_vs_mh_dest_changed, | ||
| 520 | .del_dest = ip_vs_mh_dest_changed, | ||
| 521 | .upd_dest = ip_vs_mh_dest_changed, | ||
| 522 | .schedule = ip_vs_mh_schedule, | ||
| 523 | }; | ||
| 524 | |||
| 525 | static int __init ip_vs_mh_init(void) | ||
| 526 | { | ||
| 527 | return register_ip_vs_scheduler(&ip_vs_mh_scheduler); | ||
| 528 | } | ||
| 529 | |||
| 530 | static void __exit ip_vs_mh_cleanup(void) | ||
| 531 | { | ||
| 532 | unregister_ip_vs_scheduler(&ip_vs_mh_scheduler); | ||
| 533 | rcu_barrier(); | ||
| 534 | } | ||
| 535 | |||
| 536 | module_init(ip_vs_mh_init); | ||
| 537 | module_exit(ip_vs_mh_cleanup); | ||
| 538 | MODULE_DESCRIPTION("Maglev hashing ipvs scheduler"); | ||
| 539 | MODULE_LICENSE("GPL v2"); | ||
| 540 | MODULE_AUTHOR("Inju Song <inju.song@navercorp.com>"); | ||
