diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_diag.c | 6 | ||||
-rw-r--r-- | net/ipv4/ipvs/Kconfig | 6 | ||||
-rw-r--r-- | net/ipv4/ipvs/Makefile | 3 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_core.c | 8 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_ctl.c | 896 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_est.c | 18 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lblc.c | 213 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lblcr.c | 238 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_lc.c | 21 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_nq.c | 24 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto_ah_esp.c (renamed from net/ipv4/ipvs/ip_vs_proto_ah.c) | 69 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_proto_esp.c | 176 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_rr.c | 7 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_sed.c | 24 | ||||
-rw-r--r-- | net/ipv4/ipvs/ip_vs_wlc.c | 24 | ||||
-rw-r--r-- | net/ipv4/route.c | 14 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 208 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 26 |
18 files changed, 1305 insertions, 676 deletions
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c10036e7a463..89cb047ab314 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c | |||
@@ -782,11 +782,15 @@ skip_listen_ht: | |||
782 | struct sock *sk; | 782 | struct sock *sk; |
783 | struct hlist_node *node; | 783 | struct hlist_node *node; |
784 | 784 | ||
785 | num = 0; | ||
786 | |||
787 | if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) | ||
788 | continue; | ||
789 | |||
785 | if (i > s_i) | 790 | if (i > s_i) |
786 | s_num = 0; | 791 | s_num = 0; |
787 | 792 | ||
788 | read_lock_bh(lock); | 793 | read_lock_bh(lock); |
789 | num = 0; | ||
790 | sk_for_each(sk, node, &head->chain) { | 794 | sk_for_each(sk, node, &head->chain) { |
791 | struct inet_sock *inet = inet_sk(sk); | 795 | struct inet_sock *inet = inet_sk(sk); |
792 | 796 | ||
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig index 09d0c3f35669..2e48a7e27223 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/ipv4/ipvs/Kconfig | |||
@@ -71,14 +71,20 @@ config IP_VS_PROTO_UDP | |||
71 | This option enables support for load balancing UDP transport | 71 | This option enables support for load balancing UDP transport |
72 | protocol. Say Y if unsure. | 72 | protocol. Say Y if unsure. |
73 | 73 | ||
74 | config IP_VS_PROTO_AH_ESP | ||
75 | bool | ||
76 | depends on UNDEFINED | ||
77 | |||
74 | config IP_VS_PROTO_ESP | 78 | config IP_VS_PROTO_ESP |
75 | bool "ESP load balancing support" | 79 | bool "ESP load balancing support" |
80 | select IP_VS_PROTO_AH_ESP | ||
76 | ---help--- | 81 | ---help--- |
77 | This option enables support for load balancing ESP (Encapsulation | 82 | This option enables support for load balancing ESP (Encapsulation |
78 | Security Payload) transport protocol. Say Y if unsure. | 83 | Security Payload) transport protocol. Say Y if unsure. |
79 | 84 | ||
80 | config IP_VS_PROTO_AH | 85 | config IP_VS_PROTO_AH |
81 | bool "AH load balancing support" | 86 | bool "AH load balancing support" |
87 | select IP_VS_PROTO_AH_ESP | ||
82 | ---help--- | 88 | ---help--- |
83 | This option enables support for load balancing AH (Authentication | 89 | This option enables support for load balancing AH (Authentication |
84 | Header) transport protocol. Say Y if unsure. | 90 | Header) transport protocol. Say Y if unsure. |
diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile index 30e85de9ffff..73a46fe1fe4c 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/ipv4/ipvs/Makefile | |||
@@ -6,8 +6,7 @@ | |||
6 | ip_vs_proto-objs-y := | 6 | ip_vs_proto-objs-y := |
7 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o | 7 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o |
8 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o | 8 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o |
9 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o | 9 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o |
10 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o | ||
11 | 10 | ||
12 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ | 11 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ |
13 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ | 12 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ |
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index a7879eafc3b5..9fbf0a6d7392 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c | |||
@@ -1070,10 +1070,12 @@ static int __init ip_vs_init(void) | |||
1070 | { | 1070 | { |
1071 | int ret; | 1071 | int ret; |
1072 | 1072 | ||
1073 | ip_vs_estimator_init(); | ||
1074 | |||
1073 | ret = ip_vs_control_init(); | 1075 | ret = ip_vs_control_init(); |
1074 | if (ret < 0) { | 1076 | if (ret < 0) { |
1075 | IP_VS_ERR("can't setup control.\n"); | 1077 | IP_VS_ERR("can't setup control.\n"); |
1076 | goto cleanup_nothing; | 1078 | goto cleanup_estimator; |
1077 | } | 1079 | } |
1078 | 1080 | ||
1079 | ip_vs_protocol_init(); | 1081 | ip_vs_protocol_init(); |
@@ -1106,7 +1108,8 @@ static int __init ip_vs_init(void) | |||
1106 | cleanup_protocol: | 1108 | cleanup_protocol: |
1107 | ip_vs_protocol_cleanup(); | 1109 | ip_vs_protocol_cleanup(); |
1108 | ip_vs_control_cleanup(); | 1110 | ip_vs_control_cleanup(); |
1109 | cleanup_nothing: | 1111 | cleanup_estimator: |
1112 | ip_vs_estimator_cleanup(); | ||
1110 | return ret; | 1113 | return ret; |
1111 | } | 1114 | } |
1112 | 1115 | ||
@@ -1117,6 +1120,7 @@ static void __exit ip_vs_cleanup(void) | |||
1117 | ip_vs_app_cleanup(); | 1120 | ip_vs_app_cleanup(); |
1118 | ip_vs_protocol_cleanup(); | 1121 | ip_vs_protocol_cleanup(); |
1119 | ip_vs_control_cleanup(); | 1122 | ip_vs_control_cleanup(); |
1123 | ip_vs_estimator_cleanup(); | ||
1120 | IP_VS_INFO("ipvs unloaded.\n"); | 1124 | IP_VS_INFO("ipvs unloaded.\n"); |
1121 | } | 1125 | } |
1122 | 1126 | ||
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 6379705a8dcb..ede101eeec17 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <net/ip.h> | 37 | #include <net/ip.h> |
38 | #include <net/route.h> | 38 | #include <net/route.h> |
39 | #include <net/sock.h> | 39 | #include <net/sock.h> |
40 | #include <net/genetlink.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | 43 | ||
@@ -868,7 +869,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
868 | svc->num_dests++; | 869 | svc->num_dests++; |
869 | 870 | ||
870 | /* call the update_service function of its scheduler */ | 871 | /* call the update_service function of its scheduler */ |
871 | svc->scheduler->update_service(svc); | 872 | if (svc->scheduler->update_service) |
873 | svc->scheduler->update_service(svc); | ||
872 | 874 | ||
873 | write_unlock_bh(&__ip_vs_svc_lock); | 875 | write_unlock_bh(&__ip_vs_svc_lock); |
874 | return 0; | 876 | return 0; |
@@ -898,7 +900,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
898 | svc->num_dests++; | 900 | svc->num_dests++; |
899 | 901 | ||
900 | /* call the update_service function of its scheduler */ | 902 | /* call the update_service function of its scheduler */ |
901 | svc->scheduler->update_service(svc); | 903 | if (svc->scheduler->update_service) |
904 | svc->scheduler->update_service(svc); | ||
902 | 905 | ||
903 | write_unlock_bh(&__ip_vs_svc_lock); | 906 | write_unlock_bh(&__ip_vs_svc_lock); |
904 | 907 | ||
@@ -948,7 +951,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) | |||
948 | IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); | 951 | IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); |
949 | 952 | ||
950 | /* call the update_service, because server weight may be changed */ | 953 | /* call the update_service, because server weight may be changed */ |
951 | svc->scheduler->update_service(svc); | 954 | if (svc->scheduler->update_service) |
955 | svc->scheduler->update_service(svc); | ||
952 | 956 | ||
953 | write_unlock_bh(&__ip_vs_svc_lock); | 957 | write_unlock_bh(&__ip_vs_svc_lock); |
954 | 958 | ||
@@ -1011,12 +1015,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, | |||
1011 | */ | 1015 | */ |
1012 | list_del(&dest->n_list); | 1016 | list_del(&dest->n_list); |
1013 | svc->num_dests--; | 1017 | svc->num_dests--; |
1014 | if (svcupd) { | 1018 | |
1015 | /* | 1019 | /* |
1016 | * Call the update_service function of its scheduler | 1020 | * Call the update_service function of its scheduler |
1017 | */ | 1021 | */ |
1018 | svc->scheduler->update_service(svc); | 1022 | if (svcupd && svc->scheduler->update_service) |
1019 | } | 1023 | svc->scheduler->update_service(svc); |
1020 | } | 1024 | } |
1021 | 1025 | ||
1022 | 1026 | ||
@@ -2320,6 +2324,872 @@ static struct nf_sockopt_ops ip_vs_sockopts = { | |||
2320 | .owner = THIS_MODULE, | 2324 | .owner = THIS_MODULE, |
2321 | }; | 2325 | }; |
2322 | 2326 | ||
2327 | /* | ||
2328 | * Generic Netlink interface | ||
2329 | */ | ||
2330 | |||
2331 | /* IPVS genetlink family */ | ||
2332 | static struct genl_family ip_vs_genl_family = { | ||
2333 | .id = GENL_ID_GENERATE, | ||
2334 | .hdrsize = 0, | ||
2335 | .name = IPVS_GENL_NAME, | ||
2336 | .version = IPVS_GENL_VERSION, | ||
2337 | .maxattr = IPVS_CMD_MAX, | ||
2338 | }; | ||
2339 | |||
2340 | /* Policy used for first-level command attributes */ | ||
2341 | static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { | ||
2342 | [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, | ||
2343 | [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, | ||
2344 | [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, | ||
2345 | [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, | ||
2346 | [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, | ||
2347 | [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, | ||
2348 | }; | ||
2349 | |||
2350 | /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ | ||
2351 | static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { | ||
2352 | [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, | ||
2353 | [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, | ||
2354 | .len = IP_VS_IFNAME_MAXLEN }, | ||
2355 | [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, | ||
2356 | }; | ||
2357 | |||
2358 | /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ | ||
2359 | static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { | ||
2360 | [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, | ||
2361 | [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, | ||
2362 | [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, | ||
2363 | .len = sizeof(union nf_inet_addr) }, | ||
2364 | [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, | ||
2365 | [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, | ||
2366 | [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, | ||
2367 | .len = IP_VS_SCHEDNAME_MAXLEN }, | ||
2368 | [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, | ||
2369 | .len = sizeof(struct ip_vs_flags) }, | ||
2370 | [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, | ||
2371 | [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, | ||
2372 | [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, | ||
2373 | }; | ||
2374 | |||
2375 | /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ | ||
2376 | static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { | ||
2377 | [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, | ||
2378 | .len = sizeof(union nf_inet_addr) }, | ||
2379 | [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, | ||
2380 | [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, | ||
2381 | [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, | ||
2382 | [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, | ||
2383 | [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, | ||
2384 | [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, | ||
2385 | [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, | ||
2386 | [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, | ||
2387 | [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, | ||
2388 | }; | ||
2389 | |||
2390 | static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, | ||
2391 | struct ip_vs_stats *stats) | ||
2392 | { | ||
2393 | struct nlattr *nl_stats = nla_nest_start(skb, container_type); | ||
2394 | if (!nl_stats) | ||
2395 | return -EMSGSIZE; | ||
2396 | |||
2397 | spin_lock_bh(&stats->lock); | ||
2398 | |||
2399 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns); | ||
2400 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts); | ||
2401 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts); | ||
2402 | NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes); | ||
2403 | NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes); | ||
2404 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps); | ||
2405 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps); | ||
2406 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps); | ||
2407 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps); | ||
2408 | NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps); | ||
2409 | |||
2410 | spin_unlock_bh(&stats->lock); | ||
2411 | |||
2412 | nla_nest_end(skb, nl_stats); | ||
2413 | |||
2414 | return 0; | ||
2415 | |||
2416 | nla_put_failure: | ||
2417 | spin_unlock_bh(&stats->lock); | ||
2418 | nla_nest_cancel(skb, nl_stats); | ||
2419 | return -EMSGSIZE; | ||
2420 | } | ||
2421 | |||
2422 | static int ip_vs_genl_fill_service(struct sk_buff *skb, | ||
2423 | struct ip_vs_service *svc) | ||
2424 | { | ||
2425 | struct nlattr *nl_service; | ||
2426 | struct ip_vs_flags flags = { .flags = svc->flags, | ||
2427 | .mask = ~0 }; | ||
2428 | |||
2429 | nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); | ||
2430 | if (!nl_service) | ||
2431 | return -EMSGSIZE; | ||
2432 | |||
2433 | NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET); | ||
2434 | |||
2435 | if (svc->fwmark) { | ||
2436 | NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); | ||
2437 | } else { | ||
2438 | NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); | ||
2439 | NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); | ||
2440 | NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); | ||
2441 | } | ||
2442 | |||
2443 | NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); | ||
2444 | NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); | ||
2445 | NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); | ||
2446 | NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); | ||
2447 | |||
2448 | if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) | ||
2449 | goto nla_put_failure; | ||
2450 | |||
2451 | nla_nest_end(skb, nl_service); | ||
2452 | |||
2453 | return 0; | ||
2454 | |||
2455 | nla_put_failure: | ||
2456 | nla_nest_cancel(skb, nl_service); | ||
2457 | return -EMSGSIZE; | ||
2458 | } | ||
2459 | |||
2460 | static int ip_vs_genl_dump_service(struct sk_buff *skb, | ||
2461 | struct ip_vs_service *svc, | ||
2462 | struct netlink_callback *cb) | ||
2463 | { | ||
2464 | void *hdr; | ||
2465 | |||
2466 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, | ||
2467 | &ip_vs_genl_family, NLM_F_MULTI, | ||
2468 | IPVS_CMD_NEW_SERVICE); | ||
2469 | if (!hdr) | ||
2470 | return -EMSGSIZE; | ||
2471 | |||
2472 | if (ip_vs_genl_fill_service(skb, svc) < 0) | ||
2473 | goto nla_put_failure; | ||
2474 | |||
2475 | return genlmsg_end(skb, hdr); | ||
2476 | |||
2477 | nla_put_failure: | ||
2478 | genlmsg_cancel(skb, hdr); | ||
2479 | return -EMSGSIZE; | ||
2480 | } | ||
2481 | |||
2482 | static int ip_vs_genl_dump_services(struct sk_buff *skb, | ||
2483 | struct netlink_callback *cb) | ||
2484 | { | ||
2485 | int idx = 0, i; | ||
2486 | int start = cb->args[0]; | ||
2487 | struct ip_vs_service *svc; | ||
2488 | |||
2489 | mutex_lock(&__ip_vs_mutex); | ||
2490 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | ||
2491 | list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { | ||
2492 | if (++idx <= start) | ||
2493 | continue; | ||
2494 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | ||
2495 | idx--; | ||
2496 | goto nla_put_failure; | ||
2497 | } | ||
2498 | } | ||
2499 | } | ||
2500 | |||
2501 | for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { | ||
2502 | list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { | ||
2503 | if (++idx <= start) | ||
2504 | continue; | ||
2505 | if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { | ||
2506 | idx--; | ||
2507 | goto nla_put_failure; | ||
2508 | } | ||
2509 | } | ||
2510 | } | ||
2511 | |||
2512 | nla_put_failure: | ||
2513 | mutex_unlock(&__ip_vs_mutex); | ||
2514 | cb->args[0] = idx; | ||
2515 | |||
2516 | return skb->len; | ||
2517 | } | ||
2518 | |||
2519 | static int ip_vs_genl_parse_service(struct ip_vs_service_user *usvc, | ||
2520 | struct nlattr *nla, int full_entry) | ||
2521 | { | ||
2522 | struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; | ||
2523 | struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; | ||
2524 | |||
2525 | /* Parse mandatory identifying service fields first */ | ||
2526 | if (nla == NULL || | ||
2527 | nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) | ||
2528 | return -EINVAL; | ||
2529 | |||
2530 | nla_af = attrs[IPVS_SVC_ATTR_AF]; | ||
2531 | nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; | ||
2532 | nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; | ||
2533 | nla_port = attrs[IPVS_SVC_ATTR_PORT]; | ||
2534 | nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; | ||
2535 | |||
2536 | if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) | ||
2537 | return -EINVAL; | ||
2538 | |||
2539 | /* For now, only support IPv4 */ | ||
2540 | if (nla_get_u16(nla_af) != AF_INET) | ||
2541 | return -EAFNOSUPPORT; | ||
2542 | |||
2543 | if (nla_fwmark) { | ||
2544 | usvc->protocol = IPPROTO_TCP; | ||
2545 | usvc->fwmark = nla_get_u32(nla_fwmark); | ||
2546 | } else { | ||
2547 | usvc->protocol = nla_get_u16(nla_protocol); | ||
2548 | nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); | ||
2549 | usvc->port = nla_get_u16(nla_port); | ||
2550 | usvc->fwmark = 0; | ||
2551 | } | ||
2552 | |||
2553 | /* If a full entry was requested, check for the additional fields */ | ||
2554 | if (full_entry) { | ||
2555 | struct nlattr *nla_sched, *nla_flags, *nla_timeout, | ||
2556 | *nla_netmask; | ||
2557 | struct ip_vs_flags flags; | ||
2558 | struct ip_vs_service *svc; | ||
2559 | |||
2560 | nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; | ||
2561 | nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; | ||
2562 | nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; | ||
2563 | nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; | ||
2564 | |||
2565 | if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) | ||
2566 | return -EINVAL; | ||
2567 | |||
2568 | nla_memcpy(&flags, nla_flags, sizeof(flags)); | ||
2569 | |||
2570 | /* prefill flags from service if it already exists */ | ||
2571 | if (usvc->fwmark) | ||
2572 | svc = __ip_vs_svc_fwm_get(usvc->fwmark); | ||
2573 | else | ||
2574 | svc = __ip_vs_service_get(usvc->protocol, usvc->addr, | ||
2575 | usvc->port); | ||
2576 | if (svc) { | ||
2577 | usvc->flags = svc->flags; | ||
2578 | ip_vs_service_put(svc); | ||
2579 | } else | ||
2580 | usvc->flags = 0; | ||
2581 | |||
2582 | /* set new flags from userland */ | ||
2583 | usvc->flags = (usvc->flags & ~flags.mask) | | ||
2584 | (flags.flags & flags.mask); | ||
2585 | |||
2586 | strlcpy(usvc->sched_name, nla_data(nla_sched), | ||
2587 | sizeof(usvc->sched_name)); | ||
2588 | usvc->timeout = nla_get_u32(nla_timeout); | ||
2589 | usvc->netmask = nla_get_u32(nla_netmask); | ||
2590 | } | ||
2591 | |||
2592 | return 0; | ||
2593 | } | ||
2594 | |||
2595 | static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) | ||
2596 | { | ||
2597 | struct ip_vs_service_user usvc; | ||
2598 | int ret; | ||
2599 | |||
2600 | ret = ip_vs_genl_parse_service(&usvc, nla, 0); | ||
2601 | if (ret) | ||
2602 | return ERR_PTR(ret); | ||
2603 | |||
2604 | if (usvc.fwmark) | ||
2605 | return __ip_vs_svc_fwm_get(usvc.fwmark); | ||
2606 | else | ||
2607 | return __ip_vs_service_get(usvc.protocol, usvc.addr, | ||
2608 | usvc.port); | ||
2609 | } | ||
2610 | |||
2611 | static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) | ||
2612 | { | ||
2613 | struct nlattr *nl_dest; | ||
2614 | |||
2615 | nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); | ||
2616 | if (!nl_dest) | ||
2617 | return -EMSGSIZE; | ||
2618 | |||
2619 | NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); | ||
2620 | NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); | ||
2621 | |||
2622 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, | ||
2623 | atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); | ||
2624 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); | ||
2625 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); | ||
2626 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); | ||
2627 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, | ||
2628 | atomic_read(&dest->activeconns)); | ||
2629 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, | ||
2630 | atomic_read(&dest->inactconns)); | ||
2631 | NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, | ||
2632 | atomic_read(&dest->persistconns)); | ||
2633 | |||
2634 | if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) | ||
2635 | goto nla_put_failure; | ||
2636 | |||
2637 | nla_nest_end(skb, nl_dest); | ||
2638 | |||
2639 | return 0; | ||
2640 | |||
2641 | nla_put_failure: | ||
2642 | nla_nest_cancel(skb, nl_dest); | ||
2643 | return -EMSGSIZE; | ||
2644 | } | ||
2645 | |||
2646 | static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, | ||
2647 | struct netlink_callback *cb) | ||
2648 | { | ||
2649 | void *hdr; | ||
2650 | |||
2651 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, | ||
2652 | &ip_vs_genl_family, NLM_F_MULTI, | ||
2653 | IPVS_CMD_NEW_DEST); | ||
2654 | if (!hdr) | ||
2655 | return -EMSGSIZE; | ||
2656 | |||
2657 | if (ip_vs_genl_fill_dest(skb, dest) < 0) | ||
2658 | goto nla_put_failure; | ||
2659 | |||
2660 | return genlmsg_end(skb, hdr); | ||
2661 | |||
2662 | nla_put_failure: | ||
2663 | genlmsg_cancel(skb, hdr); | ||
2664 | return -EMSGSIZE; | ||
2665 | } | ||
2666 | |||
2667 | static int ip_vs_genl_dump_dests(struct sk_buff *skb, | ||
2668 | struct netlink_callback *cb) | ||
2669 | { | ||
2670 | int idx = 0; | ||
2671 | int start = cb->args[0]; | ||
2672 | struct ip_vs_service *svc; | ||
2673 | struct ip_vs_dest *dest; | ||
2674 | struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; | ||
2675 | |||
2676 | mutex_lock(&__ip_vs_mutex); | ||
2677 | |||
2678 | /* Try to find the service for which to dump destinations */ | ||
2679 | if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, | ||
2680 | IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) | ||
2681 | goto out_err; | ||
2682 | |||
2683 | svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); | ||
2684 | if (IS_ERR(svc) || svc == NULL) | ||
2685 | goto out_err; | ||
2686 | |||
2687 | /* Dump the destinations */ | ||
2688 | list_for_each_entry(dest, &svc->destinations, n_list) { | ||
2689 | if (++idx <= start) | ||
2690 | continue; | ||
2691 | if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { | ||
2692 | idx--; | ||
2693 | goto nla_put_failure; | ||
2694 | } | ||
2695 | } | ||
2696 | |||
2697 | nla_put_failure: | ||
2698 | cb->args[0] = idx; | ||
2699 | ip_vs_service_put(svc); | ||
2700 | |||
2701 | out_err: | ||
2702 | mutex_unlock(&__ip_vs_mutex); | ||
2703 | |||
2704 | return skb->len; | ||
2705 | } | ||
2706 | |||
2707 | static int ip_vs_genl_parse_dest(struct ip_vs_dest_user *udest, | ||
2708 | struct nlattr *nla, int full_entry) | ||
2709 | { | ||
2710 | struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; | ||
2711 | struct nlattr *nla_addr, *nla_port; | ||
2712 | |||
2713 | /* Parse mandatory identifying destination fields first */ | ||
2714 | if (nla == NULL || | ||
2715 | nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) | ||
2716 | return -EINVAL; | ||
2717 | |||
2718 | nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; | ||
2719 | nla_port = attrs[IPVS_DEST_ATTR_PORT]; | ||
2720 | |||
2721 | if (!(nla_addr && nla_port)) | ||
2722 | return -EINVAL; | ||
2723 | |||
2724 | nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); | ||
2725 | udest->port = nla_get_u16(nla_port); | ||
2726 | |||
2727 | /* If a full entry was requested, check for the additional fields */ | ||
2728 | if (full_entry) { | ||
2729 | struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, | ||
2730 | *nla_l_thresh; | ||
2731 | |||
2732 | nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; | ||
2733 | nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; | ||
2734 | nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; | ||
2735 | nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; | ||
2736 | |||
2737 | if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) | ||
2738 | return -EINVAL; | ||
2739 | |||
2740 | udest->conn_flags = nla_get_u32(nla_fwd) | ||
2741 | & IP_VS_CONN_F_FWD_MASK; | ||
2742 | udest->weight = nla_get_u32(nla_weight); | ||
2743 | udest->u_threshold = nla_get_u32(nla_u_thresh); | ||
2744 | udest->l_threshold = nla_get_u32(nla_l_thresh); | ||
2745 | } | ||
2746 | |||
2747 | return 0; | ||
2748 | } | ||
2749 | |||
2750 | static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, | ||
2751 | const char *mcast_ifn, __be32 syncid) | ||
2752 | { | ||
2753 | struct nlattr *nl_daemon; | ||
2754 | |||
2755 | nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); | ||
2756 | if (!nl_daemon) | ||
2757 | return -EMSGSIZE; | ||
2758 | |||
2759 | NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); | ||
2760 | NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); | ||
2761 | NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); | ||
2762 | |||
2763 | nla_nest_end(skb, nl_daemon); | ||
2764 | |||
2765 | return 0; | ||
2766 | |||
2767 | nla_put_failure: | ||
2768 | nla_nest_cancel(skb, nl_daemon); | ||
2769 | return -EMSGSIZE; | ||
2770 | } | ||
2771 | |||
2772 | static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, | ||
2773 | const char *mcast_ifn, __be32 syncid, | ||
2774 | struct netlink_callback *cb) | ||
2775 | { | ||
2776 | void *hdr; | ||
2777 | hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, | ||
2778 | &ip_vs_genl_family, NLM_F_MULTI, | ||
2779 | IPVS_CMD_NEW_DAEMON); | ||
2780 | if (!hdr) | ||
2781 | return -EMSGSIZE; | ||
2782 | |||
2783 | if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) | ||
2784 | goto nla_put_failure; | ||
2785 | |||
2786 | return genlmsg_end(skb, hdr); | ||
2787 | |||
2788 | nla_put_failure: | ||
2789 | genlmsg_cancel(skb, hdr); | ||
2790 | return -EMSGSIZE; | ||
2791 | } | ||
2792 | |||
2793 | static int ip_vs_genl_dump_daemons(struct sk_buff *skb, | ||
2794 | struct netlink_callback *cb) | ||
2795 | { | ||
2796 | mutex_lock(&__ip_vs_mutex); | ||
2797 | if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { | ||
2798 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, | ||
2799 | ip_vs_master_mcast_ifn, | ||
2800 | ip_vs_master_syncid, cb) < 0) | ||
2801 | goto nla_put_failure; | ||
2802 | |||
2803 | cb->args[0] = 1; | ||
2804 | } | ||
2805 | |||
2806 | if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { | ||
2807 | if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, | ||
2808 | ip_vs_backup_mcast_ifn, | ||
2809 | ip_vs_backup_syncid, cb) < 0) | ||
2810 | goto nla_put_failure; | ||
2811 | |||
2812 | cb->args[1] = 1; | ||
2813 | } | ||
2814 | |||
2815 | nla_put_failure: | ||
2816 | mutex_unlock(&__ip_vs_mutex); | ||
2817 | |||
2818 | return skb->len; | ||
2819 | } | ||
2820 | |||
2821 | static int ip_vs_genl_new_daemon(struct nlattr **attrs) | ||
2822 | { | ||
2823 | if (!(attrs[IPVS_DAEMON_ATTR_STATE] && | ||
2824 | attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && | ||
2825 | attrs[IPVS_DAEMON_ATTR_SYNC_ID])) | ||
2826 | return -EINVAL; | ||
2827 | |||
2828 | return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), | ||
2829 | nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), | ||
2830 | nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); | ||
2831 | } | ||
2832 | |||
2833 | static int ip_vs_genl_del_daemon(struct nlattr **attrs) | ||
2834 | { | ||
2835 | if (!attrs[IPVS_DAEMON_ATTR_STATE]) | ||
2836 | return -EINVAL; | ||
2837 | |||
2838 | return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); | ||
2839 | } | ||
2840 | |||
2841 | static int ip_vs_genl_set_config(struct nlattr **attrs) | ||
2842 | { | ||
2843 | struct ip_vs_timeout_user t; | ||
2844 | |||
2845 | __ip_vs_get_timeouts(&t); | ||
2846 | |||
2847 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) | ||
2848 | t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); | ||
2849 | |||
2850 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) | ||
2851 | t.tcp_fin_timeout = | ||
2852 | nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); | ||
2853 | |||
2854 | if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) | ||
2855 | t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); | ||
2856 | |||
2857 | return ip_vs_set_timeout(&t); | ||
2858 | } | ||
2859 | |||
2860 | static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) | ||
2861 | { | ||
2862 | struct ip_vs_service *svc = NULL; | ||
2863 | struct ip_vs_service_user usvc; | ||
2864 | struct ip_vs_dest_user udest; | ||
2865 | int ret = 0, cmd; | ||
2866 | int need_full_svc = 0, need_full_dest = 0; | ||
2867 | |||
2868 | cmd = info->genlhdr->cmd; | ||
2869 | |||
2870 | mutex_lock(&__ip_vs_mutex); | ||
2871 | |||
2872 | if (cmd == IPVS_CMD_FLUSH) { | ||
2873 | ret = ip_vs_flush(); | ||
2874 | goto out; | ||
2875 | } else if (cmd == IPVS_CMD_SET_CONFIG) { | ||
2876 | ret = ip_vs_genl_set_config(info->attrs); | ||
2877 | goto out; | ||
2878 | } else if (cmd == IPVS_CMD_NEW_DAEMON || | ||
2879 | cmd == IPVS_CMD_DEL_DAEMON) { | ||
2880 | |||
2881 | struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; | ||
2882 | |||
2883 | if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || | ||
2884 | nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, | ||
2885 | info->attrs[IPVS_CMD_ATTR_DAEMON], | ||
2886 | ip_vs_daemon_policy)) { | ||
2887 | ret = -EINVAL; | ||
2888 | goto out; | ||
2889 | } | ||
2890 | |||
2891 | if (cmd == IPVS_CMD_NEW_DAEMON) | ||
2892 | ret = ip_vs_genl_new_daemon(daemon_attrs); | ||
2893 | else | ||
2894 | ret = ip_vs_genl_del_daemon(daemon_attrs); | ||
2895 | goto out; | ||
2896 | } else if (cmd == IPVS_CMD_ZERO && | ||
2897 | !info->attrs[IPVS_CMD_ATTR_SERVICE]) { | ||
2898 | ret = ip_vs_zero_all(); | ||
2899 | goto out; | ||
2900 | } | ||
2901 | |||
2902 | /* All following commands require a service argument, so check if we | ||
2903 | * received a valid one. We need a full service specification when | ||
2904 | * adding / editing a service. Only identifying members otherwise. */ | ||
2905 | if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) | ||
2906 | need_full_svc = 1; | ||
2907 | |||
2908 | ret = ip_vs_genl_parse_service(&usvc, | ||
2909 | info->attrs[IPVS_CMD_ATTR_SERVICE], | ||
2910 | need_full_svc); | ||
2911 | if (ret) | ||
2912 | goto out; | ||
2913 | |||
2914 | /* Lookup the exact service by <protocol, addr, port> or fwmark */ | ||
2915 | if (usvc.fwmark == 0) | ||
2916 | svc = __ip_vs_service_get(usvc.protocol, usvc.addr, usvc.port); | ||
2917 | else | ||
2918 | svc = __ip_vs_svc_fwm_get(usvc.fwmark); | ||
2919 | |||
2920 | /* Unless we're adding a new service, the service must already exist */ | ||
2921 | if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { | ||
2922 | ret = -ESRCH; | ||
2923 | goto out; | ||
2924 | } | ||
2925 | |||
2926 | /* Destination commands require a valid destination argument. For | ||
2927 | * adding / editing a destination, we need a full destination | ||
2928 | * specification. */ | ||
2929 | if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || | ||
2930 | cmd == IPVS_CMD_DEL_DEST) { | ||
2931 | if (cmd != IPVS_CMD_DEL_DEST) | ||
2932 | need_full_dest = 1; | ||
2933 | |||
2934 | ret = ip_vs_genl_parse_dest(&udest, | ||
2935 | info->attrs[IPVS_CMD_ATTR_DEST], | ||
2936 | need_full_dest); | ||
2937 | if (ret) | ||
2938 | goto out; | ||
2939 | } | ||
2940 | |||
2941 | switch (cmd) { | ||
2942 | case IPVS_CMD_NEW_SERVICE: | ||
2943 | if (svc == NULL) | ||
2944 | ret = ip_vs_add_service(&usvc, &svc); | ||
2945 | else | ||
2946 | ret = -EEXIST; | ||
2947 | break; | ||
2948 | case IPVS_CMD_SET_SERVICE: | ||
2949 | ret = ip_vs_edit_service(svc, &usvc); | ||
2950 | break; | ||
2951 | case IPVS_CMD_DEL_SERVICE: | ||
2952 | ret = ip_vs_del_service(svc); | ||
2953 | break; | ||
2954 | case IPVS_CMD_NEW_DEST: | ||
2955 | ret = ip_vs_add_dest(svc, &udest); | ||
2956 | break; | ||
2957 | case IPVS_CMD_SET_DEST: | ||
2958 | ret = ip_vs_edit_dest(svc, &udest); | ||
2959 | break; | ||
2960 | case IPVS_CMD_DEL_DEST: | ||
2961 | ret = ip_vs_del_dest(svc, &udest); | ||
2962 | break; | ||
2963 | case IPVS_CMD_ZERO: | ||
2964 | ret = ip_vs_zero_service(svc); | ||
2965 | break; | ||
2966 | default: | ||
2967 | ret = -EINVAL; | ||
2968 | } | ||
2969 | |||
2970 | out: | ||
2971 | if (svc) | ||
2972 | ip_vs_service_put(svc); | ||
2973 | mutex_unlock(&__ip_vs_mutex); | ||
2974 | |||
2975 | return ret; | ||
2976 | } | ||
2977 | |||
2978 | static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) | ||
2979 | { | ||
2980 | struct sk_buff *msg; | ||
2981 | void *reply; | ||
2982 | int ret, cmd, reply_cmd; | ||
2983 | |||
2984 | cmd = info->genlhdr->cmd; | ||
2985 | |||
2986 | if (cmd == IPVS_CMD_GET_SERVICE) | ||
2987 | reply_cmd = IPVS_CMD_NEW_SERVICE; | ||
2988 | else if (cmd == IPVS_CMD_GET_INFO) | ||
2989 | reply_cmd = IPVS_CMD_SET_INFO; | ||
2990 | else if (cmd == IPVS_CMD_GET_CONFIG) | ||
2991 | reply_cmd = IPVS_CMD_SET_CONFIG; | ||
2992 | else { | ||
2993 | IP_VS_ERR("unknown Generic Netlink command\n"); | ||
2994 | return -EINVAL; | ||
2995 | } | ||
2996 | |||
2997 | msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); | ||
2998 | if (!msg) | ||
2999 | return -ENOMEM; | ||
3000 | |||
3001 | mutex_lock(&__ip_vs_mutex); | ||
3002 | |||
3003 | reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); | ||
3004 | if (reply == NULL) | ||
3005 | goto nla_put_failure; | ||
3006 | |||
3007 | switch (cmd) { | ||
3008 | case IPVS_CMD_GET_SERVICE: | ||
3009 | { | ||
3010 | struct ip_vs_service *svc; | ||
3011 | |||
3012 | svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); | ||
3013 | if (IS_ERR(svc)) { | ||
3014 | ret = PTR_ERR(svc); | ||
3015 | goto out_err; | ||
3016 | } else if (svc) { | ||
3017 | ret = ip_vs_genl_fill_service(msg, svc); | ||
3018 | ip_vs_service_put(svc); | ||
3019 | if (ret) | ||
3020 | goto nla_put_failure; | ||
3021 | } else { | ||
3022 | ret = -ESRCH; | ||
3023 | goto out_err; | ||
3024 | } | ||
3025 | |||
3026 | break; | ||
3027 | } | ||
3028 | |||
3029 | case IPVS_CMD_GET_CONFIG: | ||
3030 | { | ||
3031 | struct ip_vs_timeout_user t; | ||
3032 | |||
3033 | __ip_vs_get_timeouts(&t); | ||
3034 | #ifdef CONFIG_IP_VS_PROTO_TCP | ||
3035 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); | ||
3036 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, | ||
3037 | t.tcp_fin_timeout); | ||
3038 | #endif | ||
3039 | #ifdef CONFIG_IP_VS_PROTO_UDP | ||
3040 | NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); | ||
3041 | #endif | ||
3042 | |||
3043 | break; | ||
3044 | } | ||
3045 | |||
3046 | case IPVS_CMD_GET_INFO: | ||
3047 | NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); | ||
3048 | NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, | ||
3049 | IP_VS_CONN_TAB_SIZE); | ||
3050 | break; | ||
3051 | } | ||
3052 | |||
3053 | genlmsg_end(msg, reply); | ||
3054 | ret = genlmsg_unicast(msg, info->snd_pid); | ||
3055 | goto out; | ||
3056 | |||
3057 | nla_put_failure: | ||
3058 | IP_VS_ERR("not enough space in Netlink message\n"); | ||
3059 | ret = -EMSGSIZE; | ||
3060 | |||
3061 | out_err: | ||
3062 | nlmsg_free(msg); | ||
3063 | out: | ||
3064 | mutex_unlock(&__ip_vs_mutex); | ||
3065 | |||
3066 | return ret; | ||
3067 | } | ||
3068 | |||
3069 | |||
3070 | static struct genl_ops ip_vs_genl_ops[] __read_mostly = { | ||
3071 | { | ||
3072 | .cmd = IPVS_CMD_NEW_SERVICE, | ||
3073 | .flags = GENL_ADMIN_PERM, | ||
3074 | .policy = ip_vs_cmd_policy, | ||
3075 | .doit = ip_vs_genl_set_cmd, | ||
3076 | }, | ||
3077 | { | ||
3078 | .cmd = IPVS_CMD_SET_SERVICE, | ||
3079 | .flags = GENL_ADMIN_PERM, | ||
3080 | .policy = ip_vs_cmd_policy, | ||
3081 | .doit = ip_vs_genl_set_cmd, | ||
3082 | }, | ||
3083 | { | ||
3084 | .cmd = IPVS_CMD_DEL_SERVICE, | ||
3085 | .flags = GENL_ADMIN_PERM, | ||
3086 | .policy = ip_vs_cmd_policy, | ||
3087 | .doit = ip_vs_genl_set_cmd, | ||
3088 | }, | ||
3089 | { | ||
3090 | .cmd = IPVS_CMD_GET_SERVICE, | ||
3091 | .flags = GENL_ADMIN_PERM, | ||
3092 | .doit = ip_vs_genl_get_cmd, | ||
3093 | .dumpit = ip_vs_genl_dump_services, | ||
3094 | .policy = ip_vs_cmd_policy, | ||
3095 | }, | ||
3096 | { | ||
3097 | .cmd = IPVS_CMD_NEW_DEST, | ||
3098 | .flags = GENL_ADMIN_PERM, | ||
3099 | .policy = ip_vs_cmd_policy, | ||
3100 | .doit = ip_vs_genl_set_cmd, | ||
3101 | }, | ||
3102 | { | ||
3103 | .cmd = IPVS_CMD_SET_DEST, | ||
3104 | .flags = GENL_ADMIN_PERM, | ||
3105 | .policy = ip_vs_cmd_policy, | ||
3106 | .doit = ip_vs_genl_set_cmd, | ||
3107 | }, | ||
3108 | { | ||
3109 | .cmd = IPVS_CMD_DEL_DEST, | ||
3110 | .flags = GENL_ADMIN_PERM, | ||
3111 | .policy = ip_vs_cmd_policy, | ||
3112 | .doit = ip_vs_genl_set_cmd, | ||
3113 | }, | ||
3114 | { | ||
3115 | .cmd = IPVS_CMD_GET_DEST, | ||
3116 | .flags = GENL_ADMIN_PERM, | ||
3117 | .policy = ip_vs_cmd_policy, | ||
3118 | .dumpit = ip_vs_genl_dump_dests, | ||
3119 | }, | ||
3120 | { | ||
3121 | .cmd = IPVS_CMD_NEW_DAEMON, | ||
3122 | .flags = GENL_ADMIN_PERM, | ||
3123 | .policy = ip_vs_cmd_policy, | ||
3124 | .doit = ip_vs_genl_set_cmd, | ||
3125 | }, | ||
3126 | { | ||
3127 | .cmd = IPVS_CMD_DEL_DAEMON, | ||
3128 | .flags = GENL_ADMIN_PERM, | ||
3129 | .policy = ip_vs_cmd_policy, | ||
3130 | .doit = ip_vs_genl_set_cmd, | ||
3131 | }, | ||
3132 | { | ||
3133 | .cmd = IPVS_CMD_GET_DAEMON, | ||
3134 | .flags = GENL_ADMIN_PERM, | ||
3135 | .dumpit = ip_vs_genl_dump_daemons, | ||
3136 | }, | ||
3137 | { | ||
3138 | .cmd = IPVS_CMD_SET_CONFIG, | ||
3139 | .flags = GENL_ADMIN_PERM, | ||
3140 | .policy = ip_vs_cmd_policy, | ||
3141 | .doit = ip_vs_genl_set_cmd, | ||
3142 | }, | ||
3143 | { | ||
3144 | .cmd = IPVS_CMD_GET_CONFIG, | ||
3145 | .flags = GENL_ADMIN_PERM, | ||
3146 | .doit = ip_vs_genl_get_cmd, | ||
3147 | }, | ||
3148 | { | ||
3149 | .cmd = IPVS_CMD_GET_INFO, | ||
3150 | .flags = GENL_ADMIN_PERM, | ||
3151 | .doit = ip_vs_genl_get_cmd, | ||
3152 | }, | ||
3153 | { | ||
3154 | .cmd = IPVS_CMD_ZERO, | ||
3155 | .flags = GENL_ADMIN_PERM, | ||
3156 | .policy = ip_vs_cmd_policy, | ||
3157 | .doit = ip_vs_genl_set_cmd, | ||
3158 | }, | ||
3159 | { | ||
3160 | .cmd = IPVS_CMD_FLUSH, | ||
3161 | .flags = GENL_ADMIN_PERM, | ||
3162 | .doit = ip_vs_genl_set_cmd, | ||
3163 | }, | ||
3164 | }; | ||
3165 | |||
3166 | static int __init ip_vs_genl_register(void) | ||
3167 | { | ||
3168 | int ret, i; | ||
3169 | |||
3170 | ret = genl_register_family(&ip_vs_genl_family); | ||
3171 | if (ret) | ||
3172 | return ret; | ||
3173 | |||
3174 | for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { | ||
3175 | ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); | ||
3176 | if (ret) | ||
3177 | goto err_out; | ||
3178 | } | ||
3179 | return 0; | ||
3180 | |||
3181 | err_out: | ||
3182 | genl_unregister_family(&ip_vs_genl_family); | ||
3183 | return ret; | ||
3184 | } | ||
3185 | |||
3186 | static void ip_vs_genl_unregister(void) | ||
3187 | { | ||
3188 | genl_unregister_family(&ip_vs_genl_family); | ||
3189 | } | ||
3190 | |||
3191 | /* End of Generic Netlink interface definitions */ | ||
3192 | |||
2323 | 3193 | ||
2324 | int __init ip_vs_control_init(void) | 3194 | int __init ip_vs_control_init(void) |
2325 | { | 3195 | { |
@@ -2334,6 +3204,13 @@ int __init ip_vs_control_init(void) | |||
2334 | return ret; | 3204 | return ret; |
2335 | } | 3205 | } |
2336 | 3206 | ||
3207 | ret = ip_vs_genl_register(); | ||
3208 | if (ret) { | ||
3209 | IP_VS_ERR("cannot register Generic Netlink interface.\n"); | ||
3210 | nf_unregister_sockopt(&ip_vs_sockopts); | ||
3211 | return ret; | ||
3212 | } | ||
3213 | |||
2337 | proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); | 3214 | proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); |
2338 | proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); | 3215 | proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); |
2339 | 3216 | ||
@@ -2368,6 +3245,7 @@ void ip_vs_control_cleanup(void) | |||
2368 | unregister_sysctl_table(sysctl_header); | 3245 | unregister_sysctl_table(sysctl_header); |
2369 | proc_net_remove(&init_net, "ip_vs_stats"); | 3246 | proc_net_remove(&init_net, "ip_vs_stats"); |
2370 | proc_net_remove(&init_net, "ip_vs"); | 3247 | proc_net_remove(&init_net, "ip_vs"); |
3248 | ip_vs_genl_unregister(); | ||
2371 | nf_unregister_sockopt(&ip_vs_sockopts); | 3249 | nf_unregister_sockopt(&ip_vs_sockopts); |
2372 | LeaveFunction(2); | 3250 | LeaveFunction(2); |
2373 | } | 3251 | } |
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c index 5a20f93bd7f9..4fb620ec2086 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/ipv4/ipvs/ip_vs_est.c | |||
@@ -124,8 +124,6 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) | |||
124 | est->outbps = stats->outbps<<5; | 124 | est->outbps = stats->outbps<<5; |
125 | 125 | ||
126 | spin_lock_bh(&est_lock); | 126 | spin_lock_bh(&est_lock); |
127 | if (list_empty(&est_list)) | ||
128 | mod_timer(&est_timer, jiffies + 2 * HZ); | ||
129 | list_add(&est->list, &est_list); | 127 | list_add(&est->list, &est_list); |
130 | spin_unlock_bh(&est_lock); | 128 | spin_unlock_bh(&est_lock); |
131 | } | 129 | } |
@@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats) | |||
136 | 134 | ||
137 | spin_lock_bh(&est_lock); | 135 | spin_lock_bh(&est_lock); |
138 | list_del(&est->list); | 136 | list_del(&est->list); |
139 | while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { | ||
140 | spin_unlock_bh(&est_lock); | ||
141 | cpu_relax(); | ||
142 | spin_lock_bh(&est_lock); | ||
143 | } | ||
144 | spin_unlock_bh(&est_lock); | 137 | spin_unlock_bh(&est_lock); |
145 | } | 138 | } |
146 | 139 | ||
@@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) | |||
160 | est->inbps = 0; | 153 | est->inbps = 0; |
161 | est->outbps = 0; | 154 | est->outbps = 0; |
162 | } | 155 | } |
156 | |||
157 | int __init ip_vs_estimator_init(void) | ||
158 | { | ||
159 | mod_timer(&est_timer, jiffies + 2 * HZ); | ||
160 | return 0; | ||
161 | } | ||
162 | |||
163 | void ip_vs_estimator_cleanup(void) | ||
164 | { | ||
165 | del_timer_sync(&est_timer); | ||
166 | } | ||
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 7a6a319f544a..d2a43aa3fe4c 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c | |||
@@ -96,7 +96,6 @@ struct ip_vs_lblc_entry { | |||
96 | * IPVS lblc hash table | 96 | * IPVS lblc hash table |
97 | */ | 97 | */ |
98 | struct ip_vs_lblc_table { | 98 | struct ip_vs_lblc_table { |
99 | rwlock_t lock; /* lock for this table */ | ||
100 | struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ | 99 | struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ |
101 | atomic_t entries; /* number of entries */ | 100 | atomic_t entries; /* number of entries */ |
102 | int max_size; /* maximum size of entries */ | 101 | int max_size; /* maximum size of entries */ |
@@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = { | |||
123 | 122 | ||
124 | static struct ctl_table_header * sysctl_header; | 123 | static struct ctl_table_header * sysctl_header; |
125 | 124 | ||
126 | /* | ||
127 | * new/free a ip_vs_lblc_entry, which is a mapping of a destionation | ||
128 | * IP address to a server. | ||
129 | */ | ||
130 | static inline struct ip_vs_lblc_entry * | ||
131 | ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) | ||
132 | { | ||
133 | struct ip_vs_lblc_entry *en; | ||
134 | |||
135 | en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC); | ||
136 | if (en == NULL) { | ||
137 | IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); | ||
138 | return NULL; | ||
139 | } | ||
140 | |||
141 | INIT_LIST_HEAD(&en->list); | ||
142 | en->addr = daddr; | ||
143 | |||
144 | atomic_inc(&dest->refcnt); | ||
145 | en->dest = dest; | ||
146 | |||
147 | return en; | ||
148 | } | ||
149 | |||
150 | |||
151 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) | 125 | static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) |
152 | { | 126 | { |
153 | list_del(&en->list); | 127 | list_del(&en->list); |
@@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr) | |||
173 | * Hash an entry in the ip_vs_lblc_table. | 147 | * Hash an entry in the ip_vs_lblc_table. |
174 | * returns bool success. | 148 | * returns bool success. |
175 | */ | 149 | */ |
176 | static int | 150 | static void |
177 | ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) | 151 | ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) |
178 | { | 152 | { |
179 | unsigned hash; | 153 | unsigned hash = ip_vs_lblc_hashkey(en->addr); |
180 | |||
181 | if (!list_empty(&en->list)) { | ||
182 | IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, " | ||
183 | "called from %p\n", __builtin_return_address(0)); | ||
184 | return 0; | ||
185 | } | ||
186 | 154 | ||
187 | /* | ||
188 | * Hash by destination IP address | ||
189 | */ | ||
190 | hash = ip_vs_lblc_hashkey(en->addr); | ||
191 | |||
192 | write_lock(&tbl->lock); | ||
193 | list_add(&en->list, &tbl->bucket[hash]); | 155 | list_add(&en->list, &tbl->bucket[hash]); |
194 | atomic_inc(&tbl->entries); | 156 | atomic_inc(&tbl->entries); |
195 | write_unlock(&tbl->lock); | ||
196 | |||
197 | return 1; | ||
198 | } | 157 | } |
199 | 158 | ||
200 | 159 | ||
201 | /* | 160 | /* |
202 | * Get ip_vs_lblc_entry associated with supplied parameters. | 161 | * Get ip_vs_lblc_entry associated with supplied parameters. Called under read |
162 | * lock | ||
203 | */ | 163 | */ |
204 | static inline struct ip_vs_lblc_entry * | 164 | static inline struct ip_vs_lblc_entry * |
205 | ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) | 165 | ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) |
206 | { | 166 | { |
207 | unsigned hash; | 167 | unsigned hash = ip_vs_lblc_hashkey(addr); |
208 | struct ip_vs_lblc_entry *en; | 168 | struct ip_vs_lblc_entry *en; |
209 | 169 | ||
210 | hash = ip_vs_lblc_hashkey(addr); | 170 | list_for_each_entry(en, &tbl->bucket[hash], list) |
171 | if (en->addr == addr) | ||
172 | return en; | ||
211 | 173 | ||
212 | read_lock(&tbl->lock); | 174 | return NULL; |
175 | } | ||
213 | 176 | ||
214 | list_for_each_entry(en, &tbl->bucket[hash], list) { | 177 | |
215 | if (en->addr == addr) { | 178 | /* |
216 | /* HIT */ | 179 | * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP |
217 | read_unlock(&tbl->lock); | 180 | * address to a server. Called under write lock. |
218 | return en; | 181 | */ |
182 | static inline struct ip_vs_lblc_entry * | ||
183 | ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, | ||
184 | struct ip_vs_dest *dest) | ||
185 | { | ||
186 | struct ip_vs_lblc_entry *en; | ||
187 | |||
188 | en = ip_vs_lblc_get(tbl, daddr); | ||
189 | if (!en) { | ||
190 | en = kmalloc(sizeof(*en), GFP_ATOMIC); | ||
191 | if (!en) { | ||
192 | IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); | ||
193 | return NULL; | ||
219 | } | 194 | } |
220 | } | ||
221 | 195 | ||
222 | read_unlock(&tbl->lock); | 196 | en->addr = daddr; |
197 | en->lastuse = jiffies; | ||
223 | 198 | ||
224 | return NULL; | 199 | atomic_inc(&dest->refcnt); |
200 | en->dest = dest; | ||
201 | |||
202 | ip_vs_lblc_hash(tbl, en); | ||
203 | } else if (en->dest != dest) { | ||
204 | atomic_dec(&en->dest->refcnt); | ||
205 | atomic_inc(&dest->refcnt); | ||
206 | en->dest = dest; | ||
207 | } | ||
208 | |||
209 | return en; | ||
225 | } | 210 | } |
226 | 211 | ||
227 | 212 | ||
@@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) | |||
230 | */ | 215 | */ |
231 | static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) | 216 | static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) |
232 | { | 217 | { |
233 | int i; | ||
234 | struct ip_vs_lblc_entry *en, *nxt; | 218 | struct ip_vs_lblc_entry *en, *nxt; |
219 | int i; | ||
235 | 220 | ||
236 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 221 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { |
237 | write_lock(&tbl->lock); | ||
238 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { | 222 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { |
239 | ip_vs_lblc_free(en); | 223 | ip_vs_lblc_free(en); |
240 | atomic_dec(&tbl->entries); | 224 | atomic_dec(&tbl->entries); |
241 | } | 225 | } |
242 | write_unlock(&tbl->lock); | ||
243 | } | 226 | } |
244 | } | 227 | } |
245 | 228 | ||
246 | 229 | ||
247 | static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) | 230 | static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) |
248 | { | 231 | { |
232 | struct ip_vs_lblc_table *tbl = svc->sched_data; | ||
233 | struct ip_vs_lblc_entry *en, *nxt; | ||
249 | unsigned long now = jiffies; | 234 | unsigned long now = jiffies; |
250 | int i, j; | 235 | int i, j; |
251 | struct ip_vs_lblc_entry *en, *nxt; | ||
252 | 236 | ||
253 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 237 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { |
254 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 238 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
255 | 239 | ||
256 | write_lock(&tbl->lock); | 240 | write_lock(&svc->sched_lock); |
257 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 241 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
258 | if (time_before(now, | 242 | if (time_before(now, |
259 | en->lastuse + sysctl_ip_vs_lblc_expiration)) | 243 | en->lastuse + sysctl_ip_vs_lblc_expiration)) |
@@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) | |||
262 | ip_vs_lblc_free(en); | 246 | ip_vs_lblc_free(en); |
263 | atomic_dec(&tbl->entries); | 247 | atomic_dec(&tbl->entries); |
264 | } | 248 | } |
265 | write_unlock(&tbl->lock); | 249 | write_unlock(&svc->sched_lock); |
266 | } | 250 | } |
267 | tbl->rover = j; | 251 | tbl->rover = j; |
268 | } | 252 | } |
@@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) | |||
281 | */ | 265 | */ |
282 | static void ip_vs_lblc_check_expire(unsigned long data) | 266 | static void ip_vs_lblc_check_expire(unsigned long data) |
283 | { | 267 | { |
284 | struct ip_vs_lblc_table *tbl; | 268 | struct ip_vs_service *svc = (struct ip_vs_service *) data; |
269 | struct ip_vs_lblc_table *tbl = svc->sched_data; | ||
285 | unsigned long now = jiffies; | 270 | unsigned long now = jiffies; |
286 | int goal; | 271 | int goal; |
287 | int i, j; | 272 | int i, j; |
288 | struct ip_vs_lblc_entry *en, *nxt; | 273 | struct ip_vs_lblc_entry *en, *nxt; |
289 | 274 | ||
290 | tbl = (struct ip_vs_lblc_table *)data; | ||
291 | |||
292 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { | 275 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { |
293 | /* do full expiration check */ | 276 | /* do full expiration check */ |
294 | ip_vs_lblc_full_check(tbl); | 277 | ip_vs_lblc_full_check(svc); |
295 | tbl->counter = 1; | 278 | tbl->counter = 1; |
296 | goto out; | 279 | goto out; |
297 | } | 280 | } |
@@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
308 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { | 291 | for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { |
309 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; | 292 | j = (j + 1) & IP_VS_LBLC_TAB_MASK; |
310 | 293 | ||
311 | write_lock(&tbl->lock); | 294 | write_lock(&svc->sched_lock); |
312 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 295 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
313 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) | 296 | if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) |
314 | continue; | 297 | continue; |
@@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) | |||
317 | atomic_dec(&tbl->entries); | 300 | atomic_dec(&tbl->entries); |
318 | goal--; | 301 | goal--; |
319 | } | 302 | } |
320 | write_unlock(&tbl->lock); | 303 | write_unlock(&svc->sched_lock); |
321 | if (goal <= 0) | 304 | if (goal <= 0) |
322 | break; | 305 | break; |
323 | } | 306 | } |
@@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
336 | /* | 319 | /* |
337 | * Allocate the ip_vs_lblc_table for this service | 320 | * Allocate the ip_vs_lblc_table for this service |
338 | */ | 321 | */ |
339 | tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC); | 322 | tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); |
340 | if (tbl == NULL) { | 323 | if (tbl == NULL) { |
341 | IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); | 324 | IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); |
342 | return -ENOMEM; | 325 | return -ENOMEM; |
343 | } | 326 | } |
344 | svc->sched_data = tbl; | 327 | svc->sched_data = tbl; |
345 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " | 328 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " |
346 | "current service\n", | 329 | "current service\n", sizeof(*tbl)); |
347 | sizeof(struct ip_vs_lblc_table)); | ||
348 | 330 | ||
349 | /* | 331 | /* |
350 | * Initialize the hash buckets | 332 | * Initialize the hash buckets |
@@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
352 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { | 334 | for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { |
353 | INIT_LIST_HEAD(&tbl->bucket[i]); | 335 | INIT_LIST_HEAD(&tbl->bucket[i]); |
354 | } | 336 | } |
355 | rwlock_init(&tbl->lock); | ||
356 | tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; | 337 | tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; |
357 | tbl->rover = 0; | 338 | tbl->rover = 0; |
358 | tbl->counter = 1; | 339 | tbl->counter = 1; |
@@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) | |||
361 | * Hook periodic timer for garbage collection | 342 | * Hook periodic timer for garbage collection |
362 | */ | 343 | */ |
363 | setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, | 344 | setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, |
364 | (unsigned long)tbl); | 345 | (unsigned long)svc); |
365 | tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; | 346 | mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); |
366 | add_timer(&tbl->periodic_timer); | ||
367 | 347 | ||
368 | return 0; | 348 | return 0; |
369 | } | 349 | } |
@@ -380,22 +360,16 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) | |||
380 | ip_vs_lblc_flush(tbl); | 360 | ip_vs_lblc_flush(tbl); |
381 | 361 | ||
382 | /* release the table itself */ | 362 | /* release the table itself */ |
383 | kfree(svc->sched_data); | 363 | kfree(tbl); |
384 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", | 364 | IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", |
385 | sizeof(struct ip_vs_lblc_table)); | 365 | sizeof(*tbl)); |
386 | 366 | ||
387 | return 0; | 367 | return 0; |
388 | } | 368 | } |
389 | 369 | ||
390 | 370 | ||
391 | static int ip_vs_lblc_update_svc(struct ip_vs_service *svc) | ||
392 | { | ||
393 | return 0; | ||
394 | } | ||
395 | |||
396 | |||
397 | static inline struct ip_vs_dest * | 371 | static inline struct ip_vs_dest * |
398 | __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) | 372 | __ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) |
399 | { | 373 | { |
400 | struct ip_vs_dest *dest, *least; | 374 | struct ip_vs_dest *dest, *least; |
401 | int loh, doh; | 375 | int loh, doh; |
@@ -484,46 +458,54 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) | |||
484 | static struct ip_vs_dest * | 458 | static struct ip_vs_dest * |
485 | ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | 459 | ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) |
486 | { | 460 | { |
487 | struct ip_vs_dest *dest; | 461 | struct ip_vs_lblc_table *tbl = svc->sched_data; |
488 | struct ip_vs_lblc_table *tbl; | ||
489 | struct ip_vs_lblc_entry *en; | ||
490 | struct iphdr *iph = ip_hdr(skb); | 462 | struct iphdr *iph = ip_hdr(skb); |
463 | struct ip_vs_dest *dest = NULL; | ||
464 | struct ip_vs_lblc_entry *en; | ||
491 | 465 | ||
492 | IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); | 466 | IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); |
493 | 467 | ||
494 | tbl = (struct ip_vs_lblc_table *)svc->sched_data; | 468 | /* First look in our cache */ |
469 | read_lock(&svc->sched_lock); | ||
495 | en = ip_vs_lblc_get(tbl, iph->daddr); | 470 | en = ip_vs_lblc_get(tbl, iph->daddr); |
496 | if (en == NULL) { | 471 | if (en) { |
497 | dest = __ip_vs_wlc_schedule(svc, iph); | 472 | /* We only hold a read lock, but this is atomic */ |
498 | if (dest == NULL) { | 473 | en->lastuse = jiffies; |
499 | IP_VS_DBG(1, "no destination available\n"); | 474 | |
500 | return NULL; | 475 | /* |
501 | } | 476 | * If the destination is not available, i.e. it's in the trash, |
502 | en = ip_vs_lblc_new(iph->daddr, dest); | 477 | * we must ignore it, as it may be removed from under our feet, |
503 | if (en == NULL) { | 478 | * if someone drops our reference count. Our caller only makes |
504 | return NULL; | 479 | * sure that destinations, that are not in the trash, are not |
505 | } | 480 | * moved to the trash, while we are scheduling. But anyone can |
506 | ip_vs_lblc_hash(tbl, en); | 481 | * free up entries from the trash at any time. |
507 | } else { | 482 | */ |
508 | dest = en->dest; | 483 | |
509 | if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) | 484 | if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) |
510 | || atomic_read(&dest->weight) <= 0 | 485 | dest = en->dest; |
511 | || is_overloaded(dest, svc)) { | 486 | } |
512 | dest = __ip_vs_wlc_schedule(svc, iph); | 487 | read_unlock(&svc->sched_lock); |
513 | if (dest == NULL) { | 488 | |
514 | IP_VS_DBG(1, "no destination available\n"); | 489 | /* If the destination has a weight and is not overloaded, use it */ |
515 | return NULL; | 490 | if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) |
516 | } | 491 | goto out; |
517 | atomic_dec(&en->dest->refcnt); | 492 | |
518 | atomic_inc(&dest->refcnt); | 493 | /* No cache entry or it is invalid, time to schedule */ |
519 | en->dest = dest; | 494 | dest = __ip_vs_lblc_schedule(svc, iph); |
520 | } | 495 | if (!dest) { |
496 | IP_VS_DBG(1, "no destination available\n"); | ||
497 | return NULL; | ||
521 | } | 498 | } |
522 | en->lastuse = jiffies; | ||
523 | 499 | ||
500 | /* If we fail to create a cache entry, we'll just use the valid dest */ | ||
501 | write_lock(&svc->sched_lock); | ||
502 | ip_vs_lblc_new(tbl, iph->daddr, dest); | ||
503 | write_unlock(&svc->sched_lock); | ||
504 | |||
505 | out: | ||
524 | IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " | 506 | IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " |
525 | "--> server %u.%u.%u.%u:%d\n", | 507 | "--> server %u.%u.%u.%u:%d\n", |
526 | NIPQUAD(en->addr), | 508 | NIPQUAD(iph->daddr), |
527 | NIPQUAD(dest->addr), | 509 | NIPQUAD(dest->addr), |
528 | ntohs(dest->port)); | 510 | ntohs(dest->port)); |
529 | 511 | ||
@@ -542,7 +524,6 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = | |||
542 | .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), | 524 | .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), |
543 | .init_service = ip_vs_lblc_init_svc, | 525 | .init_service = ip_vs_lblc_init_svc, |
544 | .done_service = ip_vs_lblc_done_svc, | 526 | .done_service = ip_vs_lblc_done_svc, |
545 | .update_service = ip_vs_lblc_update_svc, | ||
546 | .schedule = ip_vs_lblc_schedule, | 527 | .schedule = ip_vs_lblc_schedule, |
547 | }; | 528 | }; |
548 | 529 | ||
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index c234e73968a6..375a1ffb6b65 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c | |||
@@ -106,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
106 | return NULL; | 106 | return NULL; |
107 | } | 107 | } |
108 | 108 | ||
109 | e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC); | 109 | e = kmalloc(sizeof(*e), GFP_ATOMIC); |
110 | if (e == NULL) { | 110 | if (e == NULL) { |
111 | IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); | 111 | IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); |
112 | return NULL; | 112 | return NULL; |
@@ -116,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
116 | e->dest = dest; | 116 | e->dest = dest; |
117 | 117 | ||
118 | /* link it to the list */ | 118 | /* link it to the list */ |
119 | write_lock(&set->lock); | ||
120 | e->next = set->list; | 119 | e->next = set->list; |
121 | set->list = e; | 120 | set->list = e; |
122 | atomic_inc(&set->size); | 121 | atomic_inc(&set->size); |
123 | write_unlock(&set->lock); | ||
124 | 122 | ||
125 | set->lastmod = jiffies; | 123 | set->lastmod = jiffies; |
126 | return e; | 124 | return e; |
@@ -131,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
131 | { | 129 | { |
132 | struct ip_vs_dest_list *e, **ep; | 130 | struct ip_vs_dest_list *e, **ep; |
133 | 131 | ||
134 | write_lock(&set->lock); | ||
135 | for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { | 132 | for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { |
136 | if (e->dest == dest) { | 133 | if (e->dest == dest) { |
137 | /* HIT */ | 134 | /* HIT */ |
@@ -144,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | |||
144 | } | 141 | } |
145 | ep = &e->next; | 142 | ep = &e->next; |
146 | } | 143 | } |
147 | write_unlock(&set->lock); | ||
148 | } | 144 | } |
149 | 145 | ||
150 | static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) | 146 | static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) |
@@ -174,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
174 | if (set == NULL) | 170 | if (set == NULL) |
175 | return NULL; | 171 | return NULL; |
176 | 172 | ||
177 | read_lock(&set->lock); | ||
178 | /* select the first destination server, whose weight > 0 */ | 173 | /* select the first destination server, whose weight > 0 */ |
179 | for (e=set->list; e!=NULL; e=e->next) { | 174 | for (e=set->list; e!=NULL; e=e->next) { |
180 | least = e->dest; | 175 | least = e->dest; |
@@ -188,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
188 | goto nextstage; | 183 | goto nextstage; |
189 | } | 184 | } |
190 | } | 185 | } |
191 | read_unlock(&set->lock); | ||
192 | return NULL; | 186 | return NULL; |
193 | 187 | ||
194 | /* find the destination with the weighted least load */ | 188 | /* find the destination with the weighted least load */ |
@@ -207,7 +201,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
207 | loh = doh; | 201 | loh = doh; |
208 | } | 202 | } |
209 | } | 203 | } |
210 | read_unlock(&set->lock); | ||
211 | 204 | ||
212 | IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " | 205 | IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " |
213 | "activeconns %d refcnt %d weight %d overhead %d\n", | 206 | "activeconns %d refcnt %d weight %d overhead %d\n", |
@@ -229,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
229 | if (set == NULL) | 222 | if (set == NULL) |
230 | return NULL; | 223 | return NULL; |
231 | 224 | ||
232 | read_lock(&set->lock); | ||
233 | /* select the first destination server, whose weight > 0 */ | 225 | /* select the first destination server, whose weight > 0 */ |
234 | for (e=set->list; e!=NULL; e=e->next) { | 226 | for (e=set->list; e!=NULL; e=e->next) { |
235 | most = e->dest; | 227 | most = e->dest; |
@@ -239,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
239 | goto nextstage; | 231 | goto nextstage; |
240 | } | 232 | } |
241 | } | 233 | } |
242 | read_unlock(&set->lock); | ||
243 | return NULL; | 234 | return NULL; |
244 | 235 | ||
245 | /* find the destination with the weighted most load */ | 236 | /* find the destination with the weighted most load */ |
@@ -256,7 +247,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
256 | moh = doh; | 247 | moh = doh; |
257 | } | 248 | } |
258 | } | 249 | } |
259 | read_unlock(&set->lock); | ||
260 | 250 | ||
261 | IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " | 251 | IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " |
262 | "activeconns %d refcnt %d weight %d overhead %d\n", | 252 | "activeconns %d refcnt %d weight %d overhead %d\n", |
@@ -284,7 +274,6 @@ struct ip_vs_lblcr_entry { | |||
284 | * IPVS lblcr hash table | 274 | * IPVS lblcr hash table |
285 | */ | 275 | */ |
286 | struct ip_vs_lblcr_table { | 276 | struct ip_vs_lblcr_table { |
287 | rwlock_t lock; /* lock for this table */ | ||
288 | struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ | 277 | struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ |
289 | atomic_t entries; /* number of entries */ | 278 | atomic_t entries; /* number of entries */ |
290 | int max_size; /* maximum size of entries */ | 279 | int max_size; /* maximum size of entries */ |
@@ -311,32 +300,6 @@ static ctl_table vs_vars_table[] = { | |||
311 | 300 | ||
312 | static struct ctl_table_header * sysctl_header; | 301 | static struct ctl_table_header * sysctl_header; |
313 | 302 | ||
314 | /* | ||
315 | * new/free a ip_vs_lblcr_entry, which is a mapping of a destination | ||
316 | * IP address to a server. | ||
317 | */ | ||
318 | static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) | ||
319 | { | ||
320 | struct ip_vs_lblcr_entry *en; | ||
321 | |||
322 | en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC); | ||
323 | if (en == NULL) { | ||
324 | IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); | ||
325 | return NULL; | ||
326 | } | ||
327 | |||
328 | INIT_LIST_HEAD(&en->list); | ||
329 | en->addr = daddr; | ||
330 | |||
331 | /* initilize its dest set */ | ||
332 | atomic_set(&(en->set.size), 0); | ||
333 | en->set.list = NULL; | ||
334 | rwlock_init(&en->set.lock); | ||
335 | |||
336 | return en; | ||
337 | } | ||
338 | |||
339 | |||
340 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) | 303 | static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) |
341 | { | 304 | { |
342 | list_del(&en->list); | 305 | list_del(&en->list); |
@@ -358,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) | |||
358 | * Hash an entry in the ip_vs_lblcr_table. | 321 | * Hash an entry in the ip_vs_lblcr_table. |
359 | * returns bool success. | 322 | * returns bool success. |
360 | */ | 323 | */ |
361 | static int | 324 | static void |
362 | ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) | 325 | ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) |
363 | { | 326 | { |
364 | unsigned hash; | 327 | unsigned hash = ip_vs_lblcr_hashkey(en->addr); |
365 | |||
366 | if (!list_empty(&en->list)) { | ||
367 | IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, " | ||
368 | "called from %p\n", __builtin_return_address(0)); | ||
369 | return 0; | ||
370 | } | ||
371 | 328 | ||
372 | /* | ||
373 | * Hash by destination IP address | ||
374 | */ | ||
375 | hash = ip_vs_lblcr_hashkey(en->addr); | ||
376 | |||
377 | write_lock(&tbl->lock); | ||
378 | list_add(&en->list, &tbl->bucket[hash]); | 329 | list_add(&en->list, &tbl->bucket[hash]); |
379 | atomic_inc(&tbl->entries); | 330 | atomic_inc(&tbl->entries); |
380 | write_unlock(&tbl->lock); | ||
381 | |||
382 | return 1; | ||
383 | } | 331 | } |
384 | 332 | ||
385 | 333 | ||
386 | /* | 334 | /* |
387 | * Get ip_vs_lblcr_entry associated with supplied parameters. | 335 | * Get ip_vs_lblcr_entry associated with supplied parameters. Called under |
336 | * read lock. | ||
388 | */ | 337 | */ |
389 | static inline struct ip_vs_lblcr_entry * | 338 | static inline struct ip_vs_lblcr_entry * |
390 | ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) | 339 | ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) |
391 | { | 340 | { |
392 | unsigned hash; | 341 | unsigned hash = ip_vs_lblcr_hashkey(addr); |
393 | struct ip_vs_lblcr_entry *en; | 342 | struct ip_vs_lblcr_entry *en; |
394 | 343 | ||
395 | hash = ip_vs_lblcr_hashkey(addr); | 344 | list_for_each_entry(en, &tbl->bucket[hash], list) |
345 | if (en->addr == addr) | ||
346 | return en; | ||
396 | 347 | ||
397 | read_lock(&tbl->lock); | 348 | return NULL; |
349 | } | ||
398 | 350 | ||
399 | list_for_each_entry(en, &tbl->bucket[hash], list) { | 351 | |
400 | if (en->addr == addr) { | 352 | /* |
401 | /* HIT */ | 353 | * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination |
402 | read_unlock(&tbl->lock); | 354 | * IP address to a server. Called under write lock. |
403 | return en; | 355 | */ |
356 | static inline struct ip_vs_lblcr_entry * | ||
357 | ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, | ||
358 | struct ip_vs_dest *dest) | ||
359 | { | ||
360 | struct ip_vs_lblcr_entry *en; | ||
361 | |||
362 | en = ip_vs_lblcr_get(tbl, daddr); | ||
363 | if (!en) { | ||
364 | en = kmalloc(sizeof(*en), GFP_ATOMIC); | ||
365 | if (!en) { | ||
366 | IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); | ||
367 | return NULL; | ||
404 | } | 368 | } |
369 | |||
370 | en->addr = daddr; | ||
371 | en->lastuse = jiffies; | ||
372 | |||
373 | /* initilize its dest set */ | ||
374 | atomic_set(&(en->set.size), 0); | ||
375 | en->set.list = NULL; | ||
376 | rwlock_init(&en->set.lock); | ||
377 | |||
378 | ip_vs_lblcr_hash(tbl, en); | ||
405 | } | 379 | } |
406 | 380 | ||
407 | read_unlock(&tbl->lock); | 381 | write_lock(&en->set.lock); |
382 | ip_vs_dest_set_insert(&en->set, dest); | ||
383 | write_unlock(&en->set.lock); | ||
408 | 384 | ||
409 | return NULL; | 385 | return en; |
410 | } | 386 | } |
411 | 387 | ||
412 | 388 | ||
@@ -418,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) | |||
418 | int i; | 394 | int i; |
419 | struct ip_vs_lblcr_entry *en, *nxt; | 395 | struct ip_vs_lblcr_entry *en, *nxt; |
420 | 396 | ||
397 | /* No locking required, only called during cleanup. */ | ||
421 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 398 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
422 | write_lock(&tbl->lock); | ||
423 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { | 399 | list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { |
424 | ip_vs_lblcr_free(en); | 400 | ip_vs_lblcr_free(en); |
425 | atomic_dec(&tbl->entries); | ||
426 | } | 401 | } |
427 | write_unlock(&tbl->lock); | ||
428 | } | 402 | } |
429 | } | 403 | } |
430 | 404 | ||
431 | 405 | ||
432 | static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | 406 | static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) |
433 | { | 407 | { |
408 | struct ip_vs_lblcr_table *tbl = svc->sched_data; | ||
434 | unsigned long now = jiffies; | 409 | unsigned long now = jiffies; |
435 | int i, j; | 410 | int i, j; |
436 | struct ip_vs_lblcr_entry *en, *nxt; | 411 | struct ip_vs_lblcr_entry *en, *nxt; |
@@ -438,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | |||
438 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 413 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
439 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; | 414 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; |
440 | 415 | ||
441 | write_lock(&tbl->lock); | 416 | write_lock(&svc->sched_lock); |
442 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 417 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
443 | if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, | 418 | if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, |
444 | now)) | 419 | now)) |
@@ -447,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | |||
447 | ip_vs_lblcr_free(en); | 422 | ip_vs_lblcr_free(en); |
448 | atomic_dec(&tbl->entries); | 423 | atomic_dec(&tbl->entries); |
449 | } | 424 | } |
450 | write_unlock(&tbl->lock); | 425 | write_unlock(&svc->sched_lock); |
451 | } | 426 | } |
452 | tbl->rover = j; | 427 | tbl->rover = j; |
453 | } | 428 | } |
@@ -466,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) | |||
466 | */ | 441 | */ |
467 | static void ip_vs_lblcr_check_expire(unsigned long data) | 442 | static void ip_vs_lblcr_check_expire(unsigned long data) |
468 | { | 443 | { |
469 | struct ip_vs_lblcr_table *tbl; | 444 | struct ip_vs_service *svc = (struct ip_vs_service *) data; |
445 | struct ip_vs_lblcr_table *tbl = svc->sched_data; | ||
470 | unsigned long now = jiffies; | 446 | unsigned long now = jiffies; |
471 | int goal; | 447 | int goal; |
472 | int i, j; | 448 | int i, j; |
473 | struct ip_vs_lblcr_entry *en, *nxt; | 449 | struct ip_vs_lblcr_entry *en, *nxt; |
474 | 450 | ||
475 | tbl = (struct ip_vs_lblcr_table *)data; | ||
476 | |||
477 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { | 451 | if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { |
478 | /* do full expiration check */ | 452 | /* do full expiration check */ |
479 | ip_vs_lblcr_full_check(tbl); | 453 | ip_vs_lblcr_full_check(svc); |
480 | tbl->counter = 1; | 454 | tbl->counter = 1; |
481 | goto out; | 455 | goto out; |
482 | } | 456 | } |
@@ -493,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) | |||
493 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 467 | for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
494 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; | 468 | j = (j + 1) & IP_VS_LBLCR_TAB_MASK; |
495 | 469 | ||
496 | write_lock(&tbl->lock); | 470 | write_lock(&svc->sched_lock); |
497 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { | 471 | list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { |
498 | if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) | 472 | if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) |
499 | continue; | 473 | continue; |
@@ -502,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) | |||
502 | atomic_dec(&tbl->entries); | 476 | atomic_dec(&tbl->entries); |
503 | goal--; | 477 | goal--; |
504 | } | 478 | } |
505 | write_unlock(&tbl->lock); | 479 | write_unlock(&svc->sched_lock); |
506 | if (goal <= 0) | 480 | if (goal <= 0) |
507 | break; | 481 | break; |
508 | } | 482 | } |
@@ -520,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
520 | /* | 494 | /* |
521 | * Allocate the ip_vs_lblcr_table for this service | 495 | * Allocate the ip_vs_lblcr_table for this service |
522 | */ | 496 | */ |
523 | tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC); | 497 | tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); |
524 | if (tbl == NULL) { | 498 | if (tbl == NULL) { |
525 | IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); | 499 | IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); |
526 | return -ENOMEM; | 500 | return -ENOMEM; |
527 | } | 501 | } |
528 | svc->sched_data = tbl; | 502 | svc->sched_data = tbl; |
529 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " | 503 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " |
530 | "current service\n", | 504 | "current service\n", sizeof(*tbl)); |
531 | sizeof(struct ip_vs_lblcr_table)); | ||
532 | 505 | ||
533 | /* | 506 | /* |
534 | * Initialize the hash buckets | 507 | * Initialize the hash buckets |
@@ -536,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
536 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { | 509 | for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { |
537 | INIT_LIST_HEAD(&tbl->bucket[i]); | 510 | INIT_LIST_HEAD(&tbl->bucket[i]); |
538 | } | 511 | } |
539 | rwlock_init(&tbl->lock); | ||
540 | tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; | 512 | tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; |
541 | tbl->rover = 0; | 513 | tbl->rover = 0; |
542 | tbl->counter = 1; | 514 | tbl->counter = 1; |
@@ -545,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) | |||
545 | * Hook periodic timer for garbage collection | 517 | * Hook periodic timer for garbage collection |
546 | */ | 518 | */ |
547 | setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, | 519 | setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, |
548 | (unsigned long)tbl); | 520 | (unsigned long)svc); |
549 | tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; | 521 | mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); |
550 | add_timer(&tbl->periodic_timer); | ||
551 | 522 | ||
552 | return 0; | 523 | return 0; |
553 | } | 524 | } |
@@ -564,22 +535,16 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) | |||
564 | ip_vs_lblcr_flush(tbl); | 535 | ip_vs_lblcr_flush(tbl); |
565 | 536 | ||
566 | /* release the table itself */ | 537 | /* release the table itself */ |
567 | kfree(svc->sched_data); | 538 | kfree(tbl); |
568 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", | 539 | IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", |
569 | sizeof(struct ip_vs_lblcr_table)); | 540 | sizeof(*tbl)); |
570 | 541 | ||
571 | return 0; | 542 | return 0; |
572 | } | 543 | } |
573 | 544 | ||
574 | 545 | ||
575 | static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc) | ||
576 | { | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | |||
581 | static inline struct ip_vs_dest * | 546 | static inline struct ip_vs_dest * |
582 | __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) | 547 | __ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) |
583 | { | 548 | { |
584 | struct ip_vs_dest *dest, *least; | 549 | struct ip_vs_dest *dest, *least; |
585 | int loh, doh; | 550 | int loh, doh; |
@@ -669,50 +634,78 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) | |||
669 | static struct ip_vs_dest * | 634 | static struct ip_vs_dest * |
670 | ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) | 635 | ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) |
671 | { | 636 | { |
672 | struct ip_vs_dest *dest; | 637 | struct ip_vs_lblcr_table *tbl = svc->sched_data; |
673 | struct ip_vs_lblcr_table *tbl; | ||
674 | struct ip_vs_lblcr_entry *en; | ||
675 | struct iphdr *iph = ip_hdr(skb); | 638 | struct iphdr *iph = ip_hdr(skb); |
639 | struct ip_vs_dest *dest = NULL; | ||
640 | struct ip_vs_lblcr_entry *en; | ||
676 | 641 | ||
677 | IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); | 642 | IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); |
678 | 643 | ||
679 | tbl = (struct ip_vs_lblcr_table *)svc->sched_data; | 644 | /* First look in our cache */ |
645 | read_lock(&svc->sched_lock); | ||
680 | en = ip_vs_lblcr_get(tbl, iph->daddr); | 646 | en = ip_vs_lblcr_get(tbl, iph->daddr); |
681 | if (en == NULL) { | 647 | if (en) { |
682 | dest = __ip_vs_wlc_schedule(svc, iph); | 648 | /* We only hold a read lock, but this is atomic */ |
683 | if (dest == NULL) { | 649 | en->lastuse = jiffies; |
684 | IP_VS_DBG(1, "no destination available\n"); | 650 | |
685 | return NULL; | 651 | /* Get the least loaded destination */ |
686 | } | 652 | read_lock(&en->set.lock); |
687 | en = ip_vs_lblcr_new(iph->daddr); | ||
688 | if (en == NULL) { | ||
689 | return NULL; | ||
690 | } | ||
691 | ip_vs_dest_set_insert(&en->set, dest); | ||
692 | ip_vs_lblcr_hash(tbl, en); | ||
693 | } else { | ||
694 | dest = ip_vs_dest_set_min(&en->set); | 653 | dest = ip_vs_dest_set_min(&en->set); |
695 | if (!dest || is_overloaded(dest, svc)) { | 654 | read_unlock(&en->set.lock); |
696 | dest = __ip_vs_wlc_schedule(svc, iph); | 655 | |
697 | if (dest == NULL) { | 656 | /* More than one destination + enough time passed by, cleanup */ |
698 | IP_VS_DBG(1, "no destination available\n"); | ||
699 | return NULL; | ||
700 | } | ||
701 | ip_vs_dest_set_insert(&en->set, dest); | ||
702 | } | ||
703 | if (atomic_read(&en->set.size) > 1 && | 657 | if (atomic_read(&en->set.size) > 1 && |
704 | jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) { | 658 | time_after(jiffies, en->set.lastmod + |
659 | sysctl_ip_vs_lblcr_expiration)) { | ||
705 | struct ip_vs_dest *m; | 660 | struct ip_vs_dest *m; |
661 | |||
662 | write_lock(&en->set.lock); | ||
706 | m = ip_vs_dest_set_max(&en->set); | 663 | m = ip_vs_dest_set_max(&en->set); |
707 | if (m) | 664 | if (m) |
708 | ip_vs_dest_set_erase(&en->set, m); | 665 | ip_vs_dest_set_erase(&en->set, m); |
666 | write_unlock(&en->set.lock); | ||
667 | } | ||
668 | |||
669 | /* If the destination is not overloaded, use it */ | ||
670 | if (dest && !is_overloaded(dest, svc)) { | ||
671 | read_unlock(&svc->sched_lock); | ||
672 | goto out; | ||
709 | } | 673 | } |
674 | |||
675 | /* The cache entry is invalid, time to schedule */ | ||
676 | dest = __ip_vs_lblcr_schedule(svc, iph); | ||
677 | if (!dest) { | ||
678 | IP_VS_DBG(1, "no destination available\n"); | ||
679 | read_unlock(&svc->sched_lock); | ||
680 | return NULL; | ||
681 | } | ||
682 | |||
683 | /* Update our cache entry */ | ||
684 | write_lock(&en->set.lock); | ||
685 | ip_vs_dest_set_insert(&en->set, dest); | ||
686 | write_unlock(&en->set.lock); | ||
687 | } | ||
688 | read_unlock(&svc->sched_lock); | ||
689 | |||
690 | if (dest) | ||
691 | goto out; | ||
692 | |||
693 | /* No cache entry, time to schedule */ | ||
694 | dest = __ip_vs_lblcr_schedule(svc, iph); | ||
695 | if (!dest) { | ||
696 | IP_VS_DBG(1, "no destination available\n"); | ||
697 | return NULL; | ||
710 | } | 698 | } |
711 | en->lastuse = jiffies; | ||
712 | 699 | ||
700 | /* If we fail to create a cache entry, we'll just use the valid dest */ | ||
701 | write_lock(&svc->sched_lock); | ||
702 | ip_vs_lblcr_new(tbl, iph->daddr, dest); | ||
703 | write_unlock(&svc->sched_lock); | ||
704 | |||
705 | out: | ||
713 | IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " | 706 | IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " |
714 | "--> server %u.%u.%u.%u:%d\n", | 707 | "--> server %u.%u.%u.%u:%d\n", |
715 | NIPQUAD(en->addr), | 708 | NIPQUAD(iph->daddr), |
716 | NIPQUAD(dest->addr), | 709 | NIPQUAD(dest->addr), |
717 | ntohs(dest->port)); | 710 | ntohs(dest->port)); |
718 | 711 | ||
@@ -731,7 +724,6 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = | |||
731 | .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), | 724 | .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), |
732 | .init_service = ip_vs_lblcr_init_svc, | 725 | .init_service = ip_vs_lblcr_init_svc, |
733 | .done_service = ip_vs_lblcr_done_svc, | 726 | .done_service = ip_vs_lblcr_done_svc, |
734 | .update_service = ip_vs_lblcr_update_svc, | ||
735 | .schedule = ip_vs_lblcr_schedule, | 727 | .schedule = ip_vs_lblcr_schedule, |
736 | }; | 728 | }; |
737 | 729 | ||
diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/ipv4/ipvs/ip_vs_lc.c index ebcdbf75ac65..2c3de1b63518 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/ipv4/ipvs/ip_vs_lc.c | |||
@@ -20,24 +20,6 @@ | |||
20 | #include <net/ip_vs.h> | 20 | #include <net/ip_vs.h> |
21 | 21 | ||
22 | 22 | ||
23 | static int ip_vs_lc_init_svc(struct ip_vs_service *svc) | ||
24 | { | ||
25 | return 0; | ||
26 | } | ||
27 | |||
28 | |||
29 | static int ip_vs_lc_done_svc(struct ip_vs_service *svc) | ||
30 | { | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | |||
35 | static int ip_vs_lc_update_svc(struct ip_vs_service *svc) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | |||
41 | static inline unsigned int | 23 | static inline unsigned int |
42 | ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) | 24 | ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) |
43 | { | 25 | { |
@@ -99,9 +81,6 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { | |||
99 | .refcnt = ATOMIC_INIT(0), | 81 | .refcnt = ATOMIC_INIT(0), |
100 | .module = THIS_MODULE, | 82 | .module = THIS_MODULE, |
101 | .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), | 83 | .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), |
102 | .init_service = ip_vs_lc_init_svc, | ||
103 | .done_service = ip_vs_lc_done_svc, | ||
104 | .update_service = ip_vs_lc_update_svc, | ||
105 | .schedule = ip_vs_lc_schedule, | 84 | .schedule = ip_vs_lc_schedule, |
106 | }; | 85 | }; |
107 | 86 | ||
diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/ipv4/ipvs/ip_vs_nq.c index 92f3a6770031..5330d5a2de14 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/ipv4/ipvs/ip_vs_nq.c | |||
@@ -37,27 +37,6 @@ | |||
37 | #include <net/ip_vs.h> | 37 | #include <net/ip_vs.h> |
38 | 38 | ||
39 | 39 | ||
40 | static int | ||
41 | ip_vs_nq_init_svc(struct ip_vs_service *svc) | ||
42 | { | ||
43 | return 0; | ||
44 | } | ||
45 | |||
46 | |||
47 | static int | ||
48 | ip_vs_nq_done_svc(struct ip_vs_service *svc) | ||
49 | { | ||
50 | return 0; | ||
51 | } | ||
52 | |||
53 | |||
54 | static int | ||
55 | ip_vs_nq_update_svc(struct ip_vs_service *svc) | ||
56 | { | ||
57 | return 0; | ||
58 | } | ||
59 | |||
60 | |||
61 | static inline unsigned int | 40 | static inline unsigned int |
62 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) | 41 | ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) |
63 | { | 42 | { |
@@ -137,9 +116,6 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = | |||
137 | .refcnt = ATOMIC_INIT(0), | 116 | .refcnt = ATOMIC_INIT(0), |
138 | .module = THIS_MODULE, | 117 | .module = THIS_MODULE, |
139 | .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), | 118 | .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), |
140 | .init_service = ip_vs_nq_init_svc, | ||
141 | .done_service = ip_vs_nq_done_svc, | ||
142 | .update_service = ip_vs_nq_update_svc, | ||
143 | .schedule = ip_vs_nq_schedule, | 119 | .schedule = ip_vs_nq_schedule, |
144 | }; | 120 | }; |
145 | 121 | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c index 73e0ea87c1f5..3f9ebd7639ae 100644 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ b/net/ipv4/ipvs/ip_vs_proto_ah_esp.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS | 2 | * ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS |
3 | * | 3 | * |
4 | * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | 4 | * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 |
5 | * Wensong Zhang <wensong@linuxvirtualserver.org> | 5 | * Wensong Zhang <wensong@linuxvirtualserver.org> |
@@ -39,11 +39,11 @@ struct isakmp_hdr { | |||
39 | 39 | ||
40 | 40 | ||
41 | static struct ip_vs_conn * | 41 | static struct ip_vs_conn * |
42 | ah_conn_in_get(const struct sk_buff *skb, | 42 | ah_esp_conn_in_get(const struct sk_buff *skb, |
43 | struct ip_vs_protocol *pp, | 43 | struct ip_vs_protocol *pp, |
44 | const struct iphdr *iph, | 44 | const struct iphdr *iph, |
45 | unsigned int proto_off, | 45 | unsigned int proto_off, |
46 | int inverse) | 46 | int inverse) |
47 | { | 47 | { |
48 | struct ip_vs_conn *cp; | 48 | struct ip_vs_conn *cp; |
49 | 49 | ||
@@ -79,8 +79,8 @@ ah_conn_in_get(const struct sk_buff *skb, | |||
79 | 79 | ||
80 | 80 | ||
81 | static struct ip_vs_conn * | 81 | static struct ip_vs_conn * |
82 | ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | 82 | ah_esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, |
83 | const struct iphdr *iph, unsigned int proto_off, int inverse) | 83 | const struct iphdr *iph, unsigned int proto_off, int inverse) |
84 | { | 84 | { |
85 | struct ip_vs_conn *cp; | 85 | struct ip_vs_conn *cp; |
86 | 86 | ||
@@ -112,12 +112,12 @@ ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | |||
112 | 112 | ||
113 | 113 | ||
114 | static int | 114 | static int |
115 | ah_conn_schedule(struct sk_buff *skb, | 115 | ah_esp_conn_schedule(struct sk_buff *skb, |
116 | struct ip_vs_protocol *pp, | 116 | struct ip_vs_protocol *pp, |
117 | int *verdict, struct ip_vs_conn **cpp) | 117 | int *verdict, struct ip_vs_conn **cpp) |
118 | { | 118 | { |
119 | /* | 119 | /* |
120 | * AH is only related traffic. Pass the packet to IP stack. | 120 | * AH/ESP is only related traffic. Pass the packet to IP stack. |
121 | */ | 121 | */ |
122 | *verdict = NF_ACCEPT; | 122 | *verdict = NF_ACCEPT; |
123 | return 0; | 123 | return 0; |
@@ -125,8 +125,8 @@ ah_conn_schedule(struct sk_buff *skb, | |||
125 | 125 | ||
126 | 126 | ||
127 | static void | 127 | static void |
128 | ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | 128 | ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, |
129 | int offset, const char *msg) | 129 | int offset, const char *msg) |
130 | { | 130 | { |
131 | char buf[256]; | 131 | char buf[256]; |
132 | struct iphdr _iph, *ih; | 132 | struct iphdr _iph, *ih; |
@@ -143,28 +143,29 @@ ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | |||
143 | } | 143 | } |
144 | 144 | ||
145 | 145 | ||
146 | static void ah_init(struct ip_vs_protocol *pp) | 146 | static void ah_esp_init(struct ip_vs_protocol *pp) |
147 | { | 147 | { |
148 | /* nothing to do now */ | 148 | /* nothing to do now */ |
149 | } | 149 | } |
150 | 150 | ||
151 | 151 | ||
152 | static void ah_exit(struct ip_vs_protocol *pp) | 152 | static void ah_esp_exit(struct ip_vs_protocol *pp) |
153 | { | 153 | { |
154 | /* nothing to do now */ | 154 | /* nothing to do now */ |
155 | } | 155 | } |
156 | 156 | ||
157 | 157 | ||
158 | #ifdef CONFIG_IP_VS_PROTO_AH | ||
158 | struct ip_vs_protocol ip_vs_protocol_ah = { | 159 | struct ip_vs_protocol ip_vs_protocol_ah = { |
159 | .name = "AH", | 160 | .name = "AH", |
160 | .protocol = IPPROTO_AH, | 161 | .protocol = IPPROTO_AH, |
161 | .num_states = 1, | 162 | .num_states = 1, |
162 | .dont_defrag = 1, | 163 | .dont_defrag = 1, |
163 | .init = ah_init, | 164 | .init = ah_esp_init, |
164 | .exit = ah_exit, | 165 | .exit = ah_esp_exit, |
165 | .conn_schedule = ah_conn_schedule, | 166 | .conn_schedule = ah_esp_conn_schedule, |
166 | .conn_in_get = ah_conn_in_get, | 167 | .conn_in_get = ah_esp_conn_in_get, |
167 | .conn_out_get = ah_conn_out_get, | 168 | .conn_out_get = ah_esp_conn_out_get, |
168 | .snat_handler = NULL, | 169 | .snat_handler = NULL, |
169 | .dnat_handler = NULL, | 170 | .dnat_handler = NULL, |
170 | .csum_check = NULL, | 171 | .csum_check = NULL, |
@@ -172,7 +173,31 @@ struct ip_vs_protocol ip_vs_protocol_ah = { | |||
172 | .register_app = NULL, | 173 | .register_app = NULL, |
173 | .unregister_app = NULL, | 174 | .unregister_app = NULL, |
174 | .app_conn_bind = NULL, | 175 | .app_conn_bind = NULL, |
175 | .debug_packet = ah_debug_packet, | 176 | .debug_packet = ah_esp_debug_packet, |
176 | .timeout_change = NULL, /* ISAKMP */ | 177 | .timeout_change = NULL, /* ISAKMP */ |
177 | .set_state_timeout = NULL, | 178 | .set_state_timeout = NULL, |
178 | }; | 179 | }; |
180 | #endif | ||
181 | |||
182 | #ifdef CONFIG_IP_VS_PROTO_ESP | ||
183 | struct ip_vs_protocol ip_vs_protocol_esp = { | ||
184 | .name = "ESP", | ||
185 | .protocol = IPPROTO_ESP, | ||
186 | .num_states = 1, | ||
187 | .dont_defrag = 1, | ||
188 | .init = ah_esp_init, | ||
189 | .exit = ah_esp_exit, | ||
190 | .conn_schedule = ah_esp_conn_schedule, | ||
191 | .conn_in_get = ah_esp_conn_in_get, | ||
192 | .conn_out_get = ah_esp_conn_out_get, | ||
193 | .snat_handler = NULL, | ||
194 | .dnat_handler = NULL, | ||
195 | .csum_check = NULL, | ||
196 | .state_transition = NULL, | ||
197 | .register_app = NULL, | ||
198 | .unregister_app = NULL, | ||
199 | .app_conn_bind = NULL, | ||
200 | .debug_packet = ah_esp_debug_packet, | ||
201 | .timeout_change = NULL, /* ISAKMP */ | ||
202 | }; | ||
203 | #endif | ||
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c deleted file mode 100644 index 21d70c8ffa54..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ /dev/null | |||
@@ -1,176 +0,0 @@ | |||
1 | /* | ||
2 | * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS | ||
3 | * | ||
4 | * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 | ||
5 | * Wensong Zhang <wensong@linuxvirtualserver.org> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * version 2 as published by the Free Software Foundation; | ||
10 | * | ||
11 | */ | ||
12 | |||
13 | #include <linux/in.h> | ||
14 | #include <linux/ip.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/netfilter.h> | ||
18 | #include <linux/netfilter_ipv4.h> | ||
19 | |||
20 | #include <net/ip_vs.h> | ||
21 | |||
22 | |||
23 | /* TODO: | ||
24 | |||
25 | struct isakmp_hdr { | ||
26 | __u8 icookie[8]; | ||
27 | __u8 rcookie[8]; | ||
28 | __u8 np; | ||
29 | __u8 version; | ||
30 | __u8 xchgtype; | ||
31 | __u8 flags; | ||
32 | __u32 msgid; | ||
33 | __u32 length; | ||
34 | }; | ||
35 | |||
36 | */ | ||
37 | |||
38 | #define PORT_ISAKMP 500 | ||
39 | |||
40 | |||
41 | static struct ip_vs_conn * | ||
42 | esp_conn_in_get(const struct sk_buff *skb, | ||
43 | struct ip_vs_protocol *pp, | ||
44 | const struct iphdr *iph, | ||
45 | unsigned int proto_off, | ||
46 | int inverse) | ||
47 | { | ||
48 | struct ip_vs_conn *cp; | ||
49 | |||
50 | if (likely(!inverse)) { | ||
51 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | ||
52 | iph->saddr, | ||
53 | htons(PORT_ISAKMP), | ||
54 | iph->daddr, | ||
55 | htons(PORT_ISAKMP)); | ||
56 | } else { | ||
57 | cp = ip_vs_conn_in_get(IPPROTO_UDP, | ||
58 | iph->daddr, | ||
59 | htons(PORT_ISAKMP), | ||
60 | iph->saddr, | ||
61 | htons(PORT_ISAKMP)); | ||
62 | } | ||
63 | |||
64 | if (!cp) { | ||
65 | /* | ||
66 | * We are not sure if the packet is from our | ||
67 | * service, so our conn_schedule hook should return NF_ACCEPT | ||
68 | */ | ||
69 | IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " | ||
70 | "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", | ||
71 | inverse ? "ICMP+" : "", | ||
72 | pp->name, | ||
73 | NIPQUAD(iph->saddr), | ||
74 | NIPQUAD(iph->daddr)); | ||
75 | } | ||
76 | |||
77 | return cp; | ||
78 | } | ||
79 | |||
80 | |||
81 | static struct ip_vs_conn * | ||
82 | esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
83 | const struct iphdr *iph, unsigned int proto_off, int inverse) | ||
84 | { | ||
85 | struct ip_vs_conn *cp; | ||
86 | |||
87 | if (likely(!inverse)) { | ||
88 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
89 | iph->saddr, | ||
90 | htons(PORT_ISAKMP), | ||
91 | iph->daddr, | ||
92 | htons(PORT_ISAKMP)); | ||
93 | } else { | ||
94 | cp = ip_vs_conn_out_get(IPPROTO_UDP, | ||
95 | iph->daddr, | ||
96 | htons(PORT_ISAKMP), | ||
97 | iph->saddr, | ||
98 | htons(PORT_ISAKMP)); | ||
99 | } | ||
100 | |||
101 | if (!cp) { | ||
102 | IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " | ||
103 | "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", | ||
104 | inverse ? "ICMP+" : "", | ||
105 | pp->name, | ||
106 | NIPQUAD(iph->saddr), | ||
107 | NIPQUAD(iph->daddr)); | ||
108 | } | ||
109 | |||
110 | return cp; | ||
111 | } | ||
112 | |||
113 | |||
114 | static int | ||
115 | esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, | ||
116 | int *verdict, struct ip_vs_conn **cpp) | ||
117 | { | ||
118 | /* | ||
119 | * ESP is only related traffic. Pass the packet to IP stack. | ||
120 | */ | ||
121 | *verdict = NF_ACCEPT; | ||
122 | return 0; | ||
123 | } | ||
124 | |||
125 | |||
126 | static void | ||
127 | esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, | ||
128 | int offset, const char *msg) | ||
129 | { | ||
130 | char buf[256]; | ||
131 | struct iphdr _iph, *ih; | ||
132 | |||
133 | ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); | ||
134 | if (ih == NULL) | ||
135 | sprintf(buf, "%s TRUNCATED", pp->name); | ||
136 | else | ||
137 | sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", | ||
138 | pp->name, NIPQUAD(ih->saddr), | ||
139 | NIPQUAD(ih->daddr)); | ||
140 | |||
141 | printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); | ||
142 | } | ||
143 | |||
144 | |||
145 | static void esp_init(struct ip_vs_protocol *pp) | ||
146 | { | ||
147 | /* nothing to do now */ | ||
148 | } | ||
149 | |||
150 | |||
151 | static void esp_exit(struct ip_vs_protocol *pp) | ||
152 | { | ||
153 | /* nothing to do now */ | ||
154 | } | ||
155 | |||
156 | |||
157 | struct ip_vs_protocol ip_vs_protocol_esp = { | ||
158 | .name = "ESP", | ||
159 | .protocol = IPPROTO_ESP, | ||
160 | .num_states = 1, | ||
161 | .dont_defrag = 1, | ||
162 | .init = esp_init, | ||
163 | .exit = esp_exit, | ||
164 | .conn_schedule = esp_conn_schedule, | ||
165 | .conn_in_get = esp_conn_in_get, | ||
166 | .conn_out_get = esp_conn_out_get, | ||
167 | .snat_handler = NULL, | ||
168 | .dnat_handler = NULL, | ||
169 | .csum_check = NULL, | ||
170 | .state_transition = NULL, | ||
171 | .register_app = NULL, | ||
172 | .unregister_app = NULL, | ||
173 | .app_conn_bind = NULL, | ||
174 | .debug_packet = esp_debug_packet, | ||
175 | .timeout_change = NULL, /* ISAKMP */ | ||
176 | }; | ||
diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/ipv4/ipvs/ip_vs_rr.c index 358110d17e59..f74929117534 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/ipv4/ipvs/ip_vs_rr.c | |||
@@ -32,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) | |||
32 | } | 32 | } |
33 | 33 | ||
34 | 34 | ||
35 | static int ip_vs_rr_done_svc(struct ip_vs_service *svc) | ||
36 | { | ||
37 | return 0; | ||
38 | } | ||
39 | |||
40 | |||
41 | static int ip_vs_rr_update_svc(struct ip_vs_service *svc) | 35 | static int ip_vs_rr_update_svc(struct ip_vs_service *svc) |
42 | { | 36 | { |
43 | svc->sched_data = &svc->destinations; | 37 | svc->sched_data = &svc->destinations; |
@@ -96,7 +90,6 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { | |||
96 | .module = THIS_MODULE, | 90 | .module = THIS_MODULE, |
97 | .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), | 91 | .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), |
98 | .init_service = ip_vs_rr_init_svc, | 92 | .init_service = ip_vs_rr_init_svc, |
99 | .done_service = ip_vs_rr_done_svc, | ||
100 | .update_service = ip_vs_rr_update_svc, | 93 | .update_service = ip_vs_rr_update_svc, |
101 | .schedule = ip_vs_rr_schedule, | 94 | .schedule = ip_vs_rr_schedule, |
102 | }; | 95 | }; |
diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/ipv4/ipvs/ip_vs_sed.c index 77663d84cbd1..53f73bea66ce 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/ipv4/ipvs/ip_vs_sed.c | |||
@@ -41,27 +41,6 @@ | |||
41 | #include <net/ip_vs.h> | 41 | #include <net/ip_vs.h> |
42 | 42 | ||
43 | 43 | ||
44 | static int | ||
45 | ip_vs_sed_init_svc(struct ip_vs_service *svc) | ||
46 | { | ||
47 | return 0; | ||
48 | } | ||
49 | |||
50 | |||
51 | static int | ||
52 | ip_vs_sed_done_svc(struct ip_vs_service *svc) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | |||
58 | static int | ||
59 | ip_vs_sed_update_svc(struct ip_vs_service *svc) | ||
60 | { | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | |||
65 | static inline unsigned int | 44 | static inline unsigned int |
66 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) | 45 | ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) |
67 | { | 46 | { |
@@ -139,9 +118,6 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = | |||
139 | .refcnt = ATOMIC_INIT(0), | 118 | .refcnt = ATOMIC_INIT(0), |
140 | .module = THIS_MODULE, | 119 | .module = THIS_MODULE, |
141 | .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), | 120 | .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), |
142 | .init_service = ip_vs_sed_init_svc, | ||
143 | .done_service = ip_vs_sed_done_svc, | ||
144 | .update_service = ip_vs_sed_update_svc, | ||
145 | .schedule = ip_vs_sed_schedule, | 121 | .schedule = ip_vs_sed_schedule, |
146 | }; | 122 | }; |
147 | 123 | ||
diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/ipv4/ipvs/ip_vs_wlc.c index 9b0ef86bb1f7..df7ad8d74766 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/ipv4/ipvs/ip_vs_wlc.c | |||
@@ -25,27 +25,6 @@ | |||
25 | #include <net/ip_vs.h> | 25 | #include <net/ip_vs.h> |
26 | 26 | ||
27 | 27 | ||
28 | static int | ||
29 | ip_vs_wlc_init_svc(struct ip_vs_service *svc) | ||
30 | { | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | |||
35 | static int | ||
36 | ip_vs_wlc_done_svc(struct ip_vs_service *svc) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | |||
42 | static int | ||
43 | ip_vs_wlc_update_svc(struct ip_vs_service *svc) | ||
44 | { | ||
45 | return 0; | ||
46 | } | ||
47 | |||
48 | |||
49 | static inline unsigned int | 28 | static inline unsigned int |
50 | ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) | 29 | ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) |
51 | { | 30 | { |
@@ -127,9 +106,6 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = | |||
127 | .refcnt = ATOMIC_INIT(0), | 106 | .refcnt = ATOMIC_INIT(0), |
128 | .module = THIS_MODULE, | 107 | .module = THIS_MODULE, |
129 | .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), | 108 | .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), |
130 | .init_service = ip_vs_wlc_init_svc, | ||
131 | .done_service = ip_vs_wlc_done_svc, | ||
132 | .update_service = ip_vs_wlc_update_svc, | ||
133 | .schedule = ip_vs_wlc_schedule, | 109 | .schedule = ip_vs_wlc_schedule, |
134 | }; | 110 | }; |
135 | 111 | ||
diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ee5354c9aa1..f62187bb6d08 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c | |||
@@ -282,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) | |||
282 | struct rtable *r = NULL; | 282 | struct rtable *r = NULL; |
283 | 283 | ||
284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { | 284 | for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { |
285 | if (!rt_hash_table[st->bucket].chain) | ||
286 | continue; | ||
285 | rcu_read_lock_bh(); | 287 | rcu_read_lock_bh(); |
286 | r = rcu_dereference(rt_hash_table[st->bucket].chain); | 288 | r = rcu_dereference(rt_hash_table[st->bucket].chain); |
287 | while (r) { | 289 | while (r) { |
@@ -299,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, | |||
299 | struct rtable *r) | 301 | struct rtable *r) |
300 | { | 302 | { |
301 | struct rt_cache_iter_state *st = seq->private; | 303 | struct rt_cache_iter_state *st = seq->private; |
304 | |||
302 | r = r->u.dst.rt_next; | 305 | r = r->u.dst.rt_next; |
303 | while (!r) { | 306 | while (!r) { |
304 | rcu_read_unlock_bh(); | 307 | rcu_read_unlock_bh(); |
305 | if (--st->bucket < 0) | 308 | do { |
306 | break; | 309 | if (--st->bucket < 0) |
310 | return NULL; | ||
311 | } while (!rt_hash_table[st->bucket].chain); | ||
307 | rcu_read_lock_bh(); | 312 | rcu_read_lock_bh(); |
308 | r = rt_hash_table[st->bucket].chain; | 313 | r = rt_hash_table[st->bucket].chain; |
309 | } | 314 | } |
@@ -2840,7 +2845,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2840 | if (s_h < 0) | 2845 | if (s_h < 0) |
2841 | s_h = 0; | 2846 | s_h = 0; |
2842 | s_idx = idx = cb->args[1]; | 2847 | s_idx = idx = cb->args[1]; |
2843 | for (h = s_h; h <= rt_hash_mask; h++) { | 2848 | for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { |
2849 | if (!rt_hash_table[h].chain) | ||
2850 | continue; | ||
2844 | rcu_read_lock_bh(); | 2851 | rcu_read_lock_bh(); |
2845 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; | 2852 | for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; |
2846 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { | 2853 | rt = rcu_dereference(rt->u.dst.rt_next), idx++) { |
@@ -2859,7 +2866,6 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) | |||
2859 | dst_release(xchg(&skb->dst, NULL)); | 2866 | dst_release(xchg(&skb->dst, NULL)); |
2860 | } | 2867 | } |
2861 | rcu_read_unlock_bh(); | 2868 | rcu_read_unlock_bh(); |
2862 | s_idx = 0; | ||
2863 | } | 2869 | } |
2864 | 2870 | ||
2865 | done: | 2871 | done: |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96bd..f79a51607292 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -3442,6 +3442,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, | |||
3442 | } | 3442 | } |
3443 | } | 3443 | } |
3444 | 3444 | ||
3445 | static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) | ||
3446 | { | ||
3447 | __be32 *ptr = (__be32 *)(th + 1); | ||
3448 | |||
3449 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | ||
3450 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | ||
3451 | tp->rx_opt.saw_tstamp = 1; | ||
3452 | ++ptr; | ||
3453 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
3454 | ++ptr; | ||
3455 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
3456 | return 1; | ||
3457 | } | ||
3458 | return 0; | ||
3459 | } | ||
3460 | |||
3445 | /* Fast parse options. This hopes to only see timestamps. | 3461 | /* Fast parse options. This hopes to only see timestamps. |
3446 | * If it is wrong it falls back on tcp_parse_options(). | 3462 | * If it is wrong it falls back on tcp_parse_options(). |
3447 | */ | 3463 | */ |
@@ -3453,16 +3469,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, | |||
3453 | return 0; | 3469 | return 0; |
3454 | } else if (tp->rx_opt.tstamp_ok && | 3470 | } else if (tp->rx_opt.tstamp_ok && |
3455 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { | 3471 | th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { |
3456 | __be32 *ptr = (__be32 *)(th + 1); | 3472 | if (tcp_parse_aligned_timestamp(tp, th)) |
3457 | if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | ||
3458 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { | ||
3459 | tp->rx_opt.saw_tstamp = 1; | ||
3460 | ++ptr; | ||
3461 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
3462 | ++ptr; | ||
3463 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
3464 | return 1; | 3473 | return 1; |
3465 | } | ||
3466 | } | 3474 | } |
3467 | tcp_parse_options(skb, &tp->rx_opt, 1); | 3475 | tcp_parse_options(skb, &tp->rx_opt, 1); |
3468 | return 1; | 3476 | return 1; |
@@ -4161,6 +4169,18 @@ add_sack: | |||
4161 | } | 4169 | } |
4162 | } | 4170 | } |
4163 | 4171 | ||
4172 | static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, | ||
4173 | struct sk_buff_head *list) | ||
4174 | { | ||
4175 | struct sk_buff *next = skb->next; | ||
4176 | |||
4177 | __skb_unlink(skb, list); | ||
4178 | __kfree_skb(skb); | ||
4179 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4180 | |||
4181 | return next; | ||
4182 | } | ||
4183 | |||
4164 | /* Collapse contiguous sequence of skbs head..tail with | 4184 | /* Collapse contiguous sequence of skbs head..tail with |
4165 | * sequence numbers start..end. | 4185 | * sequence numbers start..end. |
4166 | * Segments with FIN/SYN are not collapsed (only because this | 4186 | * Segments with FIN/SYN are not collapsed (only because this |
@@ -4178,11 +4198,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4178 | for (skb = head; skb != tail;) { | 4198 | for (skb = head; skb != tail;) { |
4179 | /* No new bits? It is possible on ofo queue. */ | 4199 | /* No new bits? It is possible on ofo queue. */ |
4180 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 4200 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
4181 | struct sk_buff *next = skb->next; | 4201 | skb = tcp_collapse_one(sk, skb, list); |
4182 | __skb_unlink(skb, list); | ||
4183 | __kfree_skb(skb); | ||
4184 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4185 | skb = next; | ||
4186 | continue; | 4202 | continue; |
4187 | } | 4203 | } |
4188 | 4204 | ||
@@ -4246,11 +4262,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, | |||
4246 | start += size; | 4262 | start += size; |
4247 | } | 4263 | } |
4248 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { | 4264 | if (!before(start, TCP_SKB_CB(skb)->end_seq)) { |
4249 | struct sk_buff *next = skb->next; | 4265 | skb = tcp_collapse_one(sk, skb, list); |
4250 | __skb_unlink(skb, list); | ||
4251 | __kfree_skb(skb); | ||
4252 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); | ||
4253 | skb = next; | ||
4254 | if (skb == tail || | 4266 | if (skb == tail || |
4255 | tcp_hdr(skb)->syn || | 4267 | tcp_hdr(skb)->syn || |
4256 | tcp_hdr(skb)->fin) | 4268 | tcp_hdr(skb)->fin) |
@@ -4691,6 +4703,67 @@ out: | |||
4691 | } | 4703 | } |
4692 | #endif /* CONFIG_NET_DMA */ | 4704 | #endif /* CONFIG_NET_DMA */ |
4693 | 4705 | ||
4706 | /* Does PAWS and seqno based validation of an incoming segment, flags will | ||
4707 | * play significant role here. | ||
4708 | */ | ||
4709 | static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, | ||
4710 | struct tcphdr *th, int syn_inerr) | ||
4711 | { | ||
4712 | struct tcp_sock *tp = tcp_sk(sk); | ||
4713 | |||
4714 | /* RFC1323: H1. Apply PAWS check first. */ | ||
4715 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | ||
4716 | tcp_paws_discard(sk, skb)) { | ||
4717 | if (!th->rst) { | ||
4718 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
4719 | tcp_send_dupack(sk, skb); | ||
4720 | goto discard; | ||
4721 | } | ||
4722 | /* Reset is accepted even if it did not pass PAWS. */ | ||
4723 | } | ||
4724 | |||
4725 | /* Step 1: check sequence number */ | ||
4726 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | ||
4727 | /* RFC793, page 37: "In all states except SYN-SENT, all reset | ||
4728 | * (RST) segments are validated by checking their SEQ-fields." | ||
4729 | * And page 69: "If an incoming segment is not acceptable, | ||
4730 | * an acknowledgment should be sent in reply (unless the RST | ||
4731 | * bit is set, if so drop the segment and return)". | ||
4732 | */ | ||
4733 | if (!th->rst) | ||
4734 | tcp_send_dupack(sk, skb); | ||
4735 | goto discard; | ||
4736 | } | ||
4737 | |||
4738 | /* Step 2: check RST bit */ | ||
4739 | if (th->rst) { | ||
4740 | tcp_reset(sk); | ||
4741 | goto discard; | ||
4742 | } | ||
4743 | |||
4744 | /* ts_recent update must be made after we are sure that the packet | ||
4745 | * is in window. | ||
4746 | */ | ||
4747 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
4748 | |||
4749 | /* step 3: check security and precedence [ignored] */ | ||
4750 | |||
4751 | /* step 4: Check for a SYN in window. */ | ||
4752 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
4753 | if (syn_inerr) | ||
4754 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | ||
4755 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
4756 | tcp_reset(sk); | ||
4757 | return -1; | ||
4758 | } | ||
4759 | |||
4760 | return 1; | ||
4761 | |||
4762 | discard: | ||
4763 | __kfree_skb(skb); | ||
4764 | return 0; | ||
4765 | } | ||
4766 | |||
4694 | /* | 4767 | /* |
4695 | * TCP receive function for the ESTABLISHED state. | 4768 | * TCP receive function for the ESTABLISHED state. |
4696 | * | 4769 | * |
@@ -4718,6 +4791,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4718 | struct tcphdr *th, unsigned len) | 4791 | struct tcphdr *th, unsigned len) |
4719 | { | 4792 | { |
4720 | struct tcp_sock *tp = tcp_sk(sk); | 4793 | struct tcp_sock *tp = tcp_sk(sk); |
4794 | int res; | ||
4721 | 4795 | ||
4722 | /* | 4796 | /* |
4723 | * Header prediction. | 4797 | * Header prediction. |
@@ -4756,19 +4830,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
4756 | 4830 | ||
4757 | /* Check timestamp */ | 4831 | /* Check timestamp */ |
4758 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { | 4832 | if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { |
4759 | __be32 *ptr = (__be32 *)(th + 1); | ||
4760 | |||
4761 | /* No? Slow path! */ | 4833 | /* No? Slow path! */ |
4762 | if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 4834 | if (!tcp_parse_aligned_timestamp(tp, th)) |
4763 | | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) | ||
4764 | goto slow_path; | 4835 | goto slow_path; |
4765 | 4836 | ||
4766 | tp->rx_opt.saw_tstamp = 1; | ||
4767 | ++ptr; | ||
4768 | tp->rx_opt.rcv_tsval = ntohl(*ptr); | ||
4769 | ++ptr; | ||
4770 | tp->rx_opt.rcv_tsecr = ntohl(*ptr); | ||
4771 | |||
4772 | /* If PAWS failed, check it more carefully in slow path */ | 4837 | /* If PAWS failed, check it more carefully in slow path */ |
4773 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) | 4838 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) |
4774 | goto slow_path; | 4839 | goto slow_path; |
@@ -4899,51 +4964,12 @@ slow_path: | |||
4899 | goto csum_error; | 4964 | goto csum_error; |
4900 | 4965 | ||
4901 | /* | 4966 | /* |
4902 | * RFC1323: H1. Apply PAWS check first. | ||
4903 | */ | ||
4904 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | ||
4905 | tcp_paws_discard(sk, skb)) { | ||
4906 | if (!th->rst) { | ||
4907 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
4908 | tcp_send_dupack(sk, skb); | ||
4909 | goto discard; | ||
4910 | } | ||
4911 | /* Resets are accepted even if PAWS failed. | ||
4912 | |||
4913 | ts_recent update must be made after we are sure | ||
4914 | that the packet is in window. | ||
4915 | */ | ||
4916 | } | ||
4917 | |||
4918 | /* | ||
4919 | * Standard slow path. | 4967 | * Standard slow path. |
4920 | */ | 4968 | */ |
4921 | 4969 | ||
4922 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | 4970 | res = tcp_validate_incoming(sk, skb, th, 1); |
4923 | /* RFC793, page 37: "In all states except SYN-SENT, all reset | 4971 | if (res <= 0) |
4924 | * (RST) segments are validated by checking their SEQ-fields." | 4972 | return -res; |
4925 | * And page 69: "If an incoming segment is not acceptable, | ||
4926 | * an acknowledgment should be sent in reply (unless the RST bit | ||
4927 | * is set, if so drop the segment and return)". | ||
4928 | */ | ||
4929 | if (!th->rst) | ||
4930 | tcp_send_dupack(sk, skb); | ||
4931 | goto discard; | ||
4932 | } | ||
4933 | |||
4934 | if (th->rst) { | ||
4935 | tcp_reset(sk); | ||
4936 | goto discard; | ||
4937 | } | ||
4938 | |||
4939 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
4940 | |||
4941 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
4942 | TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); | ||
4943 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
4944 | tcp_reset(sk); | ||
4945 | return 1; | ||
4946 | } | ||
4947 | 4973 | ||
4948 | step5: | 4974 | step5: |
4949 | if (th->ack) | 4975 | if (th->ack) |
@@ -5225,6 +5251,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5225 | struct tcp_sock *tp = tcp_sk(sk); | 5251 | struct tcp_sock *tp = tcp_sk(sk); |
5226 | struct inet_connection_sock *icsk = inet_csk(sk); | 5252 | struct inet_connection_sock *icsk = inet_csk(sk); |
5227 | int queued = 0; | 5253 | int queued = 0; |
5254 | int res; | ||
5228 | 5255 | ||
5229 | tp->rx_opt.saw_tstamp = 0; | 5256 | tp->rx_opt.saw_tstamp = 0; |
5230 | 5257 | ||
@@ -5277,42 +5304,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5277 | return 0; | 5304 | return 0; |
5278 | } | 5305 | } |
5279 | 5306 | ||
5280 | if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && | 5307 | res = tcp_validate_incoming(sk, skb, th, 0); |
5281 | tcp_paws_discard(sk, skb)) { | 5308 | if (res <= 0) |
5282 | if (!th->rst) { | 5309 | return -res; |
5283 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); | ||
5284 | tcp_send_dupack(sk, skb); | ||
5285 | goto discard; | ||
5286 | } | ||
5287 | /* Reset is accepted even if it did not pass PAWS. */ | ||
5288 | } | ||
5289 | |||
5290 | /* step 1: check sequence number */ | ||
5291 | if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { | ||
5292 | if (!th->rst) | ||
5293 | tcp_send_dupack(sk, skb); | ||
5294 | goto discard; | ||
5295 | } | ||
5296 | |||
5297 | /* step 2: check RST bit */ | ||
5298 | if (th->rst) { | ||
5299 | tcp_reset(sk); | ||
5300 | goto discard; | ||
5301 | } | ||
5302 | |||
5303 | tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); | ||
5304 | |||
5305 | /* step 3: check security and precedence [ignored] */ | ||
5306 | |||
5307 | /* step 4: | ||
5308 | * | ||
5309 | * Check for a SYN in window. | ||
5310 | */ | ||
5311 | if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { | ||
5312 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); | ||
5313 | tcp_reset(sk); | ||
5314 | return 1; | ||
5315 | } | ||
5316 | 5310 | ||
5317 | /* step 5: check the ACK field */ | 5311 | /* step 5: check the ACK field */ |
5318 | if (th->ack) { | 5312 | if (th->ack) { |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1b4fee20fc93..3dfbc21e555a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1946,6 +1946,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) | |||
1946 | return rc; | 1946 | return rc; |
1947 | } | 1947 | } |
1948 | 1948 | ||
1949 | static inline int empty_bucket(struct tcp_iter_state *st) | ||
1950 | { | ||
1951 | return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && | ||
1952 | hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); | ||
1953 | } | ||
1954 | |||
1949 | static void *established_get_first(struct seq_file *seq) | 1955 | static void *established_get_first(struct seq_file *seq) |
1950 | { | 1956 | { |
1951 | struct tcp_iter_state* st = seq->private; | 1957 | struct tcp_iter_state* st = seq->private; |
@@ -1958,6 +1964,10 @@ static void *established_get_first(struct seq_file *seq) | |||
1958 | struct inet_timewait_sock *tw; | 1964 | struct inet_timewait_sock *tw; |
1959 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); | 1965 | rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); |
1960 | 1966 | ||
1967 | /* Lockless fast path for the common case of empty buckets */ | ||
1968 | if (empty_bucket(st)) | ||
1969 | continue; | ||
1970 | |||
1961 | read_lock_bh(lock); | 1971 | read_lock_bh(lock); |
1962 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { | 1972 | sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { |
1963 | if (sk->sk_family != st->family || | 1973 | if (sk->sk_family != st->family || |
@@ -2008,13 +2018,15 @@ get_tw: | |||
2008 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2018 | read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); |
2009 | st->state = TCP_SEQ_STATE_ESTABLISHED; | 2019 | st->state = TCP_SEQ_STATE_ESTABLISHED; |
2010 | 2020 | ||
2011 | if (++st->bucket < tcp_hashinfo.ehash_size) { | 2021 | /* Look for next non empty bucket */ |
2012 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | 2022 | while (++st->bucket < tcp_hashinfo.ehash_size && |
2013 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | 2023 | empty_bucket(st)) |
2014 | } else { | 2024 | ; |
2015 | cur = NULL; | 2025 | if (st->bucket >= tcp_hashinfo.ehash_size) |
2016 | goto out; | 2026 | return NULL; |
2017 | } | 2027 | |
2028 | read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); | ||
2029 | sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); | ||
2018 | } else | 2030 | } else |
2019 | sk = sk_next(sk); | 2031 | sk = sk_next(sk); |
2020 | 2032 | ||