diff options
author | Julian Anastasov <ja@ssi.bg> | 2013-09-12 04:21:07 -0400 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2013-09-18 15:39:03 -0400 |
commit | bcbde4c0a7556cca72874c5e1efa4dccb5198a2b (patch) | |
tree | 73e5f985bbcd5a976c26dd75e4672ac2b70b9270 /include | |
parent | c16526a7b99c1c28e9670a8c8e3dbcf741bb32be (diff) |
ipvs: make the service replacement more robust
commit 578bc3ef1e473a ("ipvs: reorganize dest trash") added
IP_VS_DEST_STATE_REMOVING flag and RCU callback named
ip_vs_dest_wait_readers() to keep dests and services after
removal for at least a RCU grace period. But we have the
following corner cases:
- we can not reuse the same dest if its service is removed
while IP_VS_DEST_STATE_REMOVING is still set because another dest
removal in the first grace period can not extend this period.
It can happen when ipvsadm -C && ipvsadm -R is used.
- dest->svc can be replaced but ip_vs_in_stats() and
ip_vs_out_stats() have no explicit read memory barriers
when accessing dest->svc. It can happen that dest->svc
was just freed (replaced) while we use it to update
the stats.
We solve the problems as follows:
- IP_VS_DEST_STATE_REMOVING is removed and we ensure a fixed
idle period for the dest (IP_VS_DEST_TRASH_PERIOD). idle_start
will remember when for first time after deletion we noticed
dest->refcnt=0. Later, the connections can grab a reference
while in RCU grace period but if refcnt becomes 0 we can
safely free the dest and its svc.
- dest->svc becomes RCU pointer. As result, we add explicit
RCU locking in ip_vs_in_stats() and ip_vs_out_stats().
- __ip_vs_unbind_svc is renamed to __ip_vs_svc_put(), it
now can free the service immediately or after a RCU grace
period. dest->svc is not set to NULL anymore.
As result, unlinked dests and their services are
freed always after IP_VS_DEST_TRASH_PERIOD period, unused
services are freed after a RCU grace period.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'include')
-rw-r--r-- | include/net/ip_vs.h | 7 |
1 files changed, 2 insertions, 5 deletions
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index fe782ed2fe72..9c4d37ec45a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h | |||
@@ -723,8 +723,6 @@ struct ip_vs_dest_dst { | |||
723 | struct rcu_head rcu_head; | 723 | struct rcu_head rcu_head; |
724 | }; | 724 | }; |
725 | 725 | ||
726 | /* In grace period after removing */ | ||
727 | #define IP_VS_DEST_STATE_REMOVING 0x01 | ||
728 | /* | 726 | /* |
729 | * The real server destination forwarding entry | 727 | * The real server destination forwarding entry |
730 | * with ip address, port number, and so on. | 728 | * with ip address, port number, and so on. |
@@ -742,7 +740,7 @@ struct ip_vs_dest { | |||
742 | 740 | ||
743 | atomic_t refcnt; /* reference counter */ | 741 | atomic_t refcnt; /* reference counter */ |
744 | struct ip_vs_stats stats; /* statistics */ | 742 | struct ip_vs_stats stats; /* statistics */ |
745 | unsigned long state; /* state flags */ | 743 | unsigned long idle_start; /* start time, jiffies */ |
746 | 744 | ||
747 | /* connection counters and thresholds */ | 745 | /* connection counters and thresholds */ |
748 | atomic_t activeconns; /* active connections */ | 746 | atomic_t activeconns; /* active connections */ |
@@ -756,14 +754,13 @@ struct ip_vs_dest { | |||
756 | struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ | 754 | struct ip_vs_dest_dst __rcu *dest_dst; /* cached dst info */ |
757 | 755 | ||
758 | /* for virtual service */ | 756 | /* for virtual service */ |
759 | struct ip_vs_service *svc; /* service it belongs to */ | 757 | struct ip_vs_service __rcu *svc; /* service it belongs to */ |
760 | __u16 protocol; /* which protocol (TCP/UDP) */ | 758 | __u16 protocol; /* which protocol (TCP/UDP) */ |
761 | __be16 vport; /* virtual port number */ | 759 | __be16 vport; /* virtual port number */ |
762 | union nf_inet_addr vaddr; /* virtual IP address */ | 760 | union nf_inet_addr vaddr; /* virtual IP address */ |
763 | __u32 vfwmark; /* firewall mark of service */ | 761 | __u32 vfwmark; /* firewall mark of service */ |
764 | 762 | ||
765 | struct list_head t_list; /* in dest_trash */ | 763 | struct list_head t_list; /* in dest_trash */ |
766 | struct rcu_head rcu_head; | ||
767 | unsigned int in_rs_table:1; /* we are in rs_table */ | 764 | unsigned int in_rs_table:1; /* we are in rs_table */ |
768 | }; | 765 | }; |
769 | 766 | ||