diff options
author | Julian Anastasov <ja@ssi.bg> | 2013-09-12 04:21:09 -0400 |
---|---|---|
committer | Simon Horman <horms@verge.net.au> | 2013-09-18 15:39:39 -0400 |
commit | 742617b176909e586a4cf9b142c996c25986fce8 (patch) | |
tree | 50889a950b87f84ec85e485d4d95409c9c356da7 /net | |
parent | 2f3d771a35fee21a1f17364b46b3c8cc66dc6892 (diff) |
ipvs: do not use dest after ip_vs_dest_put in LBLCR
commit c5549571f975ab ("ipvs: convert lblcr scheduler to rcu")
allows RCU readers to use dest after calling ip_vs_dest_put().
In the corner case it can race with ip_vs_dest_trash_expire()
which can release the dest while it is being returned to the
RCU readers as scheduling result.
To fix the problem do not allow e->dest to be replaced and
defer the ip_vs_dest_put() call by using RCU callback. Now
e->dest does not need to be RCU pointer.
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_lblcr.c | 50 |
1 files changed, 20 insertions, 30 deletions
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index e65f7c573090..0b8550089a2e 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c | |||
@@ -89,7 +89,7 @@ | |||
89 | */ | 89 | */ |
90 | struct ip_vs_dest_set_elem { | 90 | struct ip_vs_dest_set_elem { |
91 | struct list_head list; /* list link */ | 91 | struct list_head list; /* list link */ |
92 | struct ip_vs_dest __rcu *dest; /* destination server */ | 92 | struct ip_vs_dest *dest; /* destination server */ |
93 | struct rcu_head rcu_head; | 93 | struct rcu_head rcu_head; |
94 | }; | 94 | }; |
95 | 95 | ||
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
107 | 107 | ||
108 | if (check) { | 108 | if (check) { |
109 | list_for_each_entry(e, &set->list, list) { | 109 | list_for_each_entry(e, &set->list, list) { |
110 | struct ip_vs_dest *d; | 110 | if (e->dest == dest) |
111 | |||
112 | d = rcu_dereference_protected(e->dest, 1); | ||
113 | if (d == dest) | ||
114 | /* already existed */ | ||
115 | return; | 111 | return; |
116 | } | 112 | } |
117 | } | 113 | } |
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
121 | return; | 117 | return; |
122 | 118 | ||
123 | ip_vs_dest_hold(dest); | 119 | ip_vs_dest_hold(dest); |
124 | RCU_INIT_POINTER(e->dest, dest); | 120 | e->dest = dest; |
125 | 121 | ||
126 | list_add_rcu(&e->list, &set->list); | 122 | list_add_rcu(&e->list, &set->list); |
127 | atomic_inc(&set->size); | 123 | atomic_inc(&set->size); |
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set, | |||
129 | set->lastmod = jiffies; | 125 | set->lastmod = jiffies; |
130 | } | 126 | } |
131 | 127 | ||
128 | static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) | ||
129 | { | ||
130 | struct ip_vs_dest_set_elem *e; | ||
131 | |||
132 | e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); | ||
133 | ip_vs_dest_put(e->dest); | ||
134 | kfree(e); | ||
135 | } | ||
136 | |||
132 | static void | 137 | static void |
133 | ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) | 138 | ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) |
134 | { | 139 | { |
135 | struct ip_vs_dest_set_elem *e; | 140 | struct ip_vs_dest_set_elem *e; |
136 | 141 | ||
137 | list_for_each_entry(e, &set->list, list) { | 142 | list_for_each_entry(e, &set->list, list) { |
138 | struct ip_vs_dest *d; | 143 | if (e->dest == dest) { |
139 | |||
140 | d = rcu_dereference_protected(e->dest, 1); | ||
141 | if (d == dest) { | ||
142 | /* HIT */ | 144 | /* HIT */ |
143 | atomic_dec(&set->size); | 145 | atomic_dec(&set->size); |
144 | set->lastmod = jiffies; | 146 | set->lastmod = jiffies; |
145 | ip_vs_dest_put(dest); | ||
146 | list_del_rcu(&e->list); | 147 | list_del_rcu(&e->list); |
147 | kfree_rcu(e, rcu_head); | 148 | call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); |
148 | break; | 149 | break; |
149 | } | 150 | } |
150 | } | 151 | } |
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) | |||
155 | struct ip_vs_dest_set_elem *e, *ep; | 156 | struct ip_vs_dest_set_elem *e, *ep; |
156 | 157 | ||
157 | list_for_each_entry_safe(e, ep, &set->list, list) { | 158 | list_for_each_entry_safe(e, ep, &set->list, list) { |
158 | struct ip_vs_dest *d; | ||
159 | |||
160 | d = rcu_dereference_protected(e->dest, 1); | ||
161 | /* | ||
162 | * We don't kfree dest because it is referred either | ||
163 | * by its service or by the trash dest list. | ||
164 | */ | ||
165 | ip_vs_dest_put(d); | ||
166 | list_del_rcu(&e->list); | 159 | list_del_rcu(&e->list); |
167 | kfree_rcu(e, rcu_head); | 160 | call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free); |
168 | } | 161 | } |
169 | } | 162 | } |
170 | 163 | ||
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
175 | struct ip_vs_dest *dest, *least; | 168 | struct ip_vs_dest *dest, *least; |
176 | int loh, doh; | 169 | int loh, doh; |
177 | 170 | ||
178 | if (set == NULL) | ||
179 | return NULL; | ||
180 | |||
181 | /* select the first destination server, whose weight > 0 */ | 171 | /* select the first destination server, whose weight > 0 */ |
182 | list_for_each_entry_rcu(e, &set->list, list) { | 172 | list_for_each_entry_rcu(e, &set->list, list) { |
183 | least = rcu_dereference(e->dest); | 173 | least = e->dest; |
184 | if (least->flags & IP_VS_DEST_F_OVERLOAD) | 174 | if (least->flags & IP_VS_DEST_F_OVERLOAD) |
185 | continue; | 175 | continue; |
186 | 176 | ||
@@ -195,7 +185,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) | |||
195 | /* find the destination with the weighted least load */ | 185 | /* find the destination with the weighted least load */ |
196 | nextstage: | 186 | nextstage: |
197 | list_for_each_entry_continue_rcu(e, &set->list, list) { | 187 | list_for_each_entry_continue_rcu(e, &set->list, list) { |
198 | dest = rcu_dereference(e->dest); | 188 | dest = e->dest; |
199 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) | 189 | if (dest->flags & IP_VS_DEST_F_OVERLOAD) |
200 | continue; | 190 | continue; |
201 | 191 | ||
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
232 | 222 | ||
233 | /* select the first destination server, whose weight > 0 */ | 223 | /* select the first destination server, whose weight > 0 */ |
234 | list_for_each_entry(e, &set->list, list) { | 224 | list_for_each_entry(e, &set->list, list) { |
235 | most = rcu_dereference_protected(e->dest, 1); | 225 | most = e->dest; |
236 | if (atomic_read(&most->weight) > 0) { | 226 | if (atomic_read(&most->weight) > 0) { |
237 | moh = ip_vs_dest_conn_overhead(most); | 227 | moh = ip_vs_dest_conn_overhead(most); |
238 | goto nextstage; | 228 | goto nextstage; |
@@ -243,7 +233,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) | |||
243 | /* find the destination with the weighted most load */ | 233 | /* find the destination with the weighted most load */ |
244 | nextstage: | 234 | nextstage: |
245 | list_for_each_entry_continue(e, &set->list, list) { | 235 | list_for_each_entry_continue(e, &set->list, list) { |
246 | dest = rcu_dereference_protected(e->dest, 1); | 236 | dest = e->dest; |
247 | doh = ip_vs_dest_conn_overhead(dest); | 237 | doh = ip_vs_dest_conn_overhead(dest); |
248 | /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ | 238 | /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ |
249 | if (((__s64)moh * atomic_read(&dest->weight) < | 239 | if (((__s64)moh * atomic_read(&dest->weight) < |
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void) | |||
819 | { | 809 | { |
820 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); | 810 | unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); |
821 | unregister_pernet_subsys(&ip_vs_lblcr_ops); | 811 | unregister_pernet_subsys(&ip_vs_lblcr_ops); |
822 | synchronize_rcu(); | 812 | rcu_barrier(); |
823 | } | 813 | } |
824 | 814 | ||
825 | 815 | ||