aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJulian Anastasov <ja@ssi.bg>2013-09-12 04:21:09 -0400
committerSimon Horman <horms@verge.net.au>2013-09-18 15:39:39 -0400
commit742617b176909e586a4cf9b142c996c25986fce8 (patch)
tree50889a950b87f84ec85e485d4d95409c9c356da7 /net
parent2f3d771a35fee21a1f17364b46b3c8cc66dc6892 (diff)
ipvs: do not use dest after ip_vs_dest_put in LBLCR
commit c5549571f975ab ("ipvs: convert lblcr scheduler to rcu") allows RCU readers to use dest after calling ip_vs_dest_put(). In the corner case it can race with ip_vs_dest_trash_expire() which can release the dest while it is being returned to the RCU readers as scheduling result. To fix the problem do not allow e->dest to be replaced and defer the ip_vs_dest_put() call by using RCU callback. Now e->dest does not need to be RCU pointer. Signed-off-by: Julian Anastasov <ja@ssi.bg> Signed-off-by: Simon Horman <horms@verge.net.au>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c50
1 files changed, 20 insertions, 30 deletions
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index e65f7c573090..0b8550089a2e 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -89,7 +89,7 @@
89 */ 89 */
90struct ip_vs_dest_set_elem { 90struct ip_vs_dest_set_elem {
91 struct list_head list; /* list link */ 91 struct list_head list; /* list link */
92 struct ip_vs_dest __rcu *dest; /* destination server */ 92 struct ip_vs_dest *dest; /* destination server */
93 struct rcu_head rcu_head; 93 struct rcu_head rcu_head;
94}; 94};
95 95
@@ -107,11 +107,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
107 107
108 if (check) { 108 if (check) {
109 list_for_each_entry(e, &set->list, list) { 109 list_for_each_entry(e, &set->list, list) {
110 struct ip_vs_dest *d; 110 if (e->dest == dest)
111
112 d = rcu_dereference_protected(e->dest, 1);
113 if (d == dest)
114 /* already existed */
115 return; 111 return;
116 } 112 }
117 } 113 }
@@ -121,7 +117,7 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
121 return; 117 return;
122 118
123 ip_vs_dest_hold(dest); 119 ip_vs_dest_hold(dest);
124 RCU_INIT_POINTER(e->dest, dest); 120 e->dest = dest;
125 121
126 list_add_rcu(&e->list, &set->list); 122 list_add_rcu(&e->list, &set->list);
127 atomic_inc(&set->size); 123 atomic_inc(&set->size);
@@ -129,22 +125,27 @@ static void ip_vs_dest_set_insert(struct ip_vs_dest_set *set,
129 set->lastmod = jiffies; 125 set->lastmod = jiffies;
130} 126}
131 127
128static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head)
129{
130 struct ip_vs_dest_set_elem *e;
131
132 e = container_of(head, struct ip_vs_dest_set_elem, rcu_head);
133 ip_vs_dest_put(e->dest);
134 kfree(e);
135}
136
132static void 137static void
133ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) 138ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest)
134{ 139{
135 struct ip_vs_dest_set_elem *e; 140 struct ip_vs_dest_set_elem *e;
136 141
137 list_for_each_entry(e, &set->list, list) { 142 list_for_each_entry(e, &set->list, list) {
138 struct ip_vs_dest *d; 143 if (e->dest == dest) {
139
140 d = rcu_dereference_protected(e->dest, 1);
141 if (d == dest) {
142 /* HIT */ 144 /* HIT */
143 atomic_dec(&set->size); 145 atomic_dec(&set->size);
144 set->lastmod = jiffies; 146 set->lastmod = jiffies;
145 ip_vs_dest_put(dest);
146 list_del_rcu(&e->list); 147 list_del_rcu(&e->list);
147 kfree_rcu(e, rcu_head); 148 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
148 break; 149 break;
149 } 150 }
150 } 151 }
@@ -155,16 +156,8 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set)
155 struct ip_vs_dest_set_elem *e, *ep; 156 struct ip_vs_dest_set_elem *e, *ep;
156 157
157 list_for_each_entry_safe(e, ep, &set->list, list) { 158 list_for_each_entry_safe(e, ep, &set->list, list) {
158 struct ip_vs_dest *d;
159
160 d = rcu_dereference_protected(e->dest, 1);
161 /*
162 * We don't kfree dest because it is referred either
163 * by its service or by the trash dest list.
164 */
165 ip_vs_dest_put(d);
166 list_del_rcu(&e->list); 159 list_del_rcu(&e->list);
167 kfree_rcu(e, rcu_head); 160 call_rcu(&e->rcu_head, ip_vs_lblcr_elem_rcu_free);
168 } 161 }
169} 162}
170 163
@@ -175,12 +168,9 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
175 struct ip_vs_dest *dest, *least; 168 struct ip_vs_dest *dest, *least;
176 int loh, doh; 169 int loh, doh;
177 170
178 if (set == NULL)
179 return NULL;
180
181 /* select the first destination server, whose weight > 0 */ 171 /* select the first destination server, whose weight > 0 */
182 list_for_each_entry_rcu(e, &set->list, list) { 172 list_for_each_entry_rcu(e, &set->list, list) {
183 least = rcu_dereference(e->dest); 173 least = e->dest;
184 if (least->flags & IP_VS_DEST_F_OVERLOAD) 174 if (least->flags & IP_VS_DEST_F_OVERLOAD)
185 continue; 175 continue;
186 176
@@ -195,7 +185,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set)
195 /* find the destination with the weighted least load */ 185 /* find the destination with the weighted least load */
196 nextstage: 186 nextstage:
197 list_for_each_entry_continue_rcu(e, &set->list, list) { 187 list_for_each_entry_continue_rcu(e, &set->list, list) {
198 dest = rcu_dereference(e->dest); 188 dest = e->dest;
199 if (dest->flags & IP_VS_DEST_F_OVERLOAD) 189 if (dest->flags & IP_VS_DEST_F_OVERLOAD)
200 continue; 190 continue;
201 191
@@ -232,7 +222,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
232 222
233 /* select the first destination server, whose weight > 0 */ 223 /* select the first destination server, whose weight > 0 */
234 list_for_each_entry(e, &set->list, list) { 224 list_for_each_entry(e, &set->list, list) {
235 most = rcu_dereference_protected(e->dest, 1); 225 most = e->dest;
236 if (atomic_read(&most->weight) > 0) { 226 if (atomic_read(&most->weight) > 0) {
237 moh = ip_vs_dest_conn_overhead(most); 227 moh = ip_vs_dest_conn_overhead(most);
238 goto nextstage; 228 goto nextstage;
@@ -243,7 +233,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set)
243 /* find the destination with the weighted most load */ 233 /* find the destination with the weighted most load */
244 nextstage: 234 nextstage:
245 list_for_each_entry_continue(e, &set->list, list) { 235 list_for_each_entry_continue(e, &set->list, list) {
246 dest = rcu_dereference_protected(e->dest, 1); 236 dest = e->dest;
247 doh = ip_vs_dest_conn_overhead(dest); 237 doh = ip_vs_dest_conn_overhead(dest);
248 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */ 238 /* moh/mw < doh/dw ==> moh*dw < doh*mw, where mw,dw>0 */
249 if (((__s64)moh * atomic_read(&dest->weight) < 239 if (((__s64)moh * atomic_read(&dest->weight) <
@@ -819,7 +809,7 @@ static void __exit ip_vs_lblcr_cleanup(void)
819{ 809{
820 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler); 810 unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
821 unregister_pernet_subsys(&ip_vs_lblcr_ops); 811 unregister_pernet_subsys(&ip_vs_lblcr_ops);
822 synchronize_rcu(); 812 rcu_barrier();
823} 813}
824 814
825 815