aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2014-07-24 10:50:36 -0400
committerDavid S. Miller <davem@davemloft.net>2014-07-28 01:34:36 -0400
commitab1c724f633080ed2e8a0cfe61654599b55cf8f9 (patch)
tree215b28da3dc0fbbac8dd853501306f65b165a499 /net/ipv4
parente3a57d18b06179d68fcf7a0a06ad844493c65e06 (diff)
inet: frag: use seqlock for hash rebuild
rehash is rare operation, don't force readers to take the read-side rwlock. Instead, we only have to detect the (rare) case where the secret was altered while we are trying to insert a new inetfrag queue into the table. If it was changed, drop the bucket lock and recompute the hash to get the 'new' chain bucket that we have to insert into. Joint work with Nikolay Aleksandrov. Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Nikolay Aleksandrov <nikolay@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_fragment.c88
-rw-r--r--net/ipv4/ip_fragment.c1
2 files changed, 55 insertions, 34 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 58d4c38534f6..62b1f73749dc 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -68,8 +68,7 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
68{ 68{
69 int i; 69 int i;
70 70
71 /* Per bucket lock NOT needed here, due to write lock protection */ 71 write_seqlock_bh(&f->rnd_seqlock);
72 write_lock_bh(&f->lock);
73 72
74 if (!inet_frag_may_rebuild(f)) 73 if (!inet_frag_may_rebuild(f))
75 goto out; 74 goto out;
@@ -82,6 +81,8 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
82 struct hlist_node *n; 81 struct hlist_node *n;
83 82
84 hb = &f->hash[i]; 83 hb = &f->hash[i];
84 spin_lock(&hb->chain_lock);
85
85 hlist_for_each_entry_safe(q, n, &hb->chain, list) { 86 hlist_for_each_entry_safe(q, n, &hb->chain, list) {
86 unsigned int hval = inet_frag_hashfn(f, q); 87 unsigned int hval = inet_frag_hashfn(f, q);
87 88
@@ -92,15 +93,28 @@ static void inet_frag_secret_rebuild(struct inet_frags *f)
92 93
93 /* Relink to new hash chain. */ 94 /* Relink to new hash chain. */
94 hb_dest = &f->hash[hval]; 95 hb_dest = &f->hash[hval];
96
97 /* This is the only place where we take
98 * another chain_lock while already holding
99 * one. As this will not run concurrently,
100 * we cannot deadlock on hb_dest lock below, if its
101 * already locked it will be released soon since
102 * other caller cannot be waiting for hb lock
103 * that we've taken above.
104 */
105 spin_lock_nested(&hb_dest->chain_lock,
106 SINGLE_DEPTH_NESTING);
95 hlist_add_head(&q->list, &hb_dest->chain); 107 hlist_add_head(&q->list, &hb_dest->chain);
108 spin_unlock(&hb_dest->chain_lock);
96 } 109 }
97 } 110 }
111 spin_unlock(&hb->chain_lock);
98 } 112 }
99 113
100 f->rebuild = false; 114 f->rebuild = false;
101 f->last_rebuild_jiffies = jiffies; 115 f->last_rebuild_jiffies = jiffies;
102out: 116out:
103 write_unlock_bh(&f->lock); 117 write_sequnlock_bh(&f->rnd_seqlock);
104} 118}
105 119
106static bool inet_fragq_should_evict(const struct inet_frag_queue *q) 120static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
@@ -163,7 +177,7 @@ static void inet_frag_worker(struct work_struct *work)
163 177
164 BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); 178 BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
165 179
166 read_lock_bh(&f->lock); 180 local_bh_disable();
167 181
168 for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { 182 for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
169 evicted += inet_evict_bucket(f, &f->hash[i]); 183 evicted += inet_evict_bucket(f, &f->hash[i]);
@@ -174,7 +188,8 @@ static void inet_frag_worker(struct work_struct *work)
174 188
175 f->next_bucket = i; 189 f->next_bucket = i;
176 190
177 read_unlock_bh(&f->lock); 191 local_bh_enable();
192
178 if (f->rebuild && inet_frag_may_rebuild(f)) 193 if (f->rebuild && inet_frag_may_rebuild(f))
179 inet_frag_secret_rebuild(f); 194 inet_frag_secret_rebuild(f);
180} 195}
@@ -197,7 +212,8 @@ void inet_frags_init(struct inet_frags *f)
197 spin_lock_init(&hb->chain_lock); 212 spin_lock_init(&hb->chain_lock);
198 INIT_HLIST_HEAD(&hb->chain); 213 INIT_HLIST_HEAD(&hb->chain);
199 } 214 }
200 rwlock_init(&f->lock); 215
216 seqlock_init(&f->rnd_seqlock);
201 f->last_rebuild_jiffies = 0; 217 f->last_rebuild_jiffies = 0;
202} 218}
203EXPORT_SYMBOL(inet_frags_init); 219EXPORT_SYMBOL(inet_frags_init);
@@ -216,35 +232,56 @@ EXPORT_SYMBOL(inet_frags_fini);
216 232
217void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) 233void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
218{ 234{
235 unsigned int seq;
219 int i; 236 int i;
220 237
221 nf->low_thresh = 0; 238 nf->low_thresh = 0;
239 local_bh_disable();
222 240
223 read_lock_bh(&f->lock); 241evict_again:
242 seq = read_seqbegin(&f->rnd_seqlock);
224 243
225 for (i = 0; i < INETFRAGS_HASHSZ ; i++) 244 for (i = 0; i < INETFRAGS_HASHSZ ; i++)
226 inet_evict_bucket(f, &f->hash[i]); 245 inet_evict_bucket(f, &f->hash[i]);
227 246
228 read_unlock_bh(&f->lock); 247 if (read_seqretry(&f->rnd_seqlock, seq))
248 goto evict_again;
249
250 local_bh_enable();
229 251
230 percpu_counter_destroy(&nf->mem); 252 percpu_counter_destroy(&nf->mem);
231} 253}
232EXPORT_SYMBOL(inet_frags_exit_net); 254EXPORT_SYMBOL(inet_frags_exit_net);
233 255
234static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 256static struct inet_frag_bucket *
257get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
258__acquires(hb->chain_lock)
235{ 259{
236 struct inet_frag_bucket *hb; 260 struct inet_frag_bucket *hb;
237 unsigned int hash; 261 unsigned int seq, hash;
262
263 restart:
264 seq = read_seqbegin(&f->rnd_seqlock);
238 265
239 read_lock(&f->lock);
240 hash = inet_frag_hashfn(f, fq); 266 hash = inet_frag_hashfn(f, fq);
241 hb = &f->hash[hash]; 267 hb = &f->hash[hash];
242 268
243 spin_lock(&hb->chain_lock); 269 spin_lock(&hb->chain_lock);
270 if (read_seqretry(&f->rnd_seqlock, seq)) {
271 spin_unlock(&hb->chain_lock);
272 goto restart;
273 }
274
275 return hb;
276}
277
278static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
279{
280 struct inet_frag_bucket *hb;
281
282 hb = get_frag_bucket_locked(fq, f);
244 hlist_del(&fq->list); 283 hlist_del(&fq->list);
245 spin_unlock(&hb->chain_lock); 284 spin_unlock(&hb->chain_lock);
246
247 read_unlock(&f->lock);
248} 285}
249 286
250void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) 287void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -300,30 +337,18 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
300 struct inet_frag_queue *qp_in, struct inet_frags *f, 337 struct inet_frag_queue *qp_in, struct inet_frags *f,
301 void *arg) 338 void *arg)
302{ 339{
303 struct inet_frag_bucket *hb; 340 struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
304 struct inet_frag_queue *qp; 341 struct inet_frag_queue *qp;
305 unsigned int hash;
306
307 read_lock(&f->lock); /* Protects against hash rebuild */
308 /*
309 * While we stayed w/o the lock other CPU could update
310 * the rnd seed, so we need to re-calculate the hash
311 * chain. Fortunatelly the qp_in can be used to get one.
312 */
313 hash = inet_frag_hashfn(f, qp_in);
314 hb = &f->hash[hash];
315 spin_lock(&hb->chain_lock);
316 342
317#ifdef CONFIG_SMP 343#ifdef CONFIG_SMP
318 /* With SMP race we have to recheck hash table, because 344 /* With SMP race we have to recheck hash table, because
319 * such entry could be created on other cpu, while we 345 * such entry could have been created on other cpu before
320 * released the hash bucket lock. 346 * we acquired hash bucket lock.
321 */ 347 */
322 hlist_for_each_entry(qp, &hb->chain, list) { 348 hlist_for_each_entry(qp, &hb->chain, list) {
323 if (qp->net == nf && f->match(qp, arg)) { 349 if (qp->net == nf && f->match(qp, arg)) {
324 atomic_inc(&qp->refcnt); 350 atomic_inc(&qp->refcnt);
325 spin_unlock(&hb->chain_lock); 351 spin_unlock(&hb->chain_lock);
326 read_unlock(&f->lock);
327 qp_in->last_in |= INET_FRAG_COMPLETE; 352 qp_in->last_in |= INET_FRAG_COMPLETE;
328 inet_frag_put(qp_in, f); 353 inet_frag_put(qp_in, f);
329 return qp; 354 return qp;
@@ -338,7 +363,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
338 hlist_add_head(&qp->list, &hb->chain); 363 hlist_add_head(&qp->list, &hb->chain);
339 364
340 spin_unlock(&hb->chain_lock); 365 spin_unlock(&hb->chain_lock);
341 read_unlock(&f->lock);
342 366
343 return qp; 367 return qp;
344} 368}
@@ -382,7 +406,6 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
382 406
383struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 407struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
384 struct inet_frags *f, void *key, unsigned int hash) 408 struct inet_frags *f, void *key, unsigned int hash)
385 __releases(&f->lock)
386{ 409{
387 struct inet_frag_bucket *hb; 410 struct inet_frag_bucket *hb;
388 struct inet_frag_queue *q; 411 struct inet_frag_queue *q;
@@ -399,19 +422,18 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
399 if (q->net == nf && f->match(q, key)) { 422 if (q->net == nf && f->match(q, key)) {
400 atomic_inc(&q->refcnt); 423 atomic_inc(&q->refcnt);
401 spin_unlock(&hb->chain_lock); 424 spin_unlock(&hb->chain_lock);
402 read_unlock(&f->lock);
403 return q; 425 return q;
404 } 426 }
405 depth++; 427 depth++;
406 } 428 }
407 spin_unlock(&hb->chain_lock); 429 spin_unlock(&hb->chain_lock);
408 read_unlock(&f->lock);
409 430
410 if (depth <= INETFRAGS_MAXDEPTH) 431 if (depth <= INETFRAGS_MAXDEPTH)
411 return inet_frag_create(nf, f, key); 432 return inet_frag_create(nf, f, key);
412 433
413 if (inet_frag_may_rebuild(f)) { 434 if (inet_frag_may_rebuild(f)) {
414 f->rebuild = true; 435 if (!f->rebuild)
436 f->rebuild = true;
415 inet_frag_schedule_worker(f); 437 inet_frag_schedule_worker(f);
416 } 438 }
417 439
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 44e591a7e03f..ccee68dffd6e 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -244,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
244 arg.iph = iph; 244 arg.iph = iph;
245 arg.user = user; 245 arg.user = user;
246 246
247 read_lock(&ip4_frags.lock);
248 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); 247 hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
249 248
250 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); 249 q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash);