diff options
-rw-r--r-- | include/net/inet_frag.h | 13 | ||||
-rw-r--r-- | net/ieee802154/reassembly.c | 1 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 88 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 1 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 2 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 1 |
6 files changed, 62 insertions, 44 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index d9cc5bb64854..6f4930a0b660 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h | |||
@@ -53,11 +53,6 @@ struct inet_frag_bucket { | |||
53 | 53 | ||
54 | struct inet_frags { | 54 | struct inet_frags { |
55 | struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; | 55 | struct inet_frag_bucket hash[INETFRAGS_HASHSZ]; |
56 | /* This rwlock is a global lock (seperate per IPv4, IPv6 and | ||
57 | * netfilter). Important to keep this on a seperate cacheline. | ||
58 | * Its primarily a rebuild protection rwlock. | ||
59 | */ | ||
60 | rwlock_t lock ____cacheline_aligned_in_smp; | ||
61 | 56 | ||
62 | struct work_struct frags_work; | 57 | struct work_struct frags_work; |
63 | unsigned int next_bucket; | 58 | unsigned int next_bucket; |
@@ -66,8 +61,12 @@ struct inet_frags { | |||
66 | 61 | ||
67 | /* The first call to hashfn is responsible to initialize | 62 | /* The first call to hashfn is responsible to initialize |
68 | * rnd. This is best done with net_get_random_once. | 63 | * rnd. This is best done with net_get_random_once. |
64 | * | ||
65 | * rnd_seqlock is used to let hash insertion detect | ||
66 | * when it needs to re-lookup the hash chain to use. | ||
69 | */ | 67 | */ |
70 | u32 rnd; | 68 | u32 rnd; |
69 | seqlock_t rnd_seqlock; | ||
71 | int qsize; | 70 | int qsize; |
72 | 71 | ||
73 | unsigned int (*hashfn)(const struct inet_frag_queue *); | 72 | unsigned int (*hashfn)(const struct inet_frag_queue *); |
@@ -89,8 +88,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); | |||
89 | void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); | 88 | void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); |
90 | void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); | 89 | void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f); |
91 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | 90 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
92 | struct inet_frags *f, void *key, unsigned int hash) | 91 | struct inet_frags *f, void *key, unsigned int hash); |
93 | __releases(&f->lock); | 92 | |
94 | void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, | 93 | void inet_frag_maybe_warn_overflow(struct inet_frag_queue *q, |
95 | const char *prefix); | 94 | const char *prefix); |
96 | 95 | ||
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c index 20d219682d84..8da635d92a58 100644 --- a/net/ieee802154/reassembly.c +++ b/net/ieee802154/reassembly.c | |||
@@ -124,7 +124,6 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info, | |||
124 | arg.src = src; | 124 | arg.src = src; |
125 | arg.dst = dst; | 125 | arg.dst = dst; |
126 | 126 | ||
127 | read_lock(&lowpan_frags.lock); | ||
128 | hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); | 127 | hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst); |
129 | 128 | ||
130 | q = inet_frag_find(&ieee802154_lowpan->frags, | 129 | q = inet_frag_find(&ieee802154_lowpan->frags, |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 58d4c38534f6..62b1f73749dc 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -68,8 +68,7 @@ static void inet_frag_secret_rebuild(struct inet_frags *f) | |||
68 | { | 68 | { |
69 | int i; | 69 | int i; |
70 | 70 | ||
71 | /* Per bucket lock NOT needed here, due to write lock protection */ | 71 | write_seqlock_bh(&f->rnd_seqlock); |
72 | write_lock_bh(&f->lock); | ||
73 | 72 | ||
74 | if (!inet_frag_may_rebuild(f)) | 73 | if (!inet_frag_may_rebuild(f)) |
75 | goto out; | 74 | goto out; |
@@ -82,6 +81,8 @@ static void inet_frag_secret_rebuild(struct inet_frags *f) | |||
82 | struct hlist_node *n; | 81 | struct hlist_node *n; |
83 | 82 | ||
84 | hb = &f->hash[i]; | 83 | hb = &f->hash[i]; |
84 | spin_lock(&hb->chain_lock); | ||
85 | |||
85 | hlist_for_each_entry_safe(q, n, &hb->chain, list) { | 86 | hlist_for_each_entry_safe(q, n, &hb->chain, list) { |
86 | unsigned int hval = inet_frag_hashfn(f, q); | 87 | unsigned int hval = inet_frag_hashfn(f, q); |
87 | 88 | ||
@@ -92,15 +93,28 @@ static void inet_frag_secret_rebuild(struct inet_frags *f) | |||
92 | 93 | ||
93 | /* Relink to new hash chain. */ | 94 | /* Relink to new hash chain. */ |
94 | hb_dest = &f->hash[hval]; | 95 | hb_dest = &f->hash[hval]; |
96 | |||
97 | /* This is the only place where we take | ||
98 | * another chain_lock while already holding | ||
99 | * one. As this will not run concurrently, | ||
100 | * we cannot deadlock on hb_dest lock below, if its | ||
101 | * already locked it will be released soon since | ||
102 | * other caller cannot be waiting for hb lock | ||
103 | * that we've taken above. | ||
104 | */ | ||
105 | spin_lock_nested(&hb_dest->chain_lock, | ||
106 | SINGLE_DEPTH_NESTING); | ||
95 | hlist_add_head(&q->list, &hb_dest->chain); | 107 | hlist_add_head(&q->list, &hb_dest->chain); |
108 | spin_unlock(&hb_dest->chain_lock); | ||
96 | } | 109 | } |
97 | } | 110 | } |
111 | spin_unlock(&hb->chain_lock); | ||
98 | } | 112 | } |
99 | 113 | ||
100 | f->rebuild = false; | 114 | f->rebuild = false; |
101 | f->last_rebuild_jiffies = jiffies; | 115 | f->last_rebuild_jiffies = jiffies; |
102 | out: | 116 | out: |
103 | write_unlock_bh(&f->lock); | 117 | write_sequnlock_bh(&f->rnd_seqlock); |
104 | } | 118 | } |
105 | 119 | ||
106 | static bool inet_fragq_should_evict(const struct inet_frag_queue *q) | 120 | static bool inet_fragq_should_evict(const struct inet_frag_queue *q) |
@@ -163,7 +177,7 @@ static void inet_frag_worker(struct work_struct *work) | |||
163 | 177 | ||
164 | BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); | 178 | BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); |
165 | 179 | ||
166 | read_lock_bh(&f->lock); | 180 | local_bh_disable(); |
167 | 181 | ||
168 | for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { | 182 | for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { |
169 | evicted += inet_evict_bucket(f, &f->hash[i]); | 183 | evicted += inet_evict_bucket(f, &f->hash[i]); |
@@ -174,7 +188,8 @@ static void inet_frag_worker(struct work_struct *work) | |||
174 | 188 | ||
175 | f->next_bucket = i; | 189 | f->next_bucket = i; |
176 | 190 | ||
177 | read_unlock_bh(&f->lock); | 191 | local_bh_enable(); |
192 | |||
178 | if (f->rebuild && inet_frag_may_rebuild(f)) | 193 | if (f->rebuild && inet_frag_may_rebuild(f)) |
179 | inet_frag_secret_rebuild(f); | 194 | inet_frag_secret_rebuild(f); |
180 | } | 195 | } |
@@ -197,7 +212,8 @@ void inet_frags_init(struct inet_frags *f) | |||
197 | spin_lock_init(&hb->chain_lock); | 212 | spin_lock_init(&hb->chain_lock); |
198 | INIT_HLIST_HEAD(&hb->chain); | 213 | INIT_HLIST_HEAD(&hb->chain); |
199 | } | 214 | } |
200 | rwlock_init(&f->lock); | 215 | |
216 | seqlock_init(&f->rnd_seqlock); | ||
201 | f->last_rebuild_jiffies = 0; | 217 | f->last_rebuild_jiffies = 0; |
202 | } | 218 | } |
203 | EXPORT_SYMBOL(inet_frags_init); | 219 | EXPORT_SYMBOL(inet_frags_init); |
@@ -216,35 +232,56 @@ EXPORT_SYMBOL(inet_frags_fini); | |||
216 | 232 | ||
217 | void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) | 233 | void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) |
218 | { | 234 | { |
235 | unsigned int seq; | ||
219 | int i; | 236 | int i; |
220 | 237 | ||
221 | nf->low_thresh = 0; | 238 | nf->low_thresh = 0; |
239 | local_bh_disable(); | ||
222 | 240 | ||
223 | read_lock_bh(&f->lock); | 241 | evict_again: |
242 | seq = read_seqbegin(&f->rnd_seqlock); | ||
224 | 243 | ||
225 | for (i = 0; i < INETFRAGS_HASHSZ ; i++) | 244 | for (i = 0; i < INETFRAGS_HASHSZ ; i++) |
226 | inet_evict_bucket(f, &f->hash[i]); | 245 | inet_evict_bucket(f, &f->hash[i]); |
227 | 246 | ||
228 | read_unlock_bh(&f->lock); | 247 | if (read_seqretry(&f->rnd_seqlock, seq)) |
248 | goto evict_again; | ||
249 | |||
250 | local_bh_enable(); | ||
229 | 251 | ||
230 | percpu_counter_destroy(&nf->mem); | 252 | percpu_counter_destroy(&nf->mem); |
231 | } | 253 | } |
232 | EXPORT_SYMBOL(inet_frags_exit_net); | 254 | EXPORT_SYMBOL(inet_frags_exit_net); |
233 | 255 | ||
234 | static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) | 256 | static struct inet_frag_bucket * |
257 | get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f) | ||
258 | __acquires(hb->chain_lock) | ||
235 | { | 259 | { |
236 | struct inet_frag_bucket *hb; | 260 | struct inet_frag_bucket *hb; |
237 | unsigned int hash; | 261 | unsigned int seq, hash; |
262 | |||
263 | restart: | ||
264 | seq = read_seqbegin(&f->rnd_seqlock); | ||
238 | 265 | ||
239 | read_lock(&f->lock); | ||
240 | hash = inet_frag_hashfn(f, fq); | 266 | hash = inet_frag_hashfn(f, fq); |
241 | hb = &f->hash[hash]; | 267 | hb = &f->hash[hash]; |
242 | 268 | ||
243 | spin_lock(&hb->chain_lock); | 269 | spin_lock(&hb->chain_lock); |
270 | if (read_seqretry(&f->rnd_seqlock, seq)) { | ||
271 | spin_unlock(&hb->chain_lock); | ||
272 | goto restart; | ||
273 | } | ||
274 | |||
275 | return hb; | ||
276 | } | ||
277 | |||
278 | static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) | ||
279 | { | ||
280 | struct inet_frag_bucket *hb; | ||
281 | |||
282 | hb = get_frag_bucket_locked(fq, f); | ||
244 | hlist_del(&fq->list); | 283 | hlist_del(&fq->list); |
245 | spin_unlock(&hb->chain_lock); | 284 | spin_unlock(&hb->chain_lock); |
246 | |||
247 | read_unlock(&f->lock); | ||
248 | } | 285 | } |
249 | 286 | ||
250 | void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) | 287 | void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) |
@@ -300,30 +337,18 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
300 | struct inet_frag_queue *qp_in, struct inet_frags *f, | 337 | struct inet_frag_queue *qp_in, struct inet_frags *f, |
301 | void *arg) | 338 | void *arg) |
302 | { | 339 | { |
303 | struct inet_frag_bucket *hb; | 340 | struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f); |
304 | struct inet_frag_queue *qp; | 341 | struct inet_frag_queue *qp; |
305 | unsigned int hash; | ||
306 | |||
307 | read_lock(&f->lock); /* Protects against hash rebuild */ | ||
308 | /* | ||
309 | * While we stayed w/o the lock other CPU could update | ||
310 | * the rnd seed, so we need to re-calculate the hash | ||
311 | * chain. Fortunatelly the qp_in can be used to get one. | ||
312 | */ | ||
313 | hash = inet_frag_hashfn(f, qp_in); | ||
314 | hb = &f->hash[hash]; | ||
315 | spin_lock(&hb->chain_lock); | ||
316 | 342 | ||
317 | #ifdef CONFIG_SMP | 343 | #ifdef CONFIG_SMP |
318 | /* With SMP race we have to recheck hash table, because | 344 | /* With SMP race we have to recheck hash table, because |
319 | * such entry could be created on other cpu, while we | 345 | * such entry could have been created on other cpu before |
320 | * released the hash bucket lock. | 346 | * we acquired hash bucket lock. |
321 | */ | 347 | */ |
322 | hlist_for_each_entry(qp, &hb->chain, list) { | 348 | hlist_for_each_entry(qp, &hb->chain, list) { |
323 | if (qp->net == nf && f->match(qp, arg)) { | 349 | if (qp->net == nf && f->match(qp, arg)) { |
324 | atomic_inc(&qp->refcnt); | 350 | atomic_inc(&qp->refcnt); |
325 | spin_unlock(&hb->chain_lock); | 351 | spin_unlock(&hb->chain_lock); |
326 | read_unlock(&f->lock); | ||
327 | qp_in->last_in |= INET_FRAG_COMPLETE; | 352 | qp_in->last_in |= INET_FRAG_COMPLETE; |
328 | inet_frag_put(qp_in, f); | 353 | inet_frag_put(qp_in, f); |
329 | return qp; | 354 | return qp; |
@@ -338,7 +363,6 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
338 | hlist_add_head(&qp->list, &hb->chain); | 363 | hlist_add_head(&qp->list, &hb->chain); |
339 | 364 | ||
340 | spin_unlock(&hb->chain_lock); | 365 | spin_unlock(&hb->chain_lock); |
341 | read_unlock(&f->lock); | ||
342 | 366 | ||
343 | return qp; | 367 | return qp; |
344 | } | 368 | } |
@@ -382,7 +406,6 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, | |||
382 | 406 | ||
383 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | 407 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
384 | struct inet_frags *f, void *key, unsigned int hash) | 408 | struct inet_frags *f, void *key, unsigned int hash) |
385 | __releases(&f->lock) | ||
386 | { | 409 | { |
387 | struct inet_frag_bucket *hb; | 410 | struct inet_frag_bucket *hb; |
388 | struct inet_frag_queue *q; | 411 | struct inet_frag_queue *q; |
@@ -399,19 +422,18 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | |||
399 | if (q->net == nf && f->match(q, key)) { | 422 | if (q->net == nf && f->match(q, key)) { |
400 | atomic_inc(&q->refcnt); | 423 | atomic_inc(&q->refcnt); |
401 | spin_unlock(&hb->chain_lock); | 424 | spin_unlock(&hb->chain_lock); |
402 | read_unlock(&f->lock); | ||
403 | return q; | 425 | return q; |
404 | } | 426 | } |
405 | depth++; | 427 | depth++; |
406 | } | 428 | } |
407 | spin_unlock(&hb->chain_lock); | 429 | spin_unlock(&hb->chain_lock); |
408 | read_unlock(&f->lock); | ||
409 | 430 | ||
410 | if (depth <= INETFRAGS_MAXDEPTH) | 431 | if (depth <= INETFRAGS_MAXDEPTH) |
411 | return inet_frag_create(nf, f, key); | 432 | return inet_frag_create(nf, f, key); |
412 | 433 | ||
413 | if (inet_frag_may_rebuild(f)) { | 434 | if (inet_frag_may_rebuild(f)) { |
414 | f->rebuild = true; | 435 | if (!f->rebuild) |
436 | f->rebuild = true; | ||
415 | inet_frag_schedule_worker(f); | 437 | inet_frag_schedule_worker(f); |
416 | } | 438 | } |
417 | 439 | ||
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 44e591a7e03f..ccee68dffd6e 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -244,7 +244,6 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user) | |||
244 | arg.iph = iph; | 244 | arg.iph = iph; |
245 | arg.user = user; | 245 | arg.user = user; |
246 | 246 | ||
247 | read_lock(&ip4_frags.lock); | ||
248 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); | 247 | hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol); |
249 | 248 | ||
250 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); | 249 | q = inet_frag_find(&net->ipv4.frags, &ip4_frags, &arg, hash); |
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3b3ef9774cc2..4d9da1e35f8c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -193,7 +193,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, | |||
193 | arg.dst = dst; | 193 | arg.dst = dst; |
194 | arg.ecn = ecn; | 194 | arg.ecn = ecn; |
195 | 195 | ||
196 | read_lock_bh(&nf_frags.lock); | 196 | local_bh_disable(); |
197 | hash = nf_hash_frag(id, src, dst); | 197 | hash = nf_hash_frag(id, src, dst); |
198 | 198 | ||
199 | q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); | 199 | q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); |
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 987fea46b915..57a9707b2032 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c | |||
@@ -190,7 +190,6 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, | |||
190 | arg.dst = dst; | 190 | arg.dst = dst; |
191 | arg.ecn = ecn; | 191 | arg.ecn = ecn; |
192 | 192 | ||
193 | read_lock(&ip6_frags.lock); | ||
194 | hash = inet6_hash_frag(id, src, dst); | 193 | hash = inet6_hash_frag(id, src, dst); |
195 | 194 | ||
196 | q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); | 195 | q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); |