aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/inet_fragment.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/inet_fragment.c')
-rw-r--r--net/ipv4/inet_fragment.c318
1 files changed, 210 insertions, 108 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 3b01959bf4bb..9eb89f3f0ee4 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -25,6 +25,12 @@
25#include <net/inet_frag.h> 25#include <net/inet_frag.h>
26#include <net/inet_ecn.h> 26#include <net/inet_ecn.h>
27 27
28#define INETFRAGS_EVICT_BUCKETS 128
29#define INETFRAGS_EVICT_MAX 512
30
31/* don't rebuild inetfrag table with new secret more often than this */
32#define INETFRAGS_MIN_REBUILD_INTERVAL (5 * HZ)
33
28/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 34/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements
29 * Value : 0xff if frame should be dropped. 35 * Value : 0xff if frame should be dropped.
30 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 36 * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
@@ -46,24 +52,39 @@ const u8 ip_frag_ecn_table[16] = {
46}; 52};
47EXPORT_SYMBOL(ip_frag_ecn_table); 53EXPORT_SYMBOL(ip_frag_ecn_table);
48 54
49static void inet_frag_secret_rebuild(unsigned long dummy) 55static unsigned int
56inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q)
57{
58 return f->hashfn(q) & (INETFRAGS_HASHSZ - 1);
59}
60
61static bool inet_frag_may_rebuild(struct inet_frags *f)
62{
63 return time_after(jiffies,
64 f->last_rebuild_jiffies + INETFRAGS_MIN_REBUILD_INTERVAL);
65}
66
67static void inet_frag_secret_rebuild(struct inet_frags *f)
50{ 68{
51 struct inet_frags *f = (struct inet_frags *)dummy;
52 unsigned long now = jiffies;
53 int i; 69 int i;
54 70
55 /* Per bucket lock NOT needed here, due to write lock protection */ 71 write_seqlock_bh(&f->rnd_seqlock);
56 write_lock(&f->lock); 72
73 if (!inet_frag_may_rebuild(f))
74 goto out;
57 75
58 get_random_bytes(&f->rnd, sizeof(u32)); 76 get_random_bytes(&f->rnd, sizeof(u32));
77
59 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 78 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
60 struct inet_frag_bucket *hb; 79 struct inet_frag_bucket *hb;
61 struct inet_frag_queue *q; 80 struct inet_frag_queue *q;
62 struct hlist_node *n; 81 struct hlist_node *n;
63 82
64 hb = &f->hash[i]; 83 hb = &f->hash[i];
84 spin_lock(&hb->chain_lock);
85
65 hlist_for_each_entry_safe(q, n, &hb->chain, list) { 86 hlist_for_each_entry_safe(q, n, &hb->chain, list) {
66 unsigned int hval = f->hashfn(q); 87 unsigned int hval = inet_frag_hashfn(f, q);
67 88
68 if (hval != i) { 89 if (hval != i) {
69 struct inet_frag_bucket *hb_dest; 90 struct inet_frag_bucket *hb_dest;
@@ -72,76 +93,200 @@ static void inet_frag_secret_rebuild(unsigned long dummy)
72 93
73 /* Relink to new hash chain. */ 94 /* Relink to new hash chain. */
74 hb_dest = &f->hash[hval]; 95 hb_dest = &f->hash[hval];
96
97 /* This is the only place where we take
98 * another chain_lock while already holding
99 * one. As this will not run concurrently,
100 * we cannot deadlock on hb_dest lock below, if its
101 * already locked it will be released soon since
102 * other caller cannot be waiting for hb lock
103 * that we've taken above.
104 */
105 spin_lock_nested(&hb_dest->chain_lock,
106 SINGLE_DEPTH_NESTING);
75 hlist_add_head(&q->list, &hb_dest->chain); 107 hlist_add_head(&q->list, &hb_dest->chain);
108 spin_unlock(&hb_dest->chain_lock);
76 } 109 }
77 } 110 }
111 spin_unlock(&hb->chain_lock);
112 }
113
114 f->rebuild = false;
115 f->last_rebuild_jiffies = jiffies;
116out:
117 write_sequnlock_bh(&f->rnd_seqlock);
118}
119
120static bool inet_fragq_should_evict(const struct inet_frag_queue *q)
121{
122 return q->net->low_thresh == 0 ||
123 frag_mem_limit(q->net) >= q->net->low_thresh;
124}
125
126static unsigned int
127inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb)
128{
129 struct inet_frag_queue *fq;
130 struct hlist_node *n;
131 unsigned int evicted = 0;
132 HLIST_HEAD(expired);
133
134evict_again:
135 spin_lock(&hb->chain_lock);
136
137 hlist_for_each_entry_safe(fq, n, &hb->chain, list) {
138 if (!inet_fragq_should_evict(fq))
139 continue;
140
141 if (!del_timer(&fq->timer)) {
142 /* q expiring right now thus increment its refcount so
143 * it won't be freed under us and wait until the timer
144 * has finished executing then destroy it
145 */
146 atomic_inc(&fq->refcnt);
147 spin_unlock(&hb->chain_lock);
148 del_timer_sync(&fq->timer);
149 WARN_ON(atomic_read(&fq->refcnt) != 1);
150 inet_frag_put(fq, f);
151 goto evict_again;
152 }
153
154 fq->flags |= INET_FRAG_EVICTED;
155 hlist_del(&fq->list);
156 hlist_add_head(&fq->list, &expired);
157 ++evicted;
78 } 158 }
79 write_unlock(&f->lock);
80 159
81 mod_timer(&f->secret_timer, now + f->secret_interval); 160 spin_unlock(&hb->chain_lock);
161
162 hlist_for_each_entry_safe(fq, n, &expired, list)
163 f->frag_expire((unsigned long) fq);
164
165 return evicted;
82} 166}
83 167
84void inet_frags_init(struct inet_frags *f) 168static void inet_frag_worker(struct work_struct *work)
169{
170 unsigned int budget = INETFRAGS_EVICT_BUCKETS;
171 unsigned int i, evicted = 0;
172 struct inet_frags *f;
173
174 f = container_of(work, struct inet_frags, frags_work);
175
176 BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ);
177
178 local_bh_disable();
179
180 for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) {
181 evicted += inet_evict_bucket(f, &f->hash[i]);
182 i = (i + 1) & (INETFRAGS_HASHSZ - 1);
183 if (evicted > INETFRAGS_EVICT_MAX)
184 break;
185 }
186
187 f->next_bucket = i;
188
189 local_bh_enable();
190
191 if (f->rebuild && inet_frag_may_rebuild(f))
192 inet_frag_secret_rebuild(f);
193}
194
195static void inet_frag_schedule_worker(struct inet_frags *f)
196{
197 if (unlikely(!work_pending(&f->frags_work)))
198 schedule_work(&f->frags_work);
199}
200
201int inet_frags_init(struct inet_frags *f)
85{ 202{
86 int i; 203 int i;
87 204
205 INIT_WORK(&f->frags_work, inet_frag_worker);
206
88 for (i = 0; i < INETFRAGS_HASHSZ; i++) { 207 for (i = 0; i < INETFRAGS_HASHSZ; i++) {
89 struct inet_frag_bucket *hb = &f->hash[i]; 208 struct inet_frag_bucket *hb = &f->hash[i];
90 209
91 spin_lock_init(&hb->chain_lock); 210 spin_lock_init(&hb->chain_lock);
92 INIT_HLIST_HEAD(&hb->chain); 211 INIT_HLIST_HEAD(&hb->chain);
93 } 212 }
94 rwlock_init(&f->lock);
95 213
96 setup_timer(&f->secret_timer, inet_frag_secret_rebuild, 214 seqlock_init(&f->rnd_seqlock);
97 (unsigned long)f); 215 f->last_rebuild_jiffies = 0;
98 f->secret_timer.expires = jiffies + f->secret_interval; 216 f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0,
99 add_timer(&f->secret_timer); 217 NULL);
218 if (!f->frags_cachep)
219 return -ENOMEM;
220
221 return 0;
100} 222}
101EXPORT_SYMBOL(inet_frags_init); 223EXPORT_SYMBOL(inet_frags_init);
102 224
103void inet_frags_init_net(struct netns_frags *nf) 225void inet_frags_init_net(struct netns_frags *nf)
104{ 226{
105 nf->nqueues = 0;
106 init_frag_mem_limit(nf); 227 init_frag_mem_limit(nf);
107 INIT_LIST_HEAD(&nf->lru_list);
108 spin_lock_init(&nf->lru_lock);
109} 228}
110EXPORT_SYMBOL(inet_frags_init_net); 229EXPORT_SYMBOL(inet_frags_init_net);
111 230
112void inet_frags_fini(struct inet_frags *f) 231void inet_frags_fini(struct inet_frags *f)
113{ 232{
114 del_timer(&f->secret_timer); 233 cancel_work_sync(&f->frags_work);
234 kmem_cache_destroy(f->frags_cachep);
115} 235}
116EXPORT_SYMBOL(inet_frags_fini); 236EXPORT_SYMBOL(inet_frags_fini);
117 237
118void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) 238void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
119{ 239{
120 nf->low_thresh = 0; 240 unsigned int seq;
241 int i;
121 242
243 nf->low_thresh = 0;
122 local_bh_disable(); 244 local_bh_disable();
123 inet_frag_evictor(nf, f, true); 245
246evict_again:
247 seq = read_seqbegin(&f->rnd_seqlock);
248
249 for (i = 0; i < INETFRAGS_HASHSZ ; i++)
250 inet_evict_bucket(f, &f->hash[i]);
251
252 if (read_seqretry(&f->rnd_seqlock, seq))
253 goto evict_again;
254
124 local_bh_enable(); 255 local_bh_enable();
125 256
126 percpu_counter_destroy(&nf->mem); 257 percpu_counter_destroy(&nf->mem);
127} 258}
128EXPORT_SYMBOL(inet_frags_exit_net); 259EXPORT_SYMBOL(inet_frags_exit_net);
129 260
130static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) 261static struct inet_frag_bucket *
262get_frag_bucket_locked(struct inet_frag_queue *fq, struct inet_frags *f)
263__acquires(hb->chain_lock)
131{ 264{
132 struct inet_frag_bucket *hb; 265 struct inet_frag_bucket *hb;
133 unsigned int hash; 266 unsigned int seq, hash;
267
268 restart:
269 seq = read_seqbegin(&f->rnd_seqlock);
134 270
135 read_lock(&f->lock); 271 hash = inet_frag_hashfn(f, fq);
136 hash = f->hashfn(fq);
137 hb = &f->hash[hash]; 272 hb = &f->hash[hash];
138 273
139 spin_lock(&hb->chain_lock); 274 spin_lock(&hb->chain_lock);
275 if (read_seqretry(&f->rnd_seqlock, seq)) {
276 spin_unlock(&hb->chain_lock);
277 goto restart;
278 }
279
280 return hb;
281}
282
283static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
284{
285 struct inet_frag_bucket *hb;
286
287 hb = get_frag_bucket_locked(fq, f);
140 hlist_del(&fq->list); 288 hlist_del(&fq->list);
141 spin_unlock(&hb->chain_lock); 289 spin_unlock(&hb->chain_lock);
142
143 read_unlock(&f->lock);
144 inet_frag_lru_del(fq);
145} 290}
146 291
147void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) 292void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -149,30 +294,29 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
149 if (del_timer(&fq->timer)) 294 if (del_timer(&fq->timer))
150 atomic_dec(&fq->refcnt); 295 atomic_dec(&fq->refcnt);
151 296
152 if (!(fq->last_in & INET_FRAG_COMPLETE)) { 297 if (!(fq->flags & INET_FRAG_COMPLETE)) {
153 fq_unlink(fq, f); 298 fq_unlink(fq, f);
154 atomic_dec(&fq->refcnt); 299 atomic_dec(&fq->refcnt);
155 fq->last_in |= INET_FRAG_COMPLETE; 300 fq->flags |= INET_FRAG_COMPLETE;
156 } 301 }
157} 302}
158EXPORT_SYMBOL(inet_frag_kill); 303EXPORT_SYMBOL(inet_frag_kill);
159 304
160static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f, 305static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
161 struct sk_buff *skb) 306 struct sk_buff *skb)
162{ 307{
163 if (f->skb_free) 308 if (f->skb_free)
164 f->skb_free(skb); 309 f->skb_free(skb);
165 kfree_skb(skb); 310 kfree_skb(skb);
166} 311}
167 312
168void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, 313void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f)
169 int *work)
170{ 314{
171 struct sk_buff *fp; 315 struct sk_buff *fp;
172 struct netns_frags *nf; 316 struct netns_frags *nf;
173 unsigned int sum, sum_truesize = 0; 317 unsigned int sum, sum_truesize = 0;
174 318
175 WARN_ON(!(q->last_in & INET_FRAG_COMPLETE)); 319 WARN_ON(!(q->flags & INET_FRAG_COMPLETE));
176 WARN_ON(del_timer(&q->timer) != 0); 320 WARN_ON(del_timer(&q->timer) != 0);
177 321
178 /* Release all fragment data. */ 322 /* Release all fragment data. */
@@ -186,87 +330,32 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
186 fp = xp; 330 fp = xp;
187 } 331 }
188 sum = sum_truesize + f->qsize; 332 sum = sum_truesize + f->qsize;
189 if (work)
190 *work -= sum;
191 sub_frag_mem_limit(q, sum); 333 sub_frag_mem_limit(q, sum);
192 334
193 if (f->destructor) 335 if (f->destructor)
194 f->destructor(q); 336 f->destructor(q);
195 kfree(q); 337 kmem_cache_free(f->frags_cachep, q);
196
197} 338}
198EXPORT_SYMBOL(inet_frag_destroy); 339EXPORT_SYMBOL(inet_frag_destroy);
199 340
200int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
201{
202 struct inet_frag_queue *q;
203 int work, evicted = 0;
204
205 if (!force) {
206 if (frag_mem_limit(nf) <= nf->high_thresh)
207 return 0;
208 }
209
210 work = frag_mem_limit(nf) - nf->low_thresh;
211 while (work > 0 || force) {
212 spin_lock(&nf->lru_lock);
213
214 if (list_empty(&nf->lru_list)) {
215 spin_unlock(&nf->lru_lock);
216 break;
217 }
218
219 q = list_first_entry(&nf->lru_list,
220 struct inet_frag_queue, lru_list);
221 atomic_inc(&q->refcnt);
222 /* Remove q from list to avoid several CPUs grabbing it */
223 list_del_init(&q->lru_list);
224
225 spin_unlock(&nf->lru_lock);
226
227 spin_lock(&q->lock);
228 if (!(q->last_in & INET_FRAG_COMPLETE))
229 inet_frag_kill(q, f);
230 spin_unlock(&q->lock);
231
232 if (atomic_dec_and_test(&q->refcnt))
233 inet_frag_destroy(q, f, &work);
234 evicted++;
235 }
236
237 return evicted;
238}
239EXPORT_SYMBOL(inet_frag_evictor);
240
241static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, 341static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
242 struct inet_frag_queue *qp_in, struct inet_frags *f, 342 struct inet_frag_queue *qp_in,
243 void *arg) 343 struct inet_frags *f,
344 void *arg)
244{ 345{
245 struct inet_frag_bucket *hb; 346 struct inet_frag_bucket *hb = get_frag_bucket_locked(qp_in, f);
246 struct inet_frag_queue *qp; 347 struct inet_frag_queue *qp;
247 unsigned int hash;
248
249 read_lock(&f->lock); /* Protects against hash rebuild */
250 /*
251 * While we stayed w/o the lock other CPU could update
252 * the rnd seed, so we need to re-calculate the hash
253 * chain. Fortunatelly the qp_in can be used to get one.
254 */
255 hash = f->hashfn(qp_in);
256 hb = &f->hash[hash];
257 spin_lock(&hb->chain_lock);
258 348
259#ifdef CONFIG_SMP 349#ifdef CONFIG_SMP
260 /* With SMP race we have to recheck hash table, because 350 /* With SMP race we have to recheck hash table, because
261 * such entry could be created on other cpu, while we 351 * such entry could have been created on other cpu before
262 * released the hash bucket lock. 352 * we acquired hash bucket lock.
263 */ 353 */
264 hlist_for_each_entry(qp, &hb->chain, list) { 354 hlist_for_each_entry(qp, &hb->chain, list) {
265 if (qp->net == nf && f->match(qp, arg)) { 355 if (qp->net == nf && f->match(qp, arg)) {
266 atomic_inc(&qp->refcnt); 356 atomic_inc(&qp->refcnt);
267 spin_unlock(&hb->chain_lock); 357 spin_unlock(&hb->chain_lock);
268 read_unlock(&f->lock); 358 qp_in->flags |= INET_FRAG_COMPLETE;
269 qp_in->last_in |= INET_FRAG_COMPLETE;
270 inet_frag_put(qp_in, f); 359 inet_frag_put(qp_in, f);
271 return qp; 360 return qp;
272 } 361 }
@@ -278,19 +367,24 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
278 367
279 atomic_inc(&qp->refcnt); 368 atomic_inc(&qp->refcnt);
280 hlist_add_head(&qp->list, &hb->chain); 369 hlist_add_head(&qp->list, &hb->chain);
281 inet_frag_lru_add(nf, qp); 370
282 spin_unlock(&hb->chain_lock); 371 spin_unlock(&hb->chain_lock);
283 read_unlock(&f->lock);
284 372
285 return qp; 373 return qp;
286} 374}
287 375
288static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, 376static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
289 struct inet_frags *f, void *arg) 377 struct inet_frags *f,
378 void *arg)
290{ 379{
291 struct inet_frag_queue *q; 380 struct inet_frag_queue *q;
292 381
293 q = kzalloc(f->qsize, GFP_ATOMIC); 382 if (frag_mem_limit(nf) > nf->high_thresh) {
383 inet_frag_schedule_worker(f);
384 return NULL;
385 }
386
387 q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC);
294 if (q == NULL) 388 if (q == NULL)
295 return NULL; 389 return NULL;
296 390
@@ -301,13 +395,13 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
301 setup_timer(&q->timer, f->frag_expire, (unsigned long)q); 395 setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
302 spin_lock_init(&q->lock); 396 spin_lock_init(&q->lock);
303 atomic_set(&q->refcnt, 1); 397 atomic_set(&q->refcnt, 1);
304 INIT_LIST_HEAD(&q->lru_list);
305 398
306 return q; 399 return q;
307} 400}
308 401
309static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, 402static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
310 struct inet_frags *f, void *arg) 403 struct inet_frags *f,
404 void *arg)
311{ 405{
312 struct inet_frag_queue *q; 406 struct inet_frag_queue *q;
313 407
@@ -319,13 +413,17 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
319} 413}
320 414
321struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 415struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
322 struct inet_frags *f, void *key, unsigned int hash) 416 struct inet_frags *f, void *key,
323 __releases(&f->lock) 417 unsigned int hash)
324{ 418{
325 struct inet_frag_bucket *hb; 419 struct inet_frag_bucket *hb;
326 struct inet_frag_queue *q; 420 struct inet_frag_queue *q;
327 int depth = 0; 421 int depth = 0;
328 422
423 if (frag_mem_limit(nf) > nf->low_thresh)
424 inet_frag_schedule_worker(f);
425
426 hash &= (INETFRAGS_HASHSZ - 1);
329 hb = &f->hash[hash]; 427 hb = &f->hash[hash];
330 428
331 spin_lock(&hb->chain_lock); 429 spin_lock(&hb->chain_lock);
@@ -333,18 +431,22 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
333 if (q->net == nf && f->match(q, key)) { 431 if (q->net == nf && f->match(q, key)) {
334 atomic_inc(&q->refcnt); 432 atomic_inc(&q->refcnt);
335 spin_unlock(&hb->chain_lock); 433 spin_unlock(&hb->chain_lock);
336 read_unlock(&f->lock);
337 return q; 434 return q;
338 } 435 }
339 depth++; 436 depth++;
340 } 437 }
341 spin_unlock(&hb->chain_lock); 438 spin_unlock(&hb->chain_lock);
342 read_unlock(&f->lock);
343 439
344 if (depth <= INETFRAGS_MAXDEPTH) 440 if (depth <= INETFRAGS_MAXDEPTH)
345 return inet_frag_create(nf, f, key); 441 return inet_frag_create(nf, f, key);
346 else 442
347 return ERR_PTR(-ENOBUFS); 443 if (inet_frag_may_rebuild(f)) {
444 if (!f->rebuild)
445 f->rebuild = true;
446 inet_frag_schedule_worker(f);
447 }
448
449 return ERR_PTR(-ENOBUFS);
348} 450}
349EXPORT_SYMBOL(inet_frag_find); 451EXPORT_SYMBOL(inet_frag_find);
350 452