diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/inet_fragment.c | 142 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 3 |
2 files changed, 101 insertions, 44 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 535636017534..43315ecb9400 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -25,6 +25,9 @@ | |||
25 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
26 | #include <net/inet_ecn.h> | 26 | #include <net/inet_ecn.h> |
27 | 27 | ||
28 | #define INETFRAGS_EVICT_BUCKETS 128 | ||
29 | #define INETFRAGS_EVICT_MAX 512 | ||
30 | |||
28 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | 31 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements |
29 | * Value : 0xff if frame should be dropped. | 32 | * Value : 0xff if frame should be dropped. |
30 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field | 33 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field |
@@ -46,8 +49,6 @@ const u8 ip_frag_ecn_table[16] = { | |||
46 | }; | 49 | }; |
47 | EXPORT_SYMBOL(ip_frag_ecn_table); | 50 | EXPORT_SYMBOL(ip_frag_ecn_table); |
48 | 51 | ||
49 | static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); | ||
50 | |||
51 | static unsigned int | 52 | static unsigned int |
52 | inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) | 53 | inet_frag_hashfn(const struct inet_frags *f, const struct inet_frag_queue *q) |
53 | { | 54 | { |
@@ -89,10 +90,92 @@ static void inet_frag_secret_rebuild(unsigned long dummy) | |||
89 | mod_timer(&f->secret_timer, now + f->secret_interval); | 90 | mod_timer(&f->secret_timer, now + f->secret_interval); |
90 | } | 91 | } |
91 | 92 | ||
93 | static bool inet_fragq_should_evict(const struct inet_frag_queue *q) | ||
94 | { | ||
95 | return q->net->low_thresh == 0 || | ||
96 | frag_mem_limit(q->net) >= q->net->low_thresh; | ||
97 | } | ||
98 | |||
99 | static unsigned int | ||
100 | inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) | ||
101 | { | ||
102 | struct inet_frag_queue *fq; | ||
103 | struct hlist_node *n; | ||
104 | unsigned int evicted = 0; | ||
105 | HLIST_HEAD(expired); | ||
106 | |||
107 | evict_again: | ||
108 | spin_lock(&hb->chain_lock); | ||
109 | |||
110 | hlist_for_each_entry_safe(fq, n, &hb->chain, list) { | ||
111 | if (!inet_fragq_should_evict(fq)) | ||
112 | continue; | ||
113 | |||
114 | if (!del_timer(&fq->timer)) { | ||
115 | /* q expiring right now thus increment its refcount so | ||
116 | * it won't be freed under us and wait until the timer | ||
117 | * has finished executing then destroy it | ||
118 | */ | ||
119 | atomic_inc(&fq->refcnt); | ||
120 | spin_unlock(&hb->chain_lock); | ||
121 | del_timer_sync(&fq->timer); | ||
122 | WARN_ON(atomic_read(&fq->refcnt) != 1); | ||
123 | inet_frag_put(fq, f); | ||
124 | goto evict_again; | ||
125 | } | ||
126 | |||
127 | /* suppress xmit of (icmp) error packet */ | ||
128 | fq->last_in &= ~INET_FRAG_FIRST_IN; | ||
129 | fq->last_in |= INET_FRAG_EVICTED; | ||
130 | hlist_del(&fq->list); | ||
131 | hlist_add_head(&fq->list, &expired); | ||
132 | ++evicted; | ||
133 | } | ||
134 | |||
135 | spin_unlock(&hb->chain_lock); | ||
136 | |||
137 | hlist_for_each_entry_safe(fq, n, &expired, list) | ||
138 | f->frag_expire((unsigned long) fq); | ||
139 | |||
140 | return evicted; | ||
141 | } | ||
142 | |||
143 | static void inet_frag_worker(struct work_struct *work) | ||
144 | { | ||
145 | unsigned int budget = INETFRAGS_EVICT_BUCKETS; | ||
146 | unsigned int i, evicted = 0; | ||
147 | struct inet_frags *f; | ||
148 | |||
149 | f = container_of(work, struct inet_frags, frags_work); | ||
150 | |||
151 | BUILD_BUG_ON(INETFRAGS_EVICT_BUCKETS >= INETFRAGS_HASHSZ); | ||
152 | |||
153 | read_lock_bh(&f->lock); | ||
154 | |||
155 | for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { | ||
156 | evicted += inet_evict_bucket(f, &f->hash[i]); | ||
157 | i = (i + 1) & (INETFRAGS_HASHSZ - 1); | ||
158 | if (evicted > INETFRAGS_EVICT_MAX) | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | f->next_bucket = i; | ||
163 | |||
164 | read_unlock_bh(&f->lock); | ||
165 | } | ||
166 | |||
167 | static void inet_frag_schedule_worker(struct inet_frags *f) | ||
168 | { | ||
169 | if (unlikely(!work_pending(&f->frags_work))) | ||
170 | schedule_work(&f->frags_work); | ||
171 | } | ||
172 | |||
92 | void inet_frags_init(struct inet_frags *f) | 173 | void inet_frags_init(struct inet_frags *f) |
93 | { | 174 | { |
94 | int i; | 175 | int i; |
95 | 176 | ||
177 | INIT_WORK(&f->frags_work, inet_frag_worker); | ||
178 | |||
96 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { | 179 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
97 | struct inet_frag_bucket *hb = &f->hash[i]; | 180 | struct inet_frag_bucket *hb = &f->hash[i]; |
98 | 181 | ||
@@ -120,16 +203,22 @@ EXPORT_SYMBOL(inet_frags_init_net); | |||
120 | void inet_frags_fini(struct inet_frags *f) | 203 | void inet_frags_fini(struct inet_frags *f) |
121 | { | 204 | { |
122 | del_timer(&f->secret_timer); | 205 | del_timer(&f->secret_timer); |
206 | cancel_work_sync(&f->frags_work); | ||
123 | } | 207 | } |
124 | EXPORT_SYMBOL(inet_frags_fini); | 208 | EXPORT_SYMBOL(inet_frags_fini); |
125 | 209 | ||
126 | void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) | 210 | void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) |
127 | { | 211 | { |
212 | int i; | ||
213 | |||
128 | nf->low_thresh = 0; | 214 | nf->low_thresh = 0; |
129 | 215 | ||
130 | local_bh_disable(); | 216 | read_lock_bh(&f->lock); |
131 | inet_frag_evictor(nf, f, true); | 217 | |
132 | local_bh_enable(); | 218 | for (i = 0; i < INETFRAGS_HASHSZ ; i++) |
219 | inet_evict_bucket(f, &f->hash[i]); | ||
220 | |||
221 | read_unlock_bh(&f->lock); | ||
133 | 222 | ||
134 | percpu_counter_destroy(&nf->mem); | 223 | percpu_counter_destroy(&nf->mem); |
135 | } | 224 | } |
@@ -205,41 +294,6 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, | |||
205 | } | 294 | } |
206 | EXPORT_SYMBOL(inet_frag_destroy); | 295 | EXPORT_SYMBOL(inet_frag_destroy); |
207 | 296 | ||
208 | static int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) | ||
209 | { | ||
210 | struct inet_frag_queue *q; | ||
211 | int work, evicted = 0; | ||
212 | |||
213 | work = frag_mem_limit(nf) - nf->low_thresh; | ||
214 | while (work > 0 || force) { | ||
215 | spin_lock(&nf->lru_lock); | ||
216 | |||
217 | if (list_empty(&nf->lru_list)) { | ||
218 | spin_unlock(&nf->lru_lock); | ||
219 | break; | ||
220 | } | ||
221 | |||
222 | q = list_first_entry(&nf->lru_list, | ||
223 | struct inet_frag_queue, lru_list); | ||
224 | atomic_inc(&q->refcnt); | ||
225 | /* Remove q from list to avoid several CPUs grabbing it */ | ||
226 | list_del_init(&q->lru_list); | ||
227 | |||
228 | spin_unlock(&nf->lru_lock); | ||
229 | |||
230 | spin_lock(&q->lock); | ||
231 | if (!(q->last_in & INET_FRAG_COMPLETE)) | ||
232 | inet_frag_kill(q, f); | ||
233 | spin_unlock(&q->lock); | ||
234 | |||
235 | if (atomic_dec_and_test(&q->refcnt)) | ||
236 | inet_frag_destroy(q, f, &work); | ||
237 | evicted++; | ||
238 | } | ||
239 | |||
240 | return evicted; | ||
241 | } | ||
242 | |||
243 | static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | 297 | static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, |
244 | struct inet_frag_queue *qp_in, struct inet_frags *f, | 298 | struct inet_frag_queue *qp_in, struct inet_frags *f, |
245 | void *arg) | 299 | void *arg) |
@@ -292,8 +346,10 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, | |||
292 | { | 346 | { |
293 | struct inet_frag_queue *q; | 347 | struct inet_frag_queue *q; |
294 | 348 | ||
295 | if (frag_mem_limit(nf) > nf->high_thresh) | 349 | if (frag_mem_limit(nf) > nf->high_thresh) { |
350 | inet_frag_schedule_worker(f); | ||
296 | return NULL; | 351 | return NULL; |
352 | } | ||
297 | 353 | ||
298 | q = kzalloc(f->qsize, GFP_ATOMIC); | 354 | q = kzalloc(f->qsize, GFP_ATOMIC); |
299 | if (q == NULL) | 355 | if (q == NULL) |
@@ -331,8 +387,8 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | |||
331 | struct inet_frag_queue *q; | 387 | struct inet_frag_queue *q; |
332 | int depth = 0; | 388 | int depth = 0; |
333 | 389 | ||
334 | if (frag_mem_limit(nf) > nf->high_thresh) | 390 | if (frag_mem_limit(nf) > nf->low_thresh) |
335 | inet_frag_evictor(nf, f, false); | 391 | inet_frag_schedule_worker(f); |
336 | 392 | ||
337 | hash &= (INETFRAGS_HASHSZ - 1); | 393 | hash &= (INETFRAGS_HASHSZ - 1); |
338 | hb = &f->hash[hash]; | 394 | hb = &f->hash[hash]; |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 54988672d00d..54bd170c5eb4 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -195,7 +195,8 @@ static void ip_expire(unsigned long arg) | |||
195 | 195 | ||
196 | ipq_kill(qp); | 196 | ipq_kill(qp); |
197 | 197 | ||
198 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | 198 | if (!(qp->q.last_in & INET_FRAG_EVICTED)) |
199 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMTIMEOUT); | ||
199 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); | 200 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); |
200 | 201 | ||
201 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { | 202 | if ((qp->q.last_in & INET_FRAG_FIRST_IN) && qp->q.fragments != NULL) { |