diff options
Diffstat (limited to 'net/ipv4/inet_fragment.c')
-rw-r--r-- | net/ipv4/inet_fragment.c | 84 |
1 files changed, 69 insertions, 15 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index f4fd23de9b13..e97d66a1fdde 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -23,6 +23,28 @@ | |||
23 | 23 | ||
24 | #include <net/sock.h> | 24 | #include <net/sock.h> |
25 | #include <net/inet_frag.h> | 25 | #include <net/inet_frag.h> |
26 | #include <net/inet_ecn.h> | ||
27 | |||
28 | /* Given the OR values of all fragments, apply RFC 3168 5.3 requirements | ||
29 | * Value : 0xff if frame should be dropped. | ||
30 | * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field | ||
31 | */ | ||
32 | const u8 ip_frag_ecn_table[16] = { | ||
33 | /* at least one fragment had CE, and others ECT_0 or ECT_1 */ | ||
34 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, | ||
35 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | ||
36 | [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, | ||
37 | |||
38 | /* invalid combinations : drop frame */ | ||
39 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, | ||
40 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, | ||
41 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, | ||
42 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | ||
43 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, | ||
44 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, | ||
45 | [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, | ||
46 | }; | ||
47 | EXPORT_SYMBOL(ip_frag_ecn_table); | ||
26 | 48 | ||
27 | static void inet_frag_secret_rebuild(unsigned long dummy) | 49 | static void inet_frag_secret_rebuild(unsigned long dummy) |
28 | { | 50 | { |
@@ -30,20 +52,27 @@ static void inet_frag_secret_rebuild(unsigned long dummy) | |||
30 | unsigned long now = jiffies; | 52 | unsigned long now = jiffies; |
31 | int i; | 53 | int i; |
32 | 54 | ||
55 | /* Per bucket lock NOT needed here, due to write lock protection */ | ||
33 | write_lock(&f->lock); | 56 | write_lock(&f->lock); |
57 | |||
34 | get_random_bytes(&f->rnd, sizeof(u32)); | 58 | get_random_bytes(&f->rnd, sizeof(u32)); |
35 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { | 59 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
60 | struct inet_frag_bucket *hb; | ||
36 | struct inet_frag_queue *q; | 61 | struct inet_frag_queue *q; |
37 | struct hlist_node *n; | 62 | struct hlist_node *n; |
38 | 63 | ||
39 | hlist_for_each_entry_safe(q, n, &f->hash[i], list) { | 64 | hb = &f->hash[i]; |
65 | hlist_for_each_entry_safe(q, n, &hb->chain, list) { | ||
40 | unsigned int hval = f->hashfn(q); | 66 | unsigned int hval = f->hashfn(q); |
41 | 67 | ||
42 | if (hval != i) { | 68 | if (hval != i) { |
69 | struct inet_frag_bucket *hb_dest; | ||
70 | |||
43 | hlist_del(&q->list); | 71 | hlist_del(&q->list); |
44 | 72 | ||
45 | /* Relink to new hash chain. */ | 73 | /* Relink to new hash chain. */ |
46 | hlist_add_head(&q->list, &f->hash[hval]); | 74 | hb_dest = &f->hash[hval]; |
75 | hlist_add_head(&q->list, &hb_dest->chain); | ||
47 | } | 76 | } |
48 | } | 77 | } |
49 | } | 78 | } |
@@ -56,9 +85,12 @@ void inet_frags_init(struct inet_frags *f) | |||
56 | { | 85 | { |
57 | int i; | 86 | int i; |
58 | 87 | ||
59 | for (i = 0; i < INETFRAGS_HASHSZ; i++) | 88 | for (i = 0; i < INETFRAGS_HASHSZ; i++) { |
60 | INIT_HLIST_HEAD(&f->hash[i]); | 89 | struct inet_frag_bucket *hb = &f->hash[i]; |
61 | 90 | ||
91 | spin_lock_init(&hb->chain_lock); | ||
92 | INIT_HLIST_HEAD(&hb->chain); | ||
93 | } | ||
62 | rwlock_init(&f->lock); | 94 | rwlock_init(&f->lock); |
63 | 95 | ||
64 | f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ | 96 | f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ |
@@ -100,10 +132,18 @@ EXPORT_SYMBOL(inet_frags_exit_net); | |||
100 | 132 | ||
101 | static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) | 133 | static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) |
102 | { | 134 | { |
103 | write_lock(&f->lock); | 135 | struct inet_frag_bucket *hb; |
136 | unsigned int hash; | ||
137 | |||
138 | read_lock(&f->lock); | ||
139 | hash = f->hashfn(fq); | ||
140 | hb = &f->hash[hash]; | ||
141 | |||
142 | spin_lock(&hb->chain_lock); | ||
104 | hlist_del(&fq->list); | 143 | hlist_del(&fq->list); |
105 | fq->net->nqueues--; | 144 | spin_unlock(&hb->chain_lock); |
106 | write_unlock(&f->lock); | 145 | |
146 | read_unlock(&f->lock); | ||
107 | inet_frag_lru_del(fq); | 147 | inet_frag_lru_del(fq); |
108 | } | 148 | } |
109 | 149 | ||
@@ -182,6 +222,9 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) | |||
182 | q = list_first_entry(&nf->lru_list, | 222 | q = list_first_entry(&nf->lru_list, |
183 | struct inet_frag_queue, lru_list); | 223 | struct inet_frag_queue, lru_list); |
184 | atomic_inc(&q->refcnt); | 224 | atomic_inc(&q->refcnt); |
225 | /* Remove q from list to avoid several CPUs grabbing it */ | ||
226 | list_del_init(&q->lru_list); | ||
227 | |||
185 | spin_unlock(&nf->lru_lock); | 228 | spin_unlock(&nf->lru_lock); |
186 | 229 | ||
187 | spin_lock(&q->lock); | 230 | spin_lock(&q->lock); |
@@ -202,27 +245,32 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
202 | struct inet_frag_queue *qp_in, struct inet_frags *f, | 245 | struct inet_frag_queue *qp_in, struct inet_frags *f, |
203 | void *arg) | 246 | void *arg) |
204 | { | 247 | { |
248 | struct inet_frag_bucket *hb; | ||
205 | struct inet_frag_queue *qp; | 249 | struct inet_frag_queue *qp; |
206 | #ifdef CONFIG_SMP | 250 | #ifdef CONFIG_SMP |
207 | #endif | 251 | #endif |
208 | unsigned int hash; | 252 | unsigned int hash; |
209 | 253 | ||
210 | write_lock(&f->lock); | 254 | read_lock(&f->lock); /* Protects against hash rebuild */ |
211 | /* | 255 | /* |
212 | * While we stayed w/o the lock other CPU could update | 256 | * While we stayed w/o the lock other CPU could update |
213 | * the rnd seed, so we need to re-calculate the hash | 257 | * the rnd seed, so we need to re-calculate the hash |
214 | * chain. Fortunatelly the qp_in can be used to get one. | 258 | * chain. Fortunatelly the qp_in can be used to get one. |
215 | */ | 259 | */ |
216 | hash = f->hashfn(qp_in); | 260 | hash = f->hashfn(qp_in); |
261 | hb = &f->hash[hash]; | ||
262 | spin_lock(&hb->chain_lock); | ||
263 | |||
217 | #ifdef CONFIG_SMP | 264 | #ifdef CONFIG_SMP |
218 | /* With SMP race we have to recheck hash table, because | 265 | /* With SMP race we have to recheck hash table, because |
219 | * such entry could be created on other cpu, while we | 266 | * such entry could be created on other cpu, while we |
220 | * promoted read lock to write lock. | 267 | * released the hash bucket lock. |
221 | */ | 268 | */ |
222 | hlist_for_each_entry(qp, &f->hash[hash], list) { | 269 | hlist_for_each_entry(qp, &hb->chain, list) { |
223 | if (qp->net == nf && f->match(qp, arg)) { | 270 | if (qp->net == nf && f->match(qp, arg)) { |
224 | atomic_inc(&qp->refcnt); | 271 | atomic_inc(&qp->refcnt); |
225 | write_unlock(&f->lock); | 272 | spin_unlock(&hb->chain_lock); |
273 | read_unlock(&f->lock); | ||
226 | qp_in->last_in |= INET_FRAG_COMPLETE; | 274 | qp_in->last_in |= INET_FRAG_COMPLETE; |
227 | inet_frag_put(qp_in, f); | 275 | inet_frag_put(qp_in, f); |
228 | return qp; | 276 | return qp; |
@@ -234,9 +282,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf, | |||
234 | atomic_inc(&qp->refcnt); | 282 | atomic_inc(&qp->refcnt); |
235 | 283 | ||
236 | atomic_inc(&qp->refcnt); | 284 | atomic_inc(&qp->refcnt); |
237 | hlist_add_head(&qp->list, &f->hash[hash]); | 285 | hlist_add_head(&qp->list, &hb->chain); |
238 | nf->nqueues++; | 286 | spin_unlock(&hb->chain_lock); |
239 | write_unlock(&f->lock); | 287 | read_unlock(&f->lock); |
240 | inet_frag_lru_add(nf, qp); | 288 | inet_frag_lru_add(nf, qp); |
241 | return qp; | 289 | return qp; |
242 | } | 290 | } |
@@ -277,17 +325,23 @@ struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | |||
277 | struct inet_frags *f, void *key, unsigned int hash) | 325 | struct inet_frags *f, void *key, unsigned int hash) |
278 | __releases(&f->lock) | 326 | __releases(&f->lock) |
279 | { | 327 | { |
328 | struct inet_frag_bucket *hb; | ||
280 | struct inet_frag_queue *q; | 329 | struct inet_frag_queue *q; |
281 | int depth = 0; | 330 | int depth = 0; |
282 | 331 | ||
283 | hlist_for_each_entry(q, &f->hash[hash], list) { | 332 | hb = &f->hash[hash]; |
333 | |||
334 | spin_lock(&hb->chain_lock); | ||
335 | hlist_for_each_entry(q, &hb->chain, list) { | ||
284 | if (q->net == nf && f->match(q, key)) { | 336 | if (q->net == nf && f->match(q, key)) { |
285 | atomic_inc(&q->refcnt); | 337 | atomic_inc(&q->refcnt); |
338 | spin_unlock(&hb->chain_lock); | ||
286 | read_unlock(&f->lock); | 339 | read_unlock(&f->lock); |
287 | return q; | 340 | return q; |
288 | } | 341 | } |
289 | depth++; | 342 | depth++; |
290 | } | 343 | } |
344 | spin_unlock(&hb->chain_lock); | ||
291 | read_unlock(&f->lock); | 345 | read_unlock(&f->lock); |
292 | 346 | ||
293 | if (depth <= INETFRAGS_MAXDEPTH) | 347 | if (depth <= INETFRAGS_MAXDEPTH) |