diff options
Diffstat (limited to 'net/ipv4/ip_fragment.c')
-rw-r--r-- | net/ipv4/ip_fragment.c | 421 |
1 files changed, 177 insertions, 244 deletions
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fabb86db763b..443b3f89192f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <net/icmp.h> | 39 | #include <net/icmp.h> |
40 | #include <net/checksum.h> | 40 | #include <net/checksum.h> |
41 | #include <net/inetpeer.h> | 41 | #include <net/inetpeer.h> |
42 | #include <net/inet_frag.h> | ||
42 | #include <linux/tcp.h> | 43 | #include <linux/tcp.h> |
43 | #include <linux/udp.h> | 44 | #include <linux/udp.h> |
44 | #include <linux/inet.h> | 45 | #include <linux/inet.h> |
@@ -49,21 +50,8 @@ | |||
49 | * as well. Or notify me, at least. --ANK | 50 | * as well. Or notify me, at least. --ANK |
50 | */ | 51 | */ |
51 | 52 | ||
52 | /* Fragment cache limits. We will commit 256K at one time. Should we | ||
53 | * cross that limit we will prune down to 192K. This should cope with | ||
54 | * even the most extreme cases without allowing an attacker to measurably | ||
55 | * harm machine performance. | ||
56 | */ | ||
57 | int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; | ||
58 | int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; | ||
59 | |||
60 | int sysctl_ipfrag_max_dist __read_mostly = 64; | 53 | int sysctl_ipfrag_max_dist __read_mostly = 64; |
61 | 54 | ||
62 | /* Important NOTE! Fragment queue must be destroyed before MSL expires. | ||
63 | * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. | ||
64 | */ | ||
65 | int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; | ||
66 | |||
67 | struct ipfrag_skb_cb | 55 | struct ipfrag_skb_cb |
68 | { | 56 | { |
69 | struct inet_skb_parm h; | 57 | struct inet_skb_parm h; |
@@ -74,153 +62,102 @@ struct ipfrag_skb_cb | |||
74 | 62 | ||
75 | /* Describe an entry in the "incomplete datagrams" queue. */ | 63 | /* Describe an entry in the "incomplete datagrams" queue. */ |
76 | struct ipq { | 64 | struct ipq { |
77 | struct hlist_node list; | 65 | struct inet_frag_queue q; |
78 | struct list_head lru_list; /* lru list member */ | 66 | |
79 | u32 user; | 67 | u32 user; |
80 | __be32 saddr; | 68 | __be32 saddr; |
81 | __be32 daddr; | 69 | __be32 daddr; |
82 | __be16 id; | 70 | __be16 id; |
83 | u8 protocol; | 71 | u8 protocol; |
84 | u8 last_in; | ||
85 | #define COMPLETE 4 | ||
86 | #define FIRST_IN 2 | ||
87 | #define LAST_IN 1 | ||
88 | |||
89 | struct sk_buff *fragments; /* linked list of received fragments */ | ||
90 | int len; /* total length of original datagram */ | ||
91 | int meat; | ||
92 | spinlock_t lock; | ||
93 | atomic_t refcnt; | ||
94 | struct timer_list timer; /* when will this queue expire? */ | ||
95 | ktime_t stamp; | ||
96 | int iif; | 72 | int iif; |
97 | unsigned int rid; | 73 | unsigned int rid; |
98 | struct inet_peer *peer; | 74 | struct inet_peer *peer; |
99 | }; | 75 | }; |
100 | 76 | ||
101 | /* Hash table. */ | 77 | struct inet_frags_ctl ip4_frags_ctl __read_mostly = { |
78 | /* | ||
79 | * Fragment cache limits. We will commit 256K at one time. Should we | ||
80 | * cross that limit we will prune down to 192K. This should cope with | ||
81 | * even the most extreme cases without allowing an attacker to | ||
82 | * measurably harm machine performance. | ||
83 | */ | ||
84 | .high_thresh = 256 * 1024, | ||
85 | .low_thresh = 192 * 1024, | ||
102 | 86 | ||
103 | #define IPQ_HASHSZ 64 | 87 | /* |
88 | * Important NOTE! Fragment queue must be destroyed before MSL expires. | ||
89 | * RFC791 is wrong proposing to prolongate timer each fragment arrival | ||
90 | * by TTL. | ||
91 | */ | ||
92 | .timeout = IP_FRAG_TIME, | ||
93 | .secret_interval = 10 * 60 * HZ, | ||
94 | }; | ||
104 | 95 | ||
105 | /* Per-bucket lock is easy to add now. */ | 96 | static struct inet_frags ip4_frags; |
106 | static struct hlist_head ipq_hash[IPQ_HASHSZ]; | ||
107 | static DEFINE_RWLOCK(ipfrag_lock); | ||
108 | static u32 ipfrag_hash_rnd; | ||
109 | static LIST_HEAD(ipq_lru_list); | ||
110 | int ip_frag_nqueues = 0; | ||
111 | 97 | ||
112 | static __inline__ void __ipq_unlink(struct ipq *qp) | 98 | int ip_frag_nqueues(void) |
113 | { | 99 | { |
114 | hlist_del(&qp->list); | 100 | return ip4_frags.nqueues; |
115 | list_del(&qp->lru_list); | ||
116 | ip_frag_nqueues--; | ||
117 | } | 101 | } |
118 | 102 | ||
119 | static __inline__ void ipq_unlink(struct ipq *ipq) | 103 | int ip_frag_mem(void) |
120 | { | 104 | { |
121 | write_lock(&ipfrag_lock); | 105 | return atomic_read(&ip4_frags.mem); |
122 | __ipq_unlink(ipq); | ||
123 | write_unlock(&ipfrag_lock); | ||
124 | } | 106 | } |
125 | 107 | ||
108 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | ||
109 | struct net_device *dev); | ||
110 | |||
126 | static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) | 111 | static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) |
127 | { | 112 | { |
128 | return jhash_3words((__force u32)id << 16 | prot, | 113 | return jhash_3words((__force u32)id << 16 | prot, |
129 | (__force u32)saddr, (__force u32)daddr, | 114 | (__force u32)saddr, (__force u32)daddr, |
130 | ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); | 115 | ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); |
131 | } | 116 | } |
132 | 117 | ||
133 | static struct timer_list ipfrag_secret_timer; | 118 | static unsigned int ip4_hashfn(struct inet_frag_queue *q) |
134 | int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; | ||
135 | |||
136 | static void ipfrag_secret_rebuild(unsigned long dummy) | ||
137 | { | 119 | { |
138 | unsigned long now = jiffies; | 120 | struct ipq *ipq; |
139 | int i; | ||
140 | |||
141 | write_lock(&ipfrag_lock); | ||
142 | get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); | ||
143 | for (i = 0; i < IPQ_HASHSZ; i++) { | ||
144 | struct ipq *q; | ||
145 | struct hlist_node *p, *n; | ||
146 | |||
147 | hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { | ||
148 | unsigned int hval = ipqhashfn(q->id, q->saddr, | ||
149 | q->daddr, q->protocol); | ||
150 | |||
151 | if (hval != i) { | ||
152 | hlist_del(&q->list); | ||
153 | 121 | ||
154 | /* Relink to new hash chain. */ | 122 | ipq = container_of(q, struct ipq, q); |
155 | hlist_add_head(&q->list, &ipq_hash[hval]); | 123 | return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); |
156 | } | ||
157 | } | ||
158 | } | ||
159 | write_unlock(&ipfrag_lock); | ||
160 | |||
161 | mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); | ||
162 | } | 124 | } |
163 | 125 | ||
164 | atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ | ||
165 | |||
166 | /* Memory Tracking Functions. */ | 126 | /* Memory Tracking Functions. */ |
167 | static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) | 127 | static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) |
168 | { | 128 | { |
169 | if (work) | 129 | if (work) |
170 | *work -= skb->truesize; | 130 | *work -= skb->truesize; |
171 | atomic_sub(skb->truesize, &ip_frag_mem); | 131 | atomic_sub(skb->truesize, &ip4_frags.mem); |
172 | kfree_skb(skb); | 132 | kfree_skb(skb); |
173 | } | 133 | } |
174 | 134 | ||
175 | static __inline__ void frag_free_queue(struct ipq *qp, int *work) | 135 | static __inline__ void ip4_frag_free(struct inet_frag_queue *q) |
176 | { | 136 | { |
177 | if (work) | 137 | struct ipq *qp; |
178 | *work -= sizeof(struct ipq); | 138 | |
179 | atomic_sub(sizeof(struct ipq), &ip_frag_mem); | 139 | qp = container_of(q, struct ipq, q); |
140 | if (qp->peer) | ||
141 | inet_putpeer(qp->peer); | ||
180 | kfree(qp); | 142 | kfree(qp); |
181 | } | 143 | } |
182 | 144 | ||
183 | static __inline__ struct ipq *frag_alloc_queue(void) | 145 | static __inline__ struct ipq *frag_alloc_queue(void) |
184 | { | 146 | { |
185 | struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); | 147 | struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC); |
186 | 148 | ||
187 | if (!qp) | 149 | if (!qp) |
188 | return NULL; | 150 | return NULL; |
189 | atomic_add(sizeof(struct ipq), &ip_frag_mem); | 151 | atomic_add(sizeof(struct ipq), &ip4_frags.mem); |
190 | return qp; | 152 | return qp; |
191 | } | 153 | } |
192 | 154 | ||
193 | 155 | ||
194 | /* Destruction primitives. */ | 156 | /* Destruction primitives. */ |
195 | 157 | ||
196 | /* Complete destruction of ipq. */ | 158 | static __inline__ void ipq_put(struct ipq *ipq) |
197 | static void ip_frag_destroy(struct ipq *qp, int *work) | ||
198 | { | ||
199 | struct sk_buff *fp; | ||
200 | |||
201 | BUG_TRAP(qp->last_in&COMPLETE); | ||
202 | BUG_TRAP(del_timer(&qp->timer) == 0); | ||
203 | |||
204 | if (qp->peer) | ||
205 | inet_putpeer(qp->peer); | ||
206 | |||
207 | /* Release all fragment data. */ | ||
208 | fp = qp->fragments; | ||
209 | while (fp) { | ||
210 | struct sk_buff *xp = fp->next; | ||
211 | |||
212 | frag_kfree_skb(fp, work); | ||
213 | fp = xp; | ||
214 | } | ||
215 | |||
216 | /* Finally, release the queue descriptor itself. */ | ||
217 | frag_free_queue(qp, work); | ||
218 | } | ||
219 | |||
220 | static __inline__ void ipq_put(struct ipq *ipq, int *work) | ||
221 | { | 159 | { |
222 | if (atomic_dec_and_test(&ipq->refcnt)) | 160 | inet_frag_put(&ipq->q, &ip4_frags); |
223 | ip_frag_destroy(ipq, work); | ||
224 | } | 161 | } |
225 | 162 | ||
226 | /* Kill ipq entry. It is not destroyed immediately, | 163 | /* Kill ipq entry. It is not destroyed immediately, |
@@ -228,14 +165,7 @@ static __inline__ void ipq_put(struct ipq *ipq, int *work) | |||
228 | */ | 165 | */ |
229 | static void ipq_kill(struct ipq *ipq) | 166 | static void ipq_kill(struct ipq *ipq) |
230 | { | 167 | { |
231 | if (del_timer(&ipq->timer)) | 168 | inet_frag_kill(&ipq->q, &ip4_frags); |
232 | atomic_dec(&ipq->refcnt); | ||
233 | |||
234 | if (!(ipq->last_in & COMPLETE)) { | ||
235 | ipq_unlink(ipq); | ||
236 | atomic_dec(&ipq->refcnt); | ||
237 | ipq->last_in |= COMPLETE; | ||
238 | } | ||
239 | } | 169 | } |
240 | 170 | ||
241 | /* Memory limiting on fragments. Evictor trashes the oldest | 171 | /* Memory limiting on fragments. Evictor trashes the oldest |
@@ -243,33 +173,11 @@ static void ipq_kill(struct ipq *ipq) | |||
243 | */ | 173 | */ |
244 | static void ip_evictor(void) | 174 | static void ip_evictor(void) |
245 | { | 175 | { |
246 | struct ipq *qp; | 176 | int evicted; |
247 | struct list_head *tmp; | ||
248 | int work; | ||
249 | |||
250 | work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; | ||
251 | if (work <= 0) | ||
252 | return; | ||
253 | |||
254 | while (work > 0) { | ||
255 | read_lock(&ipfrag_lock); | ||
256 | if (list_empty(&ipq_lru_list)) { | ||
257 | read_unlock(&ipfrag_lock); | ||
258 | return; | ||
259 | } | ||
260 | tmp = ipq_lru_list.next; | ||
261 | qp = list_entry(tmp, struct ipq, lru_list); | ||
262 | atomic_inc(&qp->refcnt); | ||
263 | read_unlock(&ipfrag_lock); | ||
264 | 177 | ||
265 | spin_lock(&qp->lock); | 178 | evicted = inet_frag_evictor(&ip4_frags); |
266 | if (!(qp->last_in&COMPLETE)) | 179 | if (evicted) |
267 | ipq_kill(qp); | 180 | IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); |
268 | spin_unlock(&qp->lock); | ||
269 | |||
270 | ipq_put(qp, &work); | ||
271 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | ||
272 | } | ||
273 | } | 181 | } |
274 | 182 | ||
275 | /* | 183 | /* |
@@ -279,9 +187,9 @@ static void ip_expire(unsigned long arg) | |||
279 | { | 187 | { |
280 | struct ipq *qp = (struct ipq *) arg; | 188 | struct ipq *qp = (struct ipq *) arg; |
281 | 189 | ||
282 | spin_lock(&qp->lock); | 190 | spin_lock(&qp->q.lock); |
283 | 191 | ||
284 | if (qp->last_in & COMPLETE) | 192 | if (qp->q.last_in & COMPLETE) |
285 | goto out; | 193 | goto out; |
286 | 194 | ||
287 | ipq_kill(qp); | 195 | ipq_kill(qp); |
@@ -289,8 +197,8 @@ static void ip_expire(unsigned long arg) | |||
289 | IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); | 197 | IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); |
290 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | 198 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); |
291 | 199 | ||
292 | if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { | 200 | if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { |
293 | struct sk_buff *head = qp->fragments; | 201 | struct sk_buff *head = qp->q.fragments; |
294 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ | 202 | /* Send an ICMP "Fragment Reassembly Timeout" message. */ |
295 | if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { | 203 | if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { |
296 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); | 204 | icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); |
@@ -298,8 +206,8 @@ static void ip_expire(unsigned long arg) | |||
298 | } | 206 | } |
299 | } | 207 | } |
300 | out: | 208 | out: |
301 | spin_unlock(&qp->lock); | 209 | spin_unlock(&qp->q.lock); |
302 | ipq_put(qp, NULL); | 210 | ipq_put(qp); |
303 | } | 211 | } |
304 | 212 | ||
305 | /* Creation primitives. */ | 213 | /* Creation primitives. */ |
@@ -312,7 +220,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) | |||
312 | #endif | 220 | #endif |
313 | unsigned int hash; | 221 | unsigned int hash; |
314 | 222 | ||
315 | write_lock(&ipfrag_lock); | 223 | write_lock(&ip4_frags.lock); |
316 | hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, | 224 | hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, |
317 | qp_in->protocol); | 225 | qp_in->protocol); |
318 | #ifdef CONFIG_SMP | 226 | #ifdef CONFIG_SMP |
@@ -320,31 +228,31 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) | |||
320 | * such entry could be created on other cpu, while we | 228 | * such entry could be created on other cpu, while we |
321 | * promoted read lock to write lock. | 229 | * promoted read lock to write lock. |
322 | */ | 230 | */ |
323 | hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { | 231 | hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { |
324 | if (qp->id == qp_in->id && | 232 | if (qp->id == qp_in->id && |
325 | qp->saddr == qp_in->saddr && | 233 | qp->saddr == qp_in->saddr && |
326 | qp->daddr == qp_in->daddr && | 234 | qp->daddr == qp_in->daddr && |
327 | qp->protocol == qp_in->protocol && | 235 | qp->protocol == qp_in->protocol && |
328 | qp->user == qp_in->user) { | 236 | qp->user == qp_in->user) { |
329 | atomic_inc(&qp->refcnt); | 237 | atomic_inc(&qp->q.refcnt); |
330 | write_unlock(&ipfrag_lock); | 238 | write_unlock(&ip4_frags.lock); |
331 | qp_in->last_in |= COMPLETE; | 239 | qp_in->q.last_in |= COMPLETE; |
332 | ipq_put(qp_in, NULL); | 240 | ipq_put(qp_in); |
333 | return qp; | 241 | return qp; |
334 | } | 242 | } |
335 | } | 243 | } |
336 | #endif | 244 | #endif |
337 | qp = qp_in; | 245 | qp = qp_in; |
338 | 246 | ||
339 | if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) | 247 | if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) |
340 | atomic_inc(&qp->refcnt); | 248 | atomic_inc(&qp->q.refcnt); |
341 | 249 | ||
342 | atomic_inc(&qp->refcnt); | 250 | atomic_inc(&qp->q.refcnt); |
343 | hlist_add_head(&qp->list, &ipq_hash[hash]); | 251 | hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]); |
344 | INIT_LIST_HEAD(&qp->lru_list); | 252 | INIT_LIST_HEAD(&qp->q.lru_list); |
345 | list_add_tail(&qp->lru_list, &ipq_lru_list); | 253 | list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list); |
346 | ip_frag_nqueues++; | 254 | ip4_frags.nqueues++; |
347 | write_unlock(&ipfrag_lock); | 255 | write_unlock(&ip4_frags.lock); |
348 | return qp; | 256 | return qp; |
349 | } | 257 | } |
350 | 258 | ||
@@ -357,23 +265,18 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) | |||
357 | goto out_nomem; | 265 | goto out_nomem; |
358 | 266 | ||
359 | qp->protocol = iph->protocol; | 267 | qp->protocol = iph->protocol; |
360 | qp->last_in = 0; | ||
361 | qp->id = iph->id; | 268 | qp->id = iph->id; |
362 | qp->saddr = iph->saddr; | 269 | qp->saddr = iph->saddr; |
363 | qp->daddr = iph->daddr; | 270 | qp->daddr = iph->daddr; |
364 | qp->user = user; | 271 | qp->user = user; |
365 | qp->len = 0; | ||
366 | qp->meat = 0; | ||
367 | qp->fragments = NULL; | ||
368 | qp->iif = 0; | ||
369 | qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; | 272 | qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; |
370 | 273 | ||
371 | /* Initialize a timer for this entry. */ | 274 | /* Initialize a timer for this entry. */ |
372 | init_timer(&qp->timer); | 275 | init_timer(&qp->q.timer); |
373 | qp->timer.data = (unsigned long) qp; /* pointer to queue */ | 276 | qp->q.timer.data = (unsigned long) qp; /* pointer to queue */ |
374 | qp->timer.function = ip_expire; /* expire function */ | 277 | qp->q.timer.function = ip_expire; /* expire function */ |
375 | spin_lock_init(&qp->lock); | 278 | spin_lock_init(&qp->q.lock); |
376 | atomic_set(&qp->refcnt, 1); | 279 | atomic_set(&qp->q.refcnt, 1); |
377 | 280 | ||
378 | return ip_frag_intern(qp); | 281 | return ip_frag_intern(qp); |
379 | 282 | ||
@@ -395,20 +298,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) | |||
395 | struct ipq *qp; | 298 | struct ipq *qp; |
396 | struct hlist_node *n; | 299 | struct hlist_node *n; |
397 | 300 | ||
398 | read_lock(&ipfrag_lock); | 301 | read_lock(&ip4_frags.lock); |
399 | hash = ipqhashfn(id, saddr, daddr, protocol); | 302 | hash = ipqhashfn(id, saddr, daddr, protocol); |
400 | hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { | 303 | hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { |
401 | if (qp->id == id && | 304 | if (qp->id == id && |
402 | qp->saddr == saddr && | 305 | qp->saddr == saddr && |
403 | qp->daddr == daddr && | 306 | qp->daddr == daddr && |
404 | qp->protocol == protocol && | 307 | qp->protocol == protocol && |
405 | qp->user == user) { | 308 | qp->user == user) { |
406 | atomic_inc(&qp->refcnt); | 309 | atomic_inc(&qp->q.refcnt); |
407 | read_unlock(&ipfrag_lock); | 310 | read_unlock(&ip4_frags.lock); |
408 | return qp; | 311 | return qp; |
409 | } | 312 | } |
410 | } | 313 | } |
411 | read_unlock(&ipfrag_lock); | 314 | read_unlock(&ip4_frags.lock); |
412 | 315 | ||
413 | return ip_frag_create(iph, user); | 316 | return ip_frag_create(iph, user); |
414 | } | 317 | } |
@@ -429,7 +332,7 @@ static inline int ip_frag_too_far(struct ipq *qp) | |||
429 | end = atomic_inc_return(&peer->rid); | 332 | end = atomic_inc_return(&peer->rid); |
430 | qp->rid = end; | 333 | qp->rid = end; |
431 | 334 | ||
432 | rc = qp->fragments && (end - start) > max; | 335 | rc = qp->q.fragments && (end - start) > max; |
433 | 336 | ||
434 | if (rc) { | 337 | if (rc) { |
435 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | 338 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); |
@@ -442,39 +345,42 @@ static int ip_frag_reinit(struct ipq *qp) | |||
442 | { | 345 | { |
443 | struct sk_buff *fp; | 346 | struct sk_buff *fp; |
444 | 347 | ||
445 | if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { | 348 | if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { |
446 | atomic_inc(&qp->refcnt); | 349 | atomic_inc(&qp->q.refcnt); |
447 | return -ETIMEDOUT; | 350 | return -ETIMEDOUT; |
448 | } | 351 | } |
449 | 352 | ||
450 | fp = qp->fragments; | 353 | fp = qp->q.fragments; |
451 | do { | 354 | do { |
452 | struct sk_buff *xp = fp->next; | 355 | struct sk_buff *xp = fp->next; |
453 | frag_kfree_skb(fp, NULL); | 356 | frag_kfree_skb(fp, NULL); |
454 | fp = xp; | 357 | fp = xp; |
455 | } while (fp); | 358 | } while (fp); |
456 | 359 | ||
457 | qp->last_in = 0; | 360 | qp->q.last_in = 0; |
458 | qp->len = 0; | 361 | qp->q.len = 0; |
459 | qp->meat = 0; | 362 | qp->q.meat = 0; |
460 | qp->fragments = NULL; | 363 | qp->q.fragments = NULL; |
461 | qp->iif = 0; | 364 | qp->iif = 0; |
462 | 365 | ||
463 | return 0; | 366 | return 0; |
464 | } | 367 | } |
465 | 368 | ||
466 | /* Add new segment to existing queue. */ | 369 | /* Add new segment to existing queue. */ |
467 | static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | 370 | static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) |
468 | { | 371 | { |
469 | struct sk_buff *prev, *next; | 372 | struct sk_buff *prev, *next; |
373 | struct net_device *dev; | ||
470 | int flags, offset; | 374 | int flags, offset; |
471 | int ihl, end; | 375 | int ihl, end; |
376 | int err = -ENOENT; | ||
472 | 377 | ||
473 | if (qp->last_in & COMPLETE) | 378 | if (qp->q.last_in & COMPLETE) |
474 | goto err; | 379 | goto err; |
475 | 380 | ||
476 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && | 381 | if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && |
477 | unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { | 382 | unlikely(ip_frag_too_far(qp)) && |
383 | unlikely(err = ip_frag_reinit(qp))) { | ||
478 | ipq_kill(qp); | 384 | ipq_kill(qp); |
479 | goto err; | 385 | goto err; |
480 | } | 386 | } |
@@ -487,36 +393,40 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
487 | 393 | ||
488 | /* Determine the position of this fragment. */ | 394 | /* Determine the position of this fragment. */ |
489 | end = offset + skb->len - ihl; | 395 | end = offset + skb->len - ihl; |
396 | err = -EINVAL; | ||
490 | 397 | ||
491 | /* Is this the final fragment? */ | 398 | /* Is this the final fragment? */ |
492 | if ((flags & IP_MF) == 0) { | 399 | if ((flags & IP_MF) == 0) { |
493 | /* If we already have some bits beyond end | 400 | /* If we already have some bits beyond end |
494 | * or have different end, the segment is corrrupted. | 401 | * or have different end, the segment is corrrupted. |
495 | */ | 402 | */ |
496 | if (end < qp->len || | 403 | if (end < qp->q.len || |
497 | ((qp->last_in & LAST_IN) && end != qp->len)) | 404 | ((qp->q.last_in & LAST_IN) && end != qp->q.len)) |
498 | goto err; | 405 | goto err; |
499 | qp->last_in |= LAST_IN; | 406 | qp->q.last_in |= LAST_IN; |
500 | qp->len = end; | 407 | qp->q.len = end; |
501 | } else { | 408 | } else { |
502 | if (end&7) { | 409 | if (end&7) { |
503 | end &= ~7; | 410 | end &= ~7; |
504 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 411 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
505 | skb->ip_summed = CHECKSUM_NONE; | 412 | skb->ip_summed = CHECKSUM_NONE; |
506 | } | 413 | } |
507 | if (end > qp->len) { | 414 | if (end > qp->q.len) { |
508 | /* Some bits beyond end -> corruption. */ | 415 | /* Some bits beyond end -> corruption. */ |
509 | if (qp->last_in & LAST_IN) | 416 | if (qp->q.last_in & LAST_IN) |
510 | goto err; | 417 | goto err; |
511 | qp->len = end; | 418 | qp->q.len = end; |
512 | } | 419 | } |
513 | } | 420 | } |
514 | if (end == offset) | 421 | if (end == offset) |
515 | goto err; | 422 | goto err; |
516 | 423 | ||
424 | err = -ENOMEM; | ||
517 | if (pskb_pull(skb, ihl) == NULL) | 425 | if (pskb_pull(skb, ihl) == NULL) |
518 | goto err; | 426 | goto err; |
519 | if (pskb_trim_rcsum(skb, end-offset)) | 427 | |
428 | err = pskb_trim_rcsum(skb, end - offset); | ||
429 | if (err) | ||
520 | goto err; | 430 | goto err; |
521 | 431 | ||
522 | /* Find out which fragments are in front and at the back of us | 432 | /* Find out which fragments are in front and at the back of us |
@@ -524,7 +434,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
524 | * this fragment, right? | 434 | * this fragment, right? |
525 | */ | 435 | */ |
526 | prev = NULL; | 436 | prev = NULL; |
527 | for (next = qp->fragments; next != NULL; next = next->next) { | 437 | for (next = qp->q.fragments; next != NULL; next = next->next) { |
528 | if (FRAG_CB(next)->offset >= offset) | 438 | if (FRAG_CB(next)->offset >= offset) |
529 | break; /* bingo! */ | 439 | break; /* bingo! */ |
530 | prev = next; | 440 | prev = next; |
@@ -539,8 +449,10 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
539 | 449 | ||
540 | if (i > 0) { | 450 | if (i > 0) { |
541 | offset += i; | 451 | offset += i; |
452 | err = -EINVAL; | ||
542 | if (end <= offset) | 453 | if (end <= offset) |
543 | goto err; | 454 | goto err; |
455 | err = -ENOMEM; | ||
544 | if (!pskb_pull(skb, i)) | 456 | if (!pskb_pull(skb, i)) |
545 | goto err; | 457 | goto err; |
546 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) | 458 | if (skb->ip_summed != CHECKSUM_UNNECESSARY) |
@@ -548,6 +460,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
548 | } | 460 | } |
549 | } | 461 | } |
550 | 462 | ||
463 | err = -ENOMEM; | ||
464 | |||
551 | while (next && FRAG_CB(next)->offset < end) { | 465 | while (next && FRAG_CB(next)->offset < end) { |
552 | int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ | 466 | int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ |
553 | 467 | ||
@@ -558,7 +472,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
558 | if (!pskb_pull(next, i)) | 472 | if (!pskb_pull(next, i)) |
559 | goto err; | 473 | goto err; |
560 | FRAG_CB(next)->offset += i; | 474 | FRAG_CB(next)->offset += i; |
561 | qp->meat -= i; | 475 | qp->q.meat -= i; |
562 | if (next->ip_summed != CHECKSUM_UNNECESSARY) | 476 | if (next->ip_summed != CHECKSUM_UNNECESSARY) |
563 | next->ip_summed = CHECKSUM_NONE; | 477 | next->ip_summed = CHECKSUM_NONE; |
564 | break; | 478 | break; |
@@ -573,9 +487,9 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
573 | if (prev) | 487 | if (prev) |
574 | prev->next = next; | 488 | prev->next = next; |
575 | else | 489 | else |
576 | qp->fragments = next; | 490 | qp->q.fragments = next; |
577 | 491 | ||
578 | qp->meat -= free_it->len; | 492 | qp->q.meat -= free_it->len; |
579 | frag_kfree_skb(free_it, NULL); | 493 | frag_kfree_skb(free_it, NULL); |
580 | } | 494 | } |
581 | } | 495 | } |
@@ -587,50 +501,77 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) | |||
587 | if (prev) | 501 | if (prev) |
588 | prev->next = skb; | 502 | prev->next = skb; |
589 | else | 503 | else |
590 | qp->fragments = skb; | 504 | qp->q.fragments = skb; |
591 | 505 | ||
592 | if (skb->dev) | 506 | dev = skb->dev; |
593 | qp->iif = skb->dev->ifindex; | 507 | if (dev) { |
594 | skb->dev = NULL; | 508 | qp->iif = dev->ifindex; |
595 | qp->stamp = skb->tstamp; | 509 | skb->dev = NULL; |
596 | qp->meat += skb->len; | 510 | } |
597 | atomic_add(skb->truesize, &ip_frag_mem); | 511 | qp->q.stamp = skb->tstamp; |
512 | qp->q.meat += skb->len; | ||
513 | atomic_add(skb->truesize, &ip4_frags.mem); | ||
598 | if (offset == 0) | 514 | if (offset == 0) |
599 | qp->last_in |= FIRST_IN; | 515 | qp->q.last_in |= FIRST_IN; |
600 | 516 | ||
601 | write_lock(&ipfrag_lock); | 517 | if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) |
602 | list_move_tail(&qp->lru_list, &ipq_lru_list); | 518 | return ip_frag_reasm(qp, prev, dev); |
603 | write_unlock(&ipfrag_lock); | ||
604 | 519 | ||
605 | return; | 520 | write_lock(&ip4_frags.lock); |
521 | list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); | ||
522 | write_unlock(&ip4_frags.lock); | ||
523 | return -EINPROGRESS; | ||
606 | 524 | ||
607 | err: | 525 | err: |
608 | kfree_skb(skb); | 526 | kfree_skb(skb); |
527 | return err; | ||
609 | } | 528 | } |
610 | 529 | ||
611 | 530 | ||
612 | /* Build a new IP datagram from all its fragments. */ | 531 | /* Build a new IP datagram from all its fragments. */ |
613 | 532 | ||
614 | static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | 533 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
534 | struct net_device *dev) | ||
615 | { | 535 | { |
616 | struct iphdr *iph; | 536 | struct iphdr *iph; |
617 | struct sk_buff *fp, *head = qp->fragments; | 537 | struct sk_buff *fp, *head = qp->q.fragments; |
618 | int len; | 538 | int len; |
619 | int ihlen; | 539 | int ihlen; |
540 | int err; | ||
620 | 541 | ||
621 | ipq_kill(qp); | 542 | ipq_kill(qp); |
622 | 543 | ||
544 | /* Make the one we just received the head. */ | ||
545 | if (prev) { | ||
546 | head = prev->next; | ||
547 | fp = skb_clone(head, GFP_ATOMIC); | ||
548 | |||
549 | if (!fp) | ||
550 | goto out_nomem; | ||
551 | |||
552 | fp->next = head->next; | ||
553 | prev->next = fp; | ||
554 | |||
555 | skb_morph(head, qp->q.fragments); | ||
556 | head->next = qp->q.fragments->next; | ||
557 | |||
558 | kfree_skb(qp->q.fragments); | ||
559 | qp->q.fragments = head; | ||
560 | } | ||
561 | |||
623 | BUG_TRAP(head != NULL); | 562 | BUG_TRAP(head != NULL); |
624 | BUG_TRAP(FRAG_CB(head)->offset == 0); | 563 | BUG_TRAP(FRAG_CB(head)->offset == 0); |
625 | 564 | ||
626 | /* Allocate a new buffer for the datagram. */ | 565 | /* Allocate a new buffer for the datagram. */ |
627 | ihlen = ip_hdrlen(head); | 566 | ihlen = ip_hdrlen(head); |
628 | len = ihlen + qp->len; | 567 | len = ihlen + qp->q.len; |
629 | 568 | ||
569 | err = -E2BIG; | ||
630 | if (len > 65535) | 570 | if (len > 65535) |
631 | goto out_oversize; | 571 | goto out_oversize; |
632 | 572 | ||
633 | /* Head of list must not be cloned. */ | 573 | /* Head of list must not be cloned. */ |
574 | err = -ENOMEM; | ||
634 | if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) | 575 | if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) |
635 | goto out_nomem; | 576 | goto out_nomem; |
636 | 577 | ||
@@ -654,12 +595,12 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
654 | head->len -= clone->len; | 595 | head->len -= clone->len; |
655 | clone->csum = 0; | 596 | clone->csum = 0; |
656 | clone->ip_summed = head->ip_summed; | 597 | clone->ip_summed = head->ip_summed; |
657 | atomic_add(clone->truesize, &ip_frag_mem); | 598 | atomic_add(clone->truesize, &ip4_frags.mem); |
658 | } | 599 | } |
659 | 600 | ||
660 | skb_shinfo(head)->frag_list = head->next; | 601 | skb_shinfo(head)->frag_list = head->next; |
661 | skb_push(head, head->data - skb_network_header(head)); | 602 | skb_push(head, head->data - skb_network_header(head)); |
662 | atomic_sub(head->truesize, &ip_frag_mem); | 603 | atomic_sub(head->truesize, &ip4_frags.mem); |
663 | 604 | ||
664 | for (fp=head->next; fp; fp = fp->next) { | 605 | for (fp=head->next; fp; fp = fp->next) { |
665 | head->data_len += fp->len; | 606 | head->data_len += fp->len; |
@@ -669,19 +610,19 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) | |||
669 | else if (head->ip_summed == CHECKSUM_COMPLETE) | 610 | else if (head->ip_summed == CHECKSUM_COMPLETE) |
670 | head->csum = csum_add(head->csum, fp->csum); | 611 | head->csum = csum_add(head->csum, fp->csum); |
671 | head->truesize += fp->truesize; | 612 | head->truesize += fp->truesize; |
672 | atomic_sub(fp->truesize, &ip_frag_mem); | 613 | atomic_sub(fp->truesize, &ip4_frags.mem); |
673 | } | 614 | } |
674 | 615 | ||
675 | head->next = NULL; | 616 | head->next = NULL; |
676 | head->dev = dev; | 617 | head->dev = dev; |
677 | head->tstamp = qp->stamp; | 618 | head->tstamp = qp->q.stamp; |
678 | 619 | ||
679 | iph = ip_hdr(head); | 620 | iph = ip_hdr(head); |
680 | iph->frag_off = 0; | 621 | iph->frag_off = 0; |
681 | iph->tot_len = htons(len); | 622 | iph->tot_len = htons(len); |
682 | IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); | 623 | IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); |
683 | qp->fragments = NULL; | 624 | qp->q.fragments = NULL; |
684 | return head; | 625 | return 0; |
685 | 626 | ||
686 | out_nomem: | 627 | out_nomem: |
687 | LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " | 628 | LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " |
@@ -694,54 +635,46 @@ out_oversize: | |||
694 | NIPQUAD(qp->saddr)); | 635 | NIPQUAD(qp->saddr)); |
695 | out_fail: | 636 | out_fail: |
696 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | 637 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); |
697 | return NULL; | 638 | return err; |
698 | } | 639 | } |
699 | 640 | ||
700 | /* Process an incoming IP datagram fragment. */ | 641 | /* Process an incoming IP datagram fragment. */ |
701 | struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) | 642 | int ip_defrag(struct sk_buff *skb, u32 user) |
702 | { | 643 | { |
703 | struct ipq *qp; | 644 | struct ipq *qp; |
704 | struct net_device *dev; | ||
705 | 645 | ||
706 | IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); | 646 | IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); |
707 | 647 | ||
708 | /* Start by cleaning up the memory. */ | 648 | /* Start by cleaning up the memory. */ |
709 | if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) | 649 | if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) |
710 | ip_evictor(); | 650 | ip_evictor(); |
711 | 651 | ||
712 | dev = skb->dev; | ||
713 | |||
714 | /* Lookup (or create) queue header */ | 652 | /* Lookup (or create) queue header */ |
715 | if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { | 653 | if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { |
716 | struct sk_buff *ret = NULL; | 654 | int ret; |
717 | |||
718 | spin_lock(&qp->lock); | ||
719 | 655 | ||
720 | ip_frag_queue(qp, skb); | 656 | spin_lock(&qp->q.lock); |
721 | 657 | ||
722 | if (qp->last_in == (FIRST_IN|LAST_IN) && | 658 | ret = ip_frag_queue(qp, skb); |
723 | qp->meat == qp->len) | ||
724 | ret = ip_frag_reasm(qp, dev); | ||
725 | 659 | ||
726 | spin_unlock(&qp->lock); | 660 | spin_unlock(&qp->q.lock); |
727 | ipq_put(qp, NULL); | 661 | ipq_put(qp); |
728 | return ret; | 662 | return ret; |
729 | } | 663 | } |
730 | 664 | ||
731 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); | 665 | IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); |
732 | kfree_skb(skb); | 666 | kfree_skb(skb); |
733 | return NULL; | 667 | return -ENOMEM; |
734 | } | 668 | } |
735 | 669 | ||
736 | void __init ipfrag_init(void) | 670 | void __init ipfrag_init(void) |
737 | { | 671 | { |
738 | ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ | 672 | ip4_frags.ctl = &ip4_frags_ctl; |
739 | (jiffies ^ (jiffies >> 6))); | 673 | ip4_frags.hashfn = ip4_hashfn; |
740 | 674 | ip4_frags.destructor = ip4_frag_free; | |
741 | init_timer(&ipfrag_secret_timer); | 675 | ip4_frags.skb_free = NULL; |
742 | ipfrag_secret_timer.function = ipfrag_secret_rebuild; | 676 | ip4_frags.qsize = sizeof(struct ipq); |
743 | ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; | 677 | inet_frags_init(&ip4_frags); |
744 | add_timer(&ipfrag_secret_timer); | ||
745 | } | 678 | } |
746 | 679 | ||
747 | EXPORT_SYMBOL(ip_defrag); | 680 | EXPORT_SYMBOL(ip_defrag); |