aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@mandriva.com>2005-08-09 23:45:03 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 18:55:58 -0400
commit696ab2d3bffc746fb8cf3712f066d42b9886aeed (patch)
treed0990b1d6f5fd6b3b7ddce553a16cccf6f029651
parent295ff7edb8f72b77d524759266f7524deae379b3 (diff)
[TIMEWAIT]: Move inet_timewait_death_row routines to net/ipv4/inet_timewait_sock.c
Also export the ones that will be used in the next changeset, when DCCP uses this infrastructure. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_timewait_sock.h10
-rw-r--r--include/net/tcp.h2
-rw-r--r--net/ipv4/inet_timewait_sock.c270
-rw-r--r--net/ipv4/tcp_minisocks.c281
4 files changed, 290 insertions, 273 deletions
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index a7e8052e2fbf..3b070352e869 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -82,6 +82,10 @@ struct inet_timewait_death_row {
82 int sysctl_max_tw_buckets; 82 int sysctl_max_tw_buckets;
83}; 83};
84 84
85extern void inet_twdr_hangman(unsigned long data);
86extern void inet_twdr_twkill_work(void *data);
87extern void inet_twdr_twcal_tick(unsigned long data);
88
85#if (BITS_PER_LONG == 64) 89#if (BITS_PER_LONG == 64)
86#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 90#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8
87#else 91#else
@@ -206,4 +210,10 @@ extern void __inet_twsk_kill(struct inet_timewait_sock *tw,
206extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, 210extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw,
207 struct sock *sk, 211 struct sock *sk,
208 struct inet_hashinfo *hashinfo); 212 struct inet_hashinfo *hashinfo);
213
214extern void inet_twsk_schedule(struct inet_timewait_sock *tw,
215 struct inet_timewait_death_row *twdr,
216 const int timeo, const int timewait_len);
217extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
218 struct inet_timewait_death_row *twdr);
209#endif /* _INET_TIMEWAIT_SOCK_ */ 219#endif /* _INET_TIMEWAIT_SOCK_ */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4c4cd4fb1ed8..d489ac548e4b 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -44,8 +44,6 @@ extern struct inet_hashinfo tcp_hashinfo;
44 44
45extern atomic_t tcp_orphan_count; 45extern atomic_t tcp_orphan_count;
46extern void tcp_time_wait(struct sock *sk, int state, int timeo); 46extern void tcp_time_wait(struct sock *sk, int state, int timeo);
47extern void inet_twsk_deschedule(struct inet_timewait_sock *tw,
48 struct inet_timewait_death_row *twdr);
49 47
50#define MAX_TCP_HEADER (128 + MAX_HEADER) 48#define MAX_TCP_HEADER (128 + MAX_HEADER)
51 49
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 22882d95f646..4d1502a49852 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -12,6 +12,7 @@
12 12
13#include <net/inet_hashtables.h> 13#include <net/inet_hashtables.h>
14#include <net/inet_timewait_sock.h> 14#include <net/inet_timewait_sock.h>
15#include <net/ip.h>
15 16
16/* Must be called with locally disabled BHs. */ 17/* Must be called with locally disabled BHs. */
17void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo) 18void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo)
@@ -85,6 +86,8 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
85 write_unlock(&ehead->lock); 86 write_unlock(&ehead->lock);
86} 87}
87 88
89EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
90
88struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) 91struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
89{ 92{
90 struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab, 93 struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
@@ -112,3 +115,270 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
112 115
113 return tw; 116 return tw;
114} 117}
118
119EXPORT_SYMBOL_GPL(inet_twsk_alloc);
120
121/* Returns non-zero if quota exceeded. */
122static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
123 const int slot)
124{
125 struct inet_timewait_sock *tw;
126 struct hlist_node *node;
127 unsigned int killed;
128 int ret;
129
130 /* NOTE: compare this to previous version where lock
131 * was released after detaching chain. It was racy,
132 * because tw buckets are scheduled in not serialized context
133 * in 2.3 (with netfilter), and with softnet it is common, because
134 * soft irqs are not sequenced.
135 */
136 killed = 0;
137 ret = 0;
138rescan:
139 inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
140 __inet_twsk_del_dead_node(tw);
141 spin_unlock(&twdr->death_lock);
142 __inet_twsk_kill(tw, twdr->hashinfo);
143 inet_twsk_put(tw);
144 killed++;
145 spin_lock(&twdr->death_lock);
146 if (killed > INET_TWDR_TWKILL_QUOTA) {
147 ret = 1;
148 break;
149 }
150
151 /* While we dropped twdr->death_lock, another cpu may have
152 * killed off the next TW bucket in the list, therefore
153 * do a fresh re-read of the hlist head node with the
154 * lock reacquired. We still use the hlist traversal
155 * macro in order to get the prefetches.
156 */
157 goto rescan;
158 }
159
160 twdr->tw_count -= killed;
161 NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
162
163 return ret;
164}
165
166void inet_twdr_hangman(unsigned long data)
167{
168 struct inet_timewait_death_row *twdr;
169 int unsigned need_timer;
170
171 twdr = (struct inet_timewait_death_row *)data;
172 spin_lock(&twdr->death_lock);
173
174 if (twdr->tw_count == 0)
175 goto out;
176
177 need_timer = 0;
178 if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
179 twdr->thread_slots |= (1 << twdr->slot);
180 mb();
181 schedule_work(&twdr->twkill_work);
182 need_timer = 1;
183 } else {
184 /* We purged the entire slot, anything left? */
185 if (twdr->tw_count)
186 need_timer = 1;
187 }
188 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
189 if (need_timer)
190 mod_timer(&twdr->tw_timer, jiffies + twdr->period);
191out:
192 spin_unlock(&twdr->death_lock);
193}
194
195EXPORT_SYMBOL_GPL(inet_twdr_hangman);
196
197extern void twkill_slots_invalid(void);
198
199void inet_twdr_twkill_work(void *data)
200{
201 struct inet_timewait_death_row *twdr = data;
202 int i;
203
204 if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
205 twkill_slots_invalid();
206
207 while (twdr->thread_slots) {
208 spin_lock_bh(&twdr->death_lock);
209 for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
210 if (!(twdr->thread_slots & (1 << i)))
211 continue;
212
213 while (inet_twdr_do_twkill_work(twdr, i) != 0) {
214 if (need_resched()) {
215 spin_unlock_bh(&twdr->death_lock);
216 schedule();
217 spin_lock_bh(&twdr->death_lock);
218 }
219 }
220
221 twdr->thread_slots &= ~(1 << i);
222 }
223 spin_unlock_bh(&twdr->death_lock);
224 }
225}
226
227EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
228
229/* These are always called from BH context. See callers in
230 * tcp_input.c to verify this.
231 */
232
233/* This is for handling early-kills of TIME_WAIT sockets. */
234void inet_twsk_deschedule(struct inet_timewait_sock *tw,
235 struct inet_timewait_death_row *twdr)
236{
237 spin_lock(&twdr->death_lock);
238 if (inet_twsk_del_dead_node(tw)) {
239 inet_twsk_put(tw);
240 if (--twdr->tw_count == 0)
241 del_timer(&twdr->tw_timer);
242 }
243 spin_unlock(&twdr->death_lock);
244 __inet_twsk_kill(tw, twdr->hashinfo);
245}
246
247EXPORT_SYMBOL(inet_twsk_deschedule);
248
249void inet_twsk_schedule(struct inet_timewait_sock *tw,
250 struct inet_timewait_death_row *twdr,
251 const int timeo, const int timewait_len)
252{
253 struct hlist_head *list;
254 int slot;
255
256 /* timeout := RTO * 3.5
257 *
258 * 3.5 = 1+2+0.5 to wait for two retransmits.
259 *
260 * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
261 * our ACK acking that FIN can be lost. If N subsequent retransmitted
262 * FINs (or previous seqments) are lost (probability of such event
263 * is p^(N+1), where p is probability to lose single packet and
264 * time to detect the loss is about RTO*(2^N - 1) with exponential
265 * backoff). Normal timewait length is calculated so, that we
266 * waited at least for one retransmitted FIN (maximal RTO is 120sec).
267 * [ BTW Linux. following BSD, violates this requirement waiting
268 * only for 60sec, we should wait at least for 240 secs.
269 * Well, 240 consumes too much of resources 8)
270 * ]
271 * This interval is not reduced to catch old duplicate and
272 * responces to our wandering segments living for two MSLs.
273 * However, if we use PAWS to detect
274 * old duplicates, we can reduce the interval to bounds required
275 * by RTO, rather than MSL. So, if peer understands PAWS, we
276 * kill tw bucket after 3.5*RTO (it is important that this number
277 * is greater than TS tick!) and detect old duplicates with help
278 * of PAWS.
279 */
280 slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
281
282 spin_lock(&twdr->death_lock);
283
284 /* Unlink it, if it was scheduled */
285 if (inet_twsk_del_dead_node(tw))
286 twdr->tw_count--;
287 else
288 atomic_inc(&tw->tw_refcnt);
289
290 if (slot >= INET_TWDR_RECYCLE_SLOTS) {
291 /* Schedule to slow timer */
292 if (timeo >= timewait_len) {
293 slot = INET_TWDR_TWKILL_SLOTS - 1;
294 } else {
295 slot = (timeo + twdr->period - 1) / twdr->period;
296 if (slot >= INET_TWDR_TWKILL_SLOTS)
297 slot = INET_TWDR_TWKILL_SLOTS - 1;
298 }
299 tw->tw_ttd = jiffies + timeo;
300 slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
301 list = &twdr->cells[slot];
302 } else {
303 tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
304
305 if (twdr->twcal_hand < 0) {
306 twdr->twcal_hand = 0;
307 twdr->twcal_jiffie = jiffies;
308 twdr->twcal_timer.expires = twdr->twcal_jiffie +
309 (slot << INET_TWDR_RECYCLE_TICK);
310 add_timer(&twdr->twcal_timer);
311 } else {
312 if (time_after(twdr->twcal_timer.expires,
313 jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
314 mod_timer(&twdr->twcal_timer,
315 jiffies + (slot << INET_TWDR_RECYCLE_TICK));
316 slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
317 }
318 list = &twdr->twcal_row[slot];
319 }
320
321 hlist_add_head(&tw->tw_death_node, list);
322
323 if (twdr->tw_count++ == 0)
324 mod_timer(&twdr->tw_timer, jiffies + twdr->period);
325 spin_unlock(&twdr->death_lock);
326}
327
328EXPORT_SYMBOL_GPL(inet_twsk_schedule);
329
330void inet_twdr_twcal_tick(unsigned long data)
331{
332 struct inet_timewait_death_row *twdr;
333 int n, slot;
334 unsigned long j;
335 unsigned long now = jiffies;
336 int killed = 0;
337 int adv = 0;
338
339 twdr = (struct inet_timewait_death_row *)data;
340
341 spin_lock(&twdr->death_lock);
342 if (twdr->twcal_hand < 0)
343 goto out;
344
345 slot = twdr->twcal_hand;
346 j = twdr->twcal_jiffie;
347
348 for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
349 if (time_before_eq(j, now)) {
350 struct hlist_node *node, *safe;
351 struct inet_timewait_sock *tw;
352
353 inet_twsk_for_each_inmate_safe(tw, node, safe,
354 &twdr->twcal_row[slot]) {
355 __inet_twsk_del_dead_node(tw);
356 __inet_twsk_kill(tw, twdr->hashinfo);
357 inet_twsk_put(tw);
358 killed++;
359 }
360 } else {
361 if (!adv) {
362 adv = 1;
363 twdr->twcal_jiffie = j;
364 twdr->twcal_hand = slot;
365 }
366
367 if (!hlist_empty(&twdr->twcal_row[slot])) {
368 mod_timer(&twdr->twcal_timer, j);
369 goto out;
370 }
371 }
372 j += 1 << INET_TWDR_RECYCLE_TICK;
373 slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
374 }
375 twdr->twcal_hand = -1;
376
377out:
378 if ((twdr->tw_count -= killed) == 0)
379 del_timer(&twdr->tw_timer);
380 NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
381 spin_unlock(&twdr->death_lock);
382}
383
384EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 81b9a52c50c6..dc085233d512 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -35,12 +35,6 @@
35#define SYNC_INIT 1 35#define SYNC_INIT 1
36#endif 36#endif
37 37
38/* New-style handling of TIME_WAIT sockets. */
39
40static void inet_twdr_hangman(unsigned long data);
41static void inet_twdr_twkill_work(void *data);
42static void inet_twdr_twcal_tick(unsigned long data);
43
44int sysctl_tcp_syncookies = SYNC_INIT; 38int sysctl_tcp_syncookies = SYNC_INIT;
45int sysctl_tcp_abort_on_overflow; 39int sysctl_tcp_abort_on_overflow;
46 40
@@ -63,10 +57,6 @@ struct inet_timewait_death_row tcp_death_row = {
63 57
64EXPORT_SYMBOL_GPL(tcp_death_row); 58EXPORT_SYMBOL_GPL(tcp_death_row);
65 59
66static void inet_twsk_schedule(struct inet_timewait_sock *tw,
67 struct inet_timewait_death_row *twdr,
68 const int timeo);
69
70static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) 60static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
71{ 61{
72 if (seq == s_win) 62 if (seq == s_win)
@@ -173,9 +163,11 @@ kill_with_rst:
173 if (tw->tw_family == AF_INET && 163 if (tw->tw_family == AF_INET &&
174 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && 164 tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp &&
175 tcp_v4_tw_remember_stamp(tw)) 165 tcp_v4_tw_remember_stamp(tw))
176 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout); 166 inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
167 TCP_TIMEWAIT_LEN);
177 else 168 else
178 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); 169 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
170 TCP_TIMEWAIT_LEN);
179 return TCP_TW_ACK; 171 return TCP_TW_ACK;
180 } 172 }
181 173
@@ -213,7 +205,8 @@ kill:
213 return TCP_TW_SUCCESS; 205 return TCP_TW_SUCCESS;
214 } 206 }
215 } 207 }
216 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); 208 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
209 TCP_TIMEWAIT_LEN);
217 210
218 if (tmp_opt.saw_tstamp) { 211 if (tmp_opt.saw_tstamp) {
219 tcptw->tw_ts_recent = tmp_opt.rcv_tsval; 212 tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
@@ -263,7 +256,8 @@ kill:
263 * Do not reschedule in the last case. 256 * Do not reschedule in the last case.
264 */ 257 */
265 if (paws_reject || th->ack) 258 if (paws_reject || th->ack)
266 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); 259 inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
260 TCP_TIMEWAIT_LEN);
267 261
268 /* Send ACK. Note, we do not put the bucket, 262 /* Send ACK. Note, we do not put the bucket,
269 * it will be released by caller. 263 * it will be released by caller.
@@ -326,7 +320,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
326 timeo = TCP_TIMEWAIT_LEN; 320 timeo = TCP_TIMEWAIT_LEN;
327 } 321 }
328 322
329 inet_twsk_schedule(tw, &tcp_death_row, timeo); 323 inet_twsk_schedule(tw, &tcp_death_row, timeo,
324 TCP_TIMEWAIT_LEN);
330 inet_twsk_put(tw); 325 inet_twsk_put(tw);
331 } else { 326 } else {
332 /* Sorry, if we're out of memory, just CLOSE this 327 /* Sorry, if we're out of memory, just CLOSE this
@@ -341,261 +336,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
341 tcp_done(sk); 336 tcp_done(sk);
342} 337}
343 338
344/* Returns non-zero if quota exceeded. */
345static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
346 const int slot)
347{
348 struct inet_timewait_sock *tw;
349 struct hlist_node *node;
350 unsigned int killed;
351 int ret;
352
353 /* NOTE: compare this to previous version where lock
354 * was released after detaching chain. It was racy,
355 * because tw buckets are scheduled in not serialized context
356 * in 2.3 (with netfilter), and with softnet it is common, because
357 * soft irqs are not sequenced.
358 */
359 killed = 0;
360 ret = 0;
361rescan:
362 inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) {
363 __inet_twsk_del_dead_node(tw);
364 spin_unlock(&twdr->death_lock);
365 __inet_twsk_kill(tw, twdr->hashinfo);
366 inet_twsk_put(tw);
367 killed++;
368 spin_lock(&twdr->death_lock);
369 if (killed > INET_TWDR_TWKILL_QUOTA) {
370 ret = 1;
371 break;
372 }
373
374 /* While we dropped twdr->death_lock, another cpu may have
375 * killed off the next TW bucket in the list, therefore
376 * do a fresh re-read of the hlist head node with the
377 * lock reacquired. We still use the hlist traversal
378 * macro in order to get the prefetches.
379 */
380 goto rescan;
381 }
382
383 twdr->tw_count -= killed;
384 NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed);
385
386 return ret;
387}
388
389static void inet_twdr_hangman(unsigned long data)
390{
391 struct inet_timewait_death_row *twdr;
392 int unsigned need_timer;
393
394 twdr = (struct inet_timewait_death_row *)data;
395 spin_lock(&twdr->death_lock);
396
397 if (twdr->tw_count == 0)
398 goto out;
399
400 need_timer = 0;
401 if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
402 twdr->thread_slots |= (1 << twdr->slot);
403 mb();
404 schedule_work(&twdr->twkill_work);
405 need_timer = 1;
406 } else {
407 /* We purged the entire slot, anything left? */
408 if (twdr->tw_count)
409 need_timer = 1;
410 }
411 twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
412 if (need_timer)
413 mod_timer(&twdr->tw_timer, jiffies + twdr->period);
414out:
415 spin_unlock(&twdr->death_lock);
416}
417
418extern void twkill_slots_invalid(void);
419
420static void inet_twdr_twkill_work(void *data)
421{
422 struct inet_timewait_death_row *twdr = data;
423 int i;
424
425 if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8))
426 twkill_slots_invalid();
427
428 while (twdr->thread_slots) {
429 spin_lock_bh(&twdr->death_lock);
430 for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
431 if (!(twdr->thread_slots & (1 << i)))
432 continue;
433
434 while (inet_twdr_do_twkill_work(twdr, i) != 0) {
435 if (need_resched()) {
436 spin_unlock_bh(&twdr->death_lock);
437 schedule();
438 spin_lock_bh(&twdr->death_lock);
439 }
440 }
441
442 twdr->thread_slots &= ~(1 << i);
443 }
444 spin_unlock_bh(&twdr->death_lock);
445 }
446}
447
448/* These are always called from BH context. See callers in
449 * tcp_input.c to verify this.
450 */
451
452/* This is for handling early-kills of TIME_WAIT sockets. */
453void inet_twsk_deschedule(struct inet_timewait_sock *tw,
454 struct inet_timewait_death_row *twdr)
455{
456 spin_lock(&twdr->death_lock);
457 if (inet_twsk_del_dead_node(tw)) {
458 inet_twsk_put(tw);
459 if (--twdr->tw_count == 0)
460 del_timer(&twdr->tw_timer);
461 }
462 spin_unlock(&twdr->death_lock);
463 __inet_twsk_kill(tw, twdr->hashinfo);
464}
465
466static void inet_twsk_schedule(struct inet_timewait_sock *tw,
467 struct inet_timewait_death_row *twdr,
468 const int timeo)
469{
470 struct hlist_head *list;
471 int slot;
472
473 /* timeout := RTO * 3.5
474 *
475 * 3.5 = 1+2+0.5 to wait for two retransmits.
476 *
477 * RATIONALE: if FIN arrived and we entered TIME-WAIT state,
478 * our ACK acking that FIN can be lost. If N subsequent retransmitted
479 * FINs (or previous seqments) are lost (probability of such event
480 * is p^(N+1), where p is probability to lose single packet and
481 * time to detect the loss is about RTO*(2^N - 1) with exponential
482 * backoff). Normal timewait length is calculated so, that we
483 * waited at least for one retransmitted FIN (maximal RTO is 120sec).
484 * [ BTW Linux. following BSD, violates this requirement waiting
485 * only for 60sec, we should wait at least for 240 secs.
486 * Well, 240 consumes too much of resources 8)
487 * ]
488 * This interval is not reduced to catch old duplicate and
489 * responces to our wandering segments living for two MSLs.
490 * However, if we use PAWS to detect
491 * old duplicates, we can reduce the interval to bounds required
492 * by RTO, rather than MSL. So, if peer understands PAWS, we
493 * kill tw bucket after 3.5*RTO (it is important that this number
494 * is greater than TS tick!) and detect old duplicates with help
495 * of PAWS.
496 */
497 slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
498
499 spin_lock(&twdr->death_lock);
500
501 /* Unlink it, if it was scheduled */
502 if (inet_twsk_del_dead_node(tw))
503 twdr->tw_count--;
504 else
505 atomic_inc(&tw->tw_refcnt);
506
507 if (slot >= INET_TWDR_RECYCLE_SLOTS) {
508 /* Schedule to slow timer */
509 if (timeo >= TCP_TIMEWAIT_LEN) {
510 slot = INET_TWDR_TWKILL_SLOTS - 1;
511 } else {
512 slot = (timeo + twdr->period - 1) / twdr->period;
513 if (slot >= INET_TWDR_TWKILL_SLOTS)
514 slot = INET_TWDR_TWKILL_SLOTS - 1;
515 }
516 tw->tw_ttd = jiffies + timeo;
517 slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
518 list = &twdr->cells[slot];
519 } else {
520 tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK);
521
522 if (twdr->twcal_hand < 0) {
523 twdr->twcal_hand = 0;
524 twdr->twcal_jiffie = jiffies;
525 twdr->twcal_timer.expires = twdr->twcal_jiffie +
526 (slot << INET_TWDR_RECYCLE_TICK);
527 add_timer(&twdr->twcal_timer);
528 } else {
529 if (time_after(twdr->twcal_timer.expires,
530 jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
531 mod_timer(&twdr->twcal_timer,
532 jiffies + (slot << INET_TWDR_RECYCLE_TICK));
533 slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
534 }
535 list = &twdr->twcal_row[slot];
536 }
537
538 hlist_add_head(&tw->tw_death_node, list);
539
540 if (twdr->tw_count++ == 0)
541 mod_timer(&twdr->tw_timer, jiffies + twdr->period);
542 spin_unlock(&twdr->death_lock);
543}
544
545void inet_twdr_twcal_tick(unsigned long data)
546{
547 struct inet_timewait_death_row *twdr;
548 int n, slot;
549 unsigned long j;
550 unsigned long now = jiffies;
551 int killed = 0;
552 int adv = 0;
553
554 twdr = (struct inet_timewait_death_row *)data;
555
556 spin_lock(&twdr->death_lock);
557 if (twdr->twcal_hand < 0)
558 goto out;
559
560 slot = twdr->twcal_hand;
561 j = twdr->twcal_jiffie;
562
563 for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
564 if (time_before_eq(j, now)) {
565 struct hlist_node *node, *safe;
566 struct inet_timewait_sock *tw;
567
568 inet_twsk_for_each_inmate_safe(tw, node, safe,
569 &twdr->twcal_row[slot]) {
570 __inet_twsk_del_dead_node(tw);
571 __inet_twsk_kill(tw, twdr->hashinfo);
572 inet_twsk_put(tw);
573 killed++;
574 }
575 } else {
576 if (!adv) {
577 adv = 1;
578 twdr->twcal_jiffie = j;
579 twdr->twcal_hand = slot;
580 }
581
582 if (!hlist_empty(&twdr->twcal_row[slot])) {
583 mod_timer(&twdr->twcal_timer, j);
584 goto out;
585 }
586 }
587 j += 1 << INET_TWDR_RECYCLE_TICK;
588 slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
589 }
590 twdr->twcal_hand = -1;
591
592out:
593 if ((twdr->tw_count -= killed) == 0)
594 del_timer(&twdr->tw_timer);
595 NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed);
596 spin_unlock(&twdr->death_lock);
597}
598
599/* This is not only more efficient than what we used to do, it eliminates 339/* This is not only more efficient than what we used to do, it eliminates
600 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM 340 * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
601 * 341 *
@@ -933,4 +673,3 @@ EXPORT_SYMBOL(tcp_check_req);
933EXPORT_SYMBOL(tcp_child_process); 673EXPORT_SYMBOL(tcp_child_process);
934EXPORT_SYMBOL(tcp_create_openreq_child); 674EXPORT_SYMBOL(tcp_create_openreq_child);
935EXPORT_SYMBOL(tcp_timewait_state_process); 675EXPORT_SYMBOL(tcp_timewait_state_process);
936EXPORT_SYMBOL(inet_twsk_deschedule);