diff options
author | Arnaldo Carvalho de Melo <acme@mandriva.com> | 2005-08-09 23:44:40 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:55:48 -0400 |
commit | 295ff7edb8f72b77d524759266f7524deae379b3 (patch) | |
tree | e16e99e324444fb01ae3dfd221b5d47a88acfeb8 /net/ipv4 | |
parent | 0b4e03bf0bc43ad6250a1e2fa25fc3eb2b028977 (diff) |
[TIMEWAIT]: Introduce inet_timewait_death_row
That groups all of the tables and variables associated to the TCP timewait
schedulling/recycling/killing code, that now can be isolated from the TCP
specific code and used by other transport protocols, such as DCCP.
Next changeset will move this code to net/ipv4/inet_timewait_sock.c
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 11 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 256 |
5 files changed, 141 insertions, 136 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 912bbcc7f415..3eadbb271871 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c | |||
@@ -65,7 +65,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) | |||
65 | socket_seq_show(seq); | 65 | socket_seq_show(seq); |
66 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", | 66 | seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", |
67 | fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), | 67 | fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), |
68 | tcp_tw_count, atomic_read(&tcp_sockets_allocated), | 68 | tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), |
69 | atomic_read(&tcp_memory_allocated)); | 69 | atomic_read(&tcp_memory_allocated)); |
70 | seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); | 70 | seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); |
71 | seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); | 71 | seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e32894532416..ce47a345ecc5 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -259,7 +259,7 @@ ctl_table ipv4_table[] = { | |||
259 | { | 259 | { |
260 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, | 260 | .ctl_name = NET_TCP_MAX_TW_BUCKETS, |
261 | .procname = "tcp_max_tw_buckets", | 261 | .procname = "tcp_max_tw_buckets", |
262 | .data = &sysctl_tcp_max_tw_buckets, | 262 | .data = &tcp_death_row.sysctl_max_tw_buckets, |
263 | .maxlen = sizeof(int), | 263 | .maxlen = sizeof(int), |
264 | .mode = 0644, | 264 | .mode = 0644, |
265 | .proc_handler = &proc_dointvec | 265 | .proc_handler = &proc_dointvec |
@@ -363,7 +363,7 @@ ctl_table ipv4_table[] = { | |||
363 | { | 363 | { |
364 | .ctl_name = NET_TCP_TW_RECYCLE, | 364 | .ctl_name = NET_TCP_TW_RECYCLE, |
365 | .procname = "tcp_tw_recycle", | 365 | .procname = "tcp_tw_recycle", |
366 | .data = &sysctl_tcp_tw_recycle, | 366 | .data = &tcp_death_row.sysctl_tw_recycle, |
367 | .maxlen = sizeof(int), | 367 | .maxlen = sizeof(int), |
368 | .mode = 0644, | 368 | .mode = 0644, |
369 | .proc_handler = &proc_dointvec | 369 | .proc_handler = &proc_dointvec |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4bda522d25cf..0eed64a1991d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2109,12 +2109,12 @@ void __init tcp_init(void) | |||
2109 | if (order >= 4) { | 2109 | if (order >= 4) { |
2110 | sysctl_local_port_range[0] = 32768; | 2110 | sysctl_local_port_range[0] = 32768; |
2111 | sysctl_local_port_range[1] = 61000; | 2111 | sysctl_local_port_range[1] = 61000; |
2112 | sysctl_tcp_max_tw_buckets = 180000; | 2112 | tcp_death_row.sysctl_max_tw_buckets = 180000; |
2113 | sysctl_tcp_max_orphans = 4096 << (order - 4); | 2113 | sysctl_tcp_max_orphans = 4096 << (order - 4); |
2114 | sysctl_max_syn_backlog = 1024; | 2114 | sysctl_max_syn_backlog = 1024; |
2115 | } else if (order < 3) { | 2115 | } else if (order < 3) { |
2116 | sysctl_local_port_range[0] = 1024 * (3 - order); | 2116 | sysctl_local_port_range[0] = 1024 * (3 - order); |
2117 | sysctl_tcp_max_tw_buckets >>= (3 - order); | 2117 | tcp_death_row.sysctl_max_tw_buckets >>= (3 - order); |
2118 | sysctl_tcp_max_orphans >>= (3 - order); | 2118 | sysctl_tcp_max_orphans >>= (3 - order); |
2119 | sysctl_max_syn_backlog = 128; | 2119 | sysctl_max_syn_backlog = 128; |
2120 | } | 2120 | } |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b966102b9f39..83f72346274a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -199,7 +199,7 @@ unique: | |||
199 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 199 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
200 | } else if (tw) { | 200 | } else if (tw) { |
201 | /* Silly. Should hash-dance instead... */ | 201 | /* Silly. Should hash-dance instead... */ |
202 | tcp_tw_deschedule(tw); | 202 | inet_twsk_deschedule(tw, &tcp_death_row); |
203 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); | 203 | NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED); |
204 | 204 | ||
205 | inet_twsk_put(tw); | 205 | inet_twsk_put(tw); |
@@ -291,7 +291,7 @@ ok: | |||
291 | spin_unlock(&head->lock); | 291 | spin_unlock(&head->lock); |
292 | 292 | ||
293 | if (tw) { | 293 | if (tw) { |
294 | tcp_tw_deschedule(tw); | 294 | inet_twsk_deschedule(tw, &tcp_death_row);; |
295 | inet_twsk_put(tw); | 295 | inet_twsk_put(tw); |
296 | } | 296 | } |
297 | 297 | ||
@@ -366,7 +366,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) | |||
366 | tp->write_seq = 0; | 366 | tp->write_seq = 0; |
367 | } | 367 | } |
368 | 368 | ||
369 | if (sysctl_tcp_tw_recycle && | 369 | if (tcp_death_row.sysctl_tw_recycle && |
370 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { | 370 | !tp->rx_opt.ts_recent_stamp && rt->rt_dst == daddr) { |
371 | struct inet_peer *peer = rt_get_peer(rt); | 371 | struct inet_peer *peer = rt_get_peer(rt); |
372 | 372 | ||
@@ -965,7 +965,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
965 | * are made in the function processing timewait state. | 965 | * are made in the function processing timewait state. |
966 | */ | 966 | */ |
967 | if (tmp_opt.saw_tstamp && | 967 | if (tmp_opt.saw_tstamp && |
968 | sysctl_tcp_tw_recycle && | 968 | tcp_death_row.sysctl_tw_recycle && |
969 | (dst = inet_csk_route_req(sk, req)) != NULL && | 969 | (dst = inet_csk_route_req(sk, req)) != NULL && |
970 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && | 970 | (peer = rt_get_peer((struct rtable *)dst)) != NULL && |
971 | peer->v4daddr == saddr) { | 971 | peer->v4daddr == saddr) { |
@@ -1305,7 +1305,8 @@ do_time_wait: | |||
1305 | ntohs(th->dest), | 1305 | ntohs(th->dest), |
1306 | inet_iif(skb)); | 1306 | inet_iif(skb)); |
1307 | if (sk2) { | 1307 | if (sk2) { |
1308 | tcp_tw_deschedule((struct inet_timewait_sock *)sk); | 1308 | inet_twsk_deschedule((struct inet_timewait_sock *)sk, |
1309 | &tcp_death_row); | ||
1309 | inet_twsk_put((struct inet_timewait_sock *)sk); | 1310 | inet_twsk_put((struct inet_timewait_sock *)sk); |
1310 | sk = sk2; | 1311 | sk = sk2; |
1311 | goto process; | 1312 | goto process; |
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 2d95afe5b393..81b9a52c50c6 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -35,13 +35,37 @@ | |||
35 | #define SYNC_INIT 1 | 35 | #define SYNC_INIT 1 |
36 | #endif | 36 | #endif |
37 | 37 | ||
38 | int sysctl_tcp_tw_recycle; | 38 | /* New-style handling of TIME_WAIT sockets. */ |
39 | int sysctl_tcp_max_tw_buckets = NR_FILE*2; | 39 | |
40 | static void inet_twdr_hangman(unsigned long data); | ||
41 | static void inet_twdr_twkill_work(void *data); | ||
42 | static void inet_twdr_twcal_tick(unsigned long data); | ||
40 | 43 | ||
41 | int sysctl_tcp_syncookies = SYNC_INIT; | 44 | int sysctl_tcp_syncookies = SYNC_INIT; |
42 | int sysctl_tcp_abort_on_overflow; | 45 | int sysctl_tcp_abort_on_overflow; |
43 | 46 | ||
44 | static void tcp_tw_schedule(struct inet_timewait_sock *tw, int timeo); | 47 | struct inet_timewait_death_row tcp_death_row = { |
48 | .sysctl_max_tw_buckets = NR_FILE * 2, | ||
49 | .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, | ||
50 | .death_lock = SPIN_LOCK_UNLOCKED, | ||
51 | .hashinfo = &tcp_hashinfo, | ||
52 | .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, | ||
53 | (unsigned long)&tcp_death_row), | ||
54 | .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, | ||
55 | inet_twdr_twkill_work, | ||
56 | &tcp_death_row), | ||
57 | /* Short-time timewait calendar */ | ||
58 | |||
59 | .twcal_hand = -1, | ||
60 | .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, | ||
61 | (unsigned long)&tcp_death_row), | ||
62 | }; | ||
63 | |||
64 | EXPORT_SYMBOL_GPL(tcp_death_row); | ||
65 | |||
66 | static void inet_twsk_schedule(struct inet_timewait_sock *tw, | ||
67 | struct inet_timewait_death_row *twdr, | ||
68 | const int timeo); | ||
45 | 69 | ||
46 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | 70 | static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) |
47 | { | 71 | { |
@@ -52,10 +76,6 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) | |||
52 | return (seq == e_win && seq == end_seq); | 76 | return (seq == e_win && seq == end_seq); |
53 | } | 77 | } |
54 | 78 | ||
55 | /* New-style handling of TIME_WAIT sockets. */ | ||
56 | |||
57 | int tcp_tw_count; | ||
58 | |||
59 | /* | 79 | /* |
60 | * * Main purpose of TIME-WAIT state is to close connection gracefully, | 80 | * * Main purpose of TIME-WAIT state is to close connection gracefully, |
61 | * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN | 81 | * when one of ends sits in LAST-ACK or CLOSING retransmitting FIN |
@@ -132,7 +152,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | |||
132 | if (!th->fin || | 152 | if (!th->fin || |
133 | TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { | 153 | TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { |
134 | kill_with_rst: | 154 | kill_with_rst: |
135 | tcp_tw_deschedule(tw); | 155 | inet_twsk_deschedule(tw, &tcp_death_row); |
136 | inet_twsk_put(tw); | 156 | inet_twsk_put(tw); |
137 | return TCP_TW_RST; | 157 | return TCP_TW_RST; |
138 | } | 158 | } |
@@ -151,11 +171,11 @@ kill_with_rst: | |||
151 | * do not undertsnad recycling in any case, it not | 171 | * do not undertsnad recycling in any case, it not |
152 | * a big problem in practice. --ANK */ | 172 | * a big problem in practice. --ANK */ |
153 | if (tw->tw_family == AF_INET && | 173 | if (tw->tw_family == AF_INET && |
154 | sysctl_tcp_tw_recycle && tcptw->tw_ts_recent_stamp && | 174 | tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && |
155 | tcp_v4_tw_remember_stamp(tw)) | 175 | tcp_v4_tw_remember_stamp(tw)) |
156 | tcp_tw_schedule(tw, tw->tw_timeout); | 176 | inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout); |
157 | else | 177 | else |
158 | tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); | 178 | inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); |
159 | return TCP_TW_ACK; | 179 | return TCP_TW_ACK; |
160 | } | 180 | } |
161 | 181 | ||
@@ -188,12 +208,12 @@ kill_with_rst: | |||
188 | */ | 208 | */ |
189 | if (sysctl_tcp_rfc1337 == 0) { | 209 | if (sysctl_tcp_rfc1337 == 0) { |
190 | kill: | 210 | kill: |
191 | tcp_tw_deschedule(tw); | 211 | inet_twsk_deschedule(tw, &tcp_death_row); |
192 | inet_twsk_put(tw); | 212 | inet_twsk_put(tw); |
193 | return TCP_TW_SUCCESS; | 213 | return TCP_TW_SUCCESS; |
194 | } | 214 | } |
195 | } | 215 | } |
196 | tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); | 216 | inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); |
197 | 217 | ||
198 | if (tmp_opt.saw_tstamp) { | 218 | if (tmp_opt.saw_tstamp) { |
199 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; | 219 | tcptw->tw_ts_recent = tmp_opt.rcv_tsval; |
@@ -243,7 +263,7 @@ kill: | |||
243 | * Do not reschedule in the last case. | 263 | * Do not reschedule in the last case. |
244 | */ | 264 | */ |
245 | if (paws_reject || th->ack) | 265 | if (paws_reject || th->ack) |
246 | tcp_tw_schedule(tw, TCP_TIMEWAIT_LEN); | 266 | inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN); |
247 | 267 | ||
248 | /* Send ACK. Note, we do not put the bucket, | 268 | /* Send ACK. Note, we do not put the bucket, |
249 | * it will be released by caller. | 269 | * it will be released by caller. |
@@ -263,10 +283,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
263 | const struct tcp_sock *tp = tcp_sk(sk); | 283 | const struct tcp_sock *tp = tcp_sk(sk); |
264 | int recycle_ok = 0; | 284 | int recycle_ok = 0; |
265 | 285 | ||
266 | if (sysctl_tcp_tw_recycle && tp->rx_opt.ts_recent_stamp) | 286 | if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) |
267 | recycle_ok = tp->af_specific->remember_stamp(sk); | 287 | recycle_ok = tp->af_specific->remember_stamp(sk); |
268 | 288 | ||
269 | if (tcp_tw_count < sysctl_tcp_max_tw_buckets) | 289 | if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) |
270 | tw = inet_twsk_alloc(sk, state); | 290 | tw = inet_twsk_alloc(sk, state); |
271 | 291 | ||
272 | if (tw != NULL) { | 292 | if (tw != NULL) { |
@@ -306,7 +326,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
306 | timeo = TCP_TIMEWAIT_LEN; | 326 | timeo = TCP_TIMEWAIT_LEN; |
307 | } | 327 | } |
308 | 328 | ||
309 | tcp_tw_schedule(tw, timeo); | 329 | inet_twsk_schedule(tw, &tcp_death_row, timeo); |
310 | inet_twsk_put(tw); | 330 | inet_twsk_put(tw); |
311 | } else { | 331 | } else { |
312 | /* Sorry, if we're out of memory, just CLOSE this | 332 | /* Sorry, if we're out of memory, just CLOSE this |
@@ -321,26 +341,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) | |||
321 | tcp_done(sk); | 341 | tcp_done(sk); |
322 | } | 342 | } |
323 | 343 | ||
324 | /* Kill off TIME_WAIT sockets once their lifetime has expired. */ | ||
325 | static int tcp_tw_death_row_slot; | ||
326 | |||
327 | static void tcp_twkill(unsigned long); | ||
328 | |||
329 | /* TIME_WAIT reaping mechanism. */ | ||
330 | #define TCP_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ | ||
331 | #define TCP_TWKILL_PERIOD (TCP_TIMEWAIT_LEN/TCP_TWKILL_SLOTS) | ||
332 | |||
333 | #define TCP_TWKILL_QUOTA 100 | ||
334 | |||
335 | static struct hlist_head tcp_tw_death_row[TCP_TWKILL_SLOTS]; | ||
336 | static DEFINE_SPINLOCK(tw_death_lock); | ||
337 | static struct timer_list tcp_tw_timer = TIMER_INITIALIZER(tcp_twkill, 0, 0); | ||
338 | static void twkill_work(void *); | ||
339 | static DECLARE_WORK(tcp_twkill_work, twkill_work, NULL); | ||
340 | static u32 twkill_thread_slots; | ||
341 | |||
342 | /* Returns non-zero if quota exceeded. */ | 344 | /* Returns non-zero if quota exceeded. */ |
343 | static int tcp_do_twkill_work(int slot, unsigned int quota) | 345 | static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, |
346 | const int slot) | ||
344 | { | 347 | { |
345 | struct inet_timewait_sock *tw; | 348 | struct inet_timewait_sock *tw; |
346 | struct hlist_node *node; | 349 | struct hlist_node *node; |
@@ -356,19 +359,19 @@ static int tcp_do_twkill_work(int slot, unsigned int quota) | |||
356 | killed = 0; | 359 | killed = 0; |
357 | ret = 0; | 360 | ret = 0; |
358 | rescan: | 361 | rescan: |
359 | inet_twsk_for_each_inmate(tw, node, &tcp_tw_death_row[slot]) { | 362 | inet_twsk_for_each_inmate(tw, node, &twdr->cells[slot]) { |
360 | __inet_twsk_del_dead_node(tw); | 363 | __inet_twsk_del_dead_node(tw); |
361 | spin_unlock(&tw_death_lock); | 364 | spin_unlock(&twdr->death_lock); |
362 | __inet_twsk_kill(tw, &tcp_hashinfo); | 365 | __inet_twsk_kill(tw, twdr->hashinfo); |
363 | inet_twsk_put(tw); | 366 | inet_twsk_put(tw); |
364 | killed++; | 367 | killed++; |
365 | spin_lock(&tw_death_lock); | 368 | spin_lock(&twdr->death_lock); |
366 | if (killed > quota) { | 369 | if (killed > INET_TWDR_TWKILL_QUOTA) { |
367 | ret = 1; | 370 | ret = 1; |
368 | break; | 371 | break; |
369 | } | 372 | } |
370 | 373 | ||
371 | /* While we dropped tw_death_lock, another cpu may have | 374 | /* While we dropped twdr->death_lock, another cpu may have |
372 | * killed off the next TW bucket in the list, therefore | 375 | * killed off the next TW bucket in the list, therefore |
373 | * do a fresh re-read of the hlist head node with the | 376 | * do a fresh re-read of the hlist head node with the |
374 | * lock reacquired. We still use the hlist traversal | 377 | * lock reacquired. We still use the hlist traversal |
@@ -377,67 +380,68 @@ rescan: | |||
377 | goto rescan; | 380 | goto rescan; |
378 | } | 381 | } |
379 | 382 | ||
380 | tcp_tw_count -= killed; | 383 | twdr->tw_count -= killed; |
381 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); | 384 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITED, killed); |
382 | 385 | ||
383 | return ret; | 386 | return ret; |
384 | } | 387 | } |
385 | 388 | ||
386 | static void tcp_twkill(unsigned long dummy) | 389 | static void inet_twdr_hangman(unsigned long data) |
387 | { | 390 | { |
388 | int need_timer, ret; | 391 | struct inet_timewait_death_row *twdr; |
392 | int unsigned need_timer; | ||
389 | 393 | ||
390 | spin_lock(&tw_death_lock); | 394 | twdr = (struct inet_timewait_death_row *)data; |
395 | spin_lock(&twdr->death_lock); | ||
391 | 396 | ||
392 | if (tcp_tw_count == 0) | 397 | if (twdr->tw_count == 0) |
393 | goto out; | 398 | goto out; |
394 | 399 | ||
395 | need_timer = 0; | 400 | need_timer = 0; |
396 | ret = tcp_do_twkill_work(tcp_tw_death_row_slot, TCP_TWKILL_QUOTA); | 401 | if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { |
397 | if (ret) { | 402 | twdr->thread_slots |= (1 << twdr->slot); |
398 | twkill_thread_slots |= (1 << tcp_tw_death_row_slot); | ||
399 | mb(); | 403 | mb(); |
400 | schedule_work(&tcp_twkill_work); | 404 | schedule_work(&twdr->twkill_work); |
401 | need_timer = 1; | 405 | need_timer = 1; |
402 | } else { | 406 | } else { |
403 | /* We purged the entire slot, anything left? */ | 407 | /* We purged the entire slot, anything left? */ |
404 | if (tcp_tw_count) | 408 | if (twdr->tw_count) |
405 | need_timer = 1; | 409 | need_timer = 1; |
406 | } | 410 | } |
407 | tcp_tw_death_row_slot = | 411 | twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); |
408 | ((tcp_tw_death_row_slot + 1) & (TCP_TWKILL_SLOTS - 1)); | ||
409 | if (need_timer) | 412 | if (need_timer) |
410 | mod_timer(&tcp_tw_timer, jiffies + TCP_TWKILL_PERIOD); | 413 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
411 | out: | 414 | out: |
412 | spin_unlock(&tw_death_lock); | 415 | spin_unlock(&twdr->death_lock); |
413 | } | 416 | } |
414 | 417 | ||
415 | extern void twkill_slots_invalid(void); | 418 | extern void twkill_slots_invalid(void); |
416 | 419 | ||
417 | static void twkill_work(void *dummy) | 420 | static void inet_twdr_twkill_work(void *data) |
418 | { | 421 | { |
422 | struct inet_timewait_death_row *twdr = data; | ||
419 | int i; | 423 | int i; |
420 | 424 | ||
421 | if ((TCP_TWKILL_SLOTS - 1) > (sizeof(twkill_thread_slots) * 8)) | 425 | if ((INET_TWDR_TWKILL_SLOTS - 1) > (sizeof(twdr->thread_slots) * 8)) |
422 | twkill_slots_invalid(); | 426 | twkill_slots_invalid(); |
423 | 427 | ||
424 | while (twkill_thread_slots) { | 428 | while (twdr->thread_slots) { |
425 | spin_lock_bh(&tw_death_lock); | 429 | spin_lock_bh(&twdr->death_lock); |
426 | for (i = 0; i < TCP_TWKILL_SLOTS; i++) { | 430 | for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { |
427 | if (!(twkill_thread_slots & (1 << i))) | 431 | if (!(twdr->thread_slots & (1 << i))) |
428 | continue; | 432 | continue; |
429 | 433 | ||
430 | while (tcp_do_twkill_work(i, TCP_TWKILL_QUOTA) != 0) { | 434 | while (inet_twdr_do_twkill_work(twdr, i) != 0) { |
431 | if (need_resched()) { | 435 | if (need_resched()) { |
432 | spin_unlock_bh(&tw_death_lock); | 436 | spin_unlock_bh(&twdr->death_lock); |
433 | schedule(); | 437 | schedule(); |
434 | spin_lock_bh(&tw_death_lock); | 438 | spin_lock_bh(&twdr->death_lock); |
435 | } | 439 | } |
436 | } | 440 | } |
437 | 441 | ||
438 | twkill_thread_slots &= ~(1 << i); | 442 | twdr->thread_slots &= ~(1 << i); |
439 | } | 443 | } |
440 | spin_unlock_bh(&tw_death_lock); | 444 | spin_unlock_bh(&twdr->death_lock); |
441 | } | 445 | } |
442 | } | 446 | } |
443 | 447 | ||
@@ -446,28 +450,22 @@ static void twkill_work(void *dummy) | |||
446 | */ | 450 | */ |
447 | 451 | ||
448 | /* This is for handling early-kills of TIME_WAIT sockets. */ | 452 | /* This is for handling early-kills of TIME_WAIT sockets. */ |
449 | void tcp_tw_deschedule(struct inet_timewait_sock *tw) | 453 | void inet_twsk_deschedule(struct inet_timewait_sock *tw, |
454 | struct inet_timewait_death_row *twdr) | ||
450 | { | 455 | { |
451 | spin_lock(&tw_death_lock); | 456 | spin_lock(&twdr->death_lock); |
452 | if (inet_twsk_del_dead_node(tw)) { | 457 | if (inet_twsk_del_dead_node(tw)) { |
453 | inet_twsk_put(tw); | 458 | inet_twsk_put(tw); |
454 | if (--tcp_tw_count == 0) | 459 | if (--twdr->tw_count == 0) |
455 | del_timer(&tcp_tw_timer); | 460 | del_timer(&twdr->tw_timer); |
456 | } | 461 | } |
457 | spin_unlock(&tw_death_lock); | 462 | spin_unlock(&twdr->death_lock); |
458 | __inet_twsk_kill(tw, &tcp_hashinfo); | 463 | __inet_twsk_kill(tw, twdr->hashinfo); |
459 | } | 464 | } |
460 | 465 | ||
461 | /* Short-time timewait calendar */ | 466 | static void inet_twsk_schedule(struct inet_timewait_sock *tw, |
462 | 467 | struct inet_timewait_death_row *twdr, | |
463 | static int tcp_twcal_hand = -1; | 468 | const int timeo) |
464 | static int tcp_twcal_jiffie; | ||
465 | static void tcp_twcal_tick(unsigned long); | ||
466 | static struct timer_list tcp_twcal_timer = | ||
467 | TIMER_INITIALIZER(tcp_twcal_tick, 0, 0); | ||
468 | static struct hlist_head tcp_twcal_row[TCP_TW_RECYCLE_SLOTS]; | ||
469 | |||
470 | static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) | ||
471 | { | 469 | { |
472 | struct hlist_head *list; | 470 | struct hlist_head *list; |
473 | int slot; | 471 | int slot; |
@@ -496,100 +494,106 @@ static void tcp_tw_schedule(struct inet_timewait_sock *tw, const int timeo) | |||
496 | * is greater than TS tick!) and detect old duplicates with help | 494 | * is greater than TS tick!) and detect old duplicates with help |
497 | * of PAWS. | 495 | * of PAWS. |
498 | */ | 496 | */ |
499 | slot = (timeo + (1<<TCP_TW_RECYCLE_TICK) - 1) >> TCP_TW_RECYCLE_TICK; | 497 | slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; |
500 | 498 | ||
501 | spin_lock(&tw_death_lock); | 499 | spin_lock(&twdr->death_lock); |
502 | 500 | ||
503 | /* Unlink it, if it was scheduled */ | 501 | /* Unlink it, if it was scheduled */ |
504 | if (inet_twsk_del_dead_node(tw)) | 502 | if (inet_twsk_del_dead_node(tw)) |
505 | tcp_tw_count--; | 503 | twdr->tw_count--; |
506 | else | 504 | else |
507 | atomic_inc(&tw->tw_refcnt); | 505 | atomic_inc(&tw->tw_refcnt); |
508 | 506 | ||
509 | if (slot >= TCP_TW_RECYCLE_SLOTS) { | 507 | if (slot >= INET_TWDR_RECYCLE_SLOTS) { |
510 | /* Schedule to slow timer */ | 508 | /* Schedule to slow timer */ |
511 | if (timeo >= TCP_TIMEWAIT_LEN) { | 509 | if (timeo >= TCP_TIMEWAIT_LEN) { |
512 | slot = TCP_TWKILL_SLOTS-1; | 510 | slot = INET_TWDR_TWKILL_SLOTS - 1; |
513 | } else { | 511 | } else { |
514 | slot = (timeo + TCP_TWKILL_PERIOD-1) / TCP_TWKILL_PERIOD; | 512 | slot = (timeo + twdr->period - 1) / twdr->period; |
515 | if (slot >= TCP_TWKILL_SLOTS) | 513 | if (slot >= INET_TWDR_TWKILL_SLOTS) |
516 | slot = TCP_TWKILL_SLOTS-1; | 514 | slot = INET_TWDR_TWKILL_SLOTS - 1; |
517 | } | 515 | } |
518 | tw->tw_ttd = jiffies + timeo; | 516 | tw->tw_ttd = jiffies + timeo; |
519 | slot = (tcp_tw_death_row_slot + slot) & (TCP_TWKILL_SLOTS - 1); | 517 | slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); |
520 | list = &tcp_tw_death_row[slot]; | 518 | list = &twdr->cells[slot]; |
521 | } else { | 519 | } else { |
522 | tw->tw_ttd = jiffies + (slot << TCP_TW_RECYCLE_TICK); | 520 | tw->tw_ttd = jiffies + (slot << INET_TWDR_RECYCLE_TICK); |
523 | 521 | ||
524 | if (tcp_twcal_hand < 0) { | 522 | if (twdr->twcal_hand < 0) { |
525 | tcp_twcal_hand = 0; | 523 | twdr->twcal_hand = 0; |
526 | tcp_twcal_jiffie = jiffies; | 524 | twdr->twcal_jiffie = jiffies; |
527 | tcp_twcal_timer.expires = tcp_twcal_jiffie + (slot<<TCP_TW_RECYCLE_TICK); | 525 | twdr->twcal_timer.expires = twdr->twcal_jiffie + |
528 | add_timer(&tcp_twcal_timer); | 526 | (slot << INET_TWDR_RECYCLE_TICK); |
527 | add_timer(&twdr->twcal_timer); | ||
529 | } else { | 528 | } else { |
530 | if (time_after(tcp_twcal_timer.expires, jiffies + (slot<<TCP_TW_RECYCLE_TICK))) | 529 | if (time_after(twdr->twcal_timer.expires, |
531 | mod_timer(&tcp_twcal_timer, jiffies + (slot<<TCP_TW_RECYCLE_TICK)); | 530 | jiffies + (slot << INET_TWDR_RECYCLE_TICK))) |
532 | slot = (tcp_twcal_hand + slot)&(TCP_TW_RECYCLE_SLOTS-1); | 531 | mod_timer(&twdr->twcal_timer, |
532 | jiffies + (slot << INET_TWDR_RECYCLE_TICK)); | ||
533 | slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); | ||
533 | } | 534 | } |
534 | list = &tcp_twcal_row[slot]; | 535 | list = &twdr->twcal_row[slot]; |
535 | } | 536 | } |
536 | 537 | ||
537 | hlist_add_head(&tw->tw_death_node, list); | 538 | hlist_add_head(&tw->tw_death_node, list); |
538 | 539 | ||
539 | if (tcp_tw_count++ == 0) | 540 | if (twdr->tw_count++ == 0) |
540 | mod_timer(&tcp_tw_timer, jiffies+TCP_TWKILL_PERIOD); | 541 | mod_timer(&twdr->tw_timer, jiffies + twdr->period); |
541 | spin_unlock(&tw_death_lock); | 542 | spin_unlock(&twdr->death_lock); |
542 | } | 543 | } |
543 | 544 | ||
544 | void tcp_twcal_tick(unsigned long dummy) | 545 | void inet_twdr_twcal_tick(unsigned long data) |
545 | { | 546 | { |
547 | struct inet_timewait_death_row *twdr; | ||
546 | int n, slot; | 548 | int n, slot; |
547 | unsigned long j; | 549 | unsigned long j; |
548 | unsigned long now = jiffies; | 550 | unsigned long now = jiffies; |
549 | int killed = 0; | 551 | int killed = 0; |
550 | int adv = 0; | 552 | int adv = 0; |
551 | 553 | ||
552 | spin_lock(&tw_death_lock); | 554 | twdr = (struct inet_timewait_death_row *)data; |
553 | if (tcp_twcal_hand < 0) | 555 | |
556 | spin_lock(&twdr->death_lock); | ||
557 | if (twdr->twcal_hand < 0) | ||
554 | goto out; | 558 | goto out; |
555 | 559 | ||
556 | slot = tcp_twcal_hand; | 560 | slot = twdr->twcal_hand; |
557 | j = tcp_twcal_jiffie; | 561 | j = twdr->twcal_jiffie; |
558 | 562 | ||
559 | for (n=0; n<TCP_TW_RECYCLE_SLOTS; n++) { | 563 | for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { |
560 | if (time_before_eq(j, now)) { | 564 | if (time_before_eq(j, now)) { |
561 | struct hlist_node *node, *safe; | 565 | struct hlist_node *node, *safe; |
562 | struct inet_timewait_sock *tw; | 566 | struct inet_timewait_sock *tw; |
563 | 567 | ||
564 | inet_twsk_for_each_inmate_safe(tw, node, safe, | 568 | inet_twsk_for_each_inmate_safe(tw, node, safe, |
565 | &tcp_twcal_row[slot]) { | 569 | &twdr->twcal_row[slot]) { |
566 | __inet_twsk_del_dead_node(tw); | 570 | __inet_twsk_del_dead_node(tw); |
567 | __inet_twsk_kill(tw, &tcp_hashinfo); | 571 | __inet_twsk_kill(tw, twdr->hashinfo); |
568 | inet_twsk_put(tw); | 572 | inet_twsk_put(tw); |
569 | killed++; | 573 | killed++; |
570 | } | 574 | } |
571 | } else { | 575 | } else { |
572 | if (!adv) { | 576 | if (!adv) { |
573 | adv = 1; | 577 | adv = 1; |
574 | tcp_twcal_jiffie = j; | 578 | twdr->twcal_jiffie = j; |
575 | tcp_twcal_hand = slot; | 579 | twdr->twcal_hand = slot; |
576 | } | 580 | } |
577 | 581 | ||
578 | if (!hlist_empty(&tcp_twcal_row[slot])) { | 582 | if (!hlist_empty(&twdr->twcal_row[slot])) { |
579 | mod_timer(&tcp_twcal_timer, j); | 583 | mod_timer(&twdr->twcal_timer, j); |
580 | goto out; | 584 | goto out; |
581 | } | 585 | } |
582 | } | 586 | } |
583 | j += (1<<TCP_TW_RECYCLE_TICK); | 587 | j += 1 << INET_TWDR_RECYCLE_TICK; |
584 | slot = (slot+1)&(TCP_TW_RECYCLE_SLOTS-1); | 588 | slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); |
585 | } | 589 | } |
586 | tcp_twcal_hand = -1; | 590 | twdr->twcal_hand = -1; |
587 | 591 | ||
588 | out: | 592 | out: |
589 | if ((tcp_tw_count -= killed) == 0) | 593 | if ((twdr->tw_count -= killed) == 0) |
590 | del_timer(&tcp_tw_timer); | 594 | del_timer(&twdr->tw_timer); |
591 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); | 595 | NET_ADD_STATS_BH(LINUX_MIB_TIMEWAITKILLED, killed); |
592 | spin_unlock(&tw_death_lock); | 596 | spin_unlock(&twdr->death_lock); |
593 | } | 597 | } |
594 | 598 | ||
595 | /* This is not only more efficient than what we used to do, it eliminates | 599 | /* This is not only more efficient than what we used to do, it eliminates |
@@ -929,4 +933,4 @@ EXPORT_SYMBOL(tcp_check_req); | |||
929 | EXPORT_SYMBOL(tcp_child_process); | 933 | EXPORT_SYMBOL(tcp_child_process); |
930 | EXPORT_SYMBOL(tcp_create_openreq_child); | 934 | EXPORT_SYMBOL(tcp_create_openreq_child); |
931 | EXPORT_SYMBOL(tcp_timewait_state_process); | 935 | EXPORT_SYMBOL(tcp_timewait_state_process); |
932 | EXPORT_SYMBOL(tcp_tw_deschedule); | 936 | EXPORT_SYMBOL(inet_twsk_deschedule); |