aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorArnaldo Carvalho de Melo <acme@mandriva.com>2005-08-10 03:03:31 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2005-08-29 18:56:18 -0400
commit6687e988d9aeaccad6774e6a8304f681f3ec0a03 (patch)
treeecd3d28f9989847aa1dcde4782de0210aeadc290 /include
parent64ce207306debd7157f47282be94770407bec01c (diff)
[ICSK]: Move TCP congestion avoidance members to icsk
This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(), minimal renaming/moving done in this changeset to ease review. Most of it is just changes of struct tcp_sock * to struct sock * parameters. With this we move to a state closer to two interesting goals: 1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used for any INET transport protocol that has struct inet_hashinfo and are derived from struct inet_connection_sock. Keeps the userspace API, that will just not display DCCP sockets, while newer versions of tools can support DCCP. 2. INET generic transport pluggable Congestion Avoidance infrastructure, using the current TCP CA infrastructure with DCCP. Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/tcp.h21
-rw-r--r--include/net/inet_connection_sock.h15
-rw-r--r--include/net/tcp.h74
3 files changed, 60 insertions, 50 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 620096840744..ac4ca44c75ca 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -258,19 +258,15 @@ struct tcp_sock {
258 __u32 mss_cache; /* Cached effective mss, not including SACKS */ 258 __u32 mss_cache; /* Cached effective mss, not including SACKS */
259 __u16 xmit_size_goal; /* Goal for segmenting output packets */ 259 __u16 xmit_size_goal; /* Goal for segmenting output packets */
260 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ 260 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
261 __u8 ca_state; /* State of fast-retransmit machine */
262 261
263 __u8 keepalive_probes; /* num of allowed keep alive probes */
264 __u16 advmss; /* Advertised MSS */
265 __u32 window_clamp; /* Maximal window to advertise */ 262 __u32 window_clamp; /* Maximal window to advertise */
266 __u32 rcv_ssthresh; /* Current window clamp */ 263 __u32 rcv_ssthresh; /* Current window clamp */
267 264
268 __u32 frto_highmark; /* snd_nxt when RTO occurred */ 265 __u32 frto_highmark; /* snd_nxt when RTO occurred */
269 __u8 reordering; /* Packet reordering metric. */ 266 __u8 reordering; /* Packet reordering metric. */
270 __u8 frto_counter; /* Number of new acks after RTO */ 267 __u8 frto_counter; /* Number of new acks after RTO */
271
272 __u8 nonagle; /* Disable Nagle algorithm? */ 268 __u8 nonagle; /* Disable Nagle algorithm? */
273 /* ONE BYTE HOLE, TRY TO PACK */ 269 __u8 keepalive_probes; /* num of allowed keep alive probes */
274 270
275/* RTT measurement */ 271/* RTT measurement */
276 __u32 srtt; /* smoothed round trip time << 3 */ 272 __u32 srtt; /* smoothed round trip time << 3 */
@@ -311,8 +307,7 @@ struct tcp_sock {
311 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ 307 struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */
312 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ 308 struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/
313 309
314 __u8 probes_out; /* unanswered 0 window probes */ 310 __u16 advmss; /* Advertised MSS */
315 __u8 ecn_flags; /* ECN status bits. */
316 __u16 prior_ssthresh; /* ssthresh saved at recovery start */ 311 __u16 prior_ssthresh; /* ssthresh saved at recovery start */
317 __u32 lost_out; /* Lost packets */ 312 __u32 lost_out; /* Lost packets */
318 __u32 sacked_out; /* SACK'd packets */ 313 __u32 sacked_out; /* SACK'd packets */
@@ -327,7 +322,7 @@ struct tcp_sock {
327 __u32 urg_seq; /* Seq of received urgent pointer */ 322 __u32 urg_seq; /* Seq of received urgent pointer */
328 __u16 urg_data; /* Saved octet of OOB data and control flags */ 323 __u16 urg_data; /* Saved octet of OOB data and control flags */
329 __u8 urg_mode; /* In urgent mode */ 324 __u8 urg_mode; /* In urgent mode */
330 /* ONE BYTE HOLE, TRY TO PACK! */ 325 __u8 ecn_flags; /* ECN status bits. */
331 __u32 snd_up; /* Urgent pointer */ 326 __u32 snd_up; /* Urgent pointer */
332 327
333 __u32 total_retrans; /* Total retransmits for entire connection */ 328 __u32 total_retrans; /* Total retransmits for entire connection */
@@ -351,11 +346,6 @@ struct tcp_sock {
351 __u32 seq; 346 __u32 seq;
352 __u32 time; 347 __u32 time;
353 } rcvq_space; 348 } rcvq_space;
354
355 /* Pluggable TCP congestion control hook */
356 struct tcp_congestion_ops *ca_ops;
357 u32 ca_priv[16];
358#define TCP_CA_PRIV_SIZE (16*sizeof(u32))
359}; 349};
360 350
361static inline struct tcp_sock *tcp_sk(const struct sock *sk) 351static inline struct tcp_sock *tcp_sk(const struct sock *sk)
@@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk)
377 return (struct tcp_timewait_sock *)sk; 367 return (struct tcp_timewait_sock *)sk;
378} 368}
379 369
380static inline void *tcp_ca(const struct tcp_sock *tp)
381{
382 return (void *) tp->ca_priv;
383}
384
385#endif 370#endif
386 371
387#endif /* _LINUX_TCP_H */ 372#endif /* _LINUX_TCP_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index bec19d5cff26..4d7e708c07d1 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -27,6 +27,7 @@
27 27
28struct inet_bind_bucket; 28struct inet_bind_bucket;
29struct inet_hashinfo; 29struct inet_hashinfo;
30struct tcp_congestion_ops;
30 31
31/** inet_connection_sock - INET connection oriented sock 32/** inet_connection_sock - INET connection oriented sock
32 * 33 *
@@ -35,10 +36,13 @@ struct inet_hashinfo;
35 * @icsk_timeout: Timeout 36 * @icsk_timeout: Timeout
36 * @icsk_retransmit_timer: Resend (no ack) 37 * @icsk_retransmit_timer: Resend (no ack)
37 * @icsk_rto: Retransmit timeout 38 * @icsk_rto: Retransmit timeout
39 * @icsk_ca_ops Pluggable congestion control hook
40 * @icsk_ca_state: Congestion control state
38 * @icsk_retransmits: Number of unrecovered [RTO] timeouts 41 * @icsk_retransmits: Number of unrecovered [RTO] timeouts
39 * @icsk_pending: Scheduled timer event 42 * @icsk_pending: Scheduled timer event
40 * @icsk_backoff: Backoff 43 * @icsk_backoff: Backoff
41 * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries 44 * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries
45 * @icsk_probes_out: unanswered 0 window probes
42 * @icsk_ack: Delayed ACK control data 46 * @icsk_ack: Delayed ACK control data
43 */ 47 */
44struct inet_connection_sock { 48struct inet_connection_sock {
@@ -50,10 +54,14 @@ struct inet_connection_sock {
50 struct timer_list icsk_retransmit_timer; 54 struct timer_list icsk_retransmit_timer;
51 struct timer_list icsk_delack_timer; 55 struct timer_list icsk_delack_timer;
52 __u32 icsk_rto; 56 __u32 icsk_rto;
57 struct tcp_congestion_ops *icsk_ca_ops;
58 __u8 icsk_ca_state;
53 __u8 icsk_retransmits; 59 __u8 icsk_retransmits;
54 __u8 icsk_pending; 60 __u8 icsk_pending;
55 __u8 icsk_backoff; 61 __u8 icsk_backoff;
56 __u8 icsk_syn_retries; 62 __u8 icsk_syn_retries;
63 __u8 icsk_probes_out;
64 /* 2 BYTES HOLE, TRY TO PACK! */
57 struct { 65 struct {
58 __u8 pending; /* ACK is pending */ 66 __u8 pending; /* ACK is pending */
59 __u8 quick; /* Scheduled number of quick acks */ 67 __u8 quick; /* Scheduled number of quick acks */
@@ -65,6 +73,8 @@ struct inet_connection_sock {
65 __u16 last_seg_size; /* Size of last incoming segment */ 73 __u16 last_seg_size; /* Size of last incoming segment */
66 __u16 rcv_mss; /* MSS used for delayed ACK decisions */ 74 __u16 rcv_mss; /* MSS used for delayed ACK decisions */
67 } icsk_ack; 75 } icsk_ack;
76 u32 icsk_ca_priv[16];
77#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32))
68}; 78};
69 79
70#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ 80#define ICSK_TIME_RETRANS 1 /* Retransmit timer */
@@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
77 return (struct inet_connection_sock *)sk; 87 return (struct inet_connection_sock *)sk;
78} 88}
79 89
90static inline void *inet_csk_ca(const struct sock *sk)
91{
92 return (void *)inet_csk(sk)->icsk_ca_priv;
93}
94
80extern struct sock *inet_csk_clone(struct sock *sk, 95extern struct sock *inet_csk_clone(struct sock *sk,
81 const struct request_sock *req, 96 const struct request_sock *req,
82 const unsigned int __nocast priority); 97 const unsigned int __nocast priority);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d489ac548e4b..0b3f7294c5c7 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -669,29 +669,29 @@ struct tcp_congestion_ops {
669 struct list_head list; 669 struct list_head list;
670 670
671 /* initialize private data (optional) */ 671 /* initialize private data (optional) */
672 void (*init)(struct tcp_sock *tp); 672 void (*init)(struct sock *sk);
673 /* cleanup private data (optional) */ 673 /* cleanup private data (optional) */
674 void (*release)(struct tcp_sock *tp); 674 void (*release)(struct sock *sk);
675 675
676 /* return slow start threshold (required) */ 676 /* return slow start threshold (required) */
677 u32 (*ssthresh)(struct tcp_sock *tp); 677 u32 (*ssthresh)(struct sock *sk);
678 /* lower bound for congestion window (optional) */ 678 /* lower bound for congestion window (optional) */
679 u32 (*min_cwnd)(struct tcp_sock *tp); 679 u32 (*min_cwnd)(struct sock *sk);
680 /* do new cwnd calculation (required) */ 680 /* do new cwnd calculation (required) */
681 void (*cong_avoid)(struct tcp_sock *tp, u32 ack, 681 void (*cong_avoid)(struct sock *sk, u32 ack,
682 u32 rtt, u32 in_flight, int good_ack); 682 u32 rtt, u32 in_flight, int good_ack);
683 /* round trip time sample per acked packet (optional) */ 683 /* round trip time sample per acked packet (optional) */
684 void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); 684 void (*rtt_sample)(struct sock *sk, u32 usrtt);
685 /* call before changing ca_state (optional) */ 685 /* call before changing ca_state (optional) */
686 void (*set_state)(struct tcp_sock *tp, u8 new_state); 686 void (*set_state)(struct sock *sk, u8 new_state);
687 /* call when cwnd event occurs (optional) */ 687 /* call when cwnd event occurs (optional) */
688 void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); 688 void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
689 /* new value of cwnd after loss (optional) */ 689 /* new value of cwnd after loss (optional) */
690 u32 (*undo_cwnd)(struct tcp_sock *tp); 690 u32 (*undo_cwnd)(struct sock *sk);
691 /* hook for packet ack accounting (optional) */ 691 /* hook for packet ack accounting (optional) */
692 void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); 692 void (*pkts_acked)(struct sock *sk, u32 num_acked);
693 /* get info for tcp_diag (optional) */ 693 /* get info for tcp_diag (optional) */
694 void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); 694 void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
695 695
696 char name[TCP_CA_NAME_MAX]; 696 char name[TCP_CA_NAME_MAX];
697 struct module *owner; 697 struct module *owner;
@@ -700,30 +700,34 @@ struct tcp_congestion_ops {
700extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); 700extern int tcp_register_congestion_control(struct tcp_congestion_ops *type);
701extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); 701extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type);
702 702
703extern void tcp_init_congestion_control(struct tcp_sock *tp); 703extern void tcp_init_congestion_control(struct sock *sk);
704extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); 704extern void tcp_cleanup_congestion_control(struct sock *sk);
705extern int tcp_set_default_congestion_control(const char *name); 705extern int tcp_set_default_congestion_control(const char *name);
706extern void tcp_get_default_congestion_control(char *name); 706extern void tcp_get_default_congestion_control(char *name);
707extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); 707extern int tcp_set_congestion_control(struct sock *sk, const char *name);
708 708
709extern struct tcp_congestion_ops tcp_init_congestion_ops; 709extern struct tcp_congestion_ops tcp_init_congestion_ops;
710extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); 710extern u32 tcp_reno_ssthresh(struct sock *sk);
711extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, 711extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
712 u32 rtt, u32 in_flight, int flag); 712 u32 rtt, u32 in_flight, int flag);
713extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); 713extern u32 tcp_reno_min_cwnd(struct sock *sk);
714extern struct tcp_congestion_ops tcp_reno; 714extern struct tcp_congestion_ops tcp_reno;
715 715
716static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) 716static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state)
717{ 717{
718 if (tp->ca_ops->set_state) 718 struct inet_connection_sock *icsk = inet_csk(sk);
719 tp->ca_ops->set_state(tp, ca_state); 719
720 tp->ca_state = ca_state; 720 if (icsk->icsk_ca_ops->set_state)
721 icsk->icsk_ca_ops->set_state(sk, ca_state);
722 icsk->icsk_ca_state = ca_state;
721} 723}
722 724
723static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) 725static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
724{ 726{
725 if (tp->ca_ops->cwnd_event) 727 const struct inet_connection_sock *icsk = inet_csk(sk);
726 tp->ca_ops->cwnd_event(tp, event); 728
729 if (icsk->icsk_ca_ops->cwnd_event)
730 icsk->icsk_ca_ops->cwnd_event(sk, event);
727} 731}
728 732
729/* This determines how many packets are "in the network" to the best 733/* This determines how many packets are "in the network" to the best
@@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
749 * The exception is rate halving phase, when cwnd is decreasing towards 753 * The exception is rate halving phase, when cwnd is decreasing towards
750 * ssthresh. 754 * ssthresh.
751 */ 755 */
752static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) 756static inline __u32 tcp_current_ssthresh(const struct sock *sk)
753{ 757{
754 if ((1<<tp->ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) 758 const struct tcp_sock *tp = tcp_sk(sk);
759 if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery))
755 return tp->snd_ssthresh; 760 return tp->snd_ssthresh;
756 else 761 else
757 return max(tp->snd_ssthresh, 762 return max(tp->snd_ssthresh,
@@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
768} 773}
769 774
770/* Set slow start threshold and cwnd not falling to slow start */ 775/* Set slow start threshold and cwnd not falling to slow start */
771static inline void __tcp_enter_cwr(struct tcp_sock *tp) 776static inline void __tcp_enter_cwr(struct sock *sk)
772{ 777{
778 const struct inet_connection_sock *icsk = inet_csk(sk);
779 struct tcp_sock *tp = tcp_sk(sk);
780
773 tp->undo_marker = 0; 781 tp->undo_marker = 0;
774 tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); 782 tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
775 tp->snd_cwnd = min(tp->snd_cwnd, 783 tp->snd_cwnd = min(tp->snd_cwnd,
776 tcp_packets_in_flight(tp) + 1U); 784 tcp_packets_in_flight(tp) + 1U);
777 tp->snd_cwnd_cnt = 0; 785 tp->snd_cwnd_cnt = 0;
@@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp)
780 TCP_ECN_queue_cwr(tp); 788 TCP_ECN_queue_cwr(tp);
781} 789}
782 790
783static inline void tcp_enter_cwr(struct tcp_sock *tp) 791static inline void tcp_enter_cwr(struct sock *sk)
784{ 792{
793 struct tcp_sock *tp = tcp_sk(sk);
794
785 tp->prior_ssthresh = 0; 795 tp->prior_ssthresh = 0;
786 if (tp->ca_state < TCP_CA_CWR) { 796 if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
787 __tcp_enter_cwr(tp); 797 __tcp_enter_cwr(sk);
788 tcp_set_ca_state(tp, TCP_CA_CWR); 798 tcp_set_ca_state(sk, TCP_CA_CWR);
789 } 799 }
790} 800}
791 801