aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_veno.c
blob: d16689e9851620a44944c397989004bc447411ef (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/*
 * TCP Veno congestion control
 *
 * This is based on the congestion detection/avoidance scheme described in
 *    C. P. Fu, S. C. Liew.
 *    "TCP Veno: TCP Enhancement for Transmission over Wireless Access Networks."
 *    IEEE Journal on Selected Areas in Communication,
 *    Feb. 2003.
 * 	See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf
 */

#include <linux/mm.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/inet_diag.h>

#include <net/tcp.h>

/* Default values of the Veno variables, in fixed-point representation
 * with V_PARAM_SHIFT bits to the right of the binary point.
 */
#define V_PARAM_SHIFT 1
static const int beta = 3 << V_PARAM_SHIFT;

/* Veno variables */
struct veno {
	u8 doing_veno_now;	/* if true, do veno for this rtt */
	u16 cntrtt;		/* # of rtts measured within last rtt */
	u32 minrtt;		/* min of rtts measured within last rtt (in usec) */
	u32 basertt;		/* the min of all Veno rtt measurements seen (in usec) */
	u32 inc;		/* decide whether to increase cwnd */
	u32 diff;		/* calculate the diff rate */
};

/* There are several situations when we must "re-start" Veno:
 *
 *  o when a connection is established
 *  o after an RTO
 *  o after fast recovery
 *  o when we send a packet and there is no outstanding
 *    unacknowledged data (restarting an idle connection)
 *
 */
static inline void veno_enable(struct sock *sk)
{
	struct veno *veno = inet_csk_ca(sk);

	/* turn on Veno */
	veno->doing_veno_now = 1;

	veno->minrtt = 0x7fffffff;
}

static inline void veno_disable(struct sock *sk)
{
	struct veno *veno = inet_csk_ca(sk);

	/* turn off Veno */
	veno->doing_veno_now = 0;
}

static void tcp_veno_init(struct sock *sk)
{
	struct veno *veno = inet_csk_ca(sk);

	veno->basertt = 0x7fffffff;
	veno->inc = 1;
	veno_enable(sk);
}

/* Do rtt sampling needed for Veno. */
static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us)
{
	struct veno *veno = inet_csk_ca(sk);
	u32 vrtt;

	if (rtt_us < 0)
		return;

	/* Never allow zero rtt or baseRTT */
	vrtt = rtt_us + 1;

	/* Filter to find propagation delay: */
	if (vrtt < veno->basertt)
		veno->basertt = vrtt;

	/* Find the min rtt during the last rtt to find
	 * the current prop. delay + queuing delay:
	 */
	veno->minrtt = min(veno->minrtt, vrtt);
	veno->cntrtt++;
}

static void tcp_veno_state(struct sock *sk, u8 ca_state)
{
	if (ca_state == TCP_CA_Open)
		veno_enable(sk);
	else
		veno_disable(sk);
}

/*
 * If the connection is idle and we are restarting,
 * then we don't want to do any Veno calculations
 * until we get fresh rtt samples.  So when we
 * restart, we reset our Veno state to a clean
 * state. After we get acks for this flight of
 * packets, _then_ we can make Veno calculations
 * again.
 */
static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
	if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START)
		tcp_veno_init(sk);
}

static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
	struct tcp_sock *tp = tcp_sk(sk);
	struct veno *veno = inet_csk_ca(sk);

	if (!veno->doing_veno_now)
		return tcp_reno_cong_avoid(sk, ack, in_flight);

	/* limited by applications */
	if (!tcp_is_cwnd_limited(sk, in_flight))
		return;

	/* We do the Veno calculations only if we got enough rtt samples */
	if (veno->cntrtt <= 2) {
		/* We don't have enough rtt samples to do the Veno
		 * calculation, so we'll behave like Reno.
		 */
		tcp_reno_cong_avoid(sk, ack, in_flight);
	} else {
		u32 rtt, target_cwnd;

		/* We have enough rtt samples, so, using the Veno
		 * algorithm, we determine the state of the network.
		 */

		rtt = veno->minrtt;

		target_cwnd = ((tp->snd_cwnd * veno->basertt)
			       << V_PARAM_SHIFT) / rtt;

		veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;

		if (tp->snd_cwnd <= tp->snd_ssthresh) {
			/* Slow start.  */
			tcp_slow_start(tp);
		} else {
			/* Congestion avoidance. */
			if (veno->diff < beta) {
				/* In the "non-congestive state", increase cwnd
				 *  every rtt.
				 */
				if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
					if (tp->snd_cwnd < tp->snd_cwnd_clamp)
						tp->snd_cwnd++;
					tp->snd_cwnd_cnt = 0;
				} else
					tp->snd_cwnd_cnt++;
			} else {
				/* In the "congestive state", increase cwnd
				 * every other rtt.
				 */
				if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
					if (veno->inc
					    && tp->snd_cwnd <
					    tp->snd_cwnd_clamp) {
						tp->snd_cwnd++;
						veno->inc = 0;
					} else
						veno->inc = 1;
					tp->snd_cwnd_cnt = 0;
				} else
					tp->snd_cwnd_cnt++;
			}

		}
		if (tp->snd_cwnd < 2)
			tp->snd_cwnd = 2;
		else if (tp->snd_cwnd > tp->snd_cwnd_clamp)
			tp->snd_cwnd = tp->snd_cwnd_clamp;
	}
	/* Wipe the slate clean for the next rtt. */
	/* veno->cntrtt = 0; */
	veno->minrtt = 0x7fffffff;
}

/* Veno MD phase */
static u32 tcp_veno_ssthresh(struct sock *sk)
{
	const struct tcp_sock *tp = tcp_sk(sk);
	struct veno *veno = inet_csk_ca(sk);

	if (veno->diff < beta)
		/* in "non-congestive state", cut cwnd by 1/5 */
		return max(tp->snd_cwnd * 4 / 5, 2U);
	else
		/* in "congestive state", cut cwnd by 1/2 */
		return max(tp->snd_cwnd >> 1U, 2U);
}

static struct tcp_congestion_ops tcp_veno = {
	.flags		= TCP_CONG_RTT_STAMP,
	.init		= tcp_veno_init,
	.ssthresh	= tcp_veno_ssthresh,
	.cong_avoid	= tcp_veno_cong_avoid,
	.pkts_acked	= tcp_veno_pkts_acked,
	.set_state	= tcp_veno_state,
	.cwnd_event	= tcp_veno_cwnd_event,

	.owner		= THIS_MODULE,
	.name		= "veno",
};

static int __init tcp_veno_register(void)
{
	BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE);
	tcp_register_congestion_control(&tcp_veno);
	return 0;
}

static void __exit tcp_veno_unregister(void)
{
	tcp_unregister_congestion_control(&tcp_veno);
}

module_init(tcp_veno_register);
module_exit(tcp_veno_unregister);

MODULE_AUTHOR("Bin Zhou, Cheng Peng Fu");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("TCP Veno");
">#define __NR_fchmod 94 #define __NR_fchown 95 #define __NR_getpriority 96 #define __NR_setpriority 97 #define __NR_profil 98 #define __NR_statfs 99 #define __NR_fstatfs 100 #define __NR_ioperm 101 #define __NR_socketcall 102 #define __NR_syslog 103 #define __NR_setitimer 104 #define __NR_getitimer 105 #define __NR_stat 106 #define __NR_lstat 107 #define __NR_fstat 108 #define __NR_olduname 109 #define __NR_iopl 110 #define __NR_vhangup 111 #define __NR_idle 112 #define __NR_vm86old 113 #define __NR_wait4 114 #define __NR_swapoff 115 #define __NR_sysinfo 116 #define __NR_ipc 117 #define __NR_fsync 118 #define __NR_sigreturn 119 #define __NR_clone 120 #define __NR_setdomainname 121 #define __NR_uname 122 #define __NR_modify_ldt 123 #define __NR_adjtimex 124 #define __NR_mprotect 125 #define __NR_sigprocmask 126 #define __NR_create_module 127 #define __NR_init_module 128 #define __NR_delete_module 129 #define __NR_get_kernel_syms 130 #define __NR_quotactl 131 #define __NR_getpgid 132 #define __NR_fchdir 133 #define __NR_bdflush 134 #define __NR_sysfs 135 #define __NR_personality 136 #define __NR_afs_syscall 137 /* Syscall for Andrew File System */ #define __NR_setfsuid 138 #define __NR_setfsgid 139 #define __NR__llseek 140 #define __NR_getdents 141 #define __NR__newselect 142 #define __NR_flock 143 #define __NR_msync 144 #define __NR_readv 145 #define __NR_writev 146 #define __NR_getsid 147 #define __NR_fdatasync 148 #define __NR__sysctl 149 #define __NR_mlock 150 #define __NR_munlock 151 #define __NR_mlockall 152 #define __NR_munlockall 153 #define __NR_sched_setparam 154 #define __NR_sched_getparam 155 #define __NR_sched_setscheduler 156 #define __NR_sched_getscheduler 157 #define __NR_sched_yield 158 #define __NR_sched_get_priority_max 159 #define __NR_sched_get_priority_min 160 #define __NR_sched_rr_get_interval 161 #define __NR_nanosleep 162 #define __NR_mremap 163 #define __NR_setresuid 164 #define __NR_getresuid 165 #define __NR_vm86 166 #define __NR_query_module 167 #define __NR_poll 168 #define __NR_nfsservctl 169 #define __NR_setresgid 170 #define __NR_getresgid 171 #define __NR_prctl 172 #define __NR_rt_sigreturn 173 #define __NR_rt_sigaction 174 #define __NR_rt_sigprocmask 175 #define __NR_rt_sigpending 176 #define __NR_rt_sigtimedwait 177 #define __NR_rt_sigqueueinfo 178 #define __NR_rt_sigsuspend 179 #define __NR_pread64 180 #define __NR_pwrite64 181 #define __NR_chown 182 #define __NR_getcwd 183 #define __NR_capget 184 #define __NR_capset 185 #define __NR_sigaltstack 186 #define __NR_sendfile 187 #define __NR_streams1 188 /* some people actually want it */ #define __NR_streams2 189 /* some people actually want it */ #define __NR_vfork 190 #define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ #define __NR_mmap2 192 #define __NR_truncate64 193 #define __NR_ftruncate64 194 #define __NR_stat64 195 #define __NR_lstat64 196 #define __NR_fstat64 197 #define __NR_lchown32 198 #define __NR_getuid32 199 #define __NR_getgid32 200 #define __NR_geteuid32 201 #define __NR_getegid32 202 #define __NR_setreuid32 203 #define __NR_setregid32 204 #define __NR_getgroups32 205 #define __NR_setgroups32 206 #define __NR_fchown32 207 #define __NR_setresuid32 208 #define __NR_getresuid32 209 #define __NR_setresgid32 210 #define __NR_getresgid32 211 #define __NR_chown32 212 #define __NR_setuid32 213 #define __NR_setgid32 214 #define __NR_setfsuid32 215 #define __NR_setfsgid32 216 #define __NR_pivot_root 217 #define __NR_mincore 218 #define __NR_madvise 219 #define __NR_getdents64 220 #define __NR_fcntl64 221 /* 223 is unused */ #define __NR_gettid 224 #define __NR_setxattr 226 #define __NR_lsetxattr 227 #define __NR_fsetxattr 228 #define __NR_getxattr 229 #define __NR_lgetxattr 230 #define __NR_fgetxattr 231 #define __NR_listxattr 232 #define __NR_llistxattr 233 #define __NR_flistxattr 234 #define __NR_removexattr 235 #define __NR_lremovexattr 236 #define __NR_fremovexattr 237 #define __NR_tkill 238 #define __NR_sendfile64 239 #define __NR_futex 240 #define __NR_sched_setaffinity 241 #define __NR_sched_getaffinity 242 #define __NR_set_thread_area 243 #define __NR_get_thread_area 244 #define __NR_io_setup 245 #define __NR_io_destroy 246 #define __NR_io_getevents 247 #define __NR_io_submit 248 #define __NR_io_cancel 249 #define __NR_fadvise64 250 #define __NR_exit_group 252 #define __NR_lookup_dcookie 253 #define __NR_epoll_create 254 #define __NR_epoll_ctl 255 #define __NR_epoll_wait 256 #define __NR_remap_file_pages 257 #define __NR_set_tid_address 258 #define __NR_timer_create 259 #define __NR_timer_settime (__NR_timer_create+1) #define __NR_timer_gettime (__NR_timer_create+2) #define __NR_timer_getoverrun (__NR_timer_create+3) #define __NR_timer_delete (__NR_timer_create+4) #define __NR_clock_settime (__NR_timer_create+5) #define __NR_clock_gettime (__NR_timer_create+6) #define __NR_clock_getres (__NR_timer_create+7) #define __NR_clock_nanosleep (__NR_timer_create+8) #define __NR_statfs64 268 #define __NR_fstatfs64 269 #define __NR_tgkill 270 #define __NR_utimes 271 #define __NR_fadvise64_64 272 #define __NR_vserver 273 #define __NR_mbind 274 #define __NR_get_mempolicy 275 #define __NR_set_mempolicy 276 #define __NR_mq_open 277 #define __NR_mq_unlink (__NR_mq_open+1) #define __NR_mq_timedsend (__NR_mq_open+2) #define __NR_mq_timedreceive (__NR_mq_open+3) #define __NR_mq_notify (__NR_mq_open+4) #define __NR_mq_getsetattr (__NR_mq_open+5) #define __NR_kexec_load 283 #define __NR_waitid 284 #define __NR_add_key 285 #define __NR_request_key 286 #define __NR_keyctl 287 #define __NR_ioprio_set 288 #define __NR_ioprio_get 289 #define __NR_inotify_init 290 #define __NR_inotify_add_watch 291 #define __NR_inotify_rm_watch 292 #define NR_syscalls 293 /* user-visible error numbers are in the range -1 - -124: see <asm-sh/errno.h> */ #define __syscall_return(type, res) \ do { \ if ((unsigned long)(res) >= (unsigned long)(-124)) { \ /* Avoid using "res" which is declared to be in register r0; \ errno might expand to a function call and clobber it. */ \ int __err = -(res); \ errno = __err; \ res = -1; \ } \ return (type) (res); \ } while (0) /* XXX - _foo needs to be __foo, while __NR_bar could be _NR_bar. */ #define _syscall0(type,name) \ type name(void) \ { \ register long __sc0 __asm__ ("r3") = __NR_##name; \ __asm__ __volatile__ ("trapa #0x10" \ : "=z" (__sc0) \ : "0" (__sc0) \ : "memory" ); \ __syscall_return(type,__sc0); \ } #define _syscall1(type,name,type1,arg1) \ type name(type1 arg1) \ { \ register long __sc0 __asm__ ("r3") = __NR_##name; \ register long __sc4 __asm__ ("r4") = (long) arg1; \ __asm__ __volatile__ ("trapa #0x11" \ : "=z" (__sc0) \ : "0" (__sc0), "r" (__sc4) \ : "memory"); \ __syscall_return(type,__sc0); \ } #define _syscall2(type,name,type1,arg1,type2,arg2) \ type name(type1 arg1,type2 arg2) \ { \ register long __sc0 __asm__ ("r3") = __NR_##name; \ register long __sc4 __asm__ ("r4") = (long) arg1; \ register long __sc5 __asm__ ("r5") = (long) arg2; \ __asm__ __volatile__ ("trapa #0x12" \ : "=z" (__sc0) \ : "0" (__sc0), "r" (__sc4), "r" (__sc5) \ : "memory"); \ __syscall_return(type,__sc0); \ } #define _syscall3(type,name,type1,arg1,type2,arg2,type3,arg3) \ type name(type1 arg1,type2 arg2,type3 arg3) \ { \ register long __sc0 __asm__ ("r3") = __NR_##name; \ register long __sc4 __asm__ ("r4") = (long) arg1; \ register long __sc5 __asm__ ("r5") = (long) arg2; \ register long __sc6 __asm__ ("r6") = (long) arg3; \ __asm__ __volatile__ ("trapa #0x13" \ : "=z" (__sc0) \ : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6) \ : "memory"); \ __syscall_return(type,__sc0); \ } #define _syscall4(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4) \ type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4) \ { \ register long __sc0 __asm__ ("r3") = __NR_##name; \ register long __sc4 __asm__ ("r4") = (long) arg1; \ register long __sc5 __asm__ ("r5") = (long) arg2; \ register long __sc6 __asm__ ("r6") = (long) arg3; \ register long __sc7 __asm__ ("r7") = (long) arg4; \ __asm__ __volatile__ ("trapa #0x14" \ : "=z" (__sc0) \ : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), \ "r" (__sc7) \ : "memory" ); \ __syscall_return(type,__sc0); \ } #define _syscall5(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5) \ type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5) \ { \ register long __sc3 __asm__ ("r3") = __NR_##name; \ register long __sc4 __asm__ ("r4") = (long) arg1; \ register long __sc5 __asm__ ("r5") = (long) arg2; \ register long __sc6 __asm__ ("r6") = (long) arg3; \ register long __sc7 __asm__ ("r7") = (long) arg4; \ register long __sc0 __asm__ ("r0") = (long) arg5; \ __asm__ __volatile__ ("trapa #0x15" \ : "=z" (__sc0) \ : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7), \ "r" (__sc3) \ : "memory" ); \ __syscall_return(type,__sc0); \ } #define _syscall6(type,name,type1,arg1,type2,arg2,type3,arg3,type4,arg4,type5,arg5,type6,arg6) \ type name (type1 arg1, type2 arg2, type3 arg3, type4 arg4, type5 arg5, type6 arg6) \ { \ register long __sc3 __asm__ ("r3") = __NR_##name; \ register long __sc4 __asm__ ("r4") = (long) arg1; \ register long __sc5 __asm__ ("r5") = (long) arg2; \ register long __sc6 __asm__ ("r6") = (long) arg3; \ register long __sc7 __asm__ ("r7") = (long) arg4; \ register long __sc0 __asm__ ("r0") = (long) arg5; \ register long __sc1 __asm__ ("r1") = (long) arg6; \ __asm__ __volatile__ ("trapa #0x16" \ : "=z" (__sc0) \ : "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6), "r" (__sc7), \ "r" (__sc3), "r" (__sc1) \ : "memory" ); \ __syscall_return(type,__sc0); \ } #ifdef __KERNEL__ #define __ARCH_WANT_IPC_PARSE_VERSION #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_OLD_STAT #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_ALARM #define __ARCH_WANT_SYS_GETHOSTNAME #define __ARCH_WANT_SYS_PAUSE #define __ARCH_WANT_SYS_SGETMASK #define __ARCH_WANT_SYS_SIGNAL #define __ARCH_WANT_SYS_TIME #define __ARCH_WANT_SYS_UTIME #define __ARCH_WANT_SYS_WAITPID #define __ARCH_WANT_SYS_SOCKETCALL #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP #define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE #define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLDUMOUNT #define __ARCH_WANT_SYS_SIGPENDING #define __ARCH_WANT_SYS_SIGPROCMASK #define __ARCH_WANT_SYS_RT_SIGACTION #endif #ifdef __KERNEL_SYSCALLS__ #include <linux/compiler.h> #include <linux/types.h> #include <linux/linkage.h> #include <asm/ptrace.h> /* * we need this inline - forking from kernel space will result * in NO COPY ON WRITE (!!!), until an execve is executed. This * is no problem, but for the stack. This is handled by not letting