diff options
Diffstat (limited to 'Documentation/networking/ip-sysctl.txt')
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 347 |
1 files changed, 179 insertions, 168 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index fd3c0c012351..a0f6842368c3 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -101,6 +101,11 @@ inet_peer_gc_maxtime - INTEGER | |||
101 | 101 | ||
102 | TCP variables: | 102 | TCP variables: |
103 | 103 | ||
104 | somaxconn - INTEGER | ||
105 | Limit of socket listen() backlog, known in userspace as SOMAXCONN. | ||
106 | Defaults to 128. See also tcp_max_syn_backlog for additional tuning | ||
107 | for TCP sockets. | ||
108 | |||
104 | tcp_abc - INTEGER | 109 | tcp_abc - INTEGER |
105 | Controls Appropriate Byte Count (ABC) defined in RFC3465. | 110 | Controls Appropriate Byte Count (ABC) defined in RFC3465. |
106 | ABC is a way of increasing congestion window (cwnd) more slowly | 111 | ABC is a way of increasing congestion window (cwnd) more slowly |
@@ -112,48 +117,51 @@ tcp_abc - INTEGER | |||
112 | of two segments to compensate for delayed acknowledgments. | 117 | of two segments to compensate for delayed acknowledgments. |
113 | Default: 0 (off) | 118 | Default: 0 (off) |
114 | 119 | ||
115 | tcp_syn_retries - INTEGER | 120 | tcp_abort_on_overflow - BOOLEAN |
116 | Number of times initial SYNs for an active TCP connection attempt | 121 | If listening service is too slow to accept new connections, |
117 | will be retransmitted. Should not be higher than 255. Default value | 122 | reset them. Default state is FALSE. It means that if overflow |
118 | is 5, which corresponds to ~180seconds. | 123 | occurred due to a burst, connection will recover. Enable this |
124 | option _only_ if you are really sure that listening daemon | ||
125 | cannot be tuned to accept connections faster. Enabling this | ||
126 | option can harm clients of your server. | ||
119 | 127 | ||
120 | tcp_synack_retries - INTEGER | 128 | tcp_adv_win_scale - INTEGER |
121 | Number of times SYNACKs for a passive TCP connection attempt will | 129 | Count buffering overhead as bytes/2^tcp_adv_win_scale |
122 | be retransmitted. Should not be higher than 255. Default value | 130 | (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale), |
123 | is 5, which corresponds to ~180seconds. | 131 | if it is <= 0. |
132 | Default: 2 | ||
124 | 133 | ||
125 | tcp_keepalive_time - INTEGER | 134 | tcp_allowed_congestion_control - STRING |
126 | How often TCP sends out keepalive messages when keepalive is enabled. | 135 | Show/set the congestion control choices available to non-privileged |
127 | Default: 2hours. | 136 | processes. The list is a subset of those listed in |
137 | tcp_available_congestion_control. | ||
138 | Default is "reno" and the default setting (tcp_congestion_control). | ||
128 | 139 | ||
129 | tcp_keepalive_probes - INTEGER | 140 | tcp_app_win - INTEGER |
130 | How many keepalive probes TCP sends out, until it decides that the | 141 | Reserve max(window/2^tcp_app_win, mss) of window for application |
131 | connection is broken. Default value: 9. | 142 | buffer. Value 0 is special, it means that nothing is reserved. |
143 | Default: 31 | ||
132 | 144 | ||
133 | tcp_keepalive_intvl - INTEGER | 145 | tcp_available_congestion_control - STRING |
134 | How frequently the probes are send out. Multiplied by | 146 | Shows the available congestion control choices that are registered. |
135 | tcp_keepalive_probes it is time to kill not responding connection, | 147 | More congestion control algorithms may be available as modules, |
136 | after probes started. Default value: 75sec i.e. connection | 148 | but not loaded. |
137 | will be aborted after ~11 minutes of retries. | ||
138 | 149 | ||
139 | tcp_retries1 - INTEGER | 150 | tcp_congestion_control - STRING |
140 | How many times to retry before deciding that something is wrong | 151 | Set the congestion control algorithm to be used for new |
141 | and it is necessary to report this suspicion to network layer. | 152 | connections. The algorithm "reno" is always available, but |
142 | Minimal RFC value is 3, it is default, which corresponds | 153 | additional choices may be available based on kernel configuration. |
143 | to ~3sec-8min depending on RTO. | 154 | Default is set as part of kernel configuration. |
144 | 155 | ||
145 | tcp_retries2 - INTEGER | 156 | tcp_dsack - BOOLEAN |
146 | How may times to retry before killing alive TCP connection. | 157 | Allows TCP to send "duplicate" SACKs. |
147 | RFC1122 says that the limit should be longer than 100 sec. | ||
148 | It is too small number. Default value 15 corresponds to ~13-30min | ||
149 | depending on RTO. | ||
150 | 158 | ||
151 | tcp_orphan_retries - INTEGER | 159 | tcp_ecn - BOOLEAN |
152 | How may times to retry before killing TCP connection, closed | 160 | Enable Explicit Congestion Notification in TCP. |
153 | by our side. Default value 7 corresponds to ~50sec-16min | 161 | |
154 | depending on RTO. If you machine is loaded WEB server, | 162 | tcp_fack - BOOLEAN |
155 | you should think about lowering this value, such sockets | 163 | Enable FACK congestion avoidance and fast retransmission. |
156 | may consume significant resources. Cf. tcp_max_orphans. | 164 | The value is not used, if tcp_sack is not enabled. |
157 | 165 | ||
158 | tcp_fin_timeout - INTEGER | 166 | tcp_fin_timeout - INTEGER |
159 | Time to hold socket in state FIN-WAIT-2, if it was closed | 167 | Time to hold socket in state FIN-WAIT-2, if it was closed |
@@ -166,24 +174,33 @@ tcp_fin_timeout - INTEGER | |||
166 | because they eat maximum 1.5K of memory, but they tend | 174 | because they eat maximum 1.5K of memory, but they tend |
167 | to live longer. Cf. tcp_max_orphans. | 175 | to live longer. Cf. tcp_max_orphans. |
168 | 176 | ||
169 | tcp_max_tw_buckets - INTEGER | 177 | tcp_frto - BOOLEAN |
170 | Maximal number of timewait sockets held by system simultaneously. | 178 | Enables F-RTO, an enhanced recovery algorithm for TCP retransmission |
171 | If this number is exceeded time-wait socket is immediately destroyed | 179 | timeouts. It is particularly beneficial in wireless environments |
172 | and warning is printed. This limit exists only to prevent | 180 | where packet loss is typically due to random radio interference |
173 | simple DoS attacks, you _must_ not lower the limit artificially, | 181 | rather than intermediate router congestion. |
174 | but rather increase it (probably, after increasing installed memory), | ||
175 | if network conditions require more than default value. | ||
176 | 182 | ||
177 | tcp_tw_recycle - BOOLEAN | 183 | tcp_keepalive_time - INTEGER |
178 | Enable fast recycling TIME-WAIT sockets. Default value is 0. | 184 | How often TCP sends out keepalive messages when keepalive is enabled. |
179 | It should not be changed without advice/request of technical | 185 | Default: 2hours. |
180 | experts. | ||
181 | 186 | ||
182 | tcp_tw_reuse - BOOLEAN | 187 | tcp_keepalive_probes - INTEGER |
183 | Allow to reuse TIME-WAIT sockets for new connections when it is | 188 | How many keepalive probes TCP sends out, until it decides that the |
184 | safe from protocol viewpoint. Default value is 0. | 189 | connection is broken. Default value: 9. |
185 | It should not be changed without advice/request of technical | 190 | |
186 | experts. | 191 | tcp_keepalive_intvl - INTEGER |
192 | How frequently the probes are send out. Multiplied by | ||
193 | tcp_keepalive_probes it is time to kill not responding connection, | ||
194 | after probes started. Default value: 75sec i.e. connection | ||
195 | will be aborted after ~11 minutes of retries. | ||
196 | |||
197 | tcp_low_latency - BOOLEAN | ||
198 | If set, the TCP stack makes decisions that prefer lower | ||
199 | latency as opposed to higher throughput. By default, this | ||
200 | option is not set meaning that higher throughput is preferred. | ||
201 | An example of an application where this default should be | ||
202 | changed would be a Beowulf compute cluster. | ||
203 | Default: 0 | ||
187 | 204 | ||
188 | tcp_max_orphans - INTEGER | 205 | tcp_max_orphans - INTEGER |
189 | Maximal number of TCP sockets not attached to any user file handle, | 206 | Maximal number of TCP sockets not attached to any user file handle, |
@@ -197,41 +214,6 @@ tcp_max_orphans - INTEGER | |||
197 | more aggressively. Let me to remind again: each orphan eats | 214 | more aggressively. Let me to remind again: each orphan eats |
198 | up to ~64K of unswappable memory. | 215 | up to ~64K of unswappable memory. |
199 | 216 | ||
200 | tcp_abort_on_overflow - BOOLEAN | ||
201 | If listening service is too slow to accept new connections, | ||
202 | reset them. Default state is FALSE. It means that if overflow | ||
203 | occurred due to a burst, connection will recover. Enable this | ||
204 | option _only_ if you are really sure that listening daemon | ||
205 | cannot be tuned to accept connections faster. Enabling this | ||
206 | option can harm clients of your server. | ||
207 | |||
208 | tcp_syncookies - BOOLEAN | ||
209 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES | ||
210 | Send out syncookies when the syn backlog queue of a socket | ||
211 | overflows. This is to prevent against the common 'syn flood attack' | ||
212 | Default: FALSE | ||
213 | |||
214 | Note, that syncookies is fallback facility. | ||
215 | It MUST NOT be used to help highly loaded servers to stand | ||
216 | against legal connection rate. If you see synflood warnings | ||
217 | in your logs, but investigation shows that they occur | ||
218 | because of overload with legal connections, you should tune | ||
219 | another parameters until this warning disappear. | ||
220 | See: tcp_max_syn_backlog, tcp_synack_retries, tcp_abort_on_overflow. | ||
221 | |||
222 | syncookies seriously violate TCP protocol, do not allow | ||
223 | to use TCP extensions, can result in serious degradation | ||
224 | of some services (f.e. SMTP relaying), visible not by you, | ||
225 | but your clients and relays, contacting you. While you see | ||
226 | synflood warnings in logs not being really flooded, your server | ||
227 | is seriously misconfigured. | ||
228 | |||
229 | tcp_stdurg - BOOLEAN | ||
230 | Use the Host requirements interpretation of the TCP urg pointer field. | ||
231 | Most hosts use the older BSD interpretation, so if you turn this on | ||
232 | Linux might not communicate correctly with them. | ||
233 | Default: FALSE | ||
234 | |||
235 | tcp_max_syn_backlog - INTEGER | 217 | tcp_max_syn_backlog - INTEGER |
236 | Maximal number of remembered connection requests, which are | 218 | Maximal number of remembered connection requests, which are |
237 | still did not receive an acknowledgment from connecting client. | 219 | still did not receive an acknowledgment from connecting client. |
@@ -239,24 +221,34 @@ tcp_max_syn_backlog - INTEGER | |||
239 | and 128 for low memory machines. If server suffers of overload, | 221 | and 128 for low memory machines. If server suffers of overload, |
240 | try to increase this number. | 222 | try to increase this number. |
241 | 223 | ||
242 | tcp_window_scaling - BOOLEAN | 224 | tcp_max_tw_buckets - INTEGER |
243 | Enable window scaling as defined in RFC1323. | 225 | Maximal number of timewait sockets held by system simultaneously. |
226 | If this number is exceeded time-wait socket is immediately destroyed | ||
227 | and warning is printed. This limit exists only to prevent | ||
228 | simple DoS attacks, you _must_ not lower the limit artificially, | ||
229 | but rather increase it (probably, after increasing installed memory), | ||
230 | if network conditions require more than default value. | ||
244 | 231 | ||
245 | tcp_timestamps - BOOLEAN | 232 | tcp_mem - vector of 3 INTEGERs: min, pressure, max |
246 | Enable timestamps as defined in RFC1323. | 233 | min: below this number of pages TCP is not bothered about its |
234 | memory appetite. | ||
247 | 235 | ||
248 | tcp_sack - BOOLEAN | 236 | pressure: when amount of memory allocated by TCP exceeds this number |
249 | Enable select acknowledgments (SACKS). | 237 | of pages, TCP moderates its memory consumption and enters memory |
238 | pressure mode, which is exited when memory consumption falls | ||
239 | under "min". | ||
250 | 240 | ||
251 | tcp_fack - BOOLEAN | 241 | max: number of pages allowed for queueing by all TCP sockets. |
252 | Enable FACK congestion avoidance and fast retransmission. | ||
253 | The value is not used, if tcp_sack is not enabled. | ||
254 | 242 | ||
255 | tcp_dsack - BOOLEAN | 243 | Defaults are calculated at boot time from amount of available |
256 | Allows TCP to send "duplicate" SACKs. | 244 | memory. |
257 | 245 | ||
258 | tcp_ecn - BOOLEAN | 246 | tcp_orphan_retries - INTEGER |
259 | Enable Explicit Congestion Notification in TCP. | 247 | How may times to retry before killing TCP connection, closed |
248 | by our side. Default value 7 corresponds to ~50sec-16min | ||
249 | depending on RTO. If you machine is loaded WEB server, | ||
250 | you should think about lowering this value, such sockets | ||
251 | may consume significant resources. Cf. tcp_max_orphans. | ||
260 | 252 | ||
261 | tcp_reordering - INTEGER | 253 | tcp_reordering - INTEGER |
262 | Maximal reordering of packets in a TCP stream. | 254 | Maximal reordering of packets in a TCP stream. |
@@ -267,20 +259,23 @@ tcp_retrans_collapse - BOOLEAN | |||
267 | On retransmit try to send bigger packets to work around bugs in | 259 | On retransmit try to send bigger packets to work around bugs in |
268 | certain TCP stacks. | 260 | certain TCP stacks. |
269 | 261 | ||
270 | tcp_wmem - vector of 3 INTEGERs: min, default, max | 262 | tcp_retries1 - INTEGER |
271 | min: Amount of memory reserved for send buffers for TCP socket. | 263 | How many times to retry before deciding that something is wrong |
272 | Each TCP socket has rights to use it due to fact of its birth. | 264 | and it is necessary to report this suspicion to network layer. |
273 | Default: 4K | 265 | Minimal RFC value is 3, it is default, which corresponds |
266 | to ~3sec-8min depending on RTO. | ||
274 | 267 | ||
275 | default: Amount of memory allowed for send buffers for TCP socket | 268 | tcp_retries2 - INTEGER |
276 | by default. This value overrides net.core.wmem_default used | 269 | How may times to retry before killing alive TCP connection. |
277 | by other protocols, it is usually lower than net.core.wmem_default. | 270 | RFC1122 says that the limit should be longer than 100 sec. |
278 | Default: 16K | 271 | It is too small number. Default value 15 corresponds to ~13-30min |
272 | depending on RTO. | ||
279 | 273 | ||
280 | max: Maximal amount of memory allowed for automatically selected | 274 | tcp_rfc1337 - BOOLEAN |
281 | send buffers for TCP socket. This value does not override | 275 | If set, the TCP stack behaves conforming to RFC1337. If unset, |
282 | net.core.wmem_max, "static" selection via SO_SNDBUF does not use this. | 276 | we are not conforming to RFC, but prevent TCP TIME_WAIT |
283 | Default: 128K | 277 | assassination. |
278 | Default: 0 | ||
284 | 279 | ||
285 | tcp_rmem - vector of 3 INTEGERs: min, default, max | 280 | tcp_rmem - vector of 3 INTEGERs: min, default, max |
286 | min: Minimal size of receive buffer used by TCP sockets. | 281 | min: Minimal size of receive buffer used by TCP sockets. |
@@ -299,67 +294,91 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max | |||
299 | net.core.rmem_max, "static" selection via SO_RCVBUF does not use this. | 294 | net.core.rmem_max, "static" selection via SO_RCVBUF does not use this. |
300 | Default: 87380*2 bytes. | 295 | Default: 87380*2 bytes. |
301 | 296 | ||
302 | tcp_mem - vector of 3 INTEGERs: min, pressure, max | 297 | tcp_sack - BOOLEAN |
303 | min: below this number of pages TCP is not bothered about its | 298 | Enable select acknowledgments (SACKS). |
304 | memory appetite. | ||
305 | 299 | ||
306 | pressure: when amount of memory allocated by TCP exceeds this number | 300 | tcp_slow_start_after_idle - BOOLEAN |
307 | of pages, TCP moderates its memory consumption and enters memory | 301 | If set, provide RFC2861 behavior and time out the congestion |
308 | pressure mode, which is exited when memory consumption falls | 302 | window after an idle period. An idle period is defined at |
309 | under "min". | 303 | the current RTO. If unset, the congestion window will not |
304 | be timed out after an idle period. | ||
305 | Default: 1 | ||
310 | 306 | ||
311 | max: number of pages allowed for queueing by all TCP sockets. | 307 | tcp_stdurg - BOOLEAN |
308 | Use the Host requirements interpretation of the TCP urg pointer field. | ||
309 | Most hosts use the older BSD interpretation, so if you turn this on | ||
310 | Linux might not communicate correctly with them. | ||
311 | Default: FALSE | ||
312 | 312 | ||
313 | Defaults are calculated at boot time from amount of available | 313 | tcp_synack_retries - INTEGER |
314 | memory. | 314 | Number of times SYNACKs for a passive TCP connection attempt will |
315 | be retransmitted. Should not be higher than 255. Default value | ||
316 | is 5, which corresponds to ~180seconds. | ||
315 | 317 | ||
316 | tcp_app_win - INTEGER | 318 | tcp_syncookies - BOOLEAN |
317 | Reserve max(window/2^tcp_app_win, mss) of window for application | 319 | Only valid when the kernel was compiled with CONFIG_SYNCOOKIES |
318 | buffer. Value 0 is special, it means that nothing is reserved. | 320 | Send out syncookies when the syn backlog queue of a socket |
319 | Default: 31 | 321 | overflows. This is to prevent against the common 'syn flood attack' |
322 | Default: FALSE | ||
320 | 323 | ||
321 | tcp_adv_win_scale - INTEGER | 324 | Note, that syncookies is fallback facility. |
322 | Count buffering overhead as bytes/2^tcp_adv_win_scale | 325 | It MUST NOT be used to help highly loaded servers to stand |
323 | (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale), | 326 | against legal connection rate. If you see synflood warnings |
324 | if it is <= 0. | 327 | in your logs, but investigation shows that they occur |
325 | Default: 2 | 328 | because of overload with legal connections, you should tune |
329 | another parameters until this warning disappear. | ||
330 | See: tcp_max_syn_backlog, tcp_synack_retries, tcp_abort_on_overflow. | ||
326 | 331 | ||
327 | tcp_rfc1337 - BOOLEAN | 332 | syncookies seriously violate TCP protocol, do not allow |
328 | If set, the TCP stack behaves conforming to RFC1337. If unset, | 333 | to use TCP extensions, can result in serious degradation |
329 | we are not conforming to RFC, but prevent TCP TIME_WAIT | 334 | of some services (f.e. SMTP relaying), visible not by you, |
330 | assassination. | 335 | but your clients and relays, contacting you. While you see |
331 | Default: 0 | 336 | synflood warnings in logs not being really flooded, your server |
337 | is seriously misconfigured. | ||
332 | 338 | ||
333 | tcp_low_latency - BOOLEAN | 339 | tcp_syn_retries - INTEGER |
334 | If set, the TCP stack makes decisions that prefer lower | 340 | Number of times initial SYNs for an active TCP connection attempt |
335 | latency as opposed to higher throughput. By default, this | 341 | will be retransmitted. Should not be higher than 255. Default value |
336 | option is not set meaning that higher throughput is preferred. | 342 | is 5, which corresponds to ~180seconds. |
337 | An example of an application where this default should be | 343 | |
338 | changed would be a Beowulf compute cluster. | 344 | tcp_timestamps - BOOLEAN |
339 | Default: 0 | 345 | Enable timestamps as defined in RFC1323. |
340 | 346 | ||
341 | tcp_tso_win_divisor - INTEGER | 347 | tcp_tso_win_divisor - INTEGER |
342 | This allows control over what percentage of the congestion window | 348 | This allows control over what percentage of the congestion window |
343 | can be consumed by a single TSO frame. | 349 | can be consumed by a single TSO frame. |
344 | The setting of this parameter is a choice between burstiness and | 350 | The setting of this parameter is a choice between burstiness and |
345 | building larger TSO frames. | 351 | building larger TSO frames. |
346 | Default: 3 | 352 | Default: 3 |
347 | 353 | ||
348 | tcp_frto - BOOLEAN | 354 | tcp_tw_recycle - BOOLEAN |
349 | Enables F-RTO, an enhanced recovery algorithm for TCP retransmission | 355 | Enable fast recycling TIME-WAIT sockets. Default value is 0. |
350 | timeouts. It is particularly beneficial in wireless environments | 356 | It should not be changed without advice/request of technical |
351 | where packet loss is typically due to random radio interference | 357 | experts. |
352 | rather than intermediate router congestion. | ||
353 | 358 | ||
354 | tcp_congestion_control - STRING | 359 | tcp_tw_reuse - BOOLEAN |
355 | Set the congestion control algorithm to be used for new | 360 | Allow to reuse TIME-WAIT sockets for new connections when it is |
356 | connections. The algorithm "reno" is always available, but | 361 | safe from protocol viewpoint. Default value is 0. |
357 | additional choices may be available based on kernel configuration. | 362 | It should not be changed without advice/request of technical |
363 | experts. | ||
358 | 364 | ||
359 | somaxconn - INTEGER | 365 | tcp_window_scaling - BOOLEAN |
360 | Limit of socket listen() backlog, known in userspace as SOMAXCONN. | 366 | Enable window scaling as defined in RFC1323. |
361 | Defaults to 128. See also tcp_max_syn_backlog for additional tuning | 367 | |
362 | for TCP sockets. | 368 | tcp_wmem - vector of 3 INTEGERs: min, default, max |
369 | min: Amount of memory reserved for send buffers for TCP socket. | ||
370 | Each TCP socket has rights to use it due to fact of its birth. | ||
371 | Default: 4K | ||
372 | |||
373 | default: Amount of memory allowed for send buffers for TCP socket | ||
374 | by default. This value overrides net.core.wmem_default used | ||
375 | by other protocols, it is usually lower than net.core.wmem_default. | ||
376 | Default: 16K | ||
377 | |||
378 | max: Maximal amount of memory allowed for automatically selected | ||
379 | send buffers for TCP socket. This value does not override | ||
380 | net.core.wmem_max, "static" selection via SO_SNDBUF does not use this. | ||
381 | Default: 128K | ||
363 | 382 | ||
364 | tcp_workaround_signed_windows - BOOLEAN | 383 | tcp_workaround_signed_windows - BOOLEAN |
365 | If set, assume no receipt of a window scaling option means the | 384 | If set, assume no receipt of a window scaling option means the |
@@ -368,13 +387,6 @@ tcp_workaround_signed_windows - BOOLEAN | |||
368 | not receive a window scaling option from them. | 387 | not receive a window scaling option from them. |
369 | Default: 0 | 388 | Default: 0 |
370 | 389 | ||
371 | tcp_slow_start_after_idle - BOOLEAN | ||
372 | If set, provide RFC2861 behavior and time out the congestion | ||
373 | window after an idle period. An idle period is defined at | ||
374 | the current RTO. If unset, the congestion window will not | ||
375 | be timed out after an idle period. | ||
376 | Default: 1 | ||
377 | |||
378 | CIPSOv4 Variables: | 390 | CIPSOv4 Variables: |
379 | 391 | ||
380 | cipso_cache_enable - BOOLEAN | 392 | cipso_cache_enable - BOOLEAN |
@@ -974,4 +986,3 @@ no_cong_thresh FIXME | |||
974 | slot_timeout FIXME | 986 | slot_timeout FIXME |
975 | warn_noreply_time FIXME | 987 | warn_noreply_time FIXME |
976 | 988 | ||
977 | $Id: ip-sysctl.txt,v 1.20 2001/12/13 09:00:18 davem Exp $ | ||