aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorJeff Garzik <jgarzik@pobox.com>2005-07-13 16:23:51 -0400
committerJeff Garzik <jgarzik@pobox.com>2005-07-13 16:23:51 -0400
commit327309e899662b482c58cf25f574513d38b5788c (patch)
tree069de438aa0e92dd9b6ba28e6b207e2cd07151a5 /net
parent0c168775709faa74c1b87f1e61046e0c51ade7f3 (diff)
parentc32511e2718618f0b53479eb36e07439aa363a74 (diff)
Merge upstream 2.6.13-rc3 into ieee80211 branch of netdev-2.6.
Diffstat (limited to 'net')
-rw-r--r--net/802/fddi.c4
-rw-r--r--net/8021q/Kconfig19
-rw-r--r--net/8021q/vlan.c8
-rw-r--r--net/Kconfig457
-rw-r--r--net/atm/Kconfig74
-rw-r--r--net/atm/br2684.c3
-rw-r--r--net/bluetooth/cmtp/core.c6
-rw-r--r--net/bluetooth/hidp/core.c5
-rw-r--r--net/bluetooth/rfcomm/sock.c7
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bridge/Kconfig31
-rw-r--r--net/core/dev.c7
-rw-r--r--net/core/filter.c104
-rw-r--r--net/core/skbuff.c19
-rw-r--r--net/core/sock.c11
-rw-r--r--net/decnet/Kconfig23
-rw-r--r--net/decnet/af_decnet.c10
-rw-r--r--net/decnet/dn_fib.c3
-rw-r--r--net/decnet/dn_nsp_out.c3
-rw-r--r--net/econet/Kconfig36
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ipv4/Kconfig25
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/fib_trie.c202
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/igmp.c96
-rw-r--r--net/ipv4/ip_output.c25
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ipvs/Kconfig4
-rw-r--r--net/ipv4/ipvs/ip_vs_conn.c6
-rw-r--r--net/ipv4/ipvs/ip_vs_ctl.c9
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c7
-rw-r--r--net/ipv4/route.c128
-rw-r--r--net/ipv4/tcp.c52
-rw-r--r--net/ipv4/tcp_input.c87
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c546
-rw-r--r--net/ipv4/tcp_timer.c5
-rw-r--r--net/ipv6/Kconfig22
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/ip6_output.c1
-rw-r--r--net/ipv6/mcast.c29
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipx/Kconfig33
-rw-r--r--net/irda/irlap.c3
-rw-r--r--net/irda/irlap_event.c14
-rw-r--r--net/irda/irlap_frame.c8
-rw-r--r--net/irda/irttp.c2
-rw-r--r--net/lapb/Kconfig22
-rw-r--r--net/llc/llc_c_ev.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/packet/Kconfig26
-rw-r--r--net/packet/af_packet.c6
-rw-r--r--net/sched/Kconfig37
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c63
-rw-r--r--net/sched/sch_blackhole.c54
-rw-r--r--net/sched/sch_generic.c35
-rw-r--r--net/sched/sch_red.c2
-rw-r--r--net/sctp/associola.c15
-rw-r--r--net/sctp/bind_addr.c16
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/endpointola.c6
-rw-r--r--net/sctp/input.c26
-rw-r--r--net/sctp/inqueue.c18
-rw-r--r--net/sctp/output.c22
-rw-r--r--net/sctp/outqueue.c50
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/sm_make_chunk.c27
-rw-r--r--net/sctp/sm_sideeffect.c13
-rw-r--r--net/sctp/socket.c2
-rw-r--r--net/sctp/ssnmap.c3
-rw-r--r--net/sctp/transport.c5
-rw-r--r--net/sctp/ulpevent.c19
-rw-r--r--net/sctp/ulpqueue.c9
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/unix/Kconfig21
-rw-r--r--net/unix/af_unix.c4
-rw-r--r--net/wanrouter/Kconfig29
-rw-r--r--net/wanrouter/wanmain.c6
-rw-r--r--net/x25/Kconfig36
-rw-r--r--net/xfrm/Kconfig15
83 files changed, 1669 insertions, 1102 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c
index ebcf4830d6f1..5ce24c4bb840 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -122,10 +122,10 @@ static int fddi_rebuild_header(struct sk_buff *skb)
122 * the proper pointer to the start of packet data (skb->data). 122 * the proper pointer to the start of packet data (skb->data).
123 */ 123 */
124 124
125unsigned short fddi_type_trans(struct sk_buff *skb, struct net_device *dev) 125__be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
126{ 126{
127 struct fddihdr *fddi = (struct fddihdr *)skb->data; 127 struct fddihdr *fddi = (struct fddihdr *)skb->data;
128 unsigned short type; 128 __be16 type;
129 129
130 /* 130 /*
131 * Set mac.raw field to point to FC byte, set data field to point 131 * Set mac.raw field to point to FC byte, set data field to point
diff --git a/net/8021q/Kconfig b/net/8021q/Kconfig
new file mode 100644
index 000000000000..c4a382e450e2
--- /dev/null
+++ b/net/8021q/Kconfig
@@ -0,0 +1,19 @@
1#
2# Configuration for 802.1Q VLAN support
3#
4
5config VLAN_8021Q
6 tristate "802.1Q VLAN Support"
7 ---help---
8 Select this and you will be able to create 802.1Q VLAN interfaces
9 on your ethernet interfaces. 802.1Q VLAN supports almost
10 everything a regular ethernet interface does, including
11 firewalling, bridging, and of course IP traffic. You will need
12 the 'vconfig' tool from the VLAN project in order to effectively
13 use VLANs. See the VLAN web page for more information:
14 <http://www.candelatech.com/~greear/vlan.html>
15
16 To compile this code as a module, choose M here: the module
17 will be called 8021q.
18
19 If unsure, say N.
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 1f6d31670bc7..91e412b0ab00 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -578,6 +578,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
578 if (!vlandev) 578 if (!vlandev)
579 continue; 579 continue;
580 580
581 if (netif_carrier_ok(dev)) {
582 if (!netif_carrier_ok(vlandev))
583 netif_carrier_on(vlandev);
584 } else {
585 if (netif_carrier_ok(vlandev))
586 netif_carrier_off(vlandev);
587 }
588
581 if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) { 589 if ((vlandev->state & VLAN_LINK_STATE_MASK) != flgs) {
582 vlandev->state = (vlandev->state &~ VLAN_LINK_STATE_MASK) 590 vlandev->state = (vlandev->state &~ VLAN_LINK_STATE_MASK)
583 | flgs; 591 | flgs;
diff --git a/net/Kconfig b/net/Kconfig
index 8a12ea8f0c05..3ae4a47e60ae 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -2,7 +2,7 @@
2# Network configuration 2# Network configuration
3# 3#
4 4
5menu "Networking support" 5menu "Networking"
6 6
7config NET 7config NET
8 bool "Networking support" 8 bool "Networking support"
@@ -10,7 +10,9 @@ config NET
10 Unless you really know what you are doing, you should say Y here. 10 Unless you really know what you are doing, you should say Y here.
11 The reason is that some programs need kernel networking support even 11 The reason is that some programs need kernel networking support even
12 when running on a stand-alone machine that isn't connected to any 12 when running on a stand-alone machine that isn't connected to any
13 other computer. If you are upgrading from an older kernel, you 13 other computer.
14
15 If you are upgrading from an older kernel, you
14 should consider updating your networking tools too because changes 16 should consider updating your networking tools too because changes
15 in the kernel and the tools often go hand in hand. The tools are 17 in the kernel and the tools often go hand in hand. The tools are
16 contained in the package net-tools, the location and version number 18 contained in the package net-tools, the location and version number
@@ -20,57 +22,14 @@ config NET
20 recommended to read the NET-HOWTO, available from 22 recommended to read the NET-HOWTO, available from
21 <http://www.tldp.org/docs.html#howto>. 23 <http://www.tldp.org/docs.html#howto>.
22 24
23menu "Networking options" 25# Make sure that all config symbols are dependent on NET
24 depends on NET 26if NET
25
26config PACKET
27 tristate "Packet socket"
28 ---help---
29 The Packet protocol is used by applications which communicate
30 directly with network devices without an intermediate network
31 protocol implemented in the kernel, e.g. tcpdump. If you want them
32 to work, choose Y.
33 27
34 To compile this driver as a module, choose M here: the module will 28menu "Networking options"
35 be called af_packet.
36
37 If unsure, say Y.
38
39config PACKET_MMAP
40 bool "Packet socket: mmapped IO"
41 depends on PACKET
42 help
43 If you say Y here, the Packet protocol driver will use an IO
44 mechanism that results in faster communication.
45
46 If unsure, say N.
47
48config UNIX
49 tristate "Unix domain sockets"
50 ---help---
51 If you say Y here, you will include support for Unix domain sockets;
52 sockets are the standard Unix mechanism for establishing and
53 accessing network connections. Many commonly used programs such as
54 the X Window system and syslog use these sockets even if your
55 machine is not connected to any network. Unless you are working on
56 an embedded system or something similar, you therefore definitely
57 want to say Y here.
58
59 To compile this driver as a module, choose M here: the module will be
60 called unix. Note that several important services won't work
61 correctly if you say M here and then neglect to load the module.
62
63 Say Y unless you know what you are doing.
64
65config NET_KEY
66 tristate "PF_KEY sockets"
67 select XFRM
68 ---help---
69 PF_KEYv2 socket family, compatible to KAME ones.
70 They are required if you are going to use IPsec tools ported
71 from KAME.
72 29
73 Say Y unless you know what you are doing. 30source "net/packet/Kconfig"
31source "net/unix/Kconfig"
32source "net/xfrm/Kconfig"
74 33
75config INET 34config INET
76 bool "TCP/IP networking" 35 bool "TCP/IP networking"
@@ -94,30 +53,12 @@ config INET
94 53
95 Short answer: say Y. 54 Short answer: say Y.
96 55
56if INET
97source "net/ipv4/Kconfig" 57source "net/ipv4/Kconfig"
98
99# IPv6 as module will cause a CRASH if you try to unload it
100config IPV6
101 tristate "The IPv6 protocol"
102 depends on INET
103 default m
104 select CRYPTO if IPV6_PRIVACY
105 select CRYPTO_MD5 if IPV6_PRIVACY
106 ---help---
107 This is complemental support for the IP version 6.
108 You will still be able to do traditional IPv4 networking as well.
109
110 For general information about IPv6, see
111 <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
112 For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
113 For specific information about IPv6 under Linux, read the HOWTO at
114 <http://www.bieringer.de/linux/IPv6/>.
115
116 To compile this protocol support as a module, choose M here: the
117 module will be called ipv6.
118
119source "net/ipv6/Kconfig" 58source "net/ipv6/Kconfig"
120 59
60endif # if INET
61
121menuconfig NETFILTER 62menuconfig NETFILTER
122 bool "Network packet filtering (replaces ipchains)" 63 bool "Network packet filtering (replaces ipchains)"
123 ---help--- 64 ---help---
@@ -206,269 +147,16 @@ source "net/bridge/netfilter/Kconfig"
206 147
207endif 148endif
208 149
209config XFRM
210 bool
211 depends on NET
212
213source "net/xfrm/Kconfig"
214
215source "net/sctp/Kconfig" 150source "net/sctp/Kconfig"
216 151source "net/atm/Kconfig"
217config ATM 152source "net/bridge/Kconfig"
218 tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)" 153source "net/8021q/Kconfig"
219 depends on EXPERIMENTAL
220 ---help---
221 ATM is a high-speed networking technology for Local Area Networks
222 and Wide Area Networks. It uses a fixed packet size and is
223 connection oriented, allowing for the negotiation of minimum
224 bandwidth requirements.
225
226 In order to participate in an ATM network, your Linux box needs an
227 ATM networking card. If you have that, say Y here and to the driver
228 of your ATM card below.
229
230 Note that you need a set of user-space programs to actually make use
231 of ATM. See the file <file:Documentation/networking/atm.txt> for
232 further details.
233
234config ATM_CLIP
235 tristate "Classical IP over ATM (EXPERIMENTAL)"
236 depends on ATM && INET
237 help
238 Classical IP over ATM for PVCs and SVCs, supporting InARP and
239 ATMARP. If you want to communication with other IP hosts on your ATM
240 network, you will typically either say Y here or to "LAN Emulation
241 (LANE)" below.
242
243config ATM_CLIP_NO_ICMP
244 bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)"
245 depends on ATM_CLIP
246 help
247 Normally, an "ICMP host unreachable" message is sent if a neighbour
248 cannot be reached because there is no VC to it in the kernel's
249 ATMARP table. This may cause problems when ATMARP table entries are
250 briefly removed during revalidation. If you say Y here, packets to
251 such neighbours are silently discarded instead.
252
253config ATM_LANE
254 tristate "LAN Emulation (LANE) support (EXPERIMENTAL)"
255 depends on ATM
256 help
257 LAN Emulation emulates services of existing LANs across an ATM
258 network. Besides operating as a normal ATM end station client, Linux
259 LANE client can also act as an proxy client bridging packets between
260 ELAN and Ethernet segments. You need LANE if you want to try MPOA.
261
262config ATM_MPOA
263 tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)"
264 depends on ATM && INET && ATM_LANE!=n
265 help
266 Multi-Protocol Over ATM allows ATM edge devices such as routers,
267 bridges and ATM attached hosts establish direct ATM VCs across
268 subnetwork boundaries. These shortcut connections bypass routers
269 enhancing overall network performance.
270
271config ATM_BR2684
272 tristate "RFC1483/2684 Bridged protocols"
273 depends on ATM && INET
274 help
275 ATM PVCs can carry ethernet PDUs according to rfc2684 (formerly 1483)
276 This device will act like an ethernet from the kernels point of view,
277 with the traffic being carried by ATM PVCs (currently 1 PVC/device).
278 This is sometimes used over DSL lines. If in doubt, say N.
279
280config ATM_BR2684_IPFILTER
281 bool "Per-VC IP filter kludge"
282 depends on ATM_BR2684
283 help
284 This is an experimental mechanism for users who need to terminating a
285 large number of IP-only vcc's. Do not enable this unless you are sure
286 you know what you are doing.
287
288config BRIDGE
289 tristate "802.1d Ethernet Bridging"
290 ---help---
291 If you say Y here, then your Linux box will be able to act as an
292 Ethernet bridge, which means that the different Ethernet segments it
293 is connected to will appear as one Ethernet to the participants.
294 Several such bridges can work together to create even larger
295 networks of Ethernets using the IEEE 802.1 spanning tree algorithm.
296 As this is a standard, Linux bridges will cooperate properly with
297 other third party bridge products.
298
299 In order to use the Ethernet bridge, you'll need the bridge
300 configuration tools; see <file:Documentation/networking/bridge.txt>
301 for location. Please read the Bridge mini-HOWTO for more
302 information.
303
304 If you enable iptables support along with the bridge support then you
305 turn your bridge into a bridging IP firewall.
306 iptables will then see the IP packets being bridged, so you need to
307 take this into account when setting up your firewall rules.
308 Enabling arptables support when bridging will let arptables see
309 bridged ARP traffic in the arptables FORWARD chain.
310
311 To compile this code as a module, choose M here: the module
312 will be called bridge.
313
314 If unsure, say N.
315
316config VLAN_8021Q
317 tristate "802.1Q VLAN Support"
318 ---help---
319 Select this and you will be able to create 802.1Q VLAN interfaces
320 on your ethernet interfaces. 802.1Q VLAN supports almost
321 everything a regular ethernet interface does, including
322 firewalling, bridging, and of course IP traffic. You will need
323 the 'vconfig' tool from the VLAN project in order to effectively
324 use VLANs. See the VLAN web page for more information:
325 <http://www.candelatech.com/~greear/vlan.html>
326
327 To compile this code as a module, choose M here: the module
328 will be called 8021q.
329
330 If unsure, say N.
331
332config DECNET
333 tristate "DECnet Support"
334 ---help---
335 The DECnet networking protocol was used in many products made by
336 Digital (now Compaq). It provides reliable stream and sequenced
337 packet communications over which run a variety of services similar
338 to those which run over TCP/IP.
339
340 To find some tools to use with the kernel layer support, please
341 look at Patrick Caulfield's web site:
342 <http://linux-decnet.sourceforge.net/>.
343
344 More detailed documentation is available in
345 <file:Documentation/networking/decnet.txt>.
346
347 Be sure to say Y to "/proc file system support" and "Sysctl support"
348 below when using DECnet, since you will need sysctl support to aid
349 in configuration at run time.
350
351 The DECnet code is also available as a module ( = code which can be
352 inserted in and removed from the running kernel whenever you want).
353 The module is called decnet.
354
355source "net/decnet/Kconfig" 154source "net/decnet/Kconfig"
356
357source "net/llc/Kconfig" 155source "net/llc/Kconfig"
358
359config IPX
360 tristate "The IPX protocol"
361 select LLC
362 ---help---
363 This is support for the Novell networking protocol, IPX, commonly
364 used for local networks of Windows machines. You need it if you
365 want to access Novell NetWare file or print servers using the Linux
366 Novell client ncpfs (available from
367 <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
368 within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
369 available from <http://www.tldp.org/docs.html#howto>). In order
370 to do the former, you'll also have to say Y to "NCP file system
371 support", below.
372
373 IPX is similar in scope to IP, while SPX, which runs on top of IPX,
374 is similar to TCP. There is also experimental support for SPX in
375 Linux (see "SPX networking", below).
376
377 To turn your Linux box into a fully featured NetWare file server and
378 IPX router, say Y here and fetch either lwared from
379 <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
380 mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
381 information, read the IPX-HOWTO available from
382 <http://www.tldp.org/docs.html#howto>.
383
384 General information about how to connect Linux, Windows machines and
385 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
386
387 The IPX driver would enlarge your kernel by about 16 KB. To compile
388 this driver as a module, choose M here: the module will be called ipx.
389 Unless you want to integrate your Linux box with a local Novell
390 network, say N.
391
392source "net/ipx/Kconfig" 156source "net/ipx/Kconfig"
393
394config ATALK
395 tristate "Appletalk protocol support"
396 select LLC
397 ---help---
398 AppleTalk is the protocol that Apple computers can use to communicate
399 on a network. If your Linux box is connected to such a network and you
400 wish to connect to it, say Y. You will need to use the netatalk package
401 so that your Linux box can act as a print and file server for Macs as
402 well as access AppleTalk printers. Check out
403 <http://www.zettabyte.net/netatalk/> on the WWW for details.
404 EtherTalk is the name used for AppleTalk over Ethernet and the
405 cheaper and slower LocalTalk is AppleTalk over a proprietary Apple
406 network using serial links. EtherTalk and LocalTalk are fully
407 supported by Linux.
408
409 General information about how to connect Linux, Windows machines and
410 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>. The
411 NET-3-HOWTO, available from
412 <http://www.tldp.org/docs.html#howto>, contains valuable
413 information as well.
414
415 To compile this driver as a module, choose M here: the module will be
416 called appletalk. You almost certainly want to compile it as a
417 module so you can restart your AppleTalk stack without rebooting
418 your machine. I hear that the GNU boycott of Apple is over, so
419 even politically correct people are allowed to say Y here.
420
421source "drivers/net/appletalk/Kconfig" 157source "drivers/net/appletalk/Kconfig"
422 158source "net/x25/Kconfig"
423config X25 159source "net/lapb/Kconfig"
424 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
425 depends on EXPERIMENTAL
426 ---help---
427 X.25 is a set of standardized network protocols, similar in scope to
428 frame relay; the one physical line from your box to the X.25 network
429 entry point can carry several logical point-to-point connections
430 (called "virtual circuits") to other computers connected to the X.25
431 network. Governments, banks, and other organizations tend to use it
432 to connect to each other or to form Wide Area Networks (WANs). Many
433 countries have public X.25 networks. X.25 consists of two
434 protocols: the higher level Packet Layer Protocol (PLP) (say Y here
435 if you want that) and the lower level data link layer protocol LAPB
436 (say Y to "LAPB Data Link Driver" below if you want that).
437
438 You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
439 <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
440 Information about X.25 for Linux is contained in the files
441 <file:Documentation/networking/x25.txt> and
442 <file:Documentation/networking/x25-iface.txt>.
443
444 One connects to an X.25 network either with a dedicated network card
445 using the X.21 protocol (not yet supported by Linux) or one can do
446 X.25 over a standard telephone line using an ordinary modem (say Y
447 to "X.25 async driver" below) or over Ethernet using an ordinary
448 Ethernet card and the LAPB over Ethernet (say Y to "LAPB Data Link
449 Driver" and "LAPB over Ethernet driver" below).
450
451 To compile this driver as a module, choose M here: the module
452 will be called x25. If unsure, say N.
453
454config LAPB
455 tristate "LAPB Data Link Driver (EXPERIMENTAL)"
456 depends on EXPERIMENTAL
457 ---help---
458 Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
459 the lower) part of the X.25 protocol. It offers a reliable
460 connection service to exchange data frames with one other host, and
461 it is used to transport higher level protocols (mostly X.25 Packet
462 Layer, the higher part of X.25, but others are possible as well).
463 Usually, LAPB is used with specialized X.21 network cards, but Linux
464 currently supports LAPB only over Ethernet connections. If you want
465 to use LAPB connections over Ethernet, say Y here and to "LAPB over
466 Ethernet driver" below. Read
467 <file:Documentation/networking/lapb-module.txt> for technical
468 details.
469
470 To compile this driver as a module, choose M here: the
471 module will be called lapb. If unsure, say N.
472 160
473config NET_DIVERT 161config NET_DIVERT
474 bool "Frame Diverter (EXPERIMENTAL)" 162 bool "Frame Diverter (EXPERIMENTAL)"
@@ -496,107 +184,10 @@ config NET_DIVERT
496 184
497 If unsure, say N. 185 If unsure, say N.
498 186
499config ECONET 187source "net/econet/Kconfig"
500 tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)" 188source "net/wanrouter/Kconfig"
501 depends on EXPERIMENTAL && INET
502 ---help---
503 Econet is a fairly old and slow networking protocol mainly used by
504 Acorn computers to access file and print servers. It uses native
505 Econet network cards. AUN is an implementation of the higher level
506 parts of Econet that runs over ordinary Ethernet connections, on
507 top of the UDP packet protocol, which in turn runs on top of the
508 Internet protocol IP.
509
510 If you say Y here, you can choose with the next two options whether
511 to send Econet/AUN traffic over a UDP Ethernet connection or over
512 a native Econet network card.
513
514 To compile this driver as a module, choose M here: the module
515 will be called econet.
516
517config ECONET_AUNUDP
518 bool "AUN over UDP"
519 depends on ECONET
520 help
521 Say Y here if you want to send Econet/AUN traffic over a UDP
522 connection (UDP is a packet based protocol that runs on top of the
523 Internet protocol IP) using an ordinary Ethernet network card.
524
525config ECONET_NATIVE
526 bool "Native Econet"
527 depends on ECONET
528 help
529 Say Y here if you have a native Econet network card installed in
530 your computer.
531
532config WAN_ROUTER
533 tristate "WAN router"
534 depends on EXPERIMENTAL
535 ---help---
536 Wide Area Networks (WANs), such as X.25, frame relay and leased
537 lines, are used to interconnect Local Area Networks (LANs) over vast
538 distances with data transfer rates significantly higher than those
539 achievable with commonly used asynchronous modem connections.
540 Usually, a quite expensive external device called a `WAN router' is
541 needed to connect to a WAN.
542
543 As an alternative, WAN routing can be built into the Linux kernel.
544 With relatively inexpensive WAN interface cards available on the
545 market, a perfectly usable router can be built for less than half
546 the price of an external router. If you have one of those cards and
547 wish to use your Linux box as a WAN router, say Y here and also to
548 the WAN driver for your card, below. You will then need the
549 wan-tools package which is available from <ftp://ftp.sangoma.com/>.
550 Read <file:Documentation/networking/wan-router.txt> for more
551 information.
552
553 To compile WAN routing support as a module, choose M here: the
554 module will be called wanrouter.
555
556 If unsure, say N.
557
558menu "QoS and/or fair queueing"
559
560config NET_SCHED
561 bool "QoS and/or fair queueing"
562 ---help---
563 When the kernel has several packets to send out over a network
564 device, it has to decide which ones to send first, which ones to
565 delay, and which ones to drop. This is the job of the packet
566 scheduler, and several different algorithms for how to do this
567 "fairly" have been proposed.
568
569 If you say N here, you will get the standard packet scheduler, which
570 is a FIFO (first come, first served). If you say Y here, you will be
571 able to choose from among several alternative algorithms which can
572 then be attached to different network devices. This is useful for
573 example if some of your network devices are real time devices that
574 need a certain minimum data flow rate, or if you need to limit the
575 maximum data flow rate for traffic which matches specified criteria.
576 This code is considered to be experimental.
577
578 To administer these schedulers, you'll need the user-level utilities
579 from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
580 That package also contains some documentation; for more, check out
581 <http://snafu.freedom.org/linux2.2/iproute-notes.html>.
582
583 This Quality of Service (QoS) support will enable you to use
584 Differentiated Services (diffserv) and Resource Reservation Protocol
585 (RSVP) on your Linux router if you also say Y to "QoS support",
586 "Packet classifier API" and to some classifiers below. Documentation
587 and software is at <http://diffserv.sourceforge.net/>.
588
589 If you say Y here and to "/proc file system" below, you will be able
590 to read status information about packet schedulers from the file
591 /proc/net/psched.
592
593 The available schedulers are listed in the following questions; you
594 can say Y to as many as you like. If unsure, say N now.
595
596source "net/sched/Kconfig" 189source "net/sched/Kconfig"
597 190
598endmenu
599
600menu "Network testing" 191menu "Network testing"
601 192
602config NET_PKTGEN 193config NET_PKTGEN
@@ -635,14 +226,10 @@ config NET_POLL_CONTROLLER
635 def_bool NETPOLL 226 def_bool NETPOLL
636 227
637source "net/ax25/Kconfig" 228source "net/ax25/Kconfig"
638
639source "net/irda/Kconfig" 229source "net/irda/Kconfig"
640
641source "net/bluetooth/Kconfig" 230source "net/bluetooth/Kconfig"
642
643source "net/ieee80211/Kconfig" 231source "net/ieee80211/Kconfig"
644 232
645source "drivers/net/Kconfig" 233endif # if NET
646 234endmenu # Networking
647endmenu
648 235
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
new file mode 100644
index 000000000000..bea2426229b1
--- /dev/null
+++ b/net/atm/Kconfig
@@ -0,0 +1,74 @@
1#
2# Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)
3#
4
5config ATM
6 tristate "Asynchronous Transfer Mode (ATM) (EXPERIMENTAL)"
7 depends on EXPERIMENTAL
8 ---help---
9 ATM is a high-speed networking technology for Local Area Networks
10 and Wide Area Networks. It uses a fixed packet size and is
11 connection oriented, allowing for the negotiation of minimum
12 bandwidth requirements.
13
14 In order to participate in an ATM network, your Linux box needs an
15 ATM networking card. If you have that, say Y here and to the driver
16 of your ATM card below.
17
18 Note that you need a set of user-space programs to actually make use
19 of ATM. See the file <file:Documentation/networking/atm.txt> for
20 further details.
21
22config ATM_CLIP
23 tristate "Classical IP over ATM (EXPERIMENTAL)"
24 depends on ATM && INET
25 help
26 Classical IP over ATM for PVCs and SVCs, supporting InARP and
27 ATMARP. If you want to communication with other IP hosts on your ATM
28 network, you will typically either say Y here or to "LAN Emulation
29 (LANE)" below.
30
31config ATM_CLIP_NO_ICMP
32 bool "Do NOT send ICMP if no neighbour (EXPERIMENTAL)"
33 depends on ATM_CLIP
34 help
35 Normally, an "ICMP host unreachable" message is sent if a neighbour
36 cannot be reached because there is no VC to it in the kernel's
37 ATMARP table. This may cause problems when ATMARP table entries are
38 briefly removed during revalidation. If you say Y here, packets to
39 such neighbours are silently discarded instead.
40
41config ATM_LANE
42 tristate "LAN Emulation (LANE) support (EXPERIMENTAL)"
43 depends on ATM
44 help
45 LAN Emulation emulates services of existing LANs across an ATM
46 network. Besides operating as a normal ATM end station client, Linux
47 LANE client can also act as an proxy client bridging packets between
48 ELAN and Ethernet segments. You need LANE if you want to try MPOA.
49
50config ATM_MPOA
51 tristate "Multi-Protocol Over ATM (MPOA) support (EXPERIMENTAL)"
52 depends on ATM && INET && ATM_LANE!=n
53 help
54 Multi-Protocol Over ATM allows ATM edge devices such as routers,
55 bridges and ATM attached hosts establish direct ATM VCs across
56 subnetwork boundaries. These shortcut connections bypass routers
57 enhancing overall network performance.
58
59config ATM_BR2684
60 tristate "RFC1483/2684 Bridged protocols"
61 depends on ATM && INET
62 help
63 ATM PVCs can carry ethernet PDUs according to rfc2684 (formerly 1483)
64 This device will act like an ethernet from the kernels point of view,
65 with the traffic being carried by ATM PVCs (currently 1 PVC/device).
66 This is sometimes used over DSL lines. If in doubt, say N.
67
68config ATM_BR2684_IPFILTER
69 bool "Per-VC IP filter kludge"
70 depends on ATM_BR2684
71 help
72 This is an experimental mechanism for users who need to terminating a
73 large number of IP-only vcc's. Do not enable this unless you are sure
74 you know what you are doing.
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index e6954cf1459d..289956c4dd3e 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -289,8 +289,7 @@ xmit will add the additional header part in that case */
289 * This is similar to eth_type_trans, which cannot be used because of 289 * This is similar to eth_type_trans, which cannot be used because of
290 * our dev->hard_header_len 290 * our dev->hard_header_len
291 */ 291 */
292static inline unsigned short br_type_trans(struct sk_buff *skb, 292static inline __be16 br_type_trans(struct sk_buff *skb, struct net_device *dev)
293 struct net_device *dev)
294{ 293{
295 struct ethhdr *eth; 294 struct ethhdr *eth;
296 unsigned char *rawp; 295 unsigned char *rawp;
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 2e341de3e763..901eff7ebe74 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -213,7 +213,7 @@ static int cmtp_send_frame(struct cmtp_session *session, unsigned char *data, in
213 return kernel_sendmsg(sock, &msg, &iv, 1, len); 213 return kernel_sendmsg(sock, &msg, &iv, 1, len);
214} 214}
215 215
216static int cmtp_process_transmit(struct cmtp_session *session) 216static void cmtp_process_transmit(struct cmtp_session *session)
217{ 217{
218 struct sk_buff *skb, *nskb; 218 struct sk_buff *skb, *nskb;
219 unsigned char *hdr; 219 unsigned char *hdr;
@@ -223,7 +223,7 @@ static int cmtp_process_transmit(struct cmtp_session *session)
223 223
224 if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) { 224 if (!(nskb = alloc_skb(session->mtu, GFP_ATOMIC))) {
225 BT_ERR("Can't allocate memory for new frame"); 225 BT_ERR("Can't allocate memory for new frame");
226 return -ENOMEM; 226 return;
227 } 227 }
228 228
229 while ((skb = skb_dequeue(&session->transmit))) { 229 while ((skb = skb_dequeue(&session->transmit))) {
@@ -275,8 +275,6 @@ static int cmtp_process_transmit(struct cmtp_session *session)
275 cmtp_send_frame(session, nskb->data, nskb->len); 275 cmtp_send_frame(session, nskb->data, nskb->len);
276 276
277 kfree_skb(nskb); 277 kfree_skb(nskb);
278
279 return skb_queue_len(&session->transmit);
280} 278}
281 279
282static int cmtp_session(void *arg) 280static int cmtp_session(void *arg)
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index affbc55462e8..de8af5f42394 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -428,7 +428,7 @@ static int hidp_send_frame(struct socket *sock, unsigned char *data, int len)
428 return kernel_sendmsg(sock, &msg, &iv, 1, len); 428 return kernel_sendmsg(sock, &msg, &iv, 1, len);
429} 429}
430 430
431static int hidp_process_transmit(struct hidp_session *session) 431static void hidp_process_transmit(struct hidp_session *session)
432{ 432{
433 struct sk_buff *skb; 433 struct sk_buff *skb;
434 434
@@ -453,9 +453,6 @@ static int hidp_process_transmit(struct hidp_session *session)
453 hidp_set_timer(session); 453 hidp_set_timer(session);
454 kfree_skb(skb); 454 kfree_skb(skb);
455 } 455 }
456
457 return skb_queue_len(&session->ctrl_transmit) +
458 skb_queue_len(&session->intr_transmit);
459} 456}
460 457
461static int hidp_session(void *arg) 458static int hidp_session(void *arg)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index f3f6355a2786..63a123c5c41b 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -590,8 +590,11 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
590 for (;;) { 590 for (;;) {
591 set_current_state(TASK_INTERRUPTIBLE); 591 set_current_state(TASK_INTERRUPTIBLE);
592 592
593 if (skb_queue_len(&sk->sk_receive_queue) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || 593 if (!skb_queue_empty(&sk->sk_receive_queue) ||
594 signal_pending(current) || !timeo) 594 sk->sk_err ||
595 (sk->sk_shutdown & RCV_SHUTDOWN) ||
596 signal_pending(current) ||
597 !timeo)
595 break; 598 break;
596 599
597 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); 600 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 6d689200bcf3..6304590fd36a 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -781,7 +781,7 @@ static int rfcomm_tty_chars_in_buffer(struct tty_struct *tty)
781 781
782 BT_DBG("tty %p dev %p", tty, dev); 782 BT_DBG("tty %p dev %p", tty, dev);
783 783
784 if (skb_queue_len(&dlc->tx_queue)) 784 if (!skb_queue_empty(&dlc->tx_queue))
785 return dlc->mtu; 785 return dlc->mtu;
786 786
787 return 0; 787 return 0;
diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
new file mode 100644
index 000000000000..db23d59746cf
--- /dev/null
+++ b/net/bridge/Kconfig
@@ -0,0 +1,31 @@
1#
2# 802.1d Ethernet Bridging
3#
4
5config BRIDGE
6 tristate "802.1d Ethernet Bridging"
7 ---help---
8 If you say Y here, then your Linux box will be able to act as an
9 Ethernet bridge, which means that the different Ethernet segments it
10 is connected to will appear as one Ethernet to the participants.
11 Several such bridges can work together to create even larger
12 networks of Ethernets using the IEEE 802.1 spanning tree algorithm.
13 As this is a standard, Linux bridges will cooperate properly with
14 other third party bridge products.
15
16 In order to use the Ethernet bridge, you'll need the bridge
17 configuration tools; see <file:Documentation/networking/bridge.txt>
18 for location. Please read the Bridge mini-HOWTO for more
19 information.
20
21 If you enable iptables support along with the bridge support then you
22 turn your bridge into a bridging IP firewall.
23 iptables will then see the IP packets being bridged, so you need to
24 take this into account when setting up your firewall rules.
25 Enabling arptables support when bridging will let arptables see
26 bridged ARP traffic in the arptables FORWARD chain.
27
28 To compile this code as a module, choose M here: the module
29 will be called bridge.
30
31 If unsure, say N.
diff --git a/net/core/dev.c b/net/core/dev.c
index 7016e0c36b3d..ff9dc029233a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1127,7 +1127,7 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1127extern void skb_release_data(struct sk_buff *); 1127extern void skb_release_data(struct sk_buff *);
1128 1128
1129/* Keep head the same: replace data */ 1129/* Keep head the same: replace data */
1130int __skb_linearize(struct sk_buff *skb, int gfp_mask) 1130int __skb_linearize(struct sk_buff *skb, unsigned int __nocast gfp_mask)
1131{ 1131{
1132 unsigned int size; 1132 unsigned int size;
1133 u8 *data; 1133 u8 *data;
@@ -2089,10 +2089,11 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
2089{ 2089{
2090 unsigned short old_flags = dev->flags; 2090 unsigned short old_flags = dev->flags;
2091 2091
2092 dev->flags |= IFF_PROMISC;
2093 if ((dev->promiscuity += inc) == 0) 2092 if ((dev->promiscuity += inc) == 0)
2094 dev->flags &= ~IFF_PROMISC; 2093 dev->flags &= ~IFF_PROMISC;
2095 if (dev->flags ^ old_flags) { 2094 else
2095 dev->flags |= IFF_PROMISC;
2096 if (dev->flags != old_flags) {
2096 dev_mc_upload(dev); 2097 dev_mc_upload(dev);
2097 printk(KERN_INFO "device %s %s promiscuous mode\n", 2098 printk(KERN_INFO "device %s %s promiscuous mode\n",
2098 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2099 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
diff --git a/net/core/filter.c b/net/core/filter.c
index f3b88205ace2..cd91a24f9720 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,7 @@
36#include <linux/filter.h> 36#include <linux/filter.h>
37 37
38/* No hurry in this branch */ 38/* No hurry in this branch */
39static u8 *load_pointer(struct sk_buff *skb, int k) 39static void *__load_pointer(struct sk_buff *skb, int k)
40{ 40{
41 u8 *ptr = NULL; 41 u8 *ptr = NULL;
42 42
@@ -50,6 +50,18 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
50 return NULL; 50 return NULL;
51} 51}
52 52
53static inline void *load_pointer(struct sk_buff *skb, int k,
54 unsigned int size, void *buffer)
55{
56 if (k >= 0)
57 return skb_header_pointer(skb, k, size, buffer);
58 else {
59 if (k >= SKF_AD_OFF)
60 return NULL;
61 return __load_pointer(skb, k);
62 }
63}
64
53/** 65/**
54 * sk_run_filter - run a filter on a socket 66 * sk_run_filter - run a filter on a socket
55 * @skb: buffer to run the filter on 67 * @skb: buffer to run the filter on
@@ -64,15 +76,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
64 76
65int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 77int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
66{ 78{
67 unsigned char *data = skb->data;
68 /* len is UNSIGNED. Byte wide insns relies only on implicit
69 type casts to prevent reading arbitrary memory locations.
70 */
71 unsigned int len = skb->len-skb->data_len;
72 struct sock_filter *fentry; /* We walk down these */ 79 struct sock_filter *fentry; /* We walk down these */
80 void *ptr;
73 u32 A = 0; /* Accumulator */ 81 u32 A = 0; /* Accumulator */
74 u32 X = 0; /* Index Register */ 82 u32 X = 0; /* Index Register */
75 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 83 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
84 u32 tmp;
76 int k; 85 int k;
77 int pc; 86 int pc;
78 87
@@ -168,86 +177,35 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
168 case BPF_LD|BPF_W|BPF_ABS: 177 case BPF_LD|BPF_W|BPF_ABS:
169 k = fentry->k; 178 k = fentry->k;
170 load_w: 179 load_w:
171 if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) { 180 ptr = load_pointer(skb, k, 4, &tmp);
172 A = ntohl(*(u32*)&data[k]); 181 if (ptr != NULL) {
182 A = ntohl(*(u32 *)ptr);
173 continue; 183 continue;
174 } 184 }
175 if (k < 0) {
176 u8 *ptr;
177
178 if (k >= SKF_AD_OFF)
179 break;
180 ptr = load_pointer(skb, k);
181 if (ptr) {
182 A = ntohl(*(u32*)ptr);
183 continue;
184 }
185 } else {
186 u32 _tmp, *p;
187 p = skb_header_pointer(skb, k, 4, &_tmp);
188 if (p != NULL) {
189 A = ntohl(*p);
190 continue;
191 }
192 }
193 return 0; 185 return 0;
194 case BPF_LD|BPF_H|BPF_ABS: 186 case BPF_LD|BPF_H|BPF_ABS:
195 k = fentry->k; 187 k = fentry->k;
196 load_h: 188 load_h:
197 if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) { 189 ptr = load_pointer(skb, k, 2, &tmp);
198 A = ntohs(*(u16*)&data[k]); 190 if (ptr != NULL) {
191 A = ntohs(*(u16 *)ptr);
199 continue; 192 continue;
200 } 193 }
201 if (k < 0) {
202 u8 *ptr;
203
204 if (k >= SKF_AD_OFF)
205 break;
206 ptr = load_pointer(skb, k);
207 if (ptr) {
208 A = ntohs(*(u16*)ptr);
209 continue;
210 }
211 } else {
212 u16 _tmp, *p;
213 p = skb_header_pointer(skb, k, 2, &_tmp);
214 if (p != NULL) {
215 A = ntohs(*p);
216 continue;
217 }
218 }
219 return 0; 194 return 0;
220 case BPF_LD|BPF_B|BPF_ABS: 195 case BPF_LD|BPF_B|BPF_ABS:
221 k = fentry->k; 196 k = fentry->k;
222load_b: 197load_b:
223 if (k >= 0 && (unsigned int)k < len) { 198 ptr = load_pointer(skb, k, 1, &tmp);
224 A = data[k]; 199 if (ptr != NULL) {
200 A = *(u8 *)ptr;
225 continue; 201 continue;
226 } 202 }
227 if (k < 0) {
228 u8 *ptr;
229
230 if (k >= SKF_AD_OFF)
231 break;
232 ptr = load_pointer(skb, k);
233 if (ptr) {
234 A = *ptr;
235 continue;
236 }
237 } else {
238 u8 _tmp, *p;
239 p = skb_header_pointer(skb, k, 1, &_tmp);
240 if (p != NULL) {
241 A = *p;
242 continue;
243 }
244 }
245 return 0; 203 return 0;
246 case BPF_LD|BPF_W|BPF_LEN: 204 case BPF_LD|BPF_W|BPF_LEN:
247 A = len; 205 A = skb->len;
248 continue; 206 continue;
249 case BPF_LDX|BPF_W|BPF_LEN: 207 case BPF_LDX|BPF_W|BPF_LEN:
250 X = len; 208 X = skb->len;
251 continue; 209 continue;
252 case BPF_LD|BPF_W|BPF_IND: 210 case BPF_LD|BPF_W|BPF_IND:
253 k = X + fentry->k; 211 k = X + fentry->k;
@@ -259,10 +217,12 @@ load_b:
259 k = X + fentry->k; 217 k = X + fentry->k;
260 goto load_b; 218 goto load_b;
261 case BPF_LDX|BPF_B|BPF_MSH: 219 case BPF_LDX|BPF_B|BPF_MSH:
262 if (fentry->k >= len) 220 ptr = load_pointer(skb, fentry->k, 1, &tmp);
263 return 0; 221 if (ptr != NULL) {
264 X = (data[fentry->k] & 0xf) << 2; 222 X = (*(u8 *)ptr & 0xf) << 2;
265 continue; 223 continue;
224 }
225 return 0;
266 case BPF_LD|BPF_IMM: 226 case BPF_LD|BPF_IMM:
267 A = fentry->k; 227 A = fentry->k;
268 continue; 228 continue;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb73b2190ec7..d9f7b06fe886 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -129,7 +129,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
129 * Buffers may only be allocated from interrupts using a @gfp_mask of 129 * Buffers may only be allocated from interrupts using a @gfp_mask of
130 * %GFP_ATOMIC. 130 * %GFP_ATOMIC.
131 */ 131 */
132struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) 132struct sk_buff *alloc_skb(unsigned int size, unsigned int __nocast gfp_mask)
133{ 133{
134 struct sk_buff *skb; 134 struct sk_buff *skb;
135 u8 *data; 135 u8 *data;
@@ -182,7 +182,8 @@ nodata:
182 * %GFP_ATOMIC. 182 * %GFP_ATOMIC.
183 */ 183 */
184struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, 184struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
185 unsigned int size, int gfp_mask) 185 unsigned int size,
186 unsigned int __nocast gfp_mask)
186{ 187{
187 struct sk_buff *skb; 188 struct sk_buff *skb;
188 u8 *data; 189 u8 *data;
@@ -322,7 +323,7 @@ void __kfree_skb(struct sk_buff *skb)
322 * %GFP_ATOMIC. 323 * %GFP_ATOMIC.
323 */ 324 */
324 325
325struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) 326struct sk_buff *skb_clone(struct sk_buff *skb, unsigned int __nocast gfp_mask)
326{ 327{
327 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); 328 struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
328 329
@@ -357,7 +358,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
357 C(ip_summed); 358 C(ip_summed);
358 C(priority); 359 C(priority);
359 C(protocol); 360 C(protocol);
360 C(security);
361 n->destructor = NULL; 361 n->destructor = NULL;
362#ifdef CONFIG_NETFILTER 362#ifdef CONFIG_NETFILTER
363 C(nfmark); 363 C(nfmark);
@@ -422,7 +422,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
422 new->pkt_type = old->pkt_type; 422 new->pkt_type = old->pkt_type;
423 new->stamp = old->stamp; 423 new->stamp = old->stamp;
424 new->destructor = NULL; 424 new->destructor = NULL;
425 new->security = old->security;
426#ifdef CONFIG_NETFILTER 425#ifdef CONFIG_NETFILTER
427 new->nfmark = old->nfmark; 426 new->nfmark = old->nfmark;
428 new->nfcache = old->nfcache; 427 new->nfcache = old->nfcache;
@@ -462,7 +461,7 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
462 * header is going to be modified. Use pskb_copy() instead. 461 * header is going to be modified. Use pskb_copy() instead.
463 */ 462 */
464 463
465struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) 464struct sk_buff *skb_copy(const struct sk_buff *skb, unsigned int __nocast gfp_mask)
466{ 465{
467 int headerlen = skb->data - skb->head; 466 int headerlen = skb->data - skb->head;
468 /* 467 /*
@@ -501,7 +500,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask)
501 * The returned buffer has a reference count of 1. 500 * The returned buffer has a reference count of 1.
502 */ 501 */
503 502
504struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) 503struct sk_buff *pskb_copy(struct sk_buff *skb, unsigned int __nocast gfp_mask)
505{ 504{
506 /* 505 /*
507 * Allocate the copy buffer 506 * Allocate the copy buffer
@@ -559,7 +558,8 @@ out:
559 * reloaded after call to this function. 558 * reloaded after call to this function.
560 */ 559 */
561 560
562int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) 561int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
562 unsigned int __nocast gfp_mask)
563{ 563{
564 int i; 564 int i;
565 u8 *data; 565 u8 *data;
@@ -649,7 +649,8 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
649 * only by netfilter in the cases when checksum is recalculated? --ANK 649 * only by netfilter in the cases when checksum is recalculated? --ANK
650 */ 650 */
651struct sk_buff *skb_copy_expand(const struct sk_buff *skb, 651struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
652 int newheadroom, int newtailroom, int gfp_mask) 652 int newheadroom, int newtailroom,
653 unsigned int __nocast gfp_mask)
653{ 654{
654 /* 655 /*
655 * Allocate the copy buffer 656 * Allocate the copy buffer
diff --git a/net/core/sock.c b/net/core/sock.c
index a6ec3ada7f9e..8b35ccdc2b3b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -622,7 +622,8 @@ lenout:
622 * @prot: struct proto associated with this new sock instance 622 * @prot: struct proto associated with this new sock instance
623 * @zero_it: if we should zero the newly allocated sock 623 * @zero_it: if we should zero the newly allocated sock
624 */ 624 */
625struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) 625struct sock *sk_alloc(int family, unsigned int __nocast priority,
626 struct proto *prot, int zero_it)
626{ 627{
627 struct sock *sk = NULL; 628 struct sock *sk = NULL;
628 kmem_cache_t *slab = prot->slab; 629 kmem_cache_t *slab = prot->slab;
@@ -750,7 +751,8 @@ unsigned long sock_i_ino(struct sock *sk)
750/* 751/*
751 * Allocate a skb from the socket's send buffer. 752 * Allocate a skb from the socket's send buffer.
752 */ 753 */
753struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int priority) 754struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
755 unsigned int __nocast priority)
754{ 756{
755 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) { 757 if (force || atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf) {
756 struct sk_buff * skb = alloc_skb(size, priority); 758 struct sk_buff * skb = alloc_skb(size, priority);
@@ -765,7 +767,8 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, int
765/* 767/*
766 * Allocate a skb from the socket's receive buffer. 768 * Allocate a skb from the socket's receive buffer.
767 */ 769 */
768struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int priority) 770struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force,
771 unsigned int __nocast priority)
769{ 772{
770 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 773 if (force || atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
771 struct sk_buff *skb = alloc_skb(size, priority); 774 struct sk_buff *skb = alloc_skb(size, priority);
@@ -780,7 +783,7 @@ struct sk_buff *sock_rmalloc(struct sock *sk, unsigned long size, int force, int
780/* 783/*
781 * Allocate a memory block from the socket's option memory buffer. 784 * Allocate a memory block from the socket's option memory buffer.
782 */ 785 */
783void *sock_kmalloc(struct sock *sk, int size, int priority) 786void *sock_kmalloc(struct sock *sk, int size, unsigned int __nocast priority)
784{ 787{
785 if ((unsigned)size <= sysctl_optmem_max && 788 if ((unsigned)size <= sysctl_optmem_max &&
786 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) { 789 atomic_read(&sk->sk_omem_alloc) + size < sysctl_optmem_max) {
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 2101da542ba8..92f2ec46fd22 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -1,6 +1,29 @@
1# 1#
2# DECnet configuration 2# DECnet configuration
3# 3#
4config DECNET
5 tristate "DECnet Support"
6 ---help---
7 The DECnet networking protocol was used in many products made by
8 Digital (now Compaq). It provides reliable stream and sequenced
9 packet communications over which run a variety of services similar
10 to those which run over TCP/IP.
11
12 To find some tools to use with the kernel layer support, please
13 look at Patrick Caulfield's web site:
14 <http://linux-decnet.sourceforge.net/>.
15
16 More detailed documentation is available in
17 <file:Documentation/networking/decnet.txt>.
18
19 Be sure to say Y to "/proc file system support" and "Sysctl support"
20 below when using DECnet, since you will need sysctl support to aid
21 in configuration at run time.
22
23 The DECnet code is also available as a module ( = code which can be
24 inserted in and removed from the running kernel whenever you want).
25 The module is called decnet.
26
4config DECNET_ROUTER 27config DECNET_ROUTER
5 bool "DECnet: router support (EXPERIMENTAL)" 28 bool "DECnet: router support (EXPERIMENTAL)"
6 depends on DECNET && EXPERIMENTAL 29 depends on DECNET && EXPERIMENTAL
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 29bb3cd21965..96a02800cd28 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -536,7 +536,7 @@ static void dn_keepalive(struct sock *sk)
536 * we are double checking that we are not sending too 536 * we are double checking that we are not sending too
537 * many of these keepalive frames. 537 * many of these keepalive frames.
538 */ 538 */
539 if (skb_queue_len(&scp->other_xmit_queue) == 0) 539 if (skb_queue_empty(&scp->other_xmit_queue))
540 dn_nsp_send_link(sk, DN_NOCHANGE, 0); 540 dn_nsp_send_link(sk, DN_NOCHANGE, 0);
541} 541}
542 542
@@ -1191,7 +1191,7 @@ static unsigned int dn_poll(struct file *file, struct socket *sock, poll_table
1191 struct dn_scp *scp = DN_SK(sk); 1191 struct dn_scp *scp = DN_SK(sk);
1192 int mask = datagram_poll(file, sock, wait); 1192 int mask = datagram_poll(file, sock, wait);
1193 1193
1194 if (skb_queue_len(&scp->other_receive_queue)) 1194 if (!skb_queue_empty(&scp->other_receive_queue))
1195 mask |= POLLRDBAND; 1195 mask |= POLLRDBAND;
1196 1196
1197 return mask; 1197 return mask;
@@ -1214,7 +1214,7 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1214 1214
1215 case SIOCATMARK: 1215 case SIOCATMARK:
1216 lock_sock(sk); 1216 lock_sock(sk);
1217 val = (skb_queue_len(&scp->other_receive_queue) != 0); 1217 val = !skb_queue_empty(&scp->other_receive_queue);
1218 if (scp->state != DN_RUN) 1218 if (scp->state != DN_RUN)
1219 val = -ENOTCONN; 1219 val = -ENOTCONN;
1220 release_sock(sk); 1220 release_sock(sk);
@@ -1630,7 +1630,7 @@ static int dn_data_ready(struct sock *sk, struct sk_buff_head *q, int flags, int
1630 int len = 0; 1630 int len = 0;
1631 1631
1632 if (flags & MSG_OOB) 1632 if (flags & MSG_OOB)
1633 return skb_queue_len(q) ? 1 : 0; 1633 return !skb_queue_empty(q) ? 1 : 0;
1634 1634
1635 while(skb != (struct sk_buff *)q) { 1635 while(skb != (struct sk_buff *)q) {
1636 struct dn_skb_cb *cb = DN_SKB_CB(skb); 1636 struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1707,7 +1707,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
1707 if (sk->sk_err) 1707 if (sk->sk_err)
1708 goto out; 1708 goto out;
1709 1709
1710 if (skb_queue_len(&scp->other_receive_queue)) { 1710 if (!skb_queue_empty(&scp->other_receive_queue)) {
1711 if (!(flags & MSG_OOB)) { 1711 if (!(flags & MSG_OOB)) {
1712 msg->msg_flags |= MSG_OOB; 1712 msg->msg_flags |= MSG_OOB;
1713 if (!scp->other_report) { 1713 if (!scp->other_report) {
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 9934b25720e4..99bc061759c3 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -551,7 +551,8 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
551 if (t < s_t) 551 if (t < s_t)
552 continue; 552 continue;
553 if (t > s_t) 553 if (t > s_t)
554 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); 554 memset(&cb->args[1], 0,
555 sizeof(cb->args) - sizeof(cb->args[0]));
555 tb = dn_fib_get_table(t, 0); 556 tb = dn_fib_get_table(t, 0);
556 if (tb == NULL) 557 if (tb == NULL)
557 continue; 558 continue;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 42abbf3f524f..8cce1fdbda90 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -342,7 +342,8 @@ int dn_nsp_xmit_timeout(struct sock *sk)
342 342
343 dn_nsp_output(sk); 343 dn_nsp_output(sk);
344 344
345 if (skb_queue_len(&scp->data_xmit_queue) || skb_queue_len(&scp->other_xmit_queue)) 345 if (!skb_queue_empty(&scp->data_xmit_queue) ||
346 !skb_queue_empty(&scp->other_xmit_queue))
346 scp->persist = dn_nsp_persist(sk); 347 scp->persist = dn_nsp_persist(sk);
347 348
348 return 0; 349 return 0;
diff --git a/net/econet/Kconfig b/net/econet/Kconfig
new file mode 100644
index 000000000000..39a2d2975e0e
--- /dev/null
+++ b/net/econet/Kconfig
@@ -0,0 +1,36 @@
1#
2# Acorn Econet/AUN protocols
3#
4
5config ECONET
6 tristate "Acorn Econet/AUN protocols (EXPERIMENTAL)"
7 depends on EXPERIMENTAL && INET
8 ---help---
9 Econet is a fairly old and slow networking protocol mainly used by
10 Acorn computers to access file and print servers. It uses native
11 Econet network cards. AUN is an implementation of the higher level
12 parts of Econet that runs over ordinary Ethernet connections, on
13 top of the UDP packet protocol, which in turn runs on top of the
14 Internet protocol IP.
15
16 If you say Y here, you can choose with the next two options whether
17 to send Econet/AUN traffic over a UDP Ethernet connection or over
18 a native Econet network card.
19
20 To compile this driver as a module, choose M here: the module
21 will be called econet.
22
23config ECONET_AUNUDP
24 bool "AUN over UDP"
25 depends on ECONET
26 help
27 Say Y here if you want to send Econet/AUN traffic over a UDP
28 connection (UDP is a packet based protocol that runs on top of the
29 Internet protocol IP) using an ordinary Ethernet network card.
30
31config ECONET_NATIVE
32 bool "Native Econet"
33 depends on ECONET
34 help
35 Say Y here if you have a native Econet network card installed in
36 your computer.
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index ab60ea63688e..f6dbfb99b14d 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -155,7 +155,7 @@ int eth_rebuild_header(struct sk_buff *skb)
155 * This is normal practice and works for any 'now in use' protocol. 155 * This is normal practice and works for any 'now in use' protocol.
156 */ 156 */
157 157
158unsigned short eth_type_trans(struct sk_buff *skb, struct net_device *dev) 158__be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
159{ 159{
160 struct ethhdr *eth; 160 struct ethhdr *eth;
161 unsigned char *rawp; 161 unsigned char *rawp;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 3e63123f7bbd..df5386885a90 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -3,7 +3,6 @@
3# 3#
4config IP_MULTICAST 4config IP_MULTICAST
5 bool "IP: multicasting" 5 bool "IP: multicasting"
6 depends on INET
7 help 6 help
8 This is code for addressing several networked computers at once, 7 This is code for addressing several networked computers at once,
9 enlarging your kernel by about 2 KB. You need multicasting if you 8 enlarging your kernel by about 2 KB. You need multicasting if you
@@ -17,7 +16,6 @@ config IP_MULTICAST
17 16
18config IP_ADVANCED_ROUTER 17config IP_ADVANCED_ROUTER
19 bool "IP: advanced router" 18 bool "IP: advanced router"
20 depends on INET
21 ---help--- 19 ---help---
22 If you intend to run your Linux box mostly as a router, i.e. as a 20 If you intend to run your Linux box mostly as a router, i.e. as a
23 computer that forwards and redistributes network packets, say Y; you 21 computer that forwards and redistributes network packets, say Y; you
@@ -183,7 +181,6 @@ config IP_ROUTE_VERBOSE
183 181
184config IP_PNP 182config IP_PNP
185 bool "IP: kernel level autoconfiguration" 183 bool "IP: kernel level autoconfiguration"
186 depends on INET
187 help 184 help
188 This enables automatic configuration of IP addresses of devices and 185 This enables automatic configuration of IP addresses of devices and
189 of the routing table during kernel boot, based on either information 186 of the routing table during kernel boot, based on either information
@@ -242,7 +239,6 @@ config IP_PNP_RARP
242# bool ' IP: ARP support' CONFIG_IP_PNP_ARP 239# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
243config NET_IPIP 240config NET_IPIP
244 tristate "IP: tunneling" 241 tristate "IP: tunneling"
245 depends on INET
246 select INET_TUNNEL 242 select INET_TUNNEL
247 ---help--- 243 ---help---
248 Tunneling means encapsulating data of one protocol type within 244 Tunneling means encapsulating data of one protocol type within
@@ -260,7 +256,6 @@ config NET_IPIP
260 256
261config NET_IPGRE 257config NET_IPGRE
262 tristate "IP: GRE tunnels over IP" 258 tristate "IP: GRE tunnels over IP"
263 depends on INET
264 select XFRM 259 select XFRM
265 help 260 help
266 Tunneling means encapsulating data of one protocol type within 261 Tunneling means encapsulating data of one protocol type within
@@ -319,7 +314,7 @@ config IP_PIMSM_V2
319 314
320config ARPD 315config ARPD
321 bool "IP: ARP daemon support (EXPERIMENTAL)" 316 bool "IP: ARP daemon support (EXPERIMENTAL)"
322 depends on INET && EXPERIMENTAL 317 depends on EXPERIMENTAL
323 ---help--- 318 ---help---
324 Normally, the kernel maintains an internal cache which maps IP 319 Normally, the kernel maintains an internal cache which maps IP
325 addresses to hardware addresses on the local network, so that 320 addresses to hardware addresses on the local network, so that
@@ -344,7 +339,6 @@ config ARPD
344 339
345config SYN_COOKIES 340config SYN_COOKIES
346 bool "IP: TCP syncookie support (disabled per default)" 341 bool "IP: TCP syncookie support (disabled per default)"
347 depends on INET
348 ---help--- 342 ---help---
349 Normal TCP/IP networking is open to an attack known as "SYN 343 Normal TCP/IP networking is open to an attack known as "SYN
350 flooding". This denial-of-service attack prevents legitimate remote 344 flooding". This denial-of-service attack prevents legitimate remote
@@ -381,7 +375,6 @@ config SYN_COOKIES
381 375
382config INET_AH 376config INET_AH
383 tristate "IP: AH transformation" 377 tristate "IP: AH transformation"
384 depends on INET
385 select XFRM 378 select XFRM
386 select CRYPTO 379 select CRYPTO
387 select CRYPTO_HMAC 380 select CRYPTO_HMAC
@@ -394,7 +387,6 @@ config INET_AH
394 387
395config INET_ESP 388config INET_ESP
396 tristate "IP: ESP transformation" 389 tristate "IP: ESP transformation"
397 depends on INET
398 select XFRM 390 select XFRM
399 select CRYPTO 391 select CRYPTO
400 select CRYPTO_HMAC 392 select CRYPTO_HMAC
@@ -408,7 +400,6 @@ config INET_ESP
408 400
409config INET_IPCOMP 401config INET_IPCOMP
410 tristate "IP: IPComp transformation" 402 tristate "IP: IPComp transformation"
411 depends on INET
412 select XFRM 403 select XFRM
413 select INET_TUNNEL 404 select INET_TUNNEL
414 select CRYPTO 405 select CRYPTO
@@ -421,7 +412,6 @@ config INET_IPCOMP
421 412
422config INET_TUNNEL 413config INET_TUNNEL
423 tristate "IP: tunnel transformation" 414 tristate "IP: tunnel transformation"
424 depends on INET
425 select XFRM 415 select XFRM
426 ---help--- 416 ---help---
427 Support for generic IP tunnel transformation, which is required by 417 Support for generic IP tunnel transformation, which is required by
@@ -431,7 +421,6 @@ config INET_TUNNEL
431 421
432config IP_TCPDIAG 422config IP_TCPDIAG
433 tristate "IP: TCP socket monitoring interface" 423 tristate "IP: TCP socket monitoring interface"
434 depends on INET
435 default y 424 default y
436 ---help--- 425 ---help---
437 Support for TCP socket monitoring interface used by native Linux 426 Support for TCP socket monitoring interface used by native Linux
@@ -447,7 +436,6 @@ config IP_TCPDIAG_IPV6
447 436
448config TCP_CONG_ADVANCED 437config TCP_CONG_ADVANCED
449 bool "TCP: advanced congestion control" 438 bool "TCP: advanced congestion control"
450 depends on INET
451 ---help--- 439 ---help---
452 Support for selection of various TCP congestion control 440 Support for selection of various TCP congestion control
453 modules. 441 modules.
@@ -463,7 +451,6 @@ menu "TCP congestion control"
463 451
464config TCP_CONG_BIC 452config TCP_CONG_BIC
465 tristate "Binary Increase Congestion (BIC) control" 453 tristate "Binary Increase Congestion (BIC) control"
466 depends on INET
467 default y 454 default y
468 ---help--- 455 ---help---
469 BIC-TCP is a sender-side only change that ensures a linear RTT 456 BIC-TCP is a sender-side only change that ensures a linear RTT
@@ -478,7 +465,6 @@ config TCP_CONG_BIC
478 465
479config TCP_CONG_WESTWOOD 466config TCP_CONG_WESTWOOD
480 tristate "TCP Westwood+" 467 tristate "TCP Westwood+"
481 depends on INET
482 default m 468 default m
483 ---help--- 469 ---help---
484 TCP Westwood+ is a sender-side only modification of the TCP Reno 470 TCP Westwood+ is a sender-side only modification of the TCP Reno
@@ -493,7 +479,6 @@ config TCP_CONG_WESTWOOD
493 479
494config TCP_CONG_HTCP 480config TCP_CONG_HTCP
495 tristate "H-TCP" 481 tristate "H-TCP"
496 depends on INET
497 default m 482 default m
498 ---help--- 483 ---help---
499 H-TCP is a send-side only modifications of the TCP Reno 484 H-TCP is a send-side only modifications of the TCP Reno
@@ -505,7 +490,7 @@ config TCP_CONG_HTCP
505 490
506config TCP_CONG_HSTCP 491config TCP_CONG_HSTCP
507 tristate "High Speed TCP" 492 tristate "High Speed TCP"
508 depends on INET && EXPERIMENTAL 493 depends on EXPERIMENTAL
509 default n 494 default n
510 ---help--- 495 ---help---
511 Sally Floyd's High Speed TCP (RFC 3649) congestion control. 496 Sally Floyd's High Speed TCP (RFC 3649) congestion control.
@@ -516,7 +501,7 @@ config TCP_CONG_HSTCP
516 501
517config TCP_CONG_HYBLA 502config TCP_CONG_HYBLA
518 tristate "TCP-Hybla congestion control algorithm" 503 tristate "TCP-Hybla congestion control algorithm"
519 depends on INET && EXPERIMENTAL 504 depends on EXPERIMENTAL
520 default n 505 default n
521 ---help--- 506 ---help---
522 TCP-Hybla is a sender-side only change that eliminates penalization of 507 TCP-Hybla is a sender-side only change that eliminates penalization of
@@ -526,7 +511,7 @@ config TCP_CONG_HYBLA
526 511
527config TCP_CONG_VEGAS 512config TCP_CONG_VEGAS
528 tristate "TCP Vegas" 513 tristate "TCP Vegas"
529 depends on INET && EXPERIMENTAL 514 depends on EXPERIMENTAL
530 default n 515 default n
531 ---help--- 516 ---help---
532 TCP Vegas is a sender-side only change to TCP that anticipates 517 TCP Vegas is a sender-side only change to TCP that anticipates
@@ -537,7 +522,7 @@ config TCP_CONG_VEGAS
537 522
538config TCP_CONG_SCALABLE 523config TCP_CONG_SCALABLE
539 tristate "Scalable TCP" 524 tristate "Scalable TCP"
540 depends on INET && EXPERIMENTAL 525 depends on EXPERIMENTAL
541 default n 526 default n
542 ---help--- 527 ---help---
543 Scalable TCP is a sender-side only change to TCP which uses a 528 Scalable TCP is a sender-side only change to TCP which uses a
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 658e7977924d..ef7468376ae6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1009,6 +1009,15 @@ static int __init init_ipv4_mibs(void)
1009static int ipv4_proc_init(void); 1009static int ipv4_proc_init(void);
1010extern void ipfrag_init(void); 1010extern void ipfrag_init(void);
1011 1011
1012/*
1013 * IP protocol layer initialiser
1014 */
1015
1016static struct packet_type ip_packet_type = {
1017 .type = __constant_htons(ETH_P_IP),
1018 .func = ip_rcv,
1019};
1020
1012static int __init inet_init(void) 1021static int __init inet_init(void)
1013{ 1022{
1014 struct sk_buff *dummy_skb; 1023 struct sk_buff *dummy_skb;
@@ -1102,6 +1111,8 @@ static int __init inet_init(void)
1102 1111
1103 ipfrag_init(); 1112 ipfrag_init();
1104 1113
1114 dev_add_pack(&ip_packet_type);
1115
1105 rc = 0; 1116 rc = 0;
1106out: 1117out:
1107 return rc; 1118 return rc;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b56e88edf1b3..4be234c7d8c3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 */ 44 */
45 45
46#define VERSION "0.324" 46#define VERSION "0.325"
47 47
48#include <linux/config.h> 48#include <linux/config.h>
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
@@ -136,6 +136,7 @@ struct trie_use_stats {
136 unsigned int semantic_match_passed; 136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss; 137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit; 138 unsigned int null_node_hit;
139 unsigned int resize_node_skipped;
139}; 140};
140#endif 141#endif
141 142
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 165static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165static int tnode_child_length(struct tnode *tn); 166static int tnode_child_length(struct tnode *tn);
166static struct node *resize(struct trie *t, struct tnode *tn); 167static struct node *resize(struct trie *t, struct tnode *tn);
167static struct tnode *inflate(struct trie *t, struct tnode *tn); 168static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
168static struct tnode *halve(struct trie *t, struct tnode *tn); 169static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
169static void tnode_free(struct tnode *tn); 170static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t); 171static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 172extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
@@ -358,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li)
358 kfree(li); 359 kfree(li);
359} 360}
360 361
362static struct tnode *tnode_alloc(unsigned int size)
363{
364 if (size <= PAGE_SIZE) {
365 return kmalloc(size, GFP_KERNEL);
366 } else {
367 return (struct tnode *)
368 __get_free_pages(GFP_KERNEL, get_order(size));
369 }
370}
371
372static void __tnode_free(struct tnode *tn)
373{
374 unsigned int size = sizeof(struct tnode) +
375 (1<<tn->bits) * sizeof(struct node *);
376
377 if (size <= PAGE_SIZE)
378 kfree(tn);
379 else
380 free_pages((unsigned long)tn, get_order(size));
381}
382
361static struct tnode* tnode_new(t_key key, int pos, int bits) 383static struct tnode* tnode_new(t_key key, int pos, int bits)
362{ 384{
363 int nchildren = 1<<bits; 385 int nchildren = 1<<bits;
364 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); 386 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
365 struct tnode *tn = kmalloc(sz, GFP_KERNEL); 387 struct tnode *tn = tnode_alloc(sz);
366 388
367 if(tn) { 389 if(tn) {
368 memset(tn, 0, sz); 390 memset(tn, 0, sz);
@@ -390,7 +412,7 @@ static void tnode_free(struct tnode *tn)
390 printk("FL %p \n", tn); 412 printk("FL %p \n", tn);
391 } 413 }
392 else if(IS_TNODE(tn)) { 414 else if(IS_TNODE(tn)) {
393 kfree(tn); 415 __tnode_free(tn);
394 if(trie_debug > 0 ) 416 if(trie_debug > 0 )
395 printk("FT %p \n", tn); 417 printk("FT %p \n", tn);
396 } 418 }
@@ -460,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
460static struct node *resize(struct trie *t, struct tnode *tn) 482static struct node *resize(struct trie *t, struct tnode *tn)
461{ 483{
462 int i; 484 int i;
485 int err = 0;
463 486
464 if (!tn) 487 if (!tn)
465 return NULL; 488 return NULL;
@@ -556,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn)
556 */ 579 */
557 580
558 check_tnode(tn); 581 check_tnode(tn);
559 582
583 err = 0;
560 while ((tn->full_children > 0 && 584 while ((tn->full_children > 0 &&
561 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 585 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
562 inflate_threshold * tnode_child_length(tn))) { 586 inflate_threshold * tnode_child_length(tn))) {
563 587
564 tn = inflate(t, tn); 588 tn = inflate(t, tn, &err);
589
590 if(err) {
591#ifdef CONFIG_IP_FIB_TRIE_STATS
592 t->stats.resize_node_skipped++;
593#endif
594 break;
595 }
565 } 596 }
566 597
567 check_tnode(tn); 598 check_tnode(tn);
@@ -570,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn)
570 * Halve as long as the number of empty children in this 601 * Halve as long as the number of empty children in this
571 * node is above threshold. 602 * node is above threshold.
572 */ 603 */
604
605 err = 0;
573 while (tn->bits > 1 && 606 while (tn->bits > 1 &&
574 100 * (tnode_child_length(tn) - tn->empty_children) < 607 100 * (tnode_child_length(tn) - tn->empty_children) <
575 halve_threshold * tnode_child_length(tn)) 608 halve_threshold * tnode_child_length(tn)) {
609
610 tn = halve(t, tn, &err);
611
612 if(err) {
613#ifdef CONFIG_IP_FIB_TRIE_STATS
614 t->stats.resize_node_skipped++;
615#endif
616 break;
617 }
618 }
576 619
577 tn = halve(t, tn);
578 620
579 /* Only one child remains */ 621 /* Only one child remains */
580 622
@@ -599,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
599 return (struct node *) tn; 641 return (struct node *) tn;
600} 642}
601 643
602static struct tnode *inflate(struct trie *t, struct tnode *tn) 644static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
603{ 645{
604 struct tnode *inode; 646 struct tnode *inode;
605 struct tnode *oldtnode = tn; 647 struct tnode *oldtnode = tn;
@@ -611,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
611 653
612 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 654 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
613 655
614 if (!tn) 656 if (!tn) {
615 trie_bug("tnode_new failed"); 657 *err = -ENOMEM;
658 return oldtnode;
659 }
660
661 /*
662 * Preallocate and store tnodes before the actual work so we
663 * don't get into an inconsistent state if memory allocation
664 * fails. In case of failure we return the oldnode and inflate
665 * of tnode is ignored.
666 */
667
668 for(i = 0; i < olen; i++) {
669 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
670
671 if (inode &&
672 IS_TNODE(inode) &&
673 inode->pos == oldtnode->pos + oldtnode->bits &&
674 inode->bits > 1) {
675 struct tnode *left, *right;
676
677 t_key m = TKEY_GET_MASK(inode->pos, 1);
678
679 left = tnode_new(inode->key&(~m), inode->pos + 1,
680 inode->bits - 1);
681
682 if(!left) {
683 *err = -ENOMEM;
684 break;
685 }
686
687 right = tnode_new(inode->key|m, inode->pos + 1,
688 inode->bits - 1);
689
690 if(!right) {
691 *err = -ENOMEM;
692 break;
693 }
694
695 put_child(t, tn, 2*i, (struct node *) left);
696 put_child(t, tn, 2*i+1, (struct node *) right);
697 }
698 }
699
700 if(*err) {
701 int size = tnode_child_length(tn);
702 int j;
703
704 for(j = 0; j < size; j++)
705 if( tn->child[j])
706 tnode_free((struct tnode *)tn->child[j]);
707
708 tnode_free(tn);
709
710 *err = -ENOMEM;
711 return oldtnode;
712 }
616 713
617 for(i = 0; i < olen; i++) { 714 for(i = 0; i < olen; i++) {
618 struct node *node = tnode_get_child(oldtnode, i); 715 struct node *node = tnode_get_child(oldtnode, i);
@@ -625,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
625 722
626 if(IS_LEAF(node) || ((struct tnode *) node)->pos > 723 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
627 tn->pos + tn->bits - 1) { 724 tn->pos + tn->bits - 1) {
628 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, 725 if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
629 1) == 0) 726 1) == 0)
630 put_child(t, tn, 2*i, node); 727 put_child(t, tn, 2*i, node);
631 else 728 else
@@ -665,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
665 * the position (inode->pos) 762 * the position (inode->pos)
666 */ 763 */
667 764
668 t_key m = TKEY_GET_MASK(inode->pos, 1);
669
670 /* Use the old key, but set the new significant 765 /* Use the old key, but set the new significant
671 * bit to zero. 766 * bit to zero.
672 */ 767 */
673 left = tnode_new(inode->key&(~m), inode->pos + 1,
674 inode->bits - 1);
675 768
676 if(!left) 769 left = (struct tnode *) tnode_get_child(tn, 2*i);
677 trie_bug("tnode_new failed"); 770 put_child(t, tn, 2*i, NULL);
678 771
679 772 if(!left)
680 /* Use the old key, but set the new significant 773 BUG();
681 * bit to one. 774
682 */ 775 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
683 right = tnode_new(inode->key|m, inode->pos + 1, 776 put_child(t, tn, 2*i+1, NULL);
684 inode->bits - 1); 777
778 if(!right)
779 BUG();
685 780
686 if(!right)
687 trie_bug("tnode_new failed");
688
689 size = tnode_child_length(left); 781 size = tnode_child_length(left);
690 for(j = 0; j < size; j++) { 782 for(j = 0; j < size; j++) {
691 put_child(t, left, j, inode->child[j]); 783 put_child(t, left, j, inode->child[j]);
@@ -701,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
701 return tn; 793 return tn;
702} 794}
703 795
704static struct tnode *halve(struct trie *t, struct tnode *tn) 796static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
705{ 797{
706 struct tnode *oldtnode = tn; 798 struct tnode *oldtnode = tn;
707 struct node *left, *right; 799 struct node *left, *right;
@@ -712,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
712 804
713 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 805 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
714 806
715 if(!tn) 807 if (!tn) {
716 trie_bug("tnode_new failed"); 808 *err = -ENOMEM;
809 return oldtnode;
810 }
811
812 /*
813 * Preallocate and store tnodes before the actual work so we
814 * don't get into an inconsistent state if memory allocation
815 * fails. In case of failure we return the oldnode and halve
816 * of tnode is ignored.
817 */
818
819 for(i = 0; i < olen; i += 2) {
820 left = tnode_get_child(oldtnode, i);
821 right = tnode_get_child(oldtnode, i+1);
822
823 /* Two nonempty children */
824 if( left && right) {
825 struct tnode *newBinNode =
826 tnode_new(left->key, tn->pos + tn->bits, 1);
827
828 if(!newBinNode) {
829 *err = -ENOMEM;
830 break;
831 }
832 put_child(t, tn, i/2, (struct node *)newBinNode);
833 }
834 }
835
836 if(*err) {
837 int size = tnode_child_length(tn);
838 int j;
839
840 for(j = 0; j < size; j++)
841 if( tn->child[j])
842 tnode_free((struct tnode *)tn->child[j]);
843
844 tnode_free(tn);
845
846 *err = -ENOMEM;
847 return oldtnode;
848 }
717 849
718 for(i = 0; i < olen; i += 2) { 850 for(i = 0; i < olen; i += 2) {
719 left = tnode_get_child(oldtnode, i); 851 left = tnode_get_child(oldtnode, i);
@@ -730,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
730 /* Two nonempty children */ 862 /* Two nonempty children */
731 else { 863 else {
732 struct tnode *newBinNode = 864 struct tnode *newBinNode =
733 tnode_new(left->key, tn->pos + tn->bits, 1); 865 (struct tnode *) tnode_get_child(tn, i/2);
866 put_child(t, tn, i/2, NULL);
734 867
735 if(!newBinNode) 868 if(!newBinNode)
736 trie_bug("tnode_new failed"); 869 BUG();
737 870
738 put_child(t, newBinNode, 0, left); 871 put_child(t, newBinNode, 0, left);
739 put_child(t, newBinNode, 1, right); 872 put_child(t, newBinNode, 1, right);
@@ -2301,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
2301 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2434 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2302 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2435 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2303 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2436 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2437 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2304#ifdef CLEAR_STATS 2438#ifdef CLEAR_STATS
2305 memset(&(t->stats), 0, sizeof(t->stats)); 2439 memset(&(t->stats), 0, sizeof(t->stats));
2306#endif 2440#endif
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index cb759484979d..279f57abfecb 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -970,7 +970,8 @@ int icmp_rcv(struct sk_buff *skb)
970 * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently 970 * RFC 1122: 3.2.2.8 An ICMP_TIMESTAMP MAY be silently
971 * discarded if to broadcast/multicast. 971 * discarded if to broadcast/multicast.
972 */ 972 */
973 if (icmph->type == ICMP_ECHO && 973 if ((icmph->type == ICMP_ECHO ||
974 icmph->type == ICMP_TIMESTAMP) &&
974 sysctl_icmp_echo_ignore_broadcasts) { 975 sysctl_icmp_echo_ignore_broadcasts) {
975 goto error; 976 goto error;
976 } 977 }
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 1f3183168a90..5088f90835ae 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1615,9 +1615,10 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1615{ 1615{
1616 int err; 1616 int err;
1617 u32 addr = imr->imr_multiaddr.s_addr; 1617 u32 addr = imr->imr_multiaddr.s_addr;
1618 struct ip_mc_socklist *iml, *i; 1618 struct ip_mc_socklist *iml=NULL, *i;
1619 struct in_device *in_dev; 1619 struct in_device *in_dev;
1620 struct inet_sock *inet = inet_sk(sk); 1620 struct inet_sock *inet = inet_sk(sk);
1621 int ifindex;
1621 int count = 0; 1622 int count = 0;
1622 1623
1623 if (!MULTICAST(addr)) 1624 if (!MULTICAST(addr))
@@ -1633,37 +1634,30 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
1633 goto done; 1634 goto done;
1634 } 1635 }
1635 1636
1636 iml = (struct ip_mc_socklist *)sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
1637
1638 err = -EADDRINUSE; 1637 err = -EADDRINUSE;
1638 ifindex = imr->imr_ifindex;
1639 for (i = inet->mc_list; i; i = i->next) { 1639 for (i = inet->mc_list; i; i = i->next) {
1640 if (memcmp(&i->multi, imr, sizeof(*imr)) == 0) { 1640 if (i->multi.imr_multiaddr.s_addr == addr &&
1641 /* New style additions are reference counted */ 1641 i->multi.imr_ifindex == ifindex)
1642 if (imr->imr_address.s_addr == 0) {
1643 i->count++;
1644 err = 0;
1645 }
1646 goto done; 1642 goto done;
1647 }
1648 count++; 1643 count++;
1649 } 1644 }
1650 err = -ENOBUFS; 1645 err = -ENOBUFS;
1651 if (iml == NULL || count >= sysctl_igmp_max_memberships) 1646 if (count >= sysctl_igmp_max_memberships)
1647 goto done;
1648 iml = (struct ip_mc_socklist *)sock_kmalloc(sk,sizeof(*iml),GFP_KERNEL);
1649 if (iml == NULL)
1652 goto done; 1650 goto done;
1651
1653 memcpy(&iml->multi, imr, sizeof(*imr)); 1652 memcpy(&iml->multi, imr, sizeof(*imr));
1654 iml->next = inet->mc_list; 1653 iml->next = inet->mc_list;
1655 iml->count = 1;
1656 iml->sflist = NULL; 1654 iml->sflist = NULL;
1657 iml->sfmode = MCAST_EXCLUDE; 1655 iml->sfmode = MCAST_EXCLUDE;
1658 inet->mc_list = iml; 1656 inet->mc_list = iml;
1659 ip_mc_inc_group(in_dev, addr); 1657 ip_mc_inc_group(in_dev, addr);
1660 iml = NULL;
1661 err = 0; 1658 err = 0;
1662
1663done: 1659done:
1664 rtnl_shunlock(); 1660 rtnl_shunlock();
1665 if (iml)
1666 sock_kfree_s(sk, iml, sizeof(*iml));
1667 return err; 1661 return err;
1668} 1662}
1669 1663
@@ -1693,30 +1687,25 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
1693{ 1687{
1694 struct inet_sock *inet = inet_sk(sk); 1688 struct inet_sock *inet = inet_sk(sk);
1695 struct ip_mc_socklist *iml, **imlp; 1689 struct ip_mc_socklist *iml, **imlp;
1690 struct in_device *in_dev;
1691 u32 group = imr->imr_multiaddr.s_addr;
1692 u32 ifindex;
1696 1693
1697 rtnl_lock(); 1694 rtnl_lock();
1695 in_dev = ip_mc_find_dev(imr);
1696 if (!in_dev) {
1697 rtnl_unlock();
1698 return -ENODEV;
1699 }
1700 ifindex = imr->imr_ifindex;
1698 for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) { 1701 for (imlp = &inet->mc_list; (iml = *imlp) != NULL; imlp = &iml->next) {
1699 if (iml->multi.imr_multiaddr.s_addr==imr->imr_multiaddr.s_addr && 1702 if (iml->multi.imr_multiaddr.s_addr == group &&
1700 iml->multi.imr_address.s_addr==imr->imr_address.s_addr && 1703 iml->multi.imr_ifindex == ifindex) {
1701 (!imr->imr_ifindex || iml->multi.imr_ifindex==imr->imr_ifindex)) { 1704 (void) ip_mc_leave_src(sk, iml, in_dev);
1702 struct in_device *in_dev;
1703
1704 in_dev = inetdev_by_index(iml->multi.imr_ifindex);
1705 if (in_dev)
1706 (void) ip_mc_leave_src(sk, iml, in_dev);
1707 if (--iml->count) {
1708 rtnl_unlock();
1709 if (in_dev)
1710 in_dev_put(in_dev);
1711 return 0;
1712 }
1713 1705
1714 *imlp = iml->next; 1706 *imlp = iml->next;
1715 1707
1716 if (in_dev) { 1708 ip_mc_dec_group(in_dev, group);
1717 ip_mc_dec_group(in_dev, imr->imr_multiaddr.s_addr);
1718 in_dev_put(in_dev);
1719 }
1720 rtnl_unlock(); 1709 rtnl_unlock();
1721 sock_kfree_s(sk, iml, sizeof(*iml)); 1710 sock_kfree_s(sk, iml, sizeof(*iml));
1722 return 0; 1711 return 0;
@@ -1736,6 +1725,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1736 struct in_device *in_dev = NULL; 1725 struct in_device *in_dev = NULL;
1737 struct inet_sock *inet = inet_sk(sk); 1726 struct inet_sock *inet = inet_sk(sk);
1738 struct ip_sf_socklist *psl; 1727 struct ip_sf_socklist *psl;
1728 int leavegroup = 0;
1739 int i, j, rv; 1729 int i, j, rv;
1740 1730
1741 if (!MULTICAST(addr)) 1731 if (!MULTICAST(addr))
@@ -1755,15 +1745,20 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1755 err = -EADDRNOTAVAIL; 1745 err = -EADDRNOTAVAIL;
1756 1746
1757 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 1747 for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
1758 if (memcmp(&pmc->multi, mreqs, 2*sizeof(__u32)) == 0) 1748 if (pmc->multi.imr_multiaddr.s_addr == imr.imr_multiaddr.s_addr
1749 && pmc->multi.imr_ifindex == imr.imr_ifindex)
1759 break; 1750 break;
1760 } 1751 }
1761 if (!pmc) /* must have a prior join */ 1752 if (!pmc) { /* must have a prior join */
1753 err = -EINVAL;
1762 goto done; 1754 goto done;
1755 }
1763 /* if a source filter was set, must be the same mode as before */ 1756 /* if a source filter was set, must be the same mode as before */
1764 if (pmc->sflist) { 1757 if (pmc->sflist) {
1765 if (pmc->sfmode != omode) 1758 if (pmc->sfmode != omode) {
1759 err = -EINVAL;
1766 goto done; 1760 goto done;
1761 }
1767 } else if (pmc->sfmode != omode) { 1762 } else if (pmc->sfmode != omode) {
1768 /* allow mode switches for empty-set filters */ 1763 /* allow mode switches for empty-set filters */
1769 ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 0, NULL, 0); 1764 ip_mc_add_src(in_dev, &mreqs->imr_multiaddr, omode, 0, NULL, 0);
@@ -1775,7 +1770,7 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1775 psl = pmc->sflist; 1770 psl = pmc->sflist;
1776 if (!add) { 1771 if (!add) {
1777 if (!psl) 1772 if (!psl)
1778 goto done; 1773 goto done; /* err = -EADDRNOTAVAIL */
1779 rv = !0; 1774 rv = !0;
1780 for (i=0; i<psl->sl_count; i++) { 1775 for (i=0; i<psl->sl_count; i++) {
1781 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr, 1776 rv = memcmp(&psl->sl_addr[i], &mreqs->imr_sourceaddr,
@@ -1784,7 +1779,13 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1784 break; 1779 break;
1785 } 1780 }
1786 if (rv) /* source not found */ 1781 if (rv) /* source not found */
1782 goto done; /* err = -EADDRNOTAVAIL */
1783
1784 /* special case - (INCLUDE, empty) == LEAVE_GROUP */
1785 if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
1786 leavegroup = 1;
1787 goto done; 1787 goto done;
1788 }
1788 1789
1789 /* update the interface filter */ 1790 /* update the interface filter */
1790 ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1, 1791 ip_mc_del_src(in_dev, &mreqs->imr_multiaddr, omode, 1,
@@ -1842,18 +1843,21 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
1842 &mreqs->imr_sourceaddr, 1); 1843 &mreqs->imr_sourceaddr, 1);
1843done: 1844done:
1844 rtnl_shunlock(); 1845 rtnl_shunlock();
1846 if (leavegroup)
1847 return ip_mc_leave_group(sk, &imr);
1845 return err; 1848 return err;
1846} 1849}
1847 1850
1848int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex) 1851int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1849{ 1852{
1850 int err; 1853 int err = 0;
1851 struct ip_mreqn imr; 1854 struct ip_mreqn imr;
1852 u32 addr = msf->imsf_multiaddr; 1855 u32 addr = msf->imsf_multiaddr;
1853 struct ip_mc_socklist *pmc; 1856 struct ip_mc_socklist *pmc;
1854 struct in_device *in_dev; 1857 struct in_device *in_dev;
1855 struct inet_sock *inet = inet_sk(sk); 1858 struct inet_sock *inet = inet_sk(sk);
1856 struct ip_sf_socklist *newpsl, *psl; 1859 struct ip_sf_socklist *newpsl, *psl;
1860 int leavegroup = 0;
1857 1861
1858 if (!MULTICAST(addr)) 1862 if (!MULTICAST(addr))
1859 return -EINVAL; 1863 return -EINVAL;
@@ -1872,15 +1876,22 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1872 err = -ENODEV; 1876 err = -ENODEV;
1873 goto done; 1877 goto done;
1874 } 1878 }
1875 err = -EADDRNOTAVAIL; 1879
1880 /* special case - (INCLUDE, empty) == LEAVE_GROUP */
1881 if (msf->imsf_fmode == MCAST_INCLUDE && msf->imsf_numsrc == 0) {
1882 leavegroup = 1;
1883 goto done;
1884 }
1876 1885
1877 for (pmc=inet->mc_list; pmc; pmc=pmc->next) { 1886 for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
1878 if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr && 1887 if (pmc->multi.imr_multiaddr.s_addr == msf->imsf_multiaddr &&
1879 pmc->multi.imr_ifindex == imr.imr_ifindex) 1888 pmc->multi.imr_ifindex == imr.imr_ifindex)
1880 break; 1889 break;
1881 } 1890 }
1882 if (!pmc) /* must have a prior join */ 1891 if (!pmc) { /* must have a prior join */
1892 err = -EINVAL;
1883 goto done; 1893 goto done;
1894 }
1884 if (msf->imsf_numsrc) { 1895 if (msf->imsf_numsrc) {
1885 newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk, 1896 newpsl = (struct ip_sf_socklist *)sock_kmalloc(sk,
1886 IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL); 1897 IP_SFLSIZE(msf->imsf_numsrc), GFP_KERNEL);
@@ -1909,8 +1920,11 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
1909 0, NULL, 0); 1920 0, NULL, 0);
1910 pmc->sflist = newpsl; 1921 pmc->sflist = newpsl;
1911 pmc->sfmode = msf->imsf_fmode; 1922 pmc->sfmode = msf->imsf_fmode;
1923 err = 0;
1912done: 1924done:
1913 rtnl_shunlock(); 1925 rtnl_shunlock();
1926 if (leavegroup)
1927 err = ip_mc_leave_group(sk, &imr);
1914 return err; 1928 return err;
1915} 1929}
1916 1930
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ce5c3292f9f..80d13103b2b0 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -107,7 +107,6 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
107 newskb->pkt_type = PACKET_LOOPBACK; 107 newskb->pkt_type = PACKET_LOOPBACK;
108 newskb->ip_summed = CHECKSUM_UNNECESSARY; 108 newskb->ip_summed = CHECKSUM_UNNECESSARY;
109 BUG_TRAP(newskb->dst); 109 BUG_TRAP(newskb->dst);
110 nf_reset(newskb);
111 netif_rx(newskb); 110 netif_rx(newskb);
112 return 0; 111 return 0;
113} 112}
@@ -188,14 +187,6 @@ static inline int ip_finish_output2(struct sk_buff *skb)
188 skb = skb2; 187 skb = skb2;
189 } 188 }
190 189
191#ifdef CONFIG_BRIDGE_NETFILTER
192 /* bridge-netfilter defers calling some IP hooks to the bridge layer
193 * and still needs the conntrack reference.
194 */
195 if (skb->nf_bridge == NULL)
196#endif
197 nf_reset(skb);
198
199 if (hh) { 190 if (hh) {
200 int hh_alen; 191 int hh_alen;
201 192
@@ -389,7 +380,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
389 to->pkt_type = from->pkt_type; 380 to->pkt_type = from->pkt_type;
390 to->priority = from->priority; 381 to->priority = from->priority;
391 to->protocol = from->protocol; 382 to->protocol = from->protocol;
392 to->security = from->security;
393 dst_release(to->dst); 383 dst_release(to->dst);
394 to->dst = dst_clone(from->dst); 384 to->dst = dst_clone(from->dst);
395 to->dev = from->dev; 385 to->dev = from->dev;
@@ -1329,23 +1319,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1329 ip_rt_put(rt); 1319 ip_rt_put(rt);
1330} 1320}
1331 1321
1332/*
1333 * IP protocol layer initialiser
1334 */
1335
1336static struct packet_type ip_packet_type = {
1337 .type = __constant_htons(ETH_P_IP),
1338 .func = ip_rcv,
1339};
1340
1341/*
1342 * IP registers the packet type and then calls the subprotocol initialisers
1343 */
1344
1345void __init ip_init(void) 1322void __init ip_init(void)
1346{ 1323{
1347 dev_add_pack(&ip_packet_type);
1348
1349 ip_rt_init(); 1324 ip_rt_init();
1350 inet_initpeers(); 1325 inet_initpeers();
1351 1326
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f8b172f89811..fc7c481d0d79 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -677,11 +677,11 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
677 mreq.imr_address.s_addr = mreqs.imr_interface; 677 mreq.imr_address.s_addr = mreqs.imr_interface;
678 mreq.imr_ifindex = 0; 678 mreq.imr_ifindex = 0;
679 err = ip_mc_join_group(sk, &mreq); 679 err = ip_mc_join_group(sk, &mreq);
680 if (err) 680 if (err && err != -EADDRINUSE)
681 break; 681 break;
682 omode = MCAST_INCLUDE; 682 omode = MCAST_INCLUDE;
683 add = 1; 683 add = 1;
684 } else /*IP_DROP_SOURCE_MEMBERSHIP */ { 684 } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
685 omode = MCAST_INCLUDE; 685 omode = MCAST_INCLUDE;
686 add = 0; 686 add = 0;
687 } 687 }
@@ -754,7 +754,7 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
754 mreq.imr_address.s_addr = 0; 754 mreq.imr_address.s_addr = 0;
755 mreq.imr_ifindex = greqs.gsr_interface; 755 mreq.imr_ifindex = greqs.gsr_interface;
756 err = ip_mc_join_group(sk, &mreq); 756 err = ip_mc_join_group(sk, &mreq);
757 if (err) 757 if (err && err != -EADDRINUSE)
758 break; 758 break;
759 greqs.gsr_interface = mreq.imr_ifindex; 759 greqs.gsr_interface = mreq.imr_ifindex;
760 omode = MCAST_INCLUDE; 760 omode = MCAST_INCLUDE;
diff --git a/net/ipv4/ipvs/Kconfig b/net/ipv4/ipvs/Kconfig
index 63a82b4b64bb..c9820bfc493a 100644
--- a/net/ipv4/ipvs/Kconfig
+++ b/net/ipv4/ipvs/Kconfig
@@ -2,11 +2,11 @@
2# IP Virtual Server configuration 2# IP Virtual Server configuration
3# 3#
4menu "IP: Virtual Server Configuration" 4menu "IP: Virtual Server Configuration"
5 depends on INET && NETFILTER 5 depends on NETFILTER
6 6
7config IP_VS 7config IP_VS
8 tristate "IP virtual server support (EXPERIMENTAL)" 8 tristate "IP virtual server support (EXPERIMENTAL)"
9 depends on INET && NETFILTER 9 depends on NETFILTER
10 ---help--- 10 ---help---
11 IP Virtual Server support will let you build a high-performance 11 IP Virtual Server support will let you build a high-performance
12 virtual server based on cluster of two or more real servers. This 12 virtual server based on cluster of two or more real servers. This
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 9f16ab309106..d0145a8b1551 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -758,7 +758,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
758 return 1; 758 return 1;
759} 759}
760 760
761 761/* Called from keventd and must protect itself from softirqs */
762void ip_vs_random_dropentry(void) 762void ip_vs_random_dropentry(void)
763{ 763{
764 int idx; 764 int idx;
@@ -773,7 +773,7 @@ void ip_vs_random_dropentry(void)
773 /* 773 /*
774 * Lock is actually needed in this loop. 774 * Lock is actually needed in this loop.
775 */ 775 */
776 ct_write_lock(hash); 776 ct_write_lock_bh(hash);
777 777
778 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 778 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
779 if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT)) 779 if (!cp->cport && !(cp->flags & IP_VS_CONN_F_NO_CPORT))
@@ -806,7 +806,7 @@ void ip_vs_random_dropentry(void)
806 ip_vs_conn_expire_now(cp->control); 806 ip_vs_conn_expire_now(cp->control);
807 } 807 }
808 } 808 }
809 ct_write_unlock(hash); 809 ct_write_unlock_bh(hash);
810 } 810 }
811} 811}
812 812
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 12a82e91d22a..7d99ede2ef79 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -90,7 +90,8 @@ int ip_vs_get_debug_level(void)
90#endif 90#endif
91 91
92/* 92/*
93 * update_defense_level is called from keventd and from sysctl. 93 * update_defense_level is called from keventd and from sysctl,
94 * so it needs to protect itself from softirqs
94 */ 95 */
95static void update_defense_level(void) 96static void update_defense_level(void)
96{ 97{
@@ -110,6 +111,8 @@ static void update_defense_level(void)
110 111
111 nomem = (availmem < sysctl_ip_vs_amemthresh); 112 nomem = (availmem < sysctl_ip_vs_amemthresh);
112 113
114 local_bh_disable();
115
113 /* drop_entry */ 116 /* drop_entry */
114 spin_lock(&__ip_vs_dropentry_lock); 117 spin_lock(&__ip_vs_dropentry_lock);
115 switch (sysctl_ip_vs_drop_entry) { 118 switch (sysctl_ip_vs_drop_entry) {
@@ -206,6 +209,8 @@ static void update_defense_level(void)
206 if (to_change >= 0) 209 if (to_change >= 0)
207 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 210 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
208 write_unlock(&__ip_vs_securetcp_lock); 211 write_unlock(&__ip_vs_securetcp_lock);
212
213 local_bh_enable();
209} 214}
210 215
211 216
@@ -1360,9 +1365,7 @@ proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1360 /* Restore the correct value */ 1365 /* Restore the correct value */
1361 *valp = val; 1366 *valp = val;
1362 } else { 1367 } else {
1363 local_bh_disable();
1364 update_defense_level(); 1368 update_defense_level();
1365 local_bh_enable();
1366 } 1369 }
1367 } 1370 }
1368 return rc; 1371 return rc;
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index 42dc95102873..1dd824f3cf0a 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -432,6 +432,13 @@ static unsigned int ip_conntrack_defrag(unsigned int hooknum,
432 const struct net_device *out, 432 const struct net_device *out,
433 int (*okfn)(struct sk_buff *)) 433 int (*okfn)(struct sk_buff *))
434{ 434{
435#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
436 /* Previously seen (loopback)? Ignore. Do this before
437 fragment check. */
438 if ((*pskb)->nfct)
439 return NF_ACCEPT;
440#endif
441
435 /* Gather fragments. */ 442 /* Gather fragments. */
436 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { 443 if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
437 *pskb = ip_ct_gather_frags(*pskb, 444 *pskb = ip_ct_gather_frags(*pskb,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 12a1cf306f67..d675ff80b04d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -54,6 +54,7 @@
54 * Marc Boucher : routing by fwmark 54 * Marc Boucher : routing by fwmark
55 * Robert Olsson : Added rt_cache statistics 55 * Robert Olsson : Added rt_cache statistics
56 * Arnaldo C. Melo : Convert proc stuff to seq_file 56 * Arnaldo C. Melo : Convert proc stuff to seq_file
57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
57 * 58 *
58 * This program is free software; you can redistribute it and/or 59 * This program is free software; you can redistribute it and/or
59 * modify it under the terms of the GNU General Public License 60 * modify it under the terms of the GNU General Public License
@@ -70,6 +71,7 @@
70#include <linux/kernel.h> 71#include <linux/kernel.h>
71#include <linux/sched.h> 72#include <linux/sched.h>
72#include <linux/mm.h> 73#include <linux/mm.h>
74#include <linux/bootmem.h>
73#include <linux/string.h> 75#include <linux/string.h>
74#include <linux/socket.h> 76#include <linux/socket.h>
75#include <linux/sockios.h> 77#include <linux/sockios.h>
@@ -201,8 +203,37 @@ __u8 ip_tos2prio[16] = {
201 203
202struct rt_hash_bucket { 204struct rt_hash_bucket {
203 struct rtable *chain; 205 struct rtable *chain;
204 spinlock_t lock; 206};
205} __attribute__((__aligned__(8))); 207#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
208/*
209 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
210 * The size of this table is a power of two and depends on the number of CPUS.
211 */
212#if NR_CPUS >= 32
213#define RT_HASH_LOCK_SZ 4096
214#elif NR_CPUS >= 16
215#define RT_HASH_LOCK_SZ 2048
216#elif NR_CPUS >= 8
217#define RT_HASH_LOCK_SZ 1024
218#elif NR_CPUS >= 4
219#define RT_HASH_LOCK_SZ 512
220#else
221#define RT_HASH_LOCK_SZ 256
222#endif
223
224static spinlock_t *rt_hash_locks;
225# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
226# define rt_hash_lock_init() { \
227 int i; \
228 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
229 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
230 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
231 spin_lock_init(&rt_hash_locks[i]); \
232 }
233#else
234# define rt_hash_lock_addr(slot) NULL
235# define rt_hash_lock_init()
236#endif
206 237
207static struct rt_hash_bucket *rt_hash_table; 238static struct rt_hash_bucket *rt_hash_table;
208static unsigned rt_hash_mask; 239static unsigned rt_hash_mask;
@@ -575,19 +606,26 @@ static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
575/* This runs via a timer and thus is always in BH context. */ 606/* This runs via a timer and thus is always in BH context. */
576static void rt_check_expire(unsigned long dummy) 607static void rt_check_expire(unsigned long dummy)
577{ 608{
578 static int rover; 609 static unsigned int rover;
579 int i = rover, t; 610 unsigned int i = rover, goal;
580 struct rtable *rth, **rthp; 611 struct rtable *rth, **rthp;
581 unsigned long now = jiffies; 612 unsigned long now = jiffies;
582 613 u64 mult;
583 for (t = ip_rt_gc_interval << rt_hash_log; t >= 0; 614
584 t -= ip_rt_gc_timeout) { 615 mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
616 if (ip_rt_gc_timeout > 1)
617 do_div(mult, ip_rt_gc_timeout);
618 goal = (unsigned int)mult;
619 if (goal > rt_hash_mask) goal = rt_hash_mask + 1;
620 for (; goal > 0; goal--) {
585 unsigned long tmo = ip_rt_gc_timeout; 621 unsigned long tmo = ip_rt_gc_timeout;
586 622
587 i = (i + 1) & rt_hash_mask; 623 i = (i + 1) & rt_hash_mask;
588 rthp = &rt_hash_table[i].chain; 624 rthp = &rt_hash_table[i].chain;
589 625
590 spin_lock(&rt_hash_table[i].lock); 626 if (*rthp == 0)
627 continue;
628 spin_lock(rt_hash_lock_addr(i));
591 while ((rth = *rthp) != NULL) { 629 while ((rth = *rthp) != NULL) {
592 if (rth->u.dst.expires) { 630 if (rth->u.dst.expires) {
593 /* Entry is expired even if it is in use */ 631 /* Entry is expired even if it is in use */
@@ -620,14 +658,14 @@ static void rt_check_expire(unsigned long dummy)
620 rt_free(rth); 658 rt_free(rth);
621#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 659#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
622 } 660 }
623 spin_unlock(&rt_hash_table[i].lock); 661 spin_unlock(rt_hash_lock_addr(i));
624 662
625 /* Fallback loop breaker. */ 663 /* Fallback loop breaker. */
626 if (time_after(jiffies, now)) 664 if (time_after(jiffies, now))
627 break; 665 break;
628 } 666 }
629 rover = i; 667 rover = i;
630 mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval); 668 mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
631} 669}
632 670
633/* This can run from both BH and non-BH contexts, the latter 671/* This can run from both BH and non-BH contexts, the latter
@@ -643,11 +681,11 @@ static void rt_run_flush(unsigned long dummy)
643 get_random_bytes(&rt_hash_rnd, 4); 681 get_random_bytes(&rt_hash_rnd, 4);
644 682
645 for (i = rt_hash_mask; i >= 0; i--) { 683 for (i = rt_hash_mask; i >= 0; i--) {
646 spin_lock_bh(&rt_hash_table[i].lock); 684 spin_lock_bh(rt_hash_lock_addr(i));
647 rth = rt_hash_table[i].chain; 685 rth = rt_hash_table[i].chain;
648 if (rth) 686 if (rth)
649 rt_hash_table[i].chain = NULL; 687 rt_hash_table[i].chain = NULL;
650 spin_unlock_bh(&rt_hash_table[i].lock); 688 spin_unlock_bh(rt_hash_lock_addr(i));
651 689
652 for (; rth; rth = next) { 690 for (; rth; rth = next) {
653 next = rth->u.rt_next; 691 next = rth->u.rt_next;
@@ -780,7 +818,7 @@ static int rt_garbage_collect(void)
780 818
781 k = (k + 1) & rt_hash_mask; 819 k = (k + 1) & rt_hash_mask;
782 rthp = &rt_hash_table[k].chain; 820 rthp = &rt_hash_table[k].chain;
783 spin_lock_bh(&rt_hash_table[k].lock); 821 spin_lock_bh(rt_hash_lock_addr(k));
784 while ((rth = *rthp) != NULL) { 822 while ((rth = *rthp) != NULL) {
785 if (!rt_may_expire(rth, tmo, expire)) { 823 if (!rt_may_expire(rth, tmo, expire)) {
786 tmo >>= 1; 824 tmo >>= 1;
@@ -812,7 +850,7 @@ static int rt_garbage_collect(void)
812 goal--; 850 goal--;
813#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 851#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
814 } 852 }
815 spin_unlock_bh(&rt_hash_table[k].lock); 853 spin_unlock_bh(rt_hash_lock_addr(k));
816 if (goal <= 0) 854 if (goal <= 0)
817 break; 855 break;
818 } 856 }
@@ -882,7 +920,7 @@ restart:
882 920
883 rthp = &rt_hash_table[hash].chain; 921 rthp = &rt_hash_table[hash].chain;
884 922
885 spin_lock_bh(&rt_hash_table[hash].lock); 923 spin_lock_bh(rt_hash_lock_addr(hash));
886 while ((rth = *rthp) != NULL) { 924 while ((rth = *rthp) != NULL) {
887#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 925#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
888 if (!(rth->u.dst.flags & DST_BALANCED) && 926 if (!(rth->u.dst.flags & DST_BALANCED) &&
@@ -908,7 +946,7 @@ restart:
908 rth->u.dst.__use++; 946 rth->u.dst.__use++;
909 dst_hold(&rth->u.dst); 947 dst_hold(&rth->u.dst);
910 rth->u.dst.lastuse = now; 948 rth->u.dst.lastuse = now;
911 spin_unlock_bh(&rt_hash_table[hash].lock); 949 spin_unlock_bh(rt_hash_lock_addr(hash));
912 950
913 rt_drop(rt); 951 rt_drop(rt);
914 *rp = rth; 952 *rp = rth;
@@ -949,7 +987,7 @@ restart:
949 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 987 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
950 int err = arp_bind_neighbour(&rt->u.dst); 988 int err = arp_bind_neighbour(&rt->u.dst);
951 if (err) { 989 if (err) {
952 spin_unlock_bh(&rt_hash_table[hash].lock); 990 spin_unlock_bh(rt_hash_lock_addr(hash));
953 991
954 if (err != -ENOBUFS) { 992 if (err != -ENOBUFS) {
955 rt_drop(rt); 993 rt_drop(rt);
@@ -990,7 +1028,7 @@ restart:
990 } 1028 }
991#endif 1029#endif
992 rt_hash_table[hash].chain = rt; 1030 rt_hash_table[hash].chain = rt;
993 spin_unlock_bh(&rt_hash_table[hash].lock); 1031 spin_unlock_bh(rt_hash_lock_addr(hash));
994 *rp = rt; 1032 *rp = rt;
995 return 0; 1033 return 0;
996} 1034}
@@ -1058,7 +1096,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1058{ 1096{
1059 struct rtable **rthp; 1097 struct rtable **rthp;
1060 1098
1061 spin_lock_bh(&rt_hash_table[hash].lock); 1099 spin_lock_bh(rt_hash_lock_addr(hash));
1062 ip_rt_put(rt); 1100 ip_rt_put(rt);
1063 for (rthp = &rt_hash_table[hash].chain; *rthp; 1101 for (rthp = &rt_hash_table[hash].chain; *rthp;
1064 rthp = &(*rthp)->u.rt_next) 1102 rthp = &(*rthp)->u.rt_next)
@@ -1067,7 +1105,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1067 rt_free(rt); 1105 rt_free(rt);
1068 break; 1106 break;
1069 } 1107 }
1070 spin_unlock_bh(&rt_hash_table[hash].lock); 1108 spin_unlock_bh(rt_hash_lock_addr(hash));
1071} 1109}
1072 1110
1073void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, 1111void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
@@ -1647,7 +1685,7 @@ static void ip_handle_martian_source(struct net_device *dev,
1647 printk(KERN_WARNING "martian source %u.%u.%u.%u from " 1685 printk(KERN_WARNING "martian source %u.%u.%u.%u from "
1648 "%u.%u.%u.%u, on dev %s\n", 1686 "%u.%u.%u.%u, on dev %s\n",
1649 NIPQUAD(daddr), NIPQUAD(saddr), dev->name); 1687 NIPQUAD(daddr), NIPQUAD(saddr), dev->name);
1650 if (dev->hard_header_len) { 1688 if (dev->hard_header_len && skb->mac.raw) {
1651 int i; 1689 int i;
1652 unsigned char *p = skb->mac.raw; 1690 unsigned char *p = skb->mac.raw;
1653 printk(KERN_WARNING "ll header: "); 1691 printk(KERN_WARNING "ll header: ");
@@ -3073,12 +3111,14 @@ __setup("rhash_entries=", set_rhash_entries);
3073 3111
3074int __init ip_rt_init(void) 3112int __init ip_rt_init(void)
3075{ 3113{
3076 int i, order, goal, rc = 0; 3114 int rc = 0;
3077 3115
3078 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ 3116 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
3079 (jiffies ^ (jiffies >> 7))); 3117 (jiffies ^ (jiffies >> 7)));
3080 3118
3081#ifdef CONFIG_NET_CLS_ROUTE 3119#ifdef CONFIG_NET_CLS_ROUTE
3120 {
3121 int order;
3082 for (order = 0; 3122 for (order = 0;
3083 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) 3123 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
3084 /* NOTHING */; 3124 /* NOTHING */;
@@ -3086,6 +3126,7 @@ int __init ip_rt_init(void)
3086 if (!ip_rt_acct) 3126 if (!ip_rt_acct)
3087 panic("IP: failed to allocate ip_rt_acct\n"); 3127 panic("IP: failed to allocate ip_rt_acct\n");
3088 memset(ip_rt_acct, 0, PAGE_SIZE << order); 3128 memset(ip_rt_acct, 0, PAGE_SIZE << order);
3129 }
3089#endif 3130#endif
3090 3131
3091 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3132 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
@@ -3096,36 +3137,19 @@ int __init ip_rt_init(void)
3096 if (!ipv4_dst_ops.kmem_cachep) 3137 if (!ipv4_dst_ops.kmem_cachep)
3097 panic("IP: failed to allocate ip_dst_cache\n"); 3138 panic("IP: failed to allocate ip_dst_cache\n");
3098 3139
3099 goal = num_physpages >> (26 - PAGE_SHIFT); 3140 rt_hash_table = (struct rt_hash_bucket *)
3100 if (rhash_entries) 3141 alloc_large_system_hash("IP route cache",
3101 goal = (rhash_entries * sizeof(struct rt_hash_bucket)) >> PAGE_SHIFT; 3142 sizeof(struct rt_hash_bucket),
3102 for (order = 0; (1UL << order) < goal; order++) 3143 rhash_entries,
3103 /* NOTHING */; 3144 (num_physpages >= 128 * 1024) ?
3104 3145 (27 - PAGE_SHIFT) :
3105 do { 3146 (29 - PAGE_SHIFT),
3106 rt_hash_mask = (1UL << order) * PAGE_SIZE / 3147 HASH_HIGHMEM,
3107 sizeof(struct rt_hash_bucket); 3148 &rt_hash_log,
3108 while (rt_hash_mask & (rt_hash_mask - 1)) 3149 &rt_hash_mask,
3109 rt_hash_mask--; 3150 0);
3110 rt_hash_table = (struct rt_hash_bucket *) 3151 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
3111 __get_free_pages(GFP_ATOMIC, order); 3152 rt_hash_lock_init();
3112 } while (rt_hash_table == NULL && --order > 0);
3113
3114 if (!rt_hash_table)
3115 panic("Failed to allocate IP route cache hash table\n");
3116
3117 printk(KERN_INFO "IP: routing cache hash table of %u buckets, %ldKbytes\n",
3118 rt_hash_mask,
3119 (long) (rt_hash_mask * sizeof(struct rt_hash_bucket)) / 1024);
3120
3121 for (rt_hash_log = 0; (1 << rt_hash_log) != rt_hash_mask; rt_hash_log++)
3122 /* NOTHING */;
3123
3124 rt_hash_mask--;
3125 for (i = 0; i <= rt_hash_mask; i++) {
3126 spin_lock_init(&rt_hash_table[i].lock);
3127 rt_hash_table[i].chain = NULL;
3128 }
3129 3153
3130 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); 3154 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3131 ip_rt_max_size = (rt_hash_mask + 1) * 16; 3155 ip_rt_max_size = (rt_hash_mask + 1) * 16;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 882436da9a3a..ddb6ce4ecff2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
615 size_t psize, int flags) 615 size_t psize, int flags)
616{ 616{
617 struct tcp_sock *tp = tcp_sk(sk); 617 struct tcp_sock *tp = tcp_sk(sk);
618 int mss_now; 618 int mss_now, size_goal;
619 int err; 619 int err;
620 ssize_t copied; 620 ssize_t copied;
621 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 621 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
628 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 628 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
629 629
630 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 630 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
631 size_goal = tp->xmit_size_goal;
631 copied = 0; 632 copied = 0;
632 633
633 err = -EPIPE; 634 err = -EPIPE;
@@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
641 int offset = poffset % PAGE_SIZE; 642 int offset = poffset % PAGE_SIZE;
642 int size = min_t(size_t, psize, PAGE_SIZE - offset); 643 int size = min_t(size_t, psize, PAGE_SIZE - offset);
643 644
644 if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) { 645 if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
645new_segment: 646new_segment:
646 if (!sk_stream_memory_free(sk)) 647 if (!sk_stream_memory_free(sk))
647 goto wait_for_sndbuf; 648 goto wait_for_sndbuf;
@@ -652,7 +653,7 @@ new_segment:
652 goto wait_for_memory; 653 goto wait_for_memory;
653 654
654 skb_entail(sk, tp, skb); 655 skb_entail(sk, tp, skb);
655 copy = mss_now; 656 copy = size_goal;
656 } 657 }
657 658
658 if (copy > size) 659 if (copy > size)
@@ -693,7 +694,7 @@ new_segment:
693 if (!(psize -= copy)) 694 if (!(psize -= copy))
694 goto out; 695 goto out;
695 696
696 if (skb->len != mss_now || (flags & MSG_OOB)) 697 if (skb->len < mss_now || (flags & MSG_OOB))
697 continue; 698 continue;
698 699
699 if (forced_push(tp)) { 700 if (forced_push(tp)) {
@@ -713,6 +714,7 @@ wait_for_memory:
713 goto do_error; 714 goto do_error;
714 715
715 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 716 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
717 size_goal = tp->xmit_size_goal;
716 } 718 }
717 719
718out: 720out:
@@ -754,15 +756,20 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
754 756
755static inline int select_size(struct sock *sk, struct tcp_sock *tp) 757static inline int select_size(struct sock *sk, struct tcp_sock *tp)
756{ 758{
757 int tmp = tp->mss_cache_std; 759 int tmp = tp->mss_cache;
758 760
759 if (sk->sk_route_caps & NETIF_F_SG) { 761 if (sk->sk_route_caps & NETIF_F_SG) {
760 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); 762 if (sk->sk_route_caps & NETIF_F_TSO)
763 tmp = 0;
764 else {
765 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
761 766
762 if (tmp >= pgbreak && 767 if (tmp >= pgbreak &&
763 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE) 768 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
764 tmp = pgbreak; 769 tmp = pgbreak;
770 }
765 } 771 }
772
766 return tmp; 773 return tmp;
767} 774}
768 775
@@ -773,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
773 struct tcp_sock *tp = tcp_sk(sk); 780 struct tcp_sock *tp = tcp_sk(sk);
774 struct sk_buff *skb; 781 struct sk_buff *skb;
775 int iovlen, flags; 782 int iovlen, flags;
776 int mss_now; 783 int mss_now, size_goal;
777 int err, copied; 784 int err, copied;
778 long timeo; 785 long timeo;
779 786
@@ -792,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
792 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 799 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
793 800
794 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 801 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
802 size_goal = tp->xmit_size_goal;
795 803
796 /* Ok commence sending. */ 804 /* Ok commence sending. */
797 iovlen = msg->msg_iovlen; 805 iovlen = msg->msg_iovlen;
@@ -814,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
814 skb = sk->sk_write_queue.prev; 822 skb = sk->sk_write_queue.prev;
815 823
816 if (!sk->sk_send_head || 824 if (!sk->sk_send_head ||
817 (copy = mss_now - skb->len) <= 0) { 825 (copy = size_goal - skb->len) <= 0) {
818 826
819new_segment: 827new_segment:
820 /* Allocate new segment. If the interface is SG, 828 /* Allocate new segment. If the interface is SG,
@@ -837,7 +845,7 @@ new_segment:
837 skb->ip_summed = CHECKSUM_HW; 845 skb->ip_summed = CHECKSUM_HW;
838 846
839 skb_entail(sk, tp, skb); 847 skb_entail(sk, tp, skb);
840 copy = mss_now; 848 copy = size_goal;
841 } 849 }
842 850
843 /* Try to append data to the end of skb. */ 851 /* Try to append data to the end of skb. */
@@ -872,11 +880,6 @@ new_segment:
872 tcp_mark_push(tp, skb); 880 tcp_mark_push(tp, skb);
873 goto new_segment; 881 goto new_segment;
874 } else if (page) { 882 } else if (page) {
875 /* If page is cached, align
876 * offset to L1 cache boundary
877 */
878 off = (off + L1_CACHE_BYTES - 1) &
879 ~(L1_CACHE_BYTES - 1);
880 if (off == PAGE_SIZE) { 883 if (off == PAGE_SIZE) {
881 put_page(page); 884 put_page(page);
882 TCP_PAGE(sk) = page = NULL; 885 TCP_PAGE(sk) = page = NULL;
@@ -937,7 +940,7 @@ new_segment:
937 if ((seglen -= copy) == 0 && iovlen == 0) 940 if ((seglen -= copy) == 0 && iovlen == 0)
938 goto out; 941 goto out;
939 942
940 if (skb->len != mss_now || (flags & MSG_OOB)) 943 if (skb->len < mss_now || (flags & MSG_OOB))
941 continue; 944 continue;
942 945
943 if (forced_push(tp)) { 946 if (forced_push(tp)) {
@@ -957,6 +960,7 @@ wait_for_memory:
957 goto do_error; 960 goto do_error;
958 961
959 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 962 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
963 size_goal = tp->xmit_size_goal;
960 } 964 }
961 } 965 }
962 966
@@ -1101,7 +1105,7 @@ static void tcp_prequeue_process(struct sock *sk)
1101 struct sk_buff *skb; 1105 struct sk_buff *skb;
1102 struct tcp_sock *tp = tcp_sk(sk); 1106 struct tcp_sock *tp = tcp_sk(sk);
1103 1107
1104 NET_ADD_STATS_USER(LINUX_MIB_TCPPREQUEUED, skb_queue_len(&tp->ucopy.prequeue)); 1108 NET_INC_STATS_USER(LINUX_MIB_TCPPREQUEUED);
1105 1109
1106 /* RX process wants to run with disabled BHs, though it is not 1110 /* RX process wants to run with disabled BHs, though it is not
1107 * necessary */ 1111 * necessary */
@@ -1365,7 +1369,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1365 * is not empty. It is more elegant, but eats cycles, 1369 * is not empty. It is more elegant, but eats cycles,
1366 * unfortunately. 1370 * unfortunately.
1367 */ 1371 */
1368 if (skb_queue_len(&tp->ucopy.prequeue)) 1372 if (!skb_queue_empty(&tp->ucopy.prequeue))
1369 goto do_prequeue; 1373 goto do_prequeue;
1370 1374
1371 /* __ Set realtime policy in scheduler __ */ 1375 /* __ Set realtime policy in scheduler __ */
@@ -1390,7 +1394,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
1390 } 1394 }
1391 1395
1392 if (tp->rcv_nxt == tp->copied_seq && 1396 if (tp->rcv_nxt == tp->copied_seq &&
1393 skb_queue_len(&tp->ucopy.prequeue)) { 1397 !skb_queue_empty(&tp->ucopy.prequeue)) {
1394do_prequeue: 1398do_prequeue:
1395 tcp_prequeue_process(sk); 1399 tcp_prequeue_process(sk);
1396 1400
@@ -1472,7 +1476,7 @@ skip_copy:
1472 } while (len > 0); 1476 } while (len > 0);
1473 1477
1474 if (user_recv) { 1478 if (user_recv) {
1475 if (skb_queue_len(&tp->ucopy.prequeue)) { 1479 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
1476 int chunk; 1480 int chunk;
1477 1481
1478 tp->ucopy.len = copied > 0 ? len : 0; 1482 tp->ucopy.len = copied > 0 ? len : 0;
@@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2128 2132
2129 info->tcpi_rto = jiffies_to_usecs(tp->rto); 2133 info->tcpi_rto = jiffies_to_usecs(tp->rto);
2130 info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); 2134 info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
2131 info->tcpi_snd_mss = tp->mss_cache_std; 2135 info->tcpi_snd_mss = tp->mss_cache;
2132 info->tcpi_rcv_mss = tp->ack.rcv_mss; 2136 info->tcpi_rcv_mss = tp->ack.rcv_mss;
2133 2137
2134 info->tcpi_unacked = tp->packets_out; 2138 info->tcpi_unacked = tp->packets_out;
@@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2178 2182
2179 switch (optname) { 2183 switch (optname) {
2180 case TCP_MAXSEG: 2184 case TCP_MAXSEG:
2181 val = tp->mss_cache_std; 2185 val = tp->mss_cache;
2182 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) 2186 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2183 val = tp->rx_opt.user_mss; 2187 val = tp->rx_opt.user_mss;
2184 break; 2188 break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7bbbbc33eb4b..53a8a5399f1e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -740,10 +740,10 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
740 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 740 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
741 741
742 if (!cwnd) { 742 if (!cwnd) {
743 if (tp->mss_cache_std > 1460) 743 if (tp->mss_cache > 1460)
744 cwnd = 2; 744 cwnd = 2;
745 else 745 else
746 cwnd = (tp->mss_cache_std > 1095) ? 3 : 4; 746 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
747 } 747 }
748 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 748 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
749} 749}
@@ -914,7 +914,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
914 if (sk->sk_route_caps & NETIF_F_TSO) { 914 if (sk->sk_route_caps & NETIF_F_TSO) {
915 sk->sk_route_caps &= ~NETIF_F_TSO; 915 sk->sk_route_caps &= ~NETIF_F_TSO;
916 sock_set_flag(sk, SOCK_NO_LARGESEND); 916 sock_set_flag(sk, SOCK_NO_LARGESEND);
917 tp->mss_cache = tp->mss_cache_std; 917 tp->mss_cache = tp->mss_cache;
918 } 918 }
919 919
920 if (!tp->sacked_out) 920 if (!tp->sacked_out)
@@ -1077,7 +1077,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1077 (IsFack(tp) || 1077 (IsFack(tp) ||
1078 !before(lost_retrans, 1078 !before(lost_retrans,
1079 TCP_SKB_CB(skb)->ack_seq + tp->reordering * 1079 TCP_SKB_CB(skb)->ack_seq + tp->reordering *
1080 tp->mss_cache_std))) { 1080 tp->mss_cache))) {
1081 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1081 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1082 tp->retrans_out -= tcp_skb_pcount(skb); 1082 tp->retrans_out -= tcp_skb_pcount(skb);
1083 1083
@@ -1957,15 +1957,6 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
1957 } 1957 }
1958} 1958}
1959 1959
1960/* There is one downside to this scheme. Although we keep the
1961 * ACK clock ticking, adjusting packet counters and advancing
1962 * congestion window, we do not liberate socket send buffer
1963 * space.
1964 *
1965 * Mucking with skb->truesize and sk->sk_wmem_alloc et al.
1966 * then making a write space wakeup callback is a possible
1967 * future enhancement. WARNING: it is not trivial to make.
1968 */
1969static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, 1960static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
1970 __u32 now, __s32 *seq_rtt) 1961 __u32 now, __s32 *seq_rtt)
1971{ 1962{
@@ -2047,7 +2038,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
2047 * the other end. 2038 * the other end.
2048 */ 2039 */
2049 if (after(scb->end_seq, tp->snd_una)) { 2040 if (after(scb->end_seq, tp->snd_una)) {
2050 if (tcp_skb_pcount(skb) > 1) 2041 if (tcp_skb_pcount(skb) > 1 &&
2042 after(tp->snd_una, scb->seq))
2051 acked |= tcp_tso_acked(sk, skb, 2043 acked |= tcp_tso_acked(sk, skb,
2052 now, &seq_rtt); 2044 now, &seq_rtt);
2053 break; 2045 break;
@@ -2810,7 +2802,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
2810 int this_sack; 2802 int this_sack;
2811 2803
2812 /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ 2804 /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
2813 if (skb_queue_len(&tp->out_of_order_queue) == 0) { 2805 if (skb_queue_empty(&tp->out_of_order_queue)) {
2814 tp->rx_opt.num_sacks = 0; 2806 tp->rx_opt.num_sacks = 0;
2815 tp->rx_opt.eff_sacks = tp->rx_opt.dsack; 2807 tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
2816 return; 2808 return;
@@ -2943,13 +2935,13 @@ queue_and_out:
2943 if(th->fin) 2935 if(th->fin)
2944 tcp_fin(skb, sk, th); 2936 tcp_fin(skb, sk, th);
2945 2937
2946 if (skb_queue_len(&tp->out_of_order_queue)) { 2938 if (!skb_queue_empty(&tp->out_of_order_queue)) {
2947 tcp_ofo_queue(sk); 2939 tcp_ofo_queue(sk);
2948 2940
2949 /* RFC2581. 4.2. SHOULD send immediate ACK, when 2941 /* RFC2581. 4.2. SHOULD send immediate ACK, when
2950 * gap in queue is filled. 2942 * gap in queue is filled.
2951 */ 2943 */
2952 if (!skb_queue_len(&tp->out_of_order_queue)) 2944 if (skb_queue_empty(&tp->out_of_order_queue))
2953 tp->ack.pingpong = 0; 2945 tp->ack.pingpong = 0;
2954 } 2946 }
2955 2947
@@ -3257,9 +3249,8 @@ static int tcp_prune_queue(struct sock *sk)
3257 * This must not ever occur. */ 3249 * This must not ever occur. */
3258 3250
3259 /* First, purge the out_of_order queue. */ 3251 /* First, purge the out_of_order queue. */
3260 if (skb_queue_len(&tp->out_of_order_queue)) { 3252 if (!skb_queue_empty(&tp->out_of_order_queue)) {
3261 NET_ADD_STATS_BH(LINUX_MIB_OFOPRUNED, 3253 NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
3262 skb_queue_len(&tp->out_of_order_queue));
3263 __skb_queue_purge(&tp->out_of_order_queue); 3254 __skb_queue_purge(&tp->out_of_order_queue);
3264 3255
3265 /* Reset SACK state. A conforming SACK implementation will 3256 /* Reset SACK state. A conforming SACK implementation will
@@ -3308,6 +3299,28 @@ void tcp_cwnd_application_limited(struct sock *sk)
3308 tp->snd_cwnd_stamp = tcp_time_stamp; 3299 tp->snd_cwnd_stamp = tcp_time_stamp;
3309} 3300}
3310 3301
3302static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
3303{
3304 /* If the user specified a specific send buffer setting, do
3305 * not modify it.
3306 */
3307 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
3308 return 0;
3309
3310 /* If we are under global TCP memory pressure, do not expand. */
3311 if (tcp_memory_pressure)
3312 return 0;
3313
3314 /* If we are under soft global TCP memory pressure, do not expand. */
3315 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
3316 return 0;
3317
3318 /* If we filled the congestion window, do not expand. */
3319 if (tp->packets_out >= tp->snd_cwnd)
3320 return 0;
3321
3322 return 1;
3323}
3311 3324
3312/* When incoming ACK allowed to free some skb from write_queue, 3325/* When incoming ACK allowed to free some skb from write_queue,
3313 * we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket 3326 * we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
@@ -3319,11 +3332,8 @@ static void tcp_new_space(struct sock *sk)
3319{ 3332{
3320 struct tcp_sock *tp = tcp_sk(sk); 3333 struct tcp_sock *tp = tcp_sk(sk);
3321 3334
3322 if (tp->packets_out < tp->snd_cwnd && 3335 if (tcp_should_expand_sndbuf(sk, tp)) {
3323 !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && 3336 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
3324 !tcp_memory_pressure &&
3325 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
3326 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache_std) +
3327 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 3337 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
3328 demanded = max_t(unsigned int, tp->snd_cwnd, 3338 demanded = max_t(unsigned int, tp->snd_cwnd,
3329 tp->reordering + 1); 3339 tp->reordering + 1);
@@ -3346,22 +3356,9 @@ static inline void tcp_check_space(struct sock *sk)
3346 } 3356 }
3347} 3357}
3348 3358
3349static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb) 3359static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
3350{
3351 struct tcp_sock *tp = tcp_sk(sk);
3352
3353 if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) ||
3354 tcp_packets_in_flight(tp) >= tp->snd_cwnd ||
3355 tcp_write_xmit(sk, tp->nonagle))
3356 tcp_check_probe_timer(sk, tp);
3357}
3358
3359static __inline__ void tcp_data_snd_check(struct sock *sk)
3360{ 3360{
3361 struct sk_buff *skb = sk->sk_send_head; 3361 tcp_push_pending_frames(sk, tp);
3362
3363 if (skb != NULL)
3364 __tcp_data_snd_check(sk, skb);
3365 tcp_check_space(sk); 3362 tcp_check_space(sk);
3366} 3363}
3367 3364
@@ -3655,7 +3652,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3655 */ 3652 */
3656 tcp_ack(sk, skb, 0); 3653 tcp_ack(sk, skb, 0);
3657 __kfree_skb(skb); 3654 __kfree_skb(skb);
3658 tcp_data_snd_check(sk); 3655 tcp_data_snd_check(sk, tp);
3659 return 0; 3656 return 0;
3660 } else { /* Header too small */ 3657 } else { /* Header too small */
3661 TCP_INC_STATS_BH(TCP_MIB_INERRS); 3658 TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -3721,7 +3718,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3721 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { 3718 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
3722 /* Well, only one small jumplet in fast path... */ 3719 /* Well, only one small jumplet in fast path... */
3723 tcp_ack(sk, skb, FLAG_DATA); 3720 tcp_ack(sk, skb, FLAG_DATA);
3724 tcp_data_snd_check(sk); 3721 tcp_data_snd_check(sk, tp);
3725 if (!tcp_ack_scheduled(tp)) 3722 if (!tcp_ack_scheduled(tp))
3726 goto no_ack; 3723 goto no_ack;
3727 } 3724 }
@@ -3799,7 +3796,7 @@ step5:
3799 /* step 7: process the segment text */ 3796 /* step 7: process the segment text */
3800 tcp_data_queue(sk, skb); 3797 tcp_data_queue(sk, skb);
3801 3798
3802 tcp_data_snd_check(sk); 3799 tcp_data_snd_check(sk, tp);
3803 tcp_ack_snd_check(sk); 3800 tcp_ack_snd_check(sk);
3804 return 0; 3801 return 0;
3805 3802
@@ -4109,7 +4106,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4109 /* Do step6 onward by hand. */ 4106 /* Do step6 onward by hand. */
4110 tcp_urg(sk, skb, th); 4107 tcp_urg(sk, skb, th);
4111 __kfree_skb(skb); 4108 __kfree_skb(skb);
4112 tcp_data_snd_check(sk); 4109 tcp_data_snd_check(sk, tp);
4113 return 0; 4110 return 0;
4114 } 4111 }
4115 4112
@@ -4300,7 +4297,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4300 4297
4301 /* tcp_data could move socket to TIME-WAIT */ 4298 /* tcp_data could move socket to TIME-WAIT */
4302 if (sk->sk_state != TCP_CLOSE) { 4299 if (sk->sk_state != TCP_CLOSE) {
4303 tcp_data_snd_check(sk); 4300 tcp_data_snd_check(sk, tp);
4304 tcp_ack_snd_check(sk); 4301 tcp_ack_snd_check(sk);
4305 } 4302 }
4306 4303
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ebf112347a97..62f62bb05c2a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2045,7 +2045,7 @@ static int tcp_v4_init_sock(struct sock *sk)
2045 */ 2045 */
2046 tp->snd_ssthresh = 0x7fffffff; /* Infinity */ 2046 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
2047 tp->snd_cwnd_clamp = ~0; 2047 tp->snd_cwnd_clamp = ~0;
2048 tp->mss_cache_std = tp->mss_cache = 536; 2048 tp->mss_cache = 536;
2049 2049
2050 tp->reordering = sysctl_tcp_reordering; 2050 tp->reordering = sysctl_tcp_reordering;
2051 tp->ca_ops = &tcp_init_congestion_ops; 2051 tp->ca_ops = &tcp_init_congestion_ops;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e17c244875c..e3f8ea1bfa9c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1;
49 * will allow a single TSO frame to consume. Building TSO frames 49 * will allow a single TSO frame to consume. Building TSO frames
50 * which are too large can cause TCP streams to be bursty. 50 * which are too large can cause TCP streams to be bursty.
51 */ 51 */
52int sysctl_tcp_tso_win_divisor = 8; 52int sysctl_tcp_tso_win_divisor = 3;
53 53
54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, 54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
55 struct sk_buff *skb) 55 struct sk_buff *skb)
@@ -140,11 +140,11 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
140 tp->ack.pingpong = 1; 140 tp->ack.pingpong = 1;
141} 141}
142 142
143static __inline__ void tcp_event_ack_sent(struct sock *sk) 143static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
144{ 144{
145 struct tcp_sock *tp = tcp_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk);
146 146
147 tcp_dec_quickack_mode(tp); 147 tcp_dec_quickack_mode(tp, pkts);
148 tcp_clear_xmit_timer(sk, TCP_TIME_DACK); 148 tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
149} 149}
150 150
@@ -355,7 +355,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
355 tp->af_specific->send_check(sk, th, skb->len, skb); 355 tp->af_specific->send_check(sk, th, skb->len, skb);
356 356
357 if (tcb->flags & TCPCB_FLAG_ACK) 357 if (tcb->flags & TCPCB_FLAG_ACK)
358 tcp_event_ack_sent(sk); 358 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
359 359
360 if (skb->len != tcp_header_size) 360 if (skb->len != tcp_header_size)
361 tcp_event_data_sent(tp, skb, sk); 361 tcp_event_data_sent(tp, skb, sk);
@@ -403,42 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
403 sk->sk_send_head = skb; 403 sk->sk_send_head = skb;
404} 404}
405 405
406static inline void tcp_tso_set_push(struct sk_buff *skb) 406static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
407{
408 /* Force push to be on for any TSO frames to workaround
409 * problems with busted implementations like Mac OS-X that
410 * hold off socket receive wakeups until push is seen.
411 */
412 if (tcp_skb_pcount(skb) > 1)
413 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
414}
415
416/* Send _single_ skb sitting at the send head. This function requires
417 * true push pending frames to setup probe timer etc.
418 */
419void tcp_push_one(struct sock *sk, unsigned cur_mss)
420{ 407{
421 struct tcp_sock *tp = tcp_sk(sk); 408 struct tcp_sock *tp = tcp_sk(sk);
422 struct sk_buff *skb = sk->sk_send_head;
423 409
424 if (tcp_snd_test(sk, skb, cur_mss, TCP_NAGLE_PUSH)) { 410 if (skb->len <= tp->mss_cache ||
425 /* Send it out now. */
426 TCP_SKB_CB(skb)->when = tcp_time_stamp;
427 tcp_tso_set_push(skb);
428 if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
429 sk->sk_send_head = NULL;
430 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
431 tcp_packets_out_inc(sk, tp, skb);
432 return;
433 }
434 }
435}
436
437void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
438{
439 struct tcp_sock *tp = tcp_sk(sk);
440
441 if (skb->len <= tp->mss_cache_std ||
442 !(sk->sk_route_caps & NETIF_F_TSO)) { 411 !(sk->sk_route_caps & NETIF_F_TSO)) {
443 /* Avoid the costly divide in the normal 412 /* Avoid the costly divide in the normal
444 * non-TSO case. 413 * non-TSO case.
@@ -448,10 +417,10 @@ void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
448 } else { 417 } else {
449 unsigned int factor; 418 unsigned int factor;
450 419
451 factor = skb->len + (tp->mss_cache_std - 1); 420 factor = skb->len + (tp->mss_cache - 1);
452 factor /= tp->mss_cache_std; 421 factor /= tp->mss_cache;
453 skb_shinfo(skb)->tso_segs = factor; 422 skb_shinfo(skb)->tso_segs = factor;
454 skb_shinfo(skb)->tso_size = tp->mss_cache_std; 423 skb_shinfo(skb)->tso_size = tp->mss_cache;
455 } 424 }
456} 425}
457 426
@@ -537,6 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
537 } 506 }
538 507
539 /* Link BUFF into the send queue. */ 508 /* Link BUFF into the send queue. */
509 skb_header_release(buff);
540 __skb_append(skb, buff); 510 __skb_append(skb, buff);
541 511
542 return 0; 512 return 0;
@@ -657,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
657 627
658 /* And store cached results */ 628 /* And store cached results */
659 tp->pmtu_cookie = pmtu; 629 tp->pmtu_cookie = pmtu;
660 tp->mss_cache = tp->mss_cache_std = mss_now; 630 tp->mss_cache = mss_now;
661 631
662 return mss_now; 632 return mss_now;
663} 633}
@@ -669,57 +639,316 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
669 * cannot be large. However, taking into account rare use of URG, this 639 * cannot be large. However, taking into account rare use of URG, this
670 * is not a big flaw. 640 * is not a big flaw.
671 */ 641 */
672 642unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
673unsigned int tcp_current_mss(struct sock *sk, int large)
674{ 643{
675 struct tcp_sock *tp = tcp_sk(sk); 644 struct tcp_sock *tp = tcp_sk(sk);
676 struct dst_entry *dst = __sk_dst_get(sk); 645 struct dst_entry *dst = __sk_dst_get(sk);
677 unsigned int do_large, mss_now; 646 u32 mss_now;
647 u16 xmit_size_goal;
648 int doing_tso = 0;
649
650 mss_now = tp->mss_cache;
651
652 if (large_allowed &&
653 (sk->sk_route_caps & NETIF_F_TSO) &&
654 !tp->urg_mode)
655 doing_tso = 1;
678 656
679 mss_now = tp->mss_cache_std;
680 if (dst) { 657 if (dst) {
681 u32 mtu = dst_mtu(dst); 658 u32 mtu = dst_mtu(dst);
682 if (mtu != tp->pmtu_cookie) 659 if (mtu != tp->pmtu_cookie)
683 mss_now = tcp_sync_mss(sk, mtu); 660 mss_now = tcp_sync_mss(sk, mtu);
684 } 661 }
685 662
686 do_large = (large && 663 if (tp->rx_opt.eff_sacks)
687 (sk->sk_route_caps & NETIF_F_TSO) && 664 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
688 !tp->urg_mode); 665 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
689 666
690 if (do_large) { 667 xmit_size_goal = mss_now;
691 unsigned int large_mss, factor, limit;
692 668
693 large_mss = 65535 - tp->af_specific->net_header_len - 669 if (doing_tso) {
670 xmit_size_goal = 65535 -
671 tp->af_specific->net_header_len -
694 tp->ext_header_len - tp->tcp_header_len; 672 tp->ext_header_len - tp->tcp_header_len;
695 673
696 if (tp->max_window && large_mss > (tp->max_window>>1)) 674 if (tp->max_window &&
697 large_mss = max((tp->max_window>>1), 675 (xmit_size_goal > (tp->max_window >> 1)))
698 68U - tp->tcp_header_len); 676 xmit_size_goal = max((tp->max_window >> 1),
677 68U - tp->tcp_header_len);
678
679 xmit_size_goal -= (xmit_size_goal % mss_now);
680 }
681 tp->xmit_size_goal = xmit_size_goal;
699 682
700 factor = large_mss / mss_now; 683 return mss_now;
684}
701 685
702 /* Always keep large mss multiple of real mss, but 686/* Congestion window validation. (RFC2861) */
703 * do not exceed 1/tso_win_divisor of the congestion window
704 * so we can keep the ACK clock ticking and minimize
705 * bursting.
706 */
707 limit = tp->snd_cwnd;
708 if (sysctl_tcp_tso_win_divisor)
709 limit /= sysctl_tcp_tso_win_divisor;
710 limit = max(1U, limit);
711 if (factor > limit)
712 factor = limit;
713 687
714 tp->mss_cache = mss_now * factor; 688static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
689{
690 __u32 packets_out = tp->packets_out;
691
692 if (packets_out >= tp->snd_cwnd) {
693 /* Network is feed fully. */
694 tp->snd_cwnd_used = 0;
695 tp->snd_cwnd_stamp = tcp_time_stamp;
696 } else {
697 /* Network starves. */
698 if (tp->packets_out > tp->snd_cwnd_used)
699 tp->snd_cwnd_used = tp->packets_out;
715 700
716 mss_now = tp->mss_cache; 701 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
702 tcp_cwnd_application_limited(sk);
717 } 703 }
704}
718 705
719 if (tp->rx_opt.eff_sacks) 706static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
720 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + 707{
721 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); 708 u32 window, cwnd_len;
722 return mss_now; 709
710 window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
711 cwnd_len = mss_now * cwnd;
712 return min(window, cwnd_len);
713}
714
715/* Can at least one segment of SKB be sent right now, according to the
716 * congestion window rules? If so, return how many segments are allowed.
717 */
718static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
719{
720 u32 in_flight, cwnd;
721
722 /* Don't be strict about the congestion window for the final FIN. */
723 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
724 return 1;
725
726 in_flight = tcp_packets_in_flight(tp);
727 cwnd = tp->snd_cwnd;
728 if (in_flight < cwnd)
729 return (cwnd - in_flight);
730
731 return 0;
732}
733
734/* This must be invoked the first time we consider transmitting
735 * SKB onto the wire.
736 */
737static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
738{
739 int tso_segs = tcp_skb_pcount(skb);
740
741 if (!tso_segs) {
742 tcp_set_skb_tso_segs(sk, skb);
743 tso_segs = tcp_skb_pcount(skb);
744 }
745 return tso_segs;
746}
747
748static inline int tcp_minshall_check(const struct tcp_sock *tp)
749{
750 return after(tp->snd_sml,tp->snd_una) &&
751 !after(tp->snd_sml, tp->snd_nxt);
752}
753
754/* Return 0, if packet can be sent now without violation Nagle's rules:
755 * 1. It is full sized.
756 * 2. Or it contains FIN. (already checked by caller)
757 * 3. Or TCP_NODELAY was set.
758 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
759 * With Minshall's modification: all sent small packets are ACKed.
760 */
761
762static inline int tcp_nagle_check(const struct tcp_sock *tp,
763 const struct sk_buff *skb,
764 unsigned mss_now, int nonagle)
765{
766 return (skb->len < mss_now &&
767 ((nonagle&TCP_NAGLE_CORK) ||
768 (!nonagle &&
769 tp->packets_out &&
770 tcp_minshall_check(tp))));
771}
772
773/* Return non-zero if the Nagle test allows this packet to be
774 * sent now.
775 */
776static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
777 unsigned int cur_mss, int nonagle)
778{
779 /* Nagle rule does not apply to frames, which sit in the middle of the
780 * write_queue (they have no chances to get new data).
781 *
782 * This is implemented in the callers, where they modify the 'nonagle'
783 * argument based upon the location of SKB in the send queue.
784 */
785 if (nonagle & TCP_NAGLE_PUSH)
786 return 1;
787
788 /* Don't use the nagle rule for urgent data (or for the final FIN). */
789 if (tp->urg_mode ||
790 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
791 return 1;
792
793 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
794 return 1;
795
796 return 0;
797}
798
799/* Does at least the first segment of SKB fit into the send window? */
800static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
801{
802 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
803
804 if (skb->len > cur_mss)
805 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
806
807 return !after(end_seq, tp->snd_una + tp->snd_wnd);
808}
809
810/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
811 * should be put on the wire right now. If so, it returns the number of
812 * packets allowed by the congestion window.
813 */
814static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
815 unsigned int cur_mss, int nonagle)
816{
817 struct tcp_sock *tp = tcp_sk(sk);
818 unsigned int cwnd_quota;
819
820 tcp_init_tso_segs(sk, skb);
821
822 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
823 return 0;
824
825 cwnd_quota = tcp_cwnd_test(tp, skb);
826 if (cwnd_quota &&
827 !tcp_snd_wnd_test(tp, skb, cur_mss))
828 cwnd_quota = 0;
829
830 return cwnd_quota;
831}
832
833static inline int tcp_skb_is_last(const struct sock *sk,
834 const struct sk_buff *skb)
835{
836 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
837}
838
839int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
840{
841 struct sk_buff *skb = sk->sk_send_head;
842
843 return (skb &&
844 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
845 (tcp_skb_is_last(sk, skb) ?
846 TCP_NAGLE_PUSH :
847 tp->nonagle)));
848}
849
850/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
851 * which is put after SKB on the list. It is very much like
852 * tcp_fragment() except that it may make several kinds of assumptions
853 * in order to speed up the splitting operation. In particular, we
854 * know that all the data is in scatter-gather pages, and that the
855 * packet has never been sent out before (and thus is not cloned).
856 */
857static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
858{
859 struct sk_buff *buff;
860 int nlen = skb->len - len;
861 u16 flags;
862
863 /* All of a TSO frame must be composed of paged data. */
864 BUG_ON(skb->len != skb->data_len);
865
866 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
867 if (unlikely(buff == NULL))
868 return -ENOMEM;
869
870 buff->truesize = nlen;
871 skb->truesize -= nlen;
872
873 /* Correct the sequence numbers. */
874 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
875 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
876 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
877
878 /* PSH and FIN should only be set in the second packet. */
879 flags = TCP_SKB_CB(skb)->flags;
880 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
881 TCP_SKB_CB(buff)->flags = flags;
882
883 /* This packet was never sent out yet, so no SACK bits. */
884 TCP_SKB_CB(buff)->sacked = 0;
885
886 buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
887 skb_split(skb, buff, len);
888
889 /* Fix up tso_factor for both original and new SKB. */
890 tcp_set_skb_tso_segs(sk, skb);
891 tcp_set_skb_tso_segs(sk, buff);
892
893 /* Link BUFF into the send queue. */
894 skb_header_release(buff);
895 __skb_append(skb, buff);
896
897 return 0;
898}
899
900/* Try to defer sending, if possible, in order to minimize the amount
901 * of TSO splitting we do. View it as a kind of TSO Nagle test.
902 *
903 * This algorithm is from John Heffner.
904 */
905static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
906{
907 u32 send_win, cong_win, limit, in_flight;
908
909 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
910 return 0;
911
912 if (tp->ca_state != TCP_CA_Open)
913 return 0;
914
915 in_flight = tcp_packets_in_flight(tp);
916
917 BUG_ON(tcp_skb_pcount(skb) <= 1 ||
918 (tp->snd_cwnd <= in_flight));
919
920 send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
921
922 /* From in_flight test above, we know that cwnd > in_flight. */
923 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
924
925 limit = min(send_win, cong_win);
926
927 /* If sk_send_head can be sent fully now, just do it. */
928 if (skb->len <= limit)
929 return 0;
930
931 if (sysctl_tcp_tso_win_divisor) {
932 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
933
934 /* If at least some fraction of a window is available,
935 * just use it.
936 */
937 chunk /= sysctl_tcp_tso_win_divisor;
938 if (limit >= chunk)
939 return 0;
940 } else {
941 /* Different approach, try not to defer past a single
942 * ACK. Receiver should ACK every other full sized
943 * frame, so if we have space for more than 3 frames
944 * then send now.
945 */
946 if (limit > tcp_max_burst(tp) * tp->mss_cache)
947 return 0;
948 }
949
950 /* Ok, it looks like it is advisable to defer. */
951 return 1;
723} 952}
724 953
725/* This routine writes packets to the network. It advances the 954/* This routine writes packets to the network. It advances the
@@ -729,57 +958,158 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
729 * Returns 1, if no segments are in flight and we have queued segments, but 958 * Returns 1, if no segments are in flight and we have queued segments, but
730 * cannot send anything now because of SWS or another problem. 959 * cannot send anything now because of SWS or another problem.
731 */ 960 */
732int tcp_write_xmit(struct sock *sk, int nonagle) 961static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
733{ 962{
734 struct tcp_sock *tp = tcp_sk(sk); 963 struct tcp_sock *tp = tcp_sk(sk);
735 unsigned int mss_now; 964 struct sk_buff *skb;
965 unsigned int tso_segs, sent_pkts;
966 int cwnd_quota;
736 967
737 /* If we are closed, the bytes will have to remain here. 968 /* If we are closed, the bytes will have to remain here.
738 * In time closedown will finish, we empty the write queue and all 969 * In time closedown will finish, we empty the write queue and all
739 * will be happy. 970 * will be happy.
740 */ 971 */
741 if (sk->sk_state != TCP_CLOSE) { 972 if (unlikely(sk->sk_state == TCP_CLOSE))
742 struct sk_buff *skb; 973 return 0;
743 int sent_pkts = 0; 974
975 skb = sk->sk_send_head;
976 if (unlikely(!skb))
977 return 0;
978
979 tso_segs = tcp_init_tso_segs(sk, skb);
980 cwnd_quota = tcp_cwnd_test(tp, skb);
981 if (unlikely(!cwnd_quota))
982 goto out;
983
984 sent_pkts = 0;
985 while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
986 BUG_ON(!tso_segs);
987
988 if (tso_segs == 1) {
989 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
990 (tcp_skb_is_last(sk, skb) ?
991 nonagle : TCP_NAGLE_PUSH))))
992 break;
993 } else {
994 if (tcp_tso_should_defer(sk, tp, skb))
995 break;
996 }
744 997
745 /* Account for SACKS, we may need to fragment due to this. 998 if (tso_segs > 1) {
746 * It is just like the real MSS changing on us midstream. 999 u32 limit = tcp_window_allows(tp, skb,
747 * We also handle things correctly when the user adds some 1000 mss_now, cwnd_quota);
748 * IP options mid-stream. Silly to do, but cover it. 1001
749 */ 1002 if (skb->len < limit) {
750 mss_now = tcp_current_mss(sk, 1); 1003 unsigned int trim = skb->len % mss_now;
751 1004
752 while ((skb = sk->sk_send_head) && 1005 if (trim)
753 tcp_snd_test(sk, skb, mss_now, 1006 limit = skb->len - trim;
754 tcp_skb_is_last(sk, skb) ? nonagle : 1007 }
755 TCP_NAGLE_PUSH)) { 1008 if (skb->len > limit) {
756 if (skb->len > mss_now) { 1009 if (tso_fragment(sk, skb, limit))
757 if (tcp_fragment(sk, skb, mss_now))
758 break; 1010 break;
759 } 1011 }
760 1012 } else if (unlikely(skb->len > mss_now)) {
761 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1013 if (unlikely(tcp_fragment(sk, skb, mss_now)))
762 tcp_tso_set_push(skb);
763 if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
764 break; 1014 break;
1015 }
765 1016
766 /* Advance the send_head. This one is sent out. 1017 TCP_SKB_CB(skb)->when = tcp_time_stamp;
767 * This call will increment packets_out. 1018
768 */ 1019 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
769 update_send_head(sk, tp, skb); 1020 break;
1021
1022 /* Advance the send_head. This one is sent out.
1023 * This call will increment packets_out.
1024 */
1025 update_send_head(sk, tp, skb);
1026
1027 tcp_minshall_update(tp, mss_now, skb);
1028 sent_pkts++;
1029
1030 /* Do not optimize this to use tso_segs. If we chopped up
1031 * the packet above, tso_segs will no longer be valid.
1032 */
1033 cwnd_quota -= tcp_skb_pcount(skb);
1034
1035 BUG_ON(cwnd_quota < 0);
1036 if (!cwnd_quota)
1037 break;
1038
1039 skb = sk->sk_send_head;
1040 if (!skb)
1041 break;
1042 tso_segs = tcp_init_tso_segs(sk, skb);
1043 }
1044
1045 if (likely(sent_pkts)) {
1046 tcp_cwnd_validate(sk, tp);
1047 return 0;
1048 }
1049out:
1050 return !tp->packets_out && sk->sk_send_head;
1051}
1052
1053/* Push out any pending frames which were held back due to
1054 * TCP_CORK or attempt at coalescing tiny packets.
1055 * The socket must be locked by the caller.
1056 */
1057void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
1058 unsigned int cur_mss, int nonagle)
1059{
1060 struct sk_buff *skb = sk->sk_send_head;
770 1061
771 tcp_minshall_update(tp, mss_now, skb); 1062 if (skb) {
772 sent_pkts = 1; 1063 if (tcp_write_xmit(sk, cur_mss, nonagle))
1064 tcp_check_probe_timer(sk, tp);
1065 }
1066}
1067
1068/* Send _single_ skb sitting at the send head. This function requires
1069 * true push pending frames to setup probe timer etc.
1070 */
1071void tcp_push_one(struct sock *sk, unsigned int mss_now)
1072{
1073 struct tcp_sock *tp = tcp_sk(sk);
1074 struct sk_buff *skb = sk->sk_send_head;
1075 unsigned int tso_segs, cwnd_quota;
1076
1077 BUG_ON(!skb || skb->len < mss_now);
1078
1079 tso_segs = tcp_init_tso_segs(sk, skb);
1080 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1081
1082 if (likely(cwnd_quota)) {
1083 BUG_ON(!tso_segs);
1084
1085 if (tso_segs > 1) {
1086 u32 limit = tcp_window_allows(tp, skb,
1087 mss_now, cwnd_quota);
1088
1089 if (skb->len < limit) {
1090 unsigned int trim = skb->len % mss_now;
1091
1092 if (trim)
1093 limit = skb->len - trim;
1094 }
1095 if (skb->len > limit) {
1096 if (unlikely(tso_fragment(sk, skb, limit)))
1097 return;
1098 }
1099 } else if (unlikely(skb->len > mss_now)) {
1100 if (unlikely(tcp_fragment(sk, skb, mss_now)))
1101 return;
773 } 1102 }
774 1103
775 if (sent_pkts) { 1104 /* Send it out now. */
1105 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1106
1107 if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
1108 update_send_head(sk, tp, skb);
776 tcp_cwnd_validate(sk, tp); 1109 tcp_cwnd_validate(sk, tp);
777 return 0; 1110 return;
778 } 1111 }
779
780 return !tp->packets_out && sk->sk_send_head;
781 } 1112 }
782 return 0;
783} 1113}
784 1114
785/* This function returns the amount that we can raise the 1115/* This function returns the amount that we can raise the
@@ -1039,7 +1369,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1039 if (sk->sk_route_caps & NETIF_F_TSO) { 1369 if (sk->sk_route_caps & NETIF_F_TSO) {
1040 sk->sk_route_caps &= ~NETIF_F_TSO; 1370 sk->sk_route_caps &= ~NETIF_F_TSO;
1041 sock_set_flag(sk, SOCK_NO_LARGESEND); 1371 sock_set_flag(sk, SOCK_NO_LARGESEND);
1042 tp->mss_cache = tp->mss_cache_std;
1043 } 1372 }
1044 1373
1045 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) 1374 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
@@ -1101,7 +1430,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1101 * is still in somebody's hands, else make a clone. 1430 * is still in somebody's hands, else make a clone.
1102 */ 1431 */
1103 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1432 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1104 tcp_tso_set_push(skb);
1105 1433
1106 err = tcp_transmit_skb(sk, (skb_cloned(skb) ? 1434 err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
1107 pskb_copy(skb, GFP_ATOMIC): 1435 pskb_copy(skb, GFP_ATOMIC):
@@ -1285,7 +1613,7 @@ void tcp_send_fin(struct sock *sk)
1285 * was unread data in the receive queue. This behavior is recommended 1613 * was unread data in the receive queue. This behavior is recommended
1286 * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM 1614 * by draft-ietf-tcpimpl-prob-03.txt section 3.10. -DaveM
1287 */ 1615 */
1288void tcp_send_active_reset(struct sock *sk, int priority) 1616void tcp_send_active_reset(struct sock *sk, unsigned int __nocast priority)
1289{ 1617{
1290 struct tcp_sock *tp = tcp_sk(sk); 1618 struct tcp_sock *tp = tcp_sk(sk);
1291 struct sk_buff *skb; 1619 struct sk_buff *skb;
@@ -1670,14 +1998,12 @@ int tcp_write_wakeup(struct sock *sk)
1670 if (sk->sk_route_caps & NETIF_F_TSO) { 1998 if (sk->sk_route_caps & NETIF_F_TSO) {
1671 sock_set_flag(sk, SOCK_NO_LARGESEND); 1999 sock_set_flag(sk, SOCK_NO_LARGESEND);
1672 sk->sk_route_caps &= ~NETIF_F_TSO; 2000 sk->sk_route_caps &= ~NETIF_F_TSO;
1673 tp->mss_cache = tp->mss_cache_std;
1674 } 2001 }
1675 } else if (!tcp_skb_pcount(skb)) 2002 } else if (!tcp_skb_pcount(skb))
1676 tcp_set_skb_tso_segs(sk, skb); 2003 tcp_set_skb_tso_segs(sk, skb);
1677 2004
1678 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2005 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1679 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2006 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1680 tcp_tso_set_push(skb);
1681 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); 2007 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1682 if (!err) { 2008 if (!err) {
1683 update_send_head(sk, tp, skb); 2009 update_send_head(sk, tp, skb);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b127b4498565..0084227438c2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -231,11 +231,10 @@ static void tcp_delack_timer(unsigned long data)
231 } 231 }
232 tp->ack.pending &= ~TCP_ACK_TIMER; 232 tp->ack.pending &= ~TCP_ACK_TIMER;
233 233
234 if (skb_queue_len(&tp->ucopy.prequeue)) { 234 if (!skb_queue_empty(&tp->ucopy.prequeue)) {
235 struct sk_buff *skb; 235 struct sk_buff *skb;
236 236
237 NET_ADD_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED, 237 NET_INC_STATS_BH(LINUX_MIB_TCPSCHEDULERFAILED);
238 skb_queue_len(&tp->ucopy.prequeue));
239 238
240 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) 239 while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
241 sk->sk_backlog_rcv(sk, skb); 240 sk->sk_backlog_rcv(sk, skb);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index e66ca9381cfd..95163cd52ae0 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -1,6 +1,26 @@
1# 1#
2# IPv6 configuration 2# IPv6 configuration
3# 3#
4
5# IPv6 as module will cause a CRASH if you try to unload it
6config IPV6
7 tristate "The IPv6 protocol"
8 default m
9 select CRYPTO if IPV6_PRIVACY
10 select CRYPTO_MD5 if IPV6_PRIVACY
11 ---help---
12 This is complemental support for the IP version 6.
13 You will still be able to do traditional IPv4 networking as well.
14
15 For general information about IPv6, see
16 <http://playground.sun.com/pub/ipng/html/ipng-main.html>.
17 For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
18 For specific information about IPv6 under Linux, read the HOWTO at
19 <http://www.bieringer.de/linux/IPv6/>.
20
21 To compile this protocol support as a module, choose M here: the
22 module will be called ipv6.
23
4config IPV6_PRIVACY 24config IPV6_PRIVACY
5 bool "IPv6: Privacy Extensions (RFC 3041) support" 25 bool "IPv6: Privacy Extensions (RFC 3041) support"
6 depends on IPV6 26 depends on IPV6
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2b193e3df49a..28d9bcab0970 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -774,7 +774,6 @@ static int __init inet6_init(void)
774 if (if6_proc_init()) 774 if (if6_proc_init())
775 goto proc_if6_fail; 775 goto proc_if6_fail;
776#endif 776#endif
777 ipv6_packet_init();
778 ip6_route_init(); 777 ip6_route_init();
779 ip6_flowlabel_init(); 778 ip6_flowlabel_init();
780 err = addrconf_init(); 779 err = addrconf_init();
@@ -791,6 +790,8 @@ static int __init inet6_init(void)
791 /* Init v6 transport protocols. */ 790 /* Init v6 transport protocols. */
792 udpv6_init(); 791 udpv6_init();
793 tcpv6_init(); 792 tcpv6_init();
793
794 ipv6_packet_init();
794 err = 0; 795 err = 0;
795out: 796out:
796 return err; 797 return err;
@@ -798,7 +799,6 @@ out:
798addrconf_fail: 799addrconf_fail:
799 ip6_flowlabel_cleanup(); 800 ip6_flowlabel_cleanup();
800 ip6_route_cleanup(); 801 ip6_route_cleanup();
801 ipv6_packet_cleanup();
802#ifdef CONFIG_PROC_FS 802#ifdef CONFIG_PROC_FS
803 if6_proc_exit(); 803 if6_proc_exit();
804proc_if6_fail: 804proc_if6_fail:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 06e7cdaeedc5..1f2c2f9e353f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -465,7 +465,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
465 to->pkt_type = from->pkt_type; 465 to->pkt_type = from->pkt_type;
466 to->priority = from->priority; 466 to->priority = from->priority;
467 to->protocol = from->protocol; 467 to->protocol = from->protocol;
468 to->security = from->security;
469 dst_release(to->dst); 468 dst_release(to->dst);
470 to->dst = dst_clone(from->dst); 469 to->dst = dst_clone(from->dst);
471 to->dev = from->dev; 470 to->dev = from->dev;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 562fcd14fdea..29fed6e58d0a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -281,7 +281,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr)
281 } 281 }
282 write_unlock_bh(&ipv6_sk_mc_lock); 282 write_unlock_bh(&ipv6_sk_mc_lock);
283 283
284 return -ENOENT; 284 return -EADDRNOTAVAIL;
285} 285}
286 286
287static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex) 287static struct inet6_dev *ip6_mc_find_dev(struct in6_addr *group, int ifindex)
@@ -386,12 +386,16 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
386 if (ipv6_addr_equal(&pmc->addr, group)) 386 if (ipv6_addr_equal(&pmc->addr, group))
387 break; 387 break;
388 } 388 }
389 if (!pmc) /* must have a prior join */ 389 if (!pmc) { /* must have a prior join */
390 err = -EINVAL;
390 goto done; 391 goto done;
392 }
391 /* if a source filter was set, must be the same mode as before */ 393 /* if a source filter was set, must be the same mode as before */
392 if (pmc->sflist) { 394 if (pmc->sflist) {
393 if (pmc->sfmode != omode) 395 if (pmc->sfmode != omode) {
396 err = -EINVAL;
394 goto done; 397 goto done;
398 }
395 } else if (pmc->sfmode != omode) { 399 } else if (pmc->sfmode != omode) {
396 /* allow mode switches for empty-set filters */ 400 /* allow mode switches for empty-set filters */
397 ip6_mc_add_src(idev, group, omode, 0, NULL, 0); 401 ip6_mc_add_src(idev, group, omode, 0, NULL, 0);
@@ -402,7 +406,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
402 psl = pmc->sflist; 406 psl = pmc->sflist;
403 if (!add) { 407 if (!add) {
404 if (!psl) 408 if (!psl)
405 goto done; 409 goto done; /* err = -EADDRNOTAVAIL */
406 rv = !0; 410 rv = !0;
407 for (i=0; i<psl->sl_count; i++) { 411 for (i=0; i<psl->sl_count; i++) {
408 rv = memcmp(&psl->sl_addr[i], source, 412 rv = memcmp(&psl->sl_addr[i], source,
@@ -411,7 +415,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
411 break; 415 break;
412 } 416 }
413 if (rv) /* source not found */ 417 if (rv) /* source not found */
414 goto done; 418 goto done; /* err = -EADDRNOTAVAIL */
415 419
416 /* special case - (INCLUDE, empty) == LEAVE_GROUP */ 420 /* special case - (INCLUDE, empty) == LEAVE_GROUP */
417 if (psl->sl_count == 1 && omode == MCAST_INCLUDE) { 421 if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
@@ -488,6 +492,7 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
488 struct inet6_dev *idev; 492 struct inet6_dev *idev;
489 struct ipv6_pinfo *inet6 = inet6_sk(sk); 493 struct ipv6_pinfo *inet6 = inet6_sk(sk);
490 struct ip6_sf_socklist *newpsl, *psl; 494 struct ip6_sf_socklist *newpsl, *psl;
495 int leavegroup = 0;
491 int i, err; 496 int i, err;
492 497
493 group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr; 498 group = &((struct sockaddr_in6 *)&gsf->gf_group)->sin6_addr;
@@ -503,7 +508,12 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
503 if (!idev) 508 if (!idev)
504 return -ENODEV; 509 return -ENODEV;
505 dev = idev->dev; 510 dev = idev->dev;
506 err = -EADDRNOTAVAIL; 511
512 err = 0;
513 if (gsf->gf_fmode == MCAST_INCLUDE && gsf->gf_numsrc == 0) {
514 leavegroup = 1;
515 goto done;
516 }
507 517
508 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) { 518 for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
509 if (pmc->ifindex != gsf->gf_interface) 519 if (pmc->ifindex != gsf->gf_interface)
@@ -511,8 +521,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
511 if (ipv6_addr_equal(&pmc->addr, group)) 521 if (ipv6_addr_equal(&pmc->addr, group))
512 break; 522 break;
513 } 523 }
514 if (!pmc) /* must have a prior join */ 524 if (!pmc) { /* must have a prior join */
525 err = -EINVAL;
515 goto done; 526 goto done;
527 }
516 if (gsf->gf_numsrc) { 528 if (gsf->gf_numsrc) {
517 newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk, 529 newpsl = (struct ip6_sf_socklist *)sock_kmalloc(sk,
518 IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC); 530 IP6_SFLSIZE(gsf->gf_numsrc), GFP_ATOMIC);
@@ -544,10 +556,13 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
544 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0); 556 (void) ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
545 pmc->sflist = newpsl; 557 pmc->sflist = newpsl;
546 pmc->sfmode = gsf->gf_fmode; 558 pmc->sfmode = gsf->gf_fmode;
559 err = 0;
547done: 560done:
548 read_unlock_bh(&idev->lock); 561 read_unlock_bh(&idev->lock);
549 in6_dev_put(idev); 562 in6_dev_put(idev);
550 dev_put(dev); 563 dev_put(dev);
564 if (leavegroup)
565 err = ipv6_sock_mc_drop(sk, gsf->gf_interface, group);
551 return err; 566 return err;
552} 567}
553 568
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9dac7fdf4726..f6e288dc116e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2018,7 +2018,7 @@ static int tcp_v6_init_sock(struct sock *sk)
2018 */ 2018 */
2019 tp->snd_ssthresh = 0x7fffffff; 2019 tp->snd_ssthresh = 0x7fffffff;
2020 tp->snd_cwnd_clamp = ~0; 2020 tp->snd_cwnd_clamp = ~0;
2021 tp->mss_cache_std = tp->mss_cache = 536; 2021 tp->mss_cache = 536;
2022 2022
2023 tp->reordering = sysctl_tcp_reordering; 2023 tp->reordering = sysctl_tcp_reordering;
2024 2024
diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig
index a16237c0e783..980a826f5d02 100644
--- a/net/ipx/Kconfig
+++ b/net/ipx/Kconfig
@@ -1,6 +1,39 @@
1# 1#
2# IPX configuration 2# IPX configuration
3# 3#
4config IPX
5 tristate "The IPX protocol"
6 select LLC
7 ---help---
8 This is support for the Novell networking protocol, IPX, commonly
9 used for local networks of Windows machines. You need it if you
10 want to access Novell NetWare file or print servers using the Linux
11 Novell client ncpfs (available from
12 <ftp://platan.vc.cvut.cz/pub/linux/ncpfs/>) or from
13 within the Linux DOS emulator DOSEMU (read the DOSEMU-HOWTO,
14 available from <http://www.tldp.org/docs.html#howto>). In order
15 to do the former, you'll also have to say Y to "NCP file system
16 support", below.
17
18 IPX is similar in scope to IP, while SPX, which runs on top of IPX,
19 is similar to TCP. There is also experimental support for SPX in
20 Linux (see "SPX networking", below).
21
22 To turn your Linux box into a fully featured NetWare file server and
23 IPX router, say Y here and fetch either lwared from
24 <ftp://ibiblio.org/pub/Linux/system/network/daemons/> or
25 mars_nwe from <ftp://www.compu-art.de/mars_nwe/>. For more
26 information, read the IPX-HOWTO available from
27 <http://www.tldp.org/docs.html#howto>.
28
29 General information about how to connect Linux, Windows machines and
30 Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
31
32 The IPX driver would enlarge your kernel by about 16 KB. To compile
33 this driver as a module, choose M here: the module will be called ipx.
34 Unless you want to integrate your Linux box with a local Novell
35 network, say N.
36
4config IPX_INTERN 37config IPX_INTERN
5 bool "IPX: Full internal IPX network" 38 bool "IPX: Full internal IPX network"
6 depends on IPX 39 depends on IPX
diff --git a/net/irda/irlap.c b/net/irda/irlap.c
index 046ad0750e48..7029618f5719 100644
--- a/net/irda/irlap.c
+++ b/net/irda/irlap.c
@@ -445,9 +445,8 @@ void irlap_disconnect_request(struct irlap_cb *self)
445 IRDA_ASSERT(self->magic == LAP_MAGIC, return;); 445 IRDA_ASSERT(self->magic == LAP_MAGIC, return;);
446 446
447 /* Don't disconnect until all data frames are successfully sent */ 447 /* Don't disconnect until all data frames are successfully sent */
448 if (skb_queue_len(&self->txq) > 0) { 448 if (!skb_queue_empty(&self->txq)) {
449 self->disconnect_pending = TRUE; 449 self->disconnect_pending = TRUE;
450
451 return; 450 return;
452 } 451 }
453 452
diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c
index 1cd89f5f3b75..a505b5457608 100644
--- a/net/irda/irlap_event.c
+++ b/net/irda/irlap_event.c
@@ -191,7 +191,7 @@ static void irlap_start_poll_timer(struct irlap_cb *self, int timeout)
191 * Send out the RR frames faster if our own transmit queue is empty, or 191 * Send out the RR frames faster if our own transmit queue is empty, or
192 * if the peer is busy. The effect is a much faster conversation 192 * if the peer is busy. The effect is a much faster conversation
193 */ 193 */
194 if ((skb_queue_len(&self->txq) == 0) || (self->remote_busy)) { 194 if (skb_queue_empty(&self->txq) || self->remote_busy) {
195 if (self->fast_RR == TRUE) { 195 if (self->fast_RR == TRUE) {
196 /* 196 /*
197 * Assert that the fast poll timer has not reached the 197 * Assert that the fast poll timer has not reached the
@@ -263,7 +263,7 @@ void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event,
263 IRDA_DEBUG(2, "%s() : queue len = %d\n", __FUNCTION__, 263 IRDA_DEBUG(2, "%s() : queue len = %d\n", __FUNCTION__,
264 skb_queue_len(&self->txq)); 264 skb_queue_len(&self->txq));
265 265
266 if (skb_queue_len(&self->txq)) { 266 if (!skb_queue_empty(&self->txq)) {
267 /* Prevent race conditions with irlap_data_request() */ 267 /* Prevent race conditions with irlap_data_request() */
268 self->local_busy = TRUE; 268 self->local_busy = TRUE;
269 269
@@ -1074,7 +1074,7 @@ static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event,
1074#else /* CONFIG_IRDA_DYNAMIC_WINDOW */ 1074#else /* CONFIG_IRDA_DYNAMIC_WINDOW */
1075 /* Window has been adjusted for the max packet 1075 /* Window has been adjusted for the max packet
1076 * size, so much simpler... - Jean II */ 1076 * size, so much simpler... - Jean II */
1077 nextfit = (skb_queue_len(&self->txq) > 0); 1077 nextfit = !skb_queue_empty(&self->txq);
1078#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ 1078#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
1079 /* 1079 /*
1080 * Send data with poll bit cleared only if window > 1 1080 * Send data with poll bit cleared only if window > 1
@@ -1814,7 +1814,7 @@ static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event,
1814#else /* CONFIG_IRDA_DYNAMIC_WINDOW */ 1814#else /* CONFIG_IRDA_DYNAMIC_WINDOW */
1815 /* Window has been adjusted for the max packet 1815 /* Window has been adjusted for the max packet
1816 * size, so much simpler... - Jean II */ 1816 * size, so much simpler... - Jean II */
1817 nextfit = (skb_queue_len(&self->txq) > 0); 1817 nextfit = !skb_queue_empty(&self->txq);
1818#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ 1818#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */
1819 /* 1819 /*
1820 * Send data with final bit cleared only if window > 1 1820 * Send data with final bit cleared only if window > 1
@@ -1937,7 +1937,7 @@ static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
1937 irlap_data_indication(self, skb, FALSE); 1937 irlap_data_indication(self, skb, FALSE);
1938 1938
1939 /* Any pending data requests? */ 1939 /* Any pending data requests? */
1940 if ((skb_queue_len(&self->txq) > 0) && 1940 if (!skb_queue_empty(&self->txq) &&
1941 (self->window > 0)) 1941 (self->window > 0))
1942 { 1942 {
1943 self->ack_required = TRUE; 1943 self->ack_required = TRUE;
@@ -2038,7 +2038,7 @@ static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
2038 /* 2038 /*
2039 * Any pending data requests? 2039 * Any pending data requests?
2040 */ 2040 */
2041 if ((skb_queue_len(&self->txq) > 0) && 2041 if (!skb_queue_empty(&self->txq) &&
2042 (self->window > 0) && !self->remote_busy) 2042 (self->window > 0) && !self->remote_busy)
2043 { 2043 {
2044 irlap_data_indication(self, skb, TRUE); 2044 irlap_data_indication(self, skb, TRUE);
@@ -2069,7 +2069,7 @@ static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event,
2069 */ 2069 */
2070 nr_status = irlap_validate_nr_received(self, info->nr); 2070 nr_status = irlap_validate_nr_received(self, info->nr);
2071 if (nr_status == NR_EXPECTED) { 2071 if (nr_status == NR_EXPECTED) {
2072 if ((skb_queue_len( &self->txq) > 0) && 2072 if (!skb_queue_empty(&self->txq) &&
2073 (self->window > 0)) { 2073 (self->window > 0)) {
2074 self->remote_busy = FALSE; 2074 self->remote_busy = FALSE;
2075 2075
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 040abe714aa3..6dafbb43b529 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -1018,11 +1018,10 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
1018 /* 1018 /*
1019 * We can now fill the window with additional data frames 1019 * We can now fill the window with additional data frames
1020 */ 1020 */
1021 while (skb_queue_len( &self->txq) > 0) { 1021 while (!skb_queue_empty(&self->txq)) {
1022 1022
1023 IRDA_DEBUG(0, "%s(), sending additional frames!\n", __FUNCTION__); 1023 IRDA_DEBUG(0, "%s(), sending additional frames!\n", __FUNCTION__);
1024 if ((skb_queue_len( &self->txq) > 0) && 1024 if (self->window > 0) {
1025 (self->window > 0)) {
1026 skb = skb_dequeue( &self->txq); 1025 skb = skb_dequeue( &self->txq);
1027 IRDA_ASSERT(skb != NULL, return;); 1026 IRDA_ASSERT(skb != NULL, return;);
1028 1027
@@ -1031,8 +1030,7 @@ void irlap_resend_rejected_frames(struct irlap_cb *self, int command)
1031 * bit cleared 1030 * bit cleared
1032 */ 1031 */
1033 if ((self->window > 1) && 1032 if ((self->window > 1) &&
1034 skb_queue_len(&self->txq) > 0) 1033 !skb_queue_empty(&self->txq)) {
1035 {
1036 irlap_send_data_primary(self, skb); 1034 irlap_send_data_primary(self, skb);
1037 } else { 1035 } else {
1038 irlap_send_data_primary_poll(self, skb); 1036 irlap_send_data_primary_poll(self, skb);
diff --git a/net/irda/irttp.c b/net/irda/irttp.c
index d091ccf773b3..6602d901f8b1 100644
--- a/net/irda/irttp.c
+++ b/net/irda/irttp.c
@@ -1513,7 +1513,7 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata,
1513 /* 1513 /*
1514 * Check if there is still data segments in the transmit queue 1514 * Check if there is still data segments in the transmit queue
1515 */ 1515 */
1516 if (skb_queue_len(&self->tx_queue) > 0) { 1516 if (!skb_queue_empty(&self->tx_queue)) {
1517 if (priority == P_HIGH) { 1517 if (priority == P_HIGH) {
1518 /* 1518 /*
1519 * No need to send the queued data, if we are 1519 * No need to send the queued data, if we are
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
new file mode 100644
index 000000000000..f0b5efb31a00
--- /dev/null
+++ b/net/lapb/Kconfig
@@ -0,0 +1,22 @@
1#
2# LAPB Data Link Drive
3#
4
5config LAPB
6 tristate "LAPB Data Link Driver (EXPERIMENTAL)"
7 depends on EXPERIMENTAL
8 ---help---
9 Link Access Procedure, Balanced (LAPB) is the data link layer (i.e.
10 the lower) part of the X.25 protocol. It offers a reliable
11 connection service to exchange data frames with one other host, and
12 it is used to transport higher level protocols (mostly X.25 Packet
13 Layer, the higher part of X.25, but others are possible as well).
14 Usually, LAPB is used with specialized X.21 network cards, but Linux
15 currently supports LAPB only over Ethernet connections. If you want
16 to use LAPB connections over Ethernet, say Y here and to "LAPB over
17 Ethernet driver" below. Read
18 <file:Documentation/networking/lapb-module.txt> for technical
19 details.
20
21 To compile this driver as a module, choose M here: the
22 module will be called lapb. If unsure, say N.
diff --git a/net/llc/llc_c_ev.c b/net/llc/llc_c_ev.c
index cd130c3b72bc..d5bdb53a348f 100644
--- a/net/llc/llc_c_ev.c
+++ b/net/llc/llc_c_ev.c
@@ -84,7 +84,7 @@ static u16 llc_util_nr_inside_tx_window(struct sock *sk, u8 nr)
84 if (llc->dev->flags & IFF_LOOPBACK) 84 if (llc->dev->flags & IFF_LOOPBACK)
85 goto out; 85 goto out;
86 rc = 1; 86 rc = 1;
87 if (!skb_queue_len(&llc->pdu_unack_q)) 87 if (skb_queue_empty(&llc->pdu_unack_q))
88 goto out; 88 goto out;
89 skb = skb_peek(&llc->pdu_unack_q); 89 skb = skb_peek(&llc->pdu_unack_q);
90 pdu = llc_pdu_sn_hdr(skb); 90 pdu = llc_pdu_sn_hdr(skb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fc456a7aaec3..3405fdf41b93 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -858,7 +858,7 @@ static inline void netlink_rcv_wake(struct sock *sk)
858{ 858{
859 struct netlink_sock *nlk = nlk_sk(sk); 859 struct netlink_sock *nlk = nlk_sk(sk);
860 860
861 if (!skb_queue_len(&sk->sk_receive_queue)) 861 if (skb_queue_empty(&sk->sk_receive_queue))
862 clear_bit(0, &nlk->state); 862 clear_bit(0, &nlk->state);
863 if (!test_bit(0, &nlk->state)) 863 if (!test_bit(0, &nlk->state))
864 wake_up_interruptible(&nlk->wait); 864 wake_up_interruptible(&nlk->wait);
diff --git a/net/packet/Kconfig b/net/packet/Kconfig
new file mode 100644
index 000000000000..34ff93ff894d
--- /dev/null
+++ b/net/packet/Kconfig
@@ -0,0 +1,26 @@
1#
2# Packet configuration
3#
4
5config PACKET
6 tristate "Packet socket"
7 ---help---
8 The Packet protocol is used by applications which communicate
9 directly with network devices without an intermediate network
10 protocol implemented in the kernel, e.g. tcpdump. If you want them
11 to work, choose Y.
12
13 To compile this driver as a module, choose M here: the module will
14 be called af_packet.
15
16 If unsure, say Y.
17
18config PACKET_MMAP
19 bool "Packet socket: mmapped IO"
20 depends on PACKET
21 help
22 If you say Y here, the Packet protocol driver will use an IO
23 mechanism that results in faster communication.
24
25 If unsure, say N.
26
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0269616e75a1..c9d5980aa4de 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -274,6 +274,9 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct
274 dst_release(skb->dst); 274 dst_release(skb->dst);
275 skb->dst = NULL; 275 skb->dst = NULL;
276 276
277 /* drop conntrack reference */
278 nf_reset(skb);
279
277 spkt = (struct sockaddr_pkt*)skb->cb; 280 spkt = (struct sockaddr_pkt*)skb->cb;
278 281
279 skb_push(skb, skb->data-skb->mac.raw); 282 skb_push(skb, skb->data-skb->mac.raw);
@@ -517,6 +520,9 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
517 dst_release(skb->dst); 520 dst_release(skb->dst);
518 skb->dst = NULL; 521 skb->dst = NULL;
519 522
523 /* drop conntrack reference */
524 nf_reset(skb);
525
520 spin_lock(&sk->sk_receive_queue.lock); 526 spin_lock(&sk->sk_receive_queue.lock);
521 po->stats.tp_packets++; 527 po->stats.tp_packets++;
522 __skb_queue_tail(&sk->sk_receive_queue, skb); 528 __skb_queue_tail(&sk->sk_receive_queue, skb);
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 7bac249258e3..59d3e71f8b85 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -1,6 +1,43 @@
1# 1#
2# Traffic control configuration. 2# Traffic control configuration.
3# 3#
4
5menuconfig NET_SCHED
6 bool "QoS and/or fair queueing"
7 ---help---
8 When the kernel has several packets to send out over a network
9 device, it has to decide which ones to send first, which ones to
10 delay, and which ones to drop. This is the job of the packet
11 scheduler, and several different algorithms for how to do this
12 "fairly" have been proposed.
13
14 If you say N here, you will get the standard packet scheduler, which
15 is a FIFO (first come, first served). If you say Y here, you will be
16 able to choose from among several alternative algorithms which can
17 then be attached to different network devices. This is useful for
18 example if some of your network devices are real time devices that
19 need a certain minimum data flow rate, or if you need to limit the
20 maximum data flow rate for traffic which matches specified criteria.
21 This code is considered to be experimental.
22
23 To administer these schedulers, you'll need the user-level utilities
24 from the package iproute2+tc at <ftp://ftp.tux.org/pub/net/ip-routing/>.
25 That package also contains some documentation; for more, check out
26 <http://snafu.freedom.org/linux2.2/iproute-notes.html>.
27
28 This Quality of Service (QoS) support will enable you to use
29 Differentiated Services (diffserv) and Resource Reservation Protocol
30 (RSVP) on your Linux router if you also say Y to "QoS support",
31 "Packet classifier API" and to some classifiers below. Documentation
32 and software is at <http://diffserv.sourceforge.net/>.
33
34 If you say Y here and to "/proc file system" below, you will be able
35 to read status information about packet schedulers from the file
36 /proc/net/psched.
37
38 The available schedulers are listed in the following questions; you
39 can say Y to as many as you like. If unsure, say N now.
40
4choice 41choice
5 prompt "Packet scheduler clock source" 42 prompt "Packet scheduler clock source"
6 depends on NET_SCHED 43 depends on NET_SCHED
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8f58cecd6266..e48d0d456b3e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-y := sch_generic.o 5obj-y := sch_generic.o
6 6
7obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o 7obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o
8obj-$(CONFIG_NET_CLS) += cls_api.o 8obj-$(CONFIG_NET_CLS) += cls_api.o
9obj-$(CONFIG_NET_CLS_ACT) += act_api.o 9obj-$(CONFIG_NET_CLS_ACT) += act_api.o
10obj-$(CONFIG_NET_ACT_POLICE) += police.o 10obj-$(CONFIG_NET_ACT_POLICE) += police.o
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 48bb23c2a35a..53d98f8d3d80 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol)
205 dst->value = skb->protocol; 205 dst->value = skb->protocol;
206} 206}
207 207
208META_COLLECTOR(int_security)
209{
210 dst->value = skb->security;
211}
212
213META_COLLECTOR(int_pkttype) 208META_COLLECTOR(int_pkttype)
214{ 209{
215 dst->value = skb->pkt_type; 210 dst->value = skb->pkt_type;
@@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
524 [META_ID(REALDEV)] = META_FUNC(int_realdev), 519 [META_ID(REALDEV)] = META_FUNC(int_realdev),
525 [META_ID(PRIORITY)] = META_FUNC(int_priority), 520 [META_ID(PRIORITY)] = META_FUNC(int_priority),
526 [META_ID(PROTOCOL)] = META_FUNC(int_protocol), 521 [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
527 [META_ID(SECURITY)] = META_FUNC(int_security),
528 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), 522 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
529 [META_ID(PKTLEN)] = META_FUNC(int_pktlen), 523 [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
530 [META_ID(DATALEN)] = META_FUNC(int_datalen), 524 [META_ID(DATALEN)] = META_FUNC(int_datalen),
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 05e6e0a799da..b9a069af4a02 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -399,10 +399,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
399{ 399{
400 int err; 400 int err;
401 struct rtattr *kind = tca[TCA_KIND-1]; 401 struct rtattr *kind = tca[TCA_KIND-1];
402 void *p = NULL;
403 struct Qdisc *sch; 402 struct Qdisc *sch;
404 struct Qdisc_ops *ops; 403 struct Qdisc_ops *ops;
405 int size;
406 404
407 ops = qdisc_lookup_ops(kind); 405 ops = qdisc_lookup_ops(kind);
408#ifdef CONFIG_KMOD 406#ifdef CONFIG_KMOD
@@ -437,64 +435,55 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
437 if (ops == NULL) 435 if (ops == NULL)
438 goto err_out; 436 goto err_out;
439 437
440 /* ensure that the Qdisc and the private data are 32-byte aligned */ 438 sch = qdisc_alloc(dev, ops);
441 size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); 439 if (IS_ERR(sch)) {
442 size += ops->priv_size + QDISC_ALIGN_CONST; 440 err = PTR_ERR(sch);
443
444 p = kmalloc(size, GFP_KERNEL);
445 err = -ENOBUFS;
446 if (!p)
447 goto err_out2; 441 goto err_out2;
448 memset(p, 0, size); 442 }
449 sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
450 & ~QDISC_ALIGN_CONST);
451 sch->padded = (char *)sch - (char *)p;
452
453 INIT_LIST_HEAD(&sch->list);
454 skb_queue_head_init(&sch->q);
455 443
456 if (handle == TC_H_INGRESS) 444 if (handle == TC_H_INGRESS) {
457 sch->flags |= TCQ_F_INGRESS; 445 sch->flags |= TCQ_F_INGRESS;
458 446 handle = TC_H_MAKE(TC_H_INGRESS, 0);
459 sch->ops = ops; 447 } else if (handle == 0) {
460 sch->enqueue = ops->enqueue;
461 sch->dequeue = ops->dequeue;
462 sch->dev = dev;
463 dev_hold(dev);
464 atomic_set(&sch->refcnt, 1);
465 sch->stats_lock = &dev->queue_lock;
466 if (handle == 0) {
467 handle = qdisc_alloc_handle(dev); 448 handle = qdisc_alloc_handle(dev);
468 err = -ENOMEM; 449 err = -ENOMEM;
469 if (handle == 0) 450 if (handle == 0)
470 goto err_out3; 451 goto err_out3;
471 } 452 }
472 453
473 if (handle == TC_H_INGRESS) 454 sch->handle = handle;
474 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
475 else
476 sch->handle = handle;
477 455
478 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { 456 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
457#ifdef CONFIG_NET_ESTIMATOR
458 if (tca[TCA_RATE-1]) {
459 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
460 sch->stats_lock,
461 tca[TCA_RATE-1]);
462 if (err) {
463 /*
464 * Any broken qdiscs that would require
465 * a ops->reset() here? The qdisc was never
466 * in action so it shouldn't be necessary.
467 */
468 if (ops->destroy)
469 ops->destroy(sch);
470 goto err_out3;
471 }
472 }
473#endif
479 qdisc_lock_tree(dev); 474 qdisc_lock_tree(dev);
480 list_add_tail(&sch->list, &dev->qdisc_list); 475 list_add_tail(&sch->list, &dev->qdisc_list);
481 qdisc_unlock_tree(dev); 476 qdisc_unlock_tree(dev);
482 477
483#ifdef CONFIG_NET_ESTIMATOR
484 if (tca[TCA_RATE-1])
485 gen_new_estimator(&sch->bstats, &sch->rate_est,
486 sch->stats_lock, tca[TCA_RATE-1]);
487#endif
488 return sch; 478 return sch;
489 } 479 }
490err_out3: 480err_out3:
491 dev_put(dev); 481 dev_put(dev);
482 kfree((char *) sch - sch->padded);
492err_out2: 483err_out2:
493 module_put(ops->owner); 484 module_put(ops->owner);
494err_out: 485err_out:
495 *errp = err; 486 *errp = err;
496 if (p)
497 kfree(p);
498 return NULL; 487 return NULL;
499} 488}
500 489
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
new file mode 100644
index 000000000000..81f0b8346d17
--- /dev/null
+++ b/net/sched/sch_blackhole.c
@@ -0,0 +1,54 @@
1/*
2 * net/sched/sch_blackhole.c Black hole queue
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 *
11 * Note: Quantum tunneling is not supported.
12 */
13
14#include <linux/config.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/netdevice.h>
19#include <linux/skbuff.h>
20#include <net/pkt_sched.h>
21
22static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
23{
24 qdisc_drop(skb, sch);
25 return NET_XMIT_SUCCESS;
26}
27
28static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
29{
30 return NULL;
31}
32
33static struct Qdisc_ops blackhole_qdisc_ops = {
34 .id = "blackhole",
35 .priv_size = 0,
36 .enqueue = blackhole_enqueue,
37 .dequeue = blackhole_dequeue,
38 .owner = THIS_MODULE,
39};
40
41static int __init blackhole_module_init(void)
42{
43 return register_qdisc(&blackhole_qdisc_ops);
44}
45
46static void __exit blackhole_module_exit(void)
47{
48 unregister_qdisc(&blackhole_qdisc_ops);
49}
50
51module_init(blackhole_module_init)
52module_exit(blackhole_module_exit)
53
54MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7683b34dc6a9..73e218e646ac 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -395,24 +395,23 @@ static struct Qdisc_ops pfifo_fast_ops = {
395 .owner = THIS_MODULE, 395 .owner = THIS_MODULE,
396}; 396};
397 397
398struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) 398struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
399{ 399{
400 void *p; 400 void *p;
401 struct Qdisc *sch; 401 struct Qdisc *sch;
402 int size; 402 unsigned int size;
403 int err = -ENOBUFS;
403 404
404 /* ensure that the Qdisc and the private data are 32-byte aligned */ 405 /* ensure that the Qdisc and the private data are 32-byte aligned */
405 size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); 406 size = QDISC_ALIGN(sizeof(*sch));
406 size += ops->priv_size + QDISC_ALIGN_CONST; 407 size += ops->priv_size + (QDISC_ALIGNTO - 1);
407 408
408 p = kmalloc(size, GFP_KERNEL); 409 p = kmalloc(size, GFP_KERNEL);
409 if (!p) 410 if (!p)
410 return NULL; 411 goto errout;
411 memset(p, 0, size); 412 memset(p, 0, size);
412 413 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
413 sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) 414 sch->padded = (char *) sch - (char *) p;
414 & ~QDISC_ALIGN_CONST);
415 sch->padded = (char *)sch - (char *)p;
416 415
417 INIT_LIST_HEAD(&sch->list); 416 INIT_LIST_HEAD(&sch->list);
418 skb_queue_head_init(&sch->q); 417 skb_queue_head_init(&sch->q);
@@ -423,11 +422,24 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
423 dev_hold(dev); 422 dev_hold(dev);
424 sch->stats_lock = &dev->queue_lock; 423 sch->stats_lock = &dev->queue_lock;
425 atomic_set(&sch->refcnt, 1); 424 atomic_set(&sch->refcnt, 1);
425
426 return sch;
427errout:
428 return ERR_PTR(-err);
429}
430
431struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
432{
433 struct Qdisc *sch;
434
435 sch = qdisc_alloc(dev, ops);
436 if (IS_ERR(sch))
437 goto errout;
438
426 if (!ops->init || ops->init(sch, NULL) == 0) 439 if (!ops->init || ops->init(sch, NULL) == 0)
427 return sch; 440 return sch;
428 441
429 dev_put(dev); 442errout:
430 kfree(p);
431 return NULL; 443 return NULL;
432} 444}
433 445
@@ -591,6 +603,7 @@ EXPORT_SYMBOL(__netdev_watchdog_up);
591EXPORT_SYMBOL(noop_qdisc); 603EXPORT_SYMBOL(noop_qdisc);
592EXPORT_SYMBOL(noop_qdisc_ops); 604EXPORT_SYMBOL(noop_qdisc_ops);
593EXPORT_SYMBOL(qdisc_create_dflt); 605EXPORT_SYMBOL(qdisc_create_dflt);
606EXPORT_SYMBOL(qdisc_alloc);
594EXPORT_SYMBOL(qdisc_destroy); 607EXPORT_SYMBOL(qdisc_destroy);
595EXPORT_SYMBOL(qdisc_reset); 608EXPORT_SYMBOL(qdisc_reset);
596EXPORT_SYMBOL(qdisc_restart); 609EXPORT_SYMBOL(qdisc_restart);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 664d0e47374f..7845d045eec4 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -385,7 +385,7 @@ static int red_change(struct Qdisc *sch, struct rtattr *opt)
385 memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256); 385 memcpy(q->Stab, RTA_DATA(tb[TCA_RED_STAB-1]), 256);
386 386
387 q->qcount = -1; 387 q->qcount = -1;
388 if (skb_queue_len(&sch->q) == 0) 388 if (skb_queue_empty(&sch->q))
389 PSCHED_SET_PASTPERFECT(q->qidlestart); 389 PSCHED_SET_PASTPERFECT(q->qidlestart);
390 sch_tree_unlock(sch); 390 sch_tree_unlock(sch);
391 return 0; 391 return 0;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 7ae6aa772dab..5b24ae0650d3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -71,7 +71,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
71 const struct sctp_endpoint *ep, 71 const struct sctp_endpoint *ep,
72 const struct sock *sk, 72 const struct sock *sk,
73 sctp_scope_t scope, 73 sctp_scope_t scope,
74 int gfp) 74 unsigned int __nocast gfp)
75{ 75{
76 struct sctp_sock *sp; 76 struct sctp_sock *sp;
77 int i; 77 int i;
@@ -203,7 +203,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
203 */ 203 */
204 asoc->addip_serial = asoc->c.initial_tsn; 204 asoc->addip_serial = asoc->c.initial_tsn;
205 205
206 skb_queue_head_init(&asoc->addip_chunks); 206 INIT_LIST_HEAD(&asoc->addip_chunk_list);
207 207
208 /* Make an empty list of remote transport addresses. */ 208 /* Make an empty list of remote transport addresses. */
209 INIT_LIST_HEAD(&asoc->peer.transport_addr_list); 209 INIT_LIST_HEAD(&asoc->peer.transport_addr_list);
@@ -272,7 +272,8 @@ fail_init:
272/* Allocate and initialize a new association */ 272/* Allocate and initialize a new association */
273struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, 273struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep,
274 const struct sock *sk, 274 const struct sock *sk,
275 sctp_scope_t scope, int gfp) 275 sctp_scope_t scope,
276 unsigned int __nocast gfp)
276{ 277{
277 struct sctp_association *asoc; 278 struct sctp_association *asoc;
278 279
@@ -478,7 +479,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
478/* Add a transport address to an association. */ 479/* Add a transport address to an association. */
479struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, 480struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
480 const union sctp_addr *addr, 481 const union sctp_addr *addr,
481 const int gfp, 482 const unsigned int __nocast gfp,
482 const int peer_state) 483 const int peer_state)
483{ 484{
484 struct sctp_transport *peer; 485 struct sctp_transport *peer;
@@ -1229,7 +1230,8 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
1229/* Build the bind address list for the association based on info from the 1230/* Build the bind address list for the association based on info from the
1230 * local endpoint and the remote peer. 1231 * local endpoint and the remote peer.
1231 */ 1232 */
1232int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, int gfp) 1233int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
1234 unsigned int __nocast gfp)
1233{ 1235{
1234 sctp_scope_t scope; 1236 sctp_scope_t scope;
1235 int flags; 1237 int flags;
@@ -1251,7 +1253,8 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, int gfp)
1251 1253
1252/* Build the association's bind address list from the cookie. */ 1254/* Build the association's bind address list from the cookie. */
1253int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc, 1255int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc,
1254 struct sctp_cookie *cookie, int gfp) 1256 struct sctp_cookie *cookie,
1257 unsigned int __nocast gfp)
1255{ 1258{
1256 int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length); 1259 int var_size2 = ntohs(cookie->peer_init->chunk_hdr.length);
1257 int var_size3 = cookie->raw_addr_list_len; 1260 int var_size3 = cookie->raw_addr_list_len;
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index f90eadfb60a2..f71549710f2e 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -53,7 +53,8 @@
53 53
54/* Forward declarations for internal helpers. */ 54/* Forward declarations for internal helpers. */
55static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *, 55static int sctp_copy_one_addr(struct sctp_bind_addr *, union sctp_addr *,
56 sctp_scope_t scope, int gfp, int flags); 56 sctp_scope_t scope, unsigned int __nocast gfp,
57 int flags);
57static void sctp_bind_addr_clean(struct sctp_bind_addr *); 58static void sctp_bind_addr_clean(struct sctp_bind_addr *);
58 59
59/* First Level Abstractions. */ 60/* First Level Abstractions. */
@@ -63,7 +64,8 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *);
63 */ 64 */
64int sctp_bind_addr_copy(struct sctp_bind_addr *dest, 65int sctp_bind_addr_copy(struct sctp_bind_addr *dest,
65 const struct sctp_bind_addr *src, 66 const struct sctp_bind_addr *src,
66 sctp_scope_t scope, int gfp, int flags) 67 sctp_scope_t scope, unsigned int __nocast gfp,
68 int flags)
67{ 69{
68 struct sctp_sockaddr_entry *addr; 70 struct sctp_sockaddr_entry *addr;
69 struct list_head *pos; 71 struct list_head *pos;
@@ -144,7 +146,7 @@ void sctp_bind_addr_free(struct sctp_bind_addr *bp)
144 146
145/* Add an address to the bind address list in the SCTP_bind_addr structure. */ 147/* Add an address to the bind address list in the SCTP_bind_addr structure. */
146int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, 148int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new,
147 int gfp) 149 unsigned int __nocast gfp)
148{ 150{
149 struct sctp_sockaddr_entry *addr; 151 struct sctp_sockaddr_entry *addr;
150 152
@@ -197,7 +199,8 @@ int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr)
197 * The second argument is the return value for the length. 199 * The second argument is the return value for the length.
198 */ 200 */
199union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, 201union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
200 int *addrs_len, int gfp) 202 int *addrs_len,
203 unsigned int __nocast gfp)
201{ 204{
202 union sctp_params addrparms; 205 union sctp_params addrparms;
203 union sctp_params retval; 206 union sctp_params retval;
@@ -249,7 +252,7 @@ end_raw:
249 * address parameters). 252 * address parameters).
250 */ 253 */
251int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, 254int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
252 int addrs_len, __u16 port, int gfp) 255 int addrs_len, __u16 port, unsigned int __nocast gfp)
253{ 256{
254 union sctp_addr_param *rawaddr; 257 union sctp_addr_param *rawaddr;
255 struct sctp_paramhdr *param; 258 struct sctp_paramhdr *param;
@@ -347,7 +350,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
347/* Copy out addresses from the global local address list. */ 350/* Copy out addresses from the global local address list. */
348static int sctp_copy_one_addr(struct sctp_bind_addr *dest, 351static int sctp_copy_one_addr(struct sctp_bind_addr *dest,
349 union sctp_addr *addr, 352 union sctp_addr *addr,
350 sctp_scope_t scope, int gfp, int flags) 353 sctp_scope_t scope, unsigned int __nocast gfp,
354 int flags)
351{ 355{
352 int error = 0; 356 int error = 0;
353 357
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 0c2ab7885058..61da2937e641 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -62,7 +62,7 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
62} 62}
63 63
64/* Allocate and initialize datamsg. */ 64/* Allocate and initialize datamsg. */
65SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(int gfp) 65SCTP_STATIC struct sctp_datamsg *sctp_datamsg_new(unsigned int __nocast gfp)
66{ 66{
67 struct sctp_datamsg *msg; 67 struct sctp_datamsg *msg;
68 msg = kmalloc(sizeof(struct sctp_datamsg), gfp); 68 msg = kmalloc(sizeof(struct sctp_datamsg), gfp);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index c44bf4165c6e..e47ac0d1a6d6 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -67,7 +67,8 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
67 * Initialize the base fields of the endpoint structure. 67 * Initialize the base fields of the endpoint structure.
68 */ 68 */
69static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, 69static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
70 struct sock *sk, int gfp) 70 struct sock *sk,
71 unsigned int __nocast gfp)
71{ 72{
72 struct sctp_sock *sp = sctp_sk(sk); 73 struct sctp_sock *sp = sctp_sk(sk);
73 memset(ep, 0, sizeof(struct sctp_endpoint)); 74 memset(ep, 0, sizeof(struct sctp_endpoint));
@@ -137,7 +138,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
137/* Create a sctp_endpoint with all that boring stuff initialized. 138/* Create a sctp_endpoint with all that boring stuff initialized.
138 * Returns NULL if there isn't enough memory. 139 * Returns NULL if there isn't enough memory.
139 */ 140 */
140struct sctp_endpoint *sctp_endpoint_new(struct sock *sk, int gfp) 141struct sctp_endpoint *sctp_endpoint_new(struct sock *sk,
142 unsigned int __nocast gfp)
141{ 143{
142 struct sctp_endpoint *ep; 144 struct sctp_endpoint *ep;
143 145
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 339f7acfdb64..5e085e041a6e 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -115,6 +115,17 @@ static void sctp_rcv_set_owner_r(struct sk_buff *skb, struct sock *sk)
115 atomic_add(sizeof(struct sctp_chunk),&sk->sk_rmem_alloc); 115 atomic_add(sizeof(struct sctp_chunk),&sk->sk_rmem_alloc);
116} 116}
117 117
118struct sctp_input_cb {
119 union {
120 struct inet_skb_parm h4;
121#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
122 struct inet6_skb_parm h6;
123#endif
124 } header;
125 struct sctp_chunk *chunk;
126};
127#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0]))
128
118/* 129/*
119 * This is the routine which IP calls when receiving an SCTP packet. 130 * This is the routine which IP calls when receiving an SCTP packet.
120 */ 131 */
@@ -243,6 +254,7 @@ int sctp_rcv(struct sk_buff *skb)
243 ret = -ENOMEM; 254 ret = -ENOMEM;
244 goto discard_release; 255 goto discard_release;
245 } 256 }
257 SCTP_INPUT_CB(skb)->chunk = chunk;
246 258
247 sctp_rcv_set_owner_r(skb,sk); 259 sctp_rcv_set_owner_r(skb,sk);
248 260
@@ -265,9 +277,9 @@ int sctp_rcv(struct sk_buff *skb)
265 sctp_bh_lock_sock(sk); 277 sctp_bh_lock_sock(sk);
266 278
267 if (sock_owned_by_user(sk)) 279 if (sock_owned_by_user(sk))
268 sk_add_backlog(sk, (struct sk_buff *) chunk); 280 sk_add_backlog(sk, skb);
269 else 281 else
270 sctp_backlog_rcv(sk, (struct sk_buff *) chunk); 282 sctp_backlog_rcv(sk, skb);
271 283
272 /* Release the sock and any reference counts we took in the 284 /* Release the sock and any reference counts we took in the
273 * lookup calls. 285 * lookup calls.
@@ -302,14 +314,8 @@ discard_release:
302 */ 314 */
303int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) 315int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb)
304{ 316{
305 struct sctp_chunk *chunk; 317 struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk;
306 struct sctp_inq *inqueue; 318 struct sctp_inq *inqueue = &chunk->rcvr->inqueue;
307
308 /* One day chunk will live inside the skb, but for
309 * now this works.
310 */
311 chunk = (struct sctp_chunk *) skb;
312 inqueue = &chunk->rcvr->inqueue;
313 319
314 sctp_inq_push(inqueue, chunk); 320 sctp_inq_push(inqueue, chunk);
315 return 0; 321 return 0;
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index cedf4351556c..2d33922c044b 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -50,7 +50,7 @@
50/* Initialize an SCTP inqueue. */ 50/* Initialize an SCTP inqueue. */
51void sctp_inq_init(struct sctp_inq *queue) 51void sctp_inq_init(struct sctp_inq *queue)
52{ 52{
53 skb_queue_head_init(&queue->in); 53 INIT_LIST_HEAD(&queue->in_chunk_list);
54 queue->in_progress = NULL; 54 queue->in_progress = NULL;
55 55
56 /* Create a task for delivering data. */ 56 /* Create a task for delivering data. */
@@ -62,11 +62,13 @@ void sctp_inq_init(struct sctp_inq *queue)
62/* Release the memory associated with an SCTP inqueue. */ 62/* Release the memory associated with an SCTP inqueue. */
63void sctp_inq_free(struct sctp_inq *queue) 63void sctp_inq_free(struct sctp_inq *queue)
64{ 64{
65 struct sctp_chunk *chunk; 65 struct sctp_chunk *chunk, *tmp;
66 66
67 /* Empty the queue. */ 67 /* Empty the queue. */
68 while ((chunk = (struct sctp_chunk *) skb_dequeue(&queue->in)) != NULL) 68 list_for_each_entry_safe(chunk, tmp, &queue->in_chunk_list, list) {
69 list_del_init(&chunk->list);
69 sctp_chunk_free(chunk); 70 sctp_chunk_free(chunk);
71 }
70 72
71 /* If there is a packet which is currently being worked on, 73 /* If there is a packet which is currently being worked on,
72 * free it as well. 74 * free it as well.
@@ -92,7 +94,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet)
92 * Eventually, we should clean up inqueue to not rely 94 * Eventually, we should clean up inqueue to not rely
93 * on the BH related data structures. 95 * on the BH related data structures.
94 */ 96 */
95 skb_queue_tail(&(q->in), (struct sk_buff *) packet); 97 list_add_tail(&packet->list, &q->in_chunk_list);
96 q->immediate.func(q->immediate.data); 98 q->immediate.func(q->immediate.data);
97} 99}
98 100
@@ -131,12 +133,16 @@ struct sctp_chunk *sctp_inq_pop(struct sctp_inq *queue)
131 133
132 /* Do we need to take the next packet out of the queue to process? */ 134 /* Do we need to take the next packet out of the queue to process? */
133 if (!chunk) { 135 if (!chunk) {
136 struct list_head *entry;
137
134 /* Is the queue empty? */ 138 /* Is the queue empty? */
135 if (skb_queue_empty(&queue->in)) 139 if (list_empty(&queue->in_chunk_list))
136 return NULL; 140 return NULL;
137 141
142 entry = queue->in_chunk_list.next;
138 chunk = queue->in_progress = 143 chunk = queue->in_progress =
139 (struct sctp_chunk *) skb_dequeue(&queue->in); 144 list_entry(entry, struct sctp_chunk, list);
145 list_del_init(entry);
140 146
141 /* This is the first chunk in the packet. */ 147 /* This is the first chunk in the packet. */
142 chunk->singleton = 1; 148 chunk->singleton = 1;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 84b5b370b09d..931371633464 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -108,7 +108,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
108 packet->transport = transport; 108 packet->transport = transport;
109 packet->source_port = sport; 109 packet->source_port = sport;
110 packet->destination_port = dport; 110 packet->destination_port = dport;
111 skb_queue_head_init(&packet->chunks); 111 INIT_LIST_HEAD(&packet->chunk_list);
112 if (asoc) { 112 if (asoc) {
113 struct sctp_sock *sp = sctp_sk(asoc->base.sk); 113 struct sctp_sock *sp = sctp_sk(asoc->base.sk);
114 overhead = sp->pf->af->net_header_len; 114 overhead = sp->pf->af->net_header_len;
@@ -129,12 +129,14 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
129/* Free a packet. */ 129/* Free a packet. */
130void sctp_packet_free(struct sctp_packet *packet) 130void sctp_packet_free(struct sctp_packet *packet)
131{ 131{
132 struct sctp_chunk *chunk; 132 struct sctp_chunk *chunk, *tmp;
133 133
134 SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet); 134 SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet);
135 135
136 while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks)) != NULL) 136 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
137 list_del_init(&chunk->list);
137 sctp_chunk_free(chunk); 138 sctp_chunk_free(chunk);
139 }
138 140
139 if (packet->malloced) 141 if (packet->malloced)
140 kfree(packet); 142 kfree(packet);
@@ -276,7 +278,7 @@ append:
276 packet->has_sack = 1; 278 packet->has_sack = 1;
277 279
278 /* It is OK to send this chunk. */ 280 /* It is OK to send this chunk. */
279 __skb_queue_tail(&packet->chunks, (struct sk_buff *)chunk); 281 list_add_tail(&chunk->list, &packet->chunk_list);
280 packet->size += chunk_len; 282 packet->size += chunk_len;
281 chunk->transport = packet->transport; 283 chunk->transport = packet->transport;
282finish: 284finish:
@@ -295,7 +297,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
295 struct sctphdr *sh; 297 struct sctphdr *sh;
296 __u32 crc32; 298 __u32 crc32;
297 struct sk_buff *nskb; 299 struct sk_buff *nskb;
298 struct sctp_chunk *chunk; 300 struct sctp_chunk *chunk, *tmp;
299 struct sock *sk; 301 struct sock *sk;
300 int err = 0; 302 int err = 0;
301 int padding; /* How much padding do we need? */ 303 int padding; /* How much padding do we need? */
@@ -305,11 +307,11 @@ int sctp_packet_transmit(struct sctp_packet *packet)
305 SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet); 307 SCTP_DEBUG_PRINTK("%s: packet:%p\n", __FUNCTION__, packet);
306 308
307 /* Do NOT generate a chunkless packet. */ 309 /* Do NOT generate a chunkless packet. */
308 chunk = (struct sctp_chunk *)skb_peek(&packet->chunks); 310 if (list_empty(&packet->chunk_list))
309 if (unlikely(!chunk))
310 return err; 311 return err;
311 312
312 /* Set up convenience variables... */ 313 /* Set up convenience variables... */
314 chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
313 sk = chunk->skb->sk; 315 sk = chunk->skb->sk;
314 316
315 /* Allocate the new skb. */ 317 /* Allocate the new skb. */
@@ -370,7 +372,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
370 * [This whole comment explains WORD_ROUND() below.] 372 * [This whole comment explains WORD_ROUND() below.]
371 */ 373 */
372 SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n"); 374 SCTP_DEBUG_PRINTK("***sctp_transmit_packet***\n");
373 while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks)) != NULL) { 375 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
376 list_del_init(&chunk->list);
374 if (sctp_chunk_is_data(chunk)) { 377 if (sctp_chunk_is_data(chunk)) {
375 378
376 if (!chunk->has_tsn) { 379 if (!chunk->has_tsn) {
@@ -511,7 +514,8 @@ err:
511 * will get resent or dropped later. 514 * will get resent or dropped later.
512 */ 515 */
513 516
514 while ((chunk = (struct sctp_chunk *)__skb_dequeue(&packet->chunks)) != NULL) { 517 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
518 list_del_init(&chunk->list);
515 if (!sctp_chunk_is_data(chunk)) 519 if (!sctp_chunk_is_data(chunk))
516 sctp_chunk_free(chunk); 520 sctp_chunk_free(chunk);
517 } 521 }
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4eb81a1407b7..efb72faba20c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -75,7 +75,7 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn);
75static inline void sctp_outq_head_data(struct sctp_outq *q, 75static inline void sctp_outq_head_data(struct sctp_outq *q,
76 struct sctp_chunk *ch) 76 struct sctp_chunk *ch)
77{ 77{
78 __skb_queue_head(&q->out, (struct sk_buff *)ch); 78 list_add(&ch->list, &q->out_chunk_list);
79 q->out_qlen += ch->skb->len; 79 q->out_qlen += ch->skb->len;
80 return; 80 return;
81} 81}
@@ -83,17 +83,22 @@ static inline void sctp_outq_head_data(struct sctp_outq *q,
83/* Take data from the front of the queue. */ 83/* Take data from the front of the queue. */
84static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) 84static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q)
85{ 85{
86 struct sctp_chunk *ch; 86 struct sctp_chunk *ch = NULL;
87 ch = (struct sctp_chunk *)__skb_dequeue(&q->out); 87
88 if (ch) 88 if (!list_empty(&q->out_chunk_list)) {
89 struct list_head *entry = q->out_chunk_list.next;
90
91 ch = list_entry(entry, struct sctp_chunk, list);
92 list_del_init(entry);
89 q->out_qlen -= ch->skb->len; 93 q->out_qlen -= ch->skb->len;
94 }
90 return ch; 95 return ch;
91} 96}
92/* Add data chunk to the end of the queue. */ 97/* Add data chunk to the end of the queue. */
93static inline void sctp_outq_tail_data(struct sctp_outq *q, 98static inline void sctp_outq_tail_data(struct sctp_outq *q,
94 struct sctp_chunk *ch) 99 struct sctp_chunk *ch)
95{ 100{
96 __skb_queue_tail(&q->out, (struct sk_buff *)ch); 101 list_add_tail(&ch->list, &q->out_chunk_list);
97 q->out_qlen += ch->skb->len; 102 q->out_qlen += ch->skb->len;
98 return; 103 return;
99} 104}
@@ -197,8 +202,8 @@ static inline int sctp_cacc_skip(struct sctp_transport *primary,
197void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) 202void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
198{ 203{
199 q->asoc = asoc; 204 q->asoc = asoc;
200 skb_queue_head_init(&q->out); 205 INIT_LIST_HEAD(&q->out_chunk_list);
201 skb_queue_head_init(&q->control); 206 INIT_LIST_HEAD(&q->control_chunk_list);
202 INIT_LIST_HEAD(&q->retransmit); 207 INIT_LIST_HEAD(&q->retransmit);
203 INIT_LIST_HEAD(&q->sacked); 208 INIT_LIST_HEAD(&q->sacked);
204 INIT_LIST_HEAD(&q->abandoned); 209 INIT_LIST_HEAD(&q->abandoned);
@@ -217,7 +222,7 @@ void sctp_outq_teardown(struct sctp_outq *q)
217{ 222{
218 struct sctp_transport *transport; 223 struct sctp_transport *transport;
219 struct list_head *lchunk, *pos, *temp; 224 struct list_head *lchunk, *pos, *temp;
220 struct sctp_chunk *chunk; 225 struct sctp_chunk *chunk, *tmp;
221 226
222 /* Throw away unacknowledged chunks. */ 227 /* Throw away unacknowledged chunks. */
223 list_for_each(pos, &q->asoc->peer.transport_addr_list) { 228 list_for_each(pos, &q->asoc->peer.transport_addr_list) {
@@ -269,8 +274,10 @@ void sctp_outq_teardown(struct sctp_outq *q)
269 q->error = 0; 274 q->error = 0;
270 275
271 /* Throw away any leftover control chunks. */ 276 /* Throw away any leftover control chunks. */
272 while ((chunk = (struct sctp_chunk *) skb_dequeue(&q->control)) != NULL) 277 list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
278 list_del_init(&chunk->list);
273 sctp_chunk_free(chunk); 279 sctp_chunk_free(chunk);
280 }
274} 281}
275 282
276/* Free the outqueue structure and any related pending chunks. */ 283/* Free the outqueue structure and any related pending chunks. */
@@ -333,7 +340,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
333 break; 340 break;
334 }; 341 };
335 } else { 342 } else {
336 __skb_queue_tail(&q->control, (struct sk_buff *) chunk); 343 list_add_tail(&chunk->list, &q->control_chunk_list);
337 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); 344 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
338 } 345 }
339 346
@@ -650,10 +657,9 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
650 __u16 sport = asoc->base.bind_addr.port; 657 __u16 sport = asoc->base.bind_addr.port;
651 __u16 dport = asoc->peer.port; 658 __u16 dport = asoc->peer.port;
652 __u32 vtag = asoc->peer.i.init_tag; 659 __u32 vtag = asoc->peer.i.init_tag;
653 struct sk_buff_head *queue;
654 struct sctp_transport *transport = NULL; 660 struct sctp_transport *transport = NULL;
655 struct sctp_transport *new_transport; 661 struct sctp_transport *new_transport;
656 struct sctp_chunk *chunk; 662 struct sctp_chunk *chunk, *tmp;
657 sctp_xmit_t status; 663 sctp_xmit_t status;
658 int error = 0; 664 int error = 0;
659 int start_timer = 0; 665 int start_timer = 0;
@@ -675,8 +681,9 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
675 * ... 681 * ...
676 */ 682 */
677 683
678 queue = &q->control; 684 list_for_each_entry_safe(chunk, tmp, &q->control_chunk_list, list) {
679 while ((chunk = (struct sctp_chunk *)skb_dequeue(queue)) != NULL) { 685 list_del_init(&chunk->list);
686
680 /* Pick the right transport to use. */ 687 /* Pick the right transport to use. */
681 new_transport = chunk->transport; 688 new_transport = chunk->transport;
682 689
@@ -814,8 +821,6 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
814 821
815 /* Finally, transmit new packets. */ 822 /* Finally, transmit new packets. */
816 start_timer = 0; 823 start_timer = 0;
817 queue = &q->out;
818
819 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { 824 while ((chunk = sctp_outq_dequeue_data(q)) != NULL) {
820 /* RFC 2960 6.5 Every DATA chunk MUST carry a valid 825 /* RFC 2960 6.5 Every DATA chunk MUST carry a valid
821 * stream identifier. 826 * stream identifier.
@@ -1149,8 +1154,9 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
1149 /* See if all chunks are acked. 1154 /* See if all chunks are acked.
1150 * Make sure the empty queue handler will get run later. 1155 * Make sure the empty queue handler will get run later.
1151 */ 1156 */
1152 q->empty = skb_queue_empty(&q->out) && skb_queue_empty(&q->control) && 1157 q->empty = (list_empty(&q->out_chunk_list) &&
1153 list_empty(&q->retransmit); 1158 list_empty(&q->control_chunk_list) &&
1159 list_empty(&q->retransmit));
1154 if (!q->empty) 1160 if (!q->empty)
1155 goto finish; 1161 goto finish;
1156 1162
@@ -1679,9 +1685,9 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
1679 if (TSN_lte(tsn, ctsn)) { 1685 if (TSN_lte(tsn, ctsn)) {
1680 list_del_init(lchunk); 1686 list_del_init(lchunk);
1681 if (!chunk->tsn_gap_acked) { 1687 if (!chunk->tsn_gap_acked) {
1682 chunk->transport->flight_size -= 1688 chunk->transport->flight_size -=
1683 sctp_data_size(chunk); 1689 sctp_data_size(chunk);
1684 q->outstanding_bytes -= sctp_data_size(chunk); 1690 q->outstanding_bytes -= sctp_data_size(chunk);
1685 } 1691 }
1686 sctp_chunk_free(chunk); 1692 sctp_chunk_free(chunk);
1687 } else { 1693 } else {
@@ -1729,7 +1735,7 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
1729 nskips, &ftsn_skip_arr[0]); 1735 nskips, &ftsn_skip_arr[0]);
1730 1736
1731 if (ftsn_chunk) { 1737 if (ftsn_chunk) {
1732 __skb_queue_tail(&q->control, (struct sk_buff *)ftsn_chunk); 1738 list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
1733 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); 1739 SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS);
1734 } 1740 }
1735} 1741}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index e7f37faba7c0..ce9245e71fca 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -219,7 +219,7 @@ static void sctp_free_local_addr_list(void)
219 219
220/* Copy the local addresses which are valid for 'scope' into 'bp'. */ 220/* Copy the local addresses which are valid for 'scope' into 'bp'. */
221int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, 221int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope,
222 int gfp, int copy_flags) 222 unsigned int __nocast gfp, int copy_flags)
223{ 223{
224 struct sctp_sockaddr_entry *addr; 224 struct sctp_sockaddr_entry *addr;
225 int error = 0; 225 int error = 0;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 5baed9bb7de5..00d32b7c8266 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -78,7 +78,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
78static int sctp_process_param(struct sctp_association *asoc, 78static int sctp_process_param(struct sctp_association *asoc,
79 union sctp_params param, 79 union sctp_params param,
80 const union sctp_addr *peer_addr, 80 const union sctp_addr *peer_addr,
81 int gfp); 81 unsigned int __nocast gfp);
82 82
83/* What was the inbound interface for this chunk? */ 83/* What was the inbound interface for this chunk? */
84int sctp_chunk_iif(const struct sctp_chunk *chunk) 84int sctp_chunk_iif(const struct sctp_chunk *chunk)
@@ -174,7 +174,7 @@ void sctp_init_cause(struct sctp_chunk *chunk, __u16 cause_code,
174 */ 174 */
175struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, 175struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
176 const struct sctp_bind_addr *bp, 176 const struct sctp_bind_addr *bp,
177 int gfp, int vparam_len) 177 unsigned int __nocast gfp, int vparam_len)
178{ 178{
179 sctp_inithdr_t init; 179 sctp_inithdr_t init;
180 union sctp_params addrs; 180 union sctp_params addrs;
@@ -261,7 +261,7 @@ nodata:
261 261
262struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, 262struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
263 const struct sctp_chunk *chunk, 263 const struct sctp_chunk *chunk,
264 int gfp, int unkparam_len) 264 unsigned int __nocast gfp, int unkparam_len)
265{ 265{
266 sctp_inithdr_t initack; 266 sctp_inithdr_t initack;
267 struct sctp_chunk *retval; 267 struct sctp_chunk *retval;
@@ -1003,6 +1003,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
1003 SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb); 1003 SCTP_DEBUG_PRINTK("chunkifying skb %p w/o an sk\n", skb);
1004 } 1004 }
1005 1005
1006 INIT_LIST_HEAD(&retval->list);
1006 retval->skb = skb; 1007 retval->skb = skb;
1007 retval->asoc = (struct sctp_association *)asoc; 1008 retval->asoc = (struct sctp_association *)asoc;
1008 retval->resent = 0; 1009 retval->resent = 0;
@@ -1116,8 +1117,7 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
1116/* Possibly, free the chunk. */ 1117/* Possibly, free the chunk. */
1117void sctp_chunk_free(struct sctp_chunk *chunk) 1118void sctp_chunk_free(struct sctp_chunk *chunk)
1118{ 1119{
1119 /* Make sure that we are not on any list. */ 1120 BUG_ON(!list_empty(&chunk->list));
1120 skb_unlink((struct sk_buff *) chunk);
1121 list_del_init(&chunk->transmitted_list); 1121 list_del_init(&chunk->transmitted_list);
1122 1122
1123 /* Release our reference on the message tracker. */ 1123 /* Release our reference on the message tracker. */
@@ -1233,7 +1233,8 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk)
1233 1233
1234/* Create a CLOSED association to use with an incoming packet. */ 1234/* Create a CLOSED association to use with an incoming packet. */
1235struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, 1235struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep,
1236 struct sctp_chunk *chunk, int gfp) 1236 struct sctp_chunk *chunk,
1237 unsigned int __nocast gfp)
1237{ 1238{
1238 struct sctp_association *asoc; 1239 struct sctp_association *asoc;
1239 struct sk_buff *skb; 1240 struct sk_buff *skb;
@@ -1348,7 +1349,7 @@ nodata:
1348struct sctp_association *sctp_unpack_cookie( 1349struct sctp_association *sctp_unpack_cookie(
1349 const struct sctp_endpoint *ep, 1350 const struct sctp_endpoint *ep,
1350 const struct sctp_association *asoc, 1351 const struct sctp_association *asoc,
1351 struct sctp_chunk *chunk, int gfp, 1352 struct sctp_chunk *chunk, unsigned int __nocast gfp,
1352 int *error, struct sctp_chunk **errp) 1353 int *error, struct sctp_chunk **errp)
1353{ 1354{
1354 struct sctp_association *retval = NULL; 1355 struct sctp_association *retval = NULL;
@@ -1812,7 +1813,7 @@ int sctp_verify_init(const struct sctp_association *asoc,
1812 */ 1813 */
1813int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, 1814int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid,
1814 const union sctp_addr *peer_addr, 1815 const union sctp_addr *peer_addr,
1815 sctp_init_chunk_t *peer_init, int gfp) 1816 sctp_init_chunk_t *peer_init, unsigned int __nocast gfp)
1816{ 1817{
1817 union sctp_params param; 1818 union sctp_params param;
1818 struct sctp_transport *transport; 1819 struct sctp_transport *transport;
@@ -1983,7 +1984,7 @@ nomem:
1983static int sctp_process_param(struct sctp_association *asoc, 1984static int sctp_process_param(struct sctp_association *asoc,
1984 union sctp_params param, 1985 union sctp_params param,
1985 const union sctp_addr *peer_addr, 1986 const union sctp_addr *peer_addr,
1986 int gfp) 1987 unsigned int __nocast gfp)
1987{ 1988{
1988 union sctp_addr addr; 1989 union sctp_addr addr;
1989 int i; 1990 int i;
@@ -2739,8 +2740,12 @@ int sctp_process_asconf_ack(struct sctp_association *asoc,
2739 asoc->addip_last_asconf = NULL; 2740 asoc->addip_last_asconf = NULL;
2740 2741
2741 /* Send the next asconf chunk from the addip chunk queue. */ 2742 /* Send the next asconf chunk from the addip chunk queue. */
2742 asconf = (struct sctp_chunk *)__skb_dequeue(&asoc->addip_chunks); 2743 if (!list_empty(&asoc->addip_chunk_list)) {
2743 if (asconf) { 2744 struct list_head *entry = asoc->addip_chunk_list.next;
2745 asconf = list_entry(entry, struct sctp_chunk, list);
2746
2747 list_del_init(entry);
2748
2744 /* Hold the chunk until an ASCONF_ACK is received. */ 2749 /* Hold the chunk until an ASCONF_ACK is received. */
2745 sctp_chunk_hold(asconf); 2750 sctp_chunk_hold(asconf);
2746 if (sctp_primitive_ASCONF(asoc, asconf)) 2751 if (sctp_primitive_ASCONF(asoc, asconf))
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 778639db125a..39c970b5b198 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -63,7 +63,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
63 void *event_arg, 63 void *event_arg,
64 sctp_disposition_t status, 64 sctp_disposition_t status,
65 sctp_cmd_seq_t *commands, 65 sctp_cmd_seq_t *commands,
66 int gfp); 66 unsigned int __nocast gfp);
67static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, 67static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
68 sctp_state_t state, 68 sctp_state_t state,
69 struct sctp_endpoint *ep, 69 struct sctp_endpoint *ep,
@@ -71,7 +71,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
71 void *event_arg, 71 void *event_arg,
72 sctp_disposition_t status, 72 sctp_disposition_t status,
73 sctp_cmd_seq_t *commands, 73 sctp_cmd_seq_t *commands,
74 int gfp); 74 unsigned int __nocast gfp);
75 75
76/******************************************************************** 76/********************************************************************
77 * Helper functions 77 * Helper functions
@@ -497,7 +497,8 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands,
497static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, 497static int sctp_cmd_process_init(sctp_cmd_seq_t *commands,
498 struct sctp_association *asoc, 498 struct sctp_association *asoc,
499 struct sctp_chunk *chunk, 499 struct sctp_chunk *chunk,
500 sctp_init_chunk_t *peer_init, int gfp) 500 sctp_init_chunk_t *peer_init,
501 unsigned int __nocast gfp)
501{ 502{
502 int error; 503 int error;
503 504
@@ -852,7 +853,7 @@ int sctp_do_sm(sctp_event_t event_type, sctp_subtype_t subtype,
852 struct sctp_endpoint *ep, 853 struct sctp_endpoint *ep,
853 struct sctp_association *asoc, 854 struct sctp_association *asoc,
854 void *event_arg, 855 void *event_arg,
855 int gfp) 856 unsigned int __nocast gfp)
856{ 857{
857 sctp_cmd_seq_t commands; 858 sctp_cmd_seq_t commands;
858 const sctp_sm_table_entry_t *state_fn; 859 const sctp_sm_table_entry_t *state_fn;
@@ -897,7 +898,7 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
897 void *event_arg, 898 void *event_arg,
898 sctp_disposition_t status, 899 sctp_disposition_t status,
899 sctp_cmd_seq_t *commands, 900 sctp_cmd_seq_t *commands,
900 int gfp) 901 unsigned int __nocast gfp)
901{ 902{
902 int error; 903 int error;
903 904
@@ -985,7 +986,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
985 void *event_arg, 986 void *event_arg,
986 sctp_disposition_t status, 987 sctp_disposition_t status,
987 sctp_cmd_seq_t *commands, 988 sctp_cmd_seq_t *commands,
988 int gfp) 989 unsigned int __nocast gfp)
989{ 990{
990 int error = 0; 991 int error = 0;
991 int force; 992 int force;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aad55dc3792b..091a66f06a35 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -406,7 +406,7 @@ static int sctp_send_asconf(struct sctp_association *asoc,
406 * transmission. 406 * transmission.
407 */ 407 */
408 if (asoc->addip_last_asconf) { 408 if (asoc->addip_last_asconf) {
409 __skb_queue_tail(&asoc->addip_chunks, (struct sk_buff *)chunk); 409 list_add_tail(&chunk->list, &asoc->addip_chunk_list);
410 goto out; 410 goto out;
411 } 411 }
412 412
diff --git a/net/sctp/ssnmap.c b/net/sctp/ssnmap.c
index e627d2b451b6..25037daf3fa0 100644
--- a/net/sctp/ssnmap.c
+++ b/net/sctp/ssnmap.c
@@ -57,7 +57,8 @@ static inline size_t sctp_ssnmap_size(__u16 in, __u16 out)
57/* Create a new sctp_ssnmap. 57/* Create a new sctp_ssnmap.
58 * Allocate room to store at least 'len' contiguous TSNs. 58 * Allocate room to store at least 'len' contiguous TSNs.
59 */ 59 */
60struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out, int gfp) 60struct sctp_ssnmap *sctp_ssnmap_new(__u16 in, __u16 out,
61 unsigned int __nocast gfp)
61{ 62{
62 struct sctp_ssnmap *retval; 63 struct sctp_ssnmap *retval;
63 int size; 64 int size;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index a63b69179607..d2f04ebe5081 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -57,7 +57,7 @@
57/* Initialize a new transport from provided memory. */ 57/* Initialize a new transport from provided memory. */
58static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, 58static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
59 const union sctp_addr *addr, 59 const union sctp_addr *addr,
60 int gfp) 60 unsigned int __nocast gfp)
61{ 61{
62 /* Copy in the address. */ 62 /* Copy in the address. */
63 peer->ipaddr = *addr; 63 peer->ipaddr = *addr;
@@ -121,7 +121,8 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
121} 121}
122 122
123/* Allocate and initialize a new transport. */ 123/* Allocate and initialize a new transport. */
124struct sctp_transport *sctp_transport_new(const union sctp_addr *addr, int gfp) 124struct sctp_transport *sctp_transport_new(const union sctp_addr *addr,
125 unsigned int __nocast gfp)
125{ 126{
126 struct sctp_transport *transport; 127 struct sctp_transport *transport;
127 128
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 17d0ff534735..0abd5101107c 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -74,7 +74,7 @@ SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags)
74 74
75/* Create a new sctp_ulpevent. */ 75/* Create a new sctp_ulpevent. */
76SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags, 76SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
77 int gfp) 77 unsigned int __nocast gfp)
78{ 78{
79 struct sctp_ulpevent *event; 79 struct sctp_ulpevent *event;
80 struct sk_buff *skb; 80 struct sk_buff *skb;
@@ -136,7 +136,7 @@ static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
136struct sctp_ulpevent *sctp_ulpevent_make_assoc_change( 136struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
137 const struct sctp_association *asoc, 137 const struct sctp_association *asoc,
138 __u16 flags, __u16 state, __u16 error, __u16 outbound, 138 __u16 flags, __u16 state, __u16 error, __u16 outbound,
139 __u16 inbound, int gfp) 139 __u16 inbound, unsigned int __nocast gfp)
140{ 140{
141 struct sctp_ulpevent *event; 141 struct sctp_ulpevent *event;
142 struct sctp_assoc_change *sac; 142 struct sctp_assoc_change *sac;
@@ -237,7 +237,7 @@ fail:
237struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change( 237struct sctp_ulpevent *sctp_ulpevent_make_peer_addr_change(
238 const struct sctp_association *asoc, 238 const struct sctp_association *asoc,
239 const struct sockaddr_storage *aaddr, 239 const struct sockaddr_storage *aaddr,
240 int flags, int state, int error, int gfp) 240 int flags, int state, int error, unsigned int __nocast gfp)
241{ 241{
242 struct sctp_ulpevent *event; 242 struct sctp_ulpevent *event;
243 struct sctp_paddr_change *spc; 243 struct sctp_paddr_change *spc;
@@ -350,7 +350,7 @@ fail:
350 */ 350 */
351struct sctp_ulpevent *sctp_ulpevent_make_remote_error( 351struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
352 const struct sctp_association *asoc, struct sctp_chunk *chunk, 352 const struct sctp_association *asoc, struct sctp_chunk *chunk,
353 __u16 flags, int gfp) 353 __u16 flags, unsigned int __nocast gfp)
354{ 354{
355 struct sctp_ulpevent *event; 355 struct sctp_ulpevent *event;
356 struct sctp_remote_error *sre; 356 struct sctp_remote_error *sre;
@@ -448,7 +448,7 @@ fail:
448 */ 448 */
449struct sctp_ulpevent *sctp_ulpevent_make_send_failed( 449struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
450 const struct sctp_association *asoc, struct sctp_chunk *chunk, 450 const struct sctp_association *asoc, struct sctp_chunk *chunk,
451 __u16 flags, __u32 error, int gfp) 451 __u16 flags, __u32 error, unsigned int __nocast gfp)
452{ 452{
453 struct sctp_ulpevent *event; 453 struct sctp_ulpevent *event;
454 struct sctp_send_failed *ssf; 454 struct sctp_send_failed *ssf;
@@ -557,7 +557,7 @@ fail:
557 */ 557 */
558struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event( 558struct sctp_ulpevent *sctp_ulpevent_make_shutdown_event(
559 const struct sctp_association *asoc, 559 const struct sctp_association *asoc,
560 __u16 flags, int gfp) 560 __u16 flags, unsigned int __nocast gfp)
561{ 561{
562 struct sctp_ulpevent *event; 562 struct sctp_ulpevent *event;
563 struct sctp_shutdown_event *sse; 563 struct sctp_shutdown_event *sse;
@@ -620,7 +620,7 @@ fail:
620 * 5.3.1.6 SCTP_ADAPTION_INDICATION 620 * 5.3.1.6 SCTP_ADAPTION_INDICATION
621 */ 621 */
622struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication( 622struct sctp_ulpevent *sctp_ulpevent_make_adaption_indication(
623 const struct sctp_association *asoc, int gfp) 623 const struct sctp_association *asoc, unsigned int __nocast gfp)
624{ 624{
625 struct sctp_ulpevent *event; 625 struct sctp_ulpevent *event;
626 struct sctp_adaption_event *sai; 626 struct sctp_adaption_event *sai;
@@ -657,7 +657,7 @@ fail:
657 */ 657 */
658struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, 658struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
659 struct sctp_chunk *chunk, 659 struct sctp_chunk *chunk,
660 int gfp) 660 unsigned int __nocast gfp)
661{ 661{
662 struct sctp_ulpevent *event = NULL; 662 struct sctp_ulpevent *event = NULL;
663 struct sk_buff *skb; 663 struct sk_buff *skb;
@@ -718,7 +718,8 @@ fail:
718 * various events. 718 * various events.
719 */ 719 */
720struct sctp_ulpevent *sctp_ulpevent_make_pdapi( 720struct sctp_ulpevent *sctp_ulpevent_make_pdapi(
721 const struct sctp_association *asoc, __u32 indication, int gfp) 721 const struct sctp_association *asoc, __u32 indication,
722 unsigned int __nocast gfp)
722{ 723{
723 struct sctp_ulpevent *event; 724 struct sctp_ulpevent *event;
724 struct sctp_pdapi_event *pd; 725 struct sctp_pdapi_event *pd;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index d5dd2cf7ac4a..8bbc279d6c99 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -100,7 +100,7 @@ void sctp_ulpq_free(struct sctp_ulpq *ulpq)
100 100
101/* Process an incoming DATA chunk. */ 101/* Process an incoming DATA chunk. */
102int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 102int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
103 int gfp) 103 unsigned int __nocast gfp)
104{ 104{
105 struct sk_buff_head temp; 105 struct sk_buff_head temp;
106 sctp_data_chunk_t *hdr; 106 sctp_data_chunk_t *hdr;
@@ -778,7 +778,8 @@ static __u16 sctp_ulpq_renege_frags(struct sctp_ulpq *ulpq, __u16 needed)
778 778
779/* Partial deliver the first message as there is pressure on rwnd. */ 779/* Partial deliver the first message as there is pressure on rwnd. */
780void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq, 780void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
781 struct sctp_chunk *chunk, int gfp) 781 struct sctp_chunk *chunk,
782 unsigned int __nocast gfp)
782{ 783{
783 struct sctp_ulpevent *event; 784 struct sctp_ulpevent *event;
784 struct sctp_association *asoc; 785 struct sctp_association *asoc;
@@ -802,7 +803,7 @@ void sctp_ulpq_partial_delivery(struct sctp_ulpq *ulpq,
802 803
803/* Renege some packets to make room for an incoming chunk. */ 804/* Renege some packets to make room for an incoming chunk. */
804void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, 805void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
805 int gfp) 806 unsigned int __nocast gfp)
806{ 807{
807 struct sctp_association *asoc; 808 struct sctp_association *asoc;
808 __u16 needed, freed; 809 __u16 needed, freed;
@@ -841,7 +842,7 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
841/* Notify the application if an association is aborted and in 842/* Notify the application if an association is aborted and in
842 * partial delivery mode. Send up any pending received messages. 843 * partial delivery mode. Send up any pending received messages.
843 */ 844 */
844void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, int gfp) 845void sctp_ulpq_abort_pd(struct sctp_ulpq *ulpq, unsigned int __nocast gfp)
845{ 846{
846 struct sctp_ulpevent *ev = NULL; 847 struct sctp_ulpevent *ev = NULL;
847 struct sock *sk; 848 struct sock *sk;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 269f217918a3..3c654e06b084 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -145,8 +145,6 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
145 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { 145 if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) {
146 if (task == xprt->snd_task) 146 if (task == xprt->snd_task)
147 return 1; 147 return 1;
148 if (task == NULL)
149 return 0;
150 goto out_sleep; 148 goto out_sleep;
151 } 149 }
152 if (xprt->nocong || __xprt_get_cong(xprt, task)) { 150 if (xprt->nocong || __xprt_get_cong(xprt, task)) {
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
new file mode 100644
index 000000000000..5a69733bcdad
--- /dev/null
+++ b/net/unix/Kconfig
@@ -0,0 +1,21 @@
1#
2# Unix Domain Sockets
3#
4
5config UNIX
6 tristate "Unix domain sockets"
7 ---help---
8 If you say Y here, you will include support for Unix domain sockets;
9 sockets are the standard Unix mechanism for establishing and
10 accessing network connections. Many commonly used programs such as
11 the X Window system and syslog use these sockets even if your
12 machine is not connected to any network. Unless you are working on
13 an embedded system or something similar, you therefore definitely
14 want to say Y here.
15
16 To compile this driver as a module, choose M here: the module will be
17 called unix. Note that several important services won't work
18 correctly if you say M here and then neglect to load the module.
19
20 Say Y unless you know what you are doing.
21
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c420eba4876b..d403e34088ad 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -302,7 +302,7 @@ static void unix_write_space(struct sock *sk)
302 * may receive messages only from that peer. */ 302 * may receive messages only from that peer. */
303static void unix_dgram_disconnected(struct sock *sk, struct sock *other) 303static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
304{ 304{
305 if (skb_queue_len(&sk->sk_receive_queue)) { 305 if (!skb_queue_empty(&sk->sk_receive_queue)) {
306 skb_queue_purge(&sk->sk_receive_queue); 306 skb_queue_purge(&sk->sk_receive_queue);
307 wake_up_interruptible_all(&unix_sk(sk)->peer_wait); 307 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
308 308
@@ -1619,7 +1619,7 @@ static long unix_stream_data_wait(struct sock * sk, long timeo)
1619 for (;;) { 1619 for (;;) {
1620 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); 1620 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1621 1621
1622 if (skb_queue_len(&sk->sk_receive_queue) || 1622 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1623 sk->sk_err || 1623 sk->sk_err ||
1624 (sk->sk_shutdown & RCV_SHUTDOWN) || 1624 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1625 signal_pending(current) || 1625 signal_pending(current) ||
diff --git a/net/wanrouter/Kconfig b/net/wanrouter/Kconfig
new file mode 100644
index 000000000000..1debe1cb054e
--- /dev/null
+++ b/net/wanrouter/Kconfig
@@ -0,0 +1,29 @@
1#
2# Configuration for WAN router
3#
4
5config WAN_ROUTER
6 tristate "WAN router"
7 depends on EXPERIMENTAL
8 ---help---
9 Wide Area Networks (WANs), such as X.25, frame relay and leased
10 lines, are used to interconnect Local Area Networks (LANs) over vast
11 distances with data transfer rates significantly higher than those
12 achievable with commonly used asynchronous modem connections.
13 Usually, a quite expensive external device called a `WAN router' is
14 needed to connect to a WAN.
15
16 As an alternative, WAN routing can be built into the Linux kernel.
17 With relatively inexpensive WAN interface cards available on the
18 market, a perfectly usable router can be built for less than half
19 the price of an external router. If you have one of those cards and
20 wish to use your Linux box as a WAN router, say Y here and also to
21 the WAN driver for your card, below. You will then need the
22 wan-tools package which is available from <ftp://ftp.sangoma.com/>.
23 Read <file:Documentation/networking/wan-router.txt> for more
24 information.
25
26 To compile WAN routing support as a module, choose M here: the
27 module will be called wanrouter.
28
29 If unsure, say N.
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index d6844ac226f5..13b650ad22e2 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -358,10 +358,10 @@ int wanrouter_encapsulate(struct sk_buff *skb, struct net_device *dev,
358 */ 358 */
359 359
360 360
361unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) 361__be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
362{ 362{
363 int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */ 363 int cnt = skb->data[0] ? 0 : 1; /* there may be a pad present */
364 unsigned short ethertype; 364 __be16 ethertype;
365 365
366 switch (skb->data[cnt]) { 366 switch (skb->data[cnt]) {
367 case NLPID_IP: /* IP datagramm */ 367 case NLPID_IP: /* IP datagramm */
@@ -379,7 +379,7 @@ unsigned short wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev)
379 skb->data[cnt+3], dev->name); 379 skb->data[cnt+3], dev->name);
380 return 0; 380 return 0;
381 } 381 }
382 ethertype = *((unsigned short*)&skb->data[cnt+4]); 382 ethertype = *((__be16*)&skb->data[cnt+4]);
383 cnt += 6; 383 cnt += 6;
384 break; 384 break;
385 385
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
new file mode 100644
index 000000000000..e6759c9660bb
--- /dev/null
+++ b/net/x25/Kconfig
@@ -0,0 +1,36 @@
1#
2# CCITT X.25 Packet Layer
3#
4
5config X25
6 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)"
7 depends on EXPERIMENTAL
8 ---help---
9 X.25 is a set of standardized network protocols, similar in scope to
10 frame relay; the one physical line from your box to the X.25 network
11 entry point can carry several logical point-to-point connections
12 (called "virtual circuits") to other computers connected to the X.25
13 network. Governments, banks, and other organizations tend to use it
14 to connect to each other or to form Wide Area Networks (WANs). Many
15 countries have public X.25 networks. X.25 consists of two
16 protocols: the higher level Packet Layer Protocol (PLP) (say Y here
17 if you want that) and the lower level data link layer protocol LAPB
18 (say Y to "LAPB Data Link Driver" below if you want that).
19
20 You can read more about X.25 at <http://www.sangoma.com/x25.htm> and
21 <http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/cbook/cx25.htm>.
22 Information about X.25 for Linux is contained in the files
23 <file:Documentation/networking/x25.txt> and
24 <file:Documentation/networking/x25-iface.txt>.
25
26 One connects to an X.25 network either with a dedicated network card
27 using the X.21 protocol (not yet supported by Linux) or one can do
28 X.25 over a standard telephone line using an ordinary modem (say Y
29 to "X.25 async driver" below) or over Ethernet using an ordinary
30 Ethernet card and the LAPB over Ethernet (say Y to "LAPB Data Link
31 Driver" and "LAPB over Ethernet driver" below).
32
33 To compile this driver as a module, choose M here: the module
34 will be called x25. If unsure, say N.
35
36
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 58ca6a972c48..0c1c04322baf 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -1,6 +1,10 @@
1# 1#
2# XFRM configuration 2# XFRM configuration
3# 3#
4config XFRM
5 bool
6 depends on NET
7
4config XFRM_USER 8config XFRM_USER
5 tristate "IPsec user configuration interface" 9 tristate "IPsec user configuration interface"
6 depends on INET && XFRM 10 depends on INET && XFRM
@@ -10,3 +14,14 @@ config XFRM_USER
10 14
11 If unsure, say Y. 15 If unsure, say Y.
12 16
17config NET_KEY
18 tristate "PF_KEY sockets"
19 select XFRM
20 ---help---
21 PF_KEYv2 socket family, compatible to KAME ones.
22 They are required if you are going to use IPsec tools ported
23 from KAME.
24
25 Say Y unless you know what you are doing.
26
27