aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/fib_trie.txt145
-rw-r--r--arch/i386/kernel/kprobes.c2
-rw-r--r--arch/ia64/kernel/kprobes.c2
-rw-r--r--arch/ppc/8xx_io/enet.c52
-rw-r--r--arch/ppc/Kconfig22
-rw-r--r--arch/ppc/configs/mpc86x_ads_defconfig633
-rw-r--r--arch/ppc/configs/mpc885ads_defconfig622
-rw-r--r--arch/ppc/kernel/time.c13
-rw-r--r--arch/ppc/platforms/fads.h109
-rw-r--r--arch/ppc/platforms/mpc885ads.h92
-rw-r--r--arch/ppc/syslib/ppc4xx_pic.c4
-rw-r--r--arch/ppc64/kernel/kprobes.c2
-rw-r--r--arch/sparc64/kernel/kprobes.c5
-rw-r--r--arch/x86_64/kernel/kprobes.c2
-rw-r--r--drivers/net/shaper.c42
-rw-r--r--drivers/net/skge.h1
-rw-r--r--drivers/net/tg3.c69
-rw-r--r--drivers/net/tg3.h10
-rw-r--r--drivers/serial/cpm_uart/cpm_uart_cpm1.c32
-rw-r--r--include/asm-ppc/mpc8xx.h4
-rw-r--r--include/linux/if_shaper.h2
-rw-r--r--include/linux/kprobes.h2
-rw-r--r--include/linux/skbuff.h19
-rw-r--r--include/linux/tc_ematch/tc_em_meta.h2
-rw-r--r--include/linux/tcp.h2
-rw-r--r--include/net/pkt_sched.h17
-rw-r--r--include/net/sch_generic.h13
-rw-r--r--include/net/slhc_vj.h21
-rw-r--r--include/net/sock.h7
-rw-r--r--include/net/tcp.h156
-rw-r--r--kernel/kprobes.c2
-rw-r--r--net/core/dev.c5
-rw-r--r--net/core/filter.c104
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/decnet/dn_fib.c3
-rw-r--r--net/ipv4/af_inet.c11
-rw-r--r--net/ipv4/fib_trie.c202
-rw-r--r--net/ipv4/ip_output.c16
-rw-r--r--net/ipv4/route.c126
-rw-r--r--net/ipv4/tcp.c44
-rw-r--r--net/ipv4/tcp_input.c76
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c544
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/ip6_output.c1
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c63
-rw-r--r--net/sched/sch_blackhole.c54
-rw-r--r--net/sched/sch_generic.c35
51 files changed, 2764 insertions, 644 deletions
diff --git a/Documentation/networking/fib_trie.txt b/Documentation/networking/fib_trie.txt
new file mode 100644
index 000000000000..f50d0c673c57
--- /dev/null
+++ b/Documentation/networking/fib_trie.txt
@@ -0,0 +1,145 @@
1 LC-trie implementation notes.
2
3Node types
4----------
5leaf
6 An end node with data. This has a copy of the relevant key, along
7 with 'hlist' with routing table entries sorted by prefix length.
8 See struct leaf and struct leaf_info.
9
10trie node or tnode
11 An internal node, holding an array of child (leaf or tnode) pointers,
12 indexed through a subset of the key. See Level Compression.
13
14A few concepts explained
15------------------------
16Bits (tnode)
17 The number of bits in the key segment used for indexing into the
18 child array - the "child index". See Level Compression.
19
20Pos (tnode)
21 The position (in the key) of the key segment used for indexing into
22 the child array. See Path Compression.
23
24Path Compression / skipped bits
25 Any given tnode is linked to from the child array of its parent, using
26 a segment of the key specified by the parent's "pos" and "bits"
27 In certain cases, this tnode's own "pos" will not be immediately
28 adjacent to the parent (pos+bits), but there will be some bits
29 in the key skipped over because they represent a single path with no
30 deviations. These "skipped bits" constitute Path Compression.
31 Note that the search algorithm will simply skip over these bits when
32 searching, making it necessary to save the keys in the leaves to
33 verify that they actually do match the key we are searching for.
34
35Level Compression / child arrays
36 the trie is kept level balanced moving, under certain conditions, the
37 children of a full child (see "full_children") up one level, so that
38 instead of a pure binary tree, each internal node ("tnode") may
39 contain an arbitrarily large array of links to several children.
40 Conversely, a tnode with a mostly empty child array (see empty_children)
41 may be "halved", having some of its children moved downwards one level,
42 in order to avoid ever-increasing child arrays.
43
44empty_children
45 the number of positions in the child array of a given tnode that are
46 NULL.
47
48full_children
49 the number of children of a given tnode that aren't path compressed.
50 (in other words, they aren't NULL or leaves and their "pos" is equal
51 to this tnode's "pos"+"bits").
52
53 (The word "full" here is used more in the sense of "complete" than
54 as the opposite of "empty", which might be a tad confusing.)
55
56Comments
57---------
58
59We have tried to keep the structure of the code as close to fib_hash as
60possible to allow verification and help up reviewing.
61
62fib_find_node()
63 A good start for understanding this code. This function implements a
64 straightforward trie lookup.
65
66fib_insert_node()
67 Inserts a new leaf node in the trie. This is bit more complicated than
68 fib_find_node(). Inserting a new node means we might have to run the
69 level compression algorithm on part of the trie.
70
71trie_leaf_remove()
72 Looks up a key, deletes it and runs the level compression algorithm.
73
74trie_rebalance()
75 The key function for the dynamic trie after any change in the trie
76 it is run to optimize and reorganize. Tt will walk the trie upwards
77 towards the root from a given tnode, doing a resize() at each step
78 to implement level compression.
79
80resize()
81 Analyzes a tnode and optimizes the child array size by either inflating
82 or shrinking it repeatedly until it fullfills the criteria for optimal
83 level compression. This part follows the original paper pretty closely
84 and there may be some room for experimentation here.
85
86inflate()
87 Doubles the size of the child array within a tnode. Used by resize().
88
89halve()
90 Halves the size of the child array within a tnode - the inverse of
91 inflate(). Used by resize();
92
93fn_trie_insert(), fn_trie_delete(), fn_trie_select_default()
94 The route manipulation functions. Should conform pretty closely to the
95 corresponding functions in fib_hash.
96
97fn_trie_flush()
98 This walks the full trie (using nextleaf()) and searches for empty
99 leaves which have to be removed.
100
101fn_trie_dump()
102 Dumps the routing table ordered by prefix length. This is somewhat
103 slower than the corresponding fib_hash function, as we have to walk the
104 entire trie for each prefix length. In comparison, fib_hash is organized
105 as one "zone"/hash per prefix length.
106
107Locking
108-------
109
110fib_lock is used for an RW-lock in the same way that this is done in fib_hash.
111However, the functions are somewhat separated for other possible locking
112scenarios. It might conceivably be possible to run trie_rebalance via RCU
113to avoid read_lock in the fn_trie_lookup() function.
114
115Main lookup mechanism
116---------------------
117fn_trie_lookup() is the main lookup function.
118
119The lookup is in its simplest form just like fib_find_node(). We descend the
120trie, key segment by key segment, until we find a leaf. check_leaf() does
121the fib_semantic_match in the leaf's sorted prefix hlist.
122
123If we find a match, we are done.
124
125If we don't find a match, we enter prefix matching mode. The prefix length,
126starting out at the same as the key length, is reduced one step at a time,
127and we backtrack upwards through the trie trying to find a longest matching
128prefix. The goal is always to reach a leaf and get a positive result from the
129fib_semantic_match mechanism.
130
131Inside each tnode, the search for longest matching prefix consists of searching
132through the child array, chopping off (zeroing) the least significant "1" of
133the child index until we find a match or the child index consists of nothing but
134zeros.
135
136At this point we backtrack (t->stats.backtrack++) up the trie, continuing to
137chop off part of the key in order to find the longest matching prefix.
138
139At this point we will repeatedly descend subtries to look for a match, and there
140are some optimizations available that can provide us with "shortcuts" to avoid
141descending into dead ends. Look for "HL_OPTIMIZE" sections in the code.
142
143To alleviate any doubts about the correctness of the route selection process,
144a new netlink operation has been added. Look for NETLINK_FIB_LOOKUP, which
145gives userland access to fib_lookup().
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index fc8b17521761..a6d8c45961d3 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -537,7 +537,7 @@ static struct kprobe trampoline_p = {
537 .pre_handler = trampoline_probe_handler 537 .pre_handler = trampoline_probe_handler
538}; 538};
539 539
540int __init arch_init(void) 540int __init arch_init_kprobes(void)
541{ 541{
542 return register_kprobe(&trampoline_p); 542 return register_kprobe(&trampoline_p);
543} 543}
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 3aa3167edbec..884f5cd27d8a 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -713,7 +713,7 @@ static struct kprobe trampoline_p = {
713 .pre_handler = trampoline_probe_handler 713 .pre_handler = trampoline_probe_handler
714}; 714};
715 715
716int __init arch_init(void) 716int __init arch_init_kprobes(void)
717{ 717{
718 trampoline_p.addr = 718 trampoline_p.addr =
719 (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip; 719 (kprobe_opcode_t *)((struct fnptr *)kretprobe_trampoline)->ip;
diff --git a/arch/ppc/8xx_io/enet.c b/arch/ppc/8xx_io/enet.c
index 4ea7158e5062..ece6a9fbe09b 100644
--- a/arch/ppc/8xx_io/enet.c
+++ b/arch/ppc/8xx_io/enet.c
@@ -714,16 +714,24 @@ static int __init scc_enet_init(void)
714 immap->im_ioport.iop_pcdat &= ~PC_ENET_LBK; /* Disable Loopback */ 714 immap->im_ioport.iop_pcdat &= ~PC_ENET_LBK; /* Disable Loopback */
715#endif /* PC_ENET_LBK */ 715#endif /* PC_ENET_LBK */
716 716
717 /* Configure port C pins to enable CLSN and RENA. 717#ifdef PE_ENET_TCLK
718 /* Configure port E for TCLK and RCLK.
718 */ 719 */
719 immap->im_ioport.iop_pcpar &= ~(PC_ENET_CLSN | PC_ENET_RENA); 720 cp->cp_pepar |= (PE_ENET_TCLK | PE_ENET_RCLK);
720 immap->im_ioport.iop_pcdir &= ~(PC_ENET_CLSN | PC_ENET_RENA); 721 cp->cp_pedir &= ~(PE_ENET_TCLK | PE_ENET_RCLK);
721 immap->im_ioport.iop_pcso |= (PC_ENET_CLSN | PC_ENET_RENA); 722 cp->cp_peso &= ~(PE_ENET_TCLK | PE_ENET_RCLK);
722 723#else
723 /* Configure port A for TCLK and RCLK. 724 /* Configure port A for TCLK and RCLK.
724 */ 725 */
725 immap->im_ioport.iop_papar |= (PA_ENET_TCLK | PA_ENET_RCLK); 726 immap->im_ioport.iop_papar |= (PA_ENET_TCLK | PA_ENET_RCLK);
726 immap->im_ioport.iop_padir &= ~(PA_ENET_TCLK | PA_ENET_RCLK); 727 immap->im_ioport.iop_padir &= ~(PA_ENET_TCLK | PA_ENET_RCLK);
728#endif
729
730 /* Configure port C pins to enable CLSN and RENA.
731 */
732 immap->im_ioport.iop_pcpar &= ~(PC_ENET_CLSN | PC_ENET_RENA);
733 immap->im_ioport.iop_pcdir &= ~(PC_ENET_CLSN | PC_ENET_RENA);
734 immap->im_ioport.iop_pcso |= (PC_ENET_CLSN | PC_ENET_RENA);
727 735
728 /* Configure Serial Interface clock routing. 736 /* Configure Serial Interface clock routing.
729 * First, clear all SCC bits to zero, then set the ones we want. 737 * First, clear all SCC bits to zero, then set the ones we want.
@@ -896,14 +904,18 @@ static int __init scc_enet_init(void)
896 /* It is now OK to enable the Ethernet transmitter. 904 /* It is now OK to enable the Ethernet transmitter.
897 * Unfortunately, there are board implementation differences here. 905 * Unfortunately, there are board implementation differences here.
898 */ 906 */
899#if (!defined (PB_ENET_TENA) && defined (PC_ENET_TENA)) 907#if (!defined (PB_ENET_TENA) && defined (PC_ENET_TENA) && !defined (PE_ENET_TENA))
900 immap->im_ioport.iop_pcpar |= PC_ENET_TENA; 908 immap->im_ioport.iop_pcpar |= PC_ENET_TENA;
901 immap->im_ioport.iop_pcdir &= ~PC_ENET_TENA; 909 immap->im_ioport.iop_pcdir &= ~PC_ENET_TENA;
902#elif ( defined (PB_ENET_TENA) && !defined (PC_ENET_TENA)) 910#elif ( defined (PB_ENET_TENA) && !defined (PC_ENET_TENA) && !defined (PE_ENET_TENA))
903 cp->cp_pbpar |= PB_ENET_TENA; 911 cp->cp_pbpar |= PB_ENET_TENA;
904 cp->cp_pbdir |= PB_ENET_TENA; 912 cp->cp_pbdir |= PB_ENET_TENA;
913#elif ( !defined (PB_ENET_TENA) && !defined (PC_ENET_TENA) && defined (PE_ENET_TENA))
914 cp->cp_pepar |= PE_ENET_TENA;
915 cp->cp_pedir &= ~PE_ENET_TENA;
916 cp->cp_peso |= PE_ENET_TENA;
905#else 917#else
906#error Configuration Error: define exactly ONE of PB_ENET_TENA, PC_ENET_TENA 918#error Configuration Error: define exactly ONE of PB_ENET_TENA, PC_ENET_TENA, PE_ENET_TENA
907#endif 919#endif
908 920
909#if defined(CONFIG_RPXLITE) || defined(CONFIG_RPXCLASSIC) 921#if defined(CONFIG_RPXLITE) || defined(CONFIG_RPXCLASSIC)
@@ -936,6 +948,29 @@ static int __init scc_enet_init(void)
936 *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN; 948 *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN;
937#endif 949#endif
938 950
951#ifdef CONFIG_MPC885ADS
952
953 /* Deassert PHY reset and enable the PHY.
954 */
955 {
956 volatile uint __iomem *bcsr = ioremap(BCSR_ADDR, BCSR_SIZE);
957 uint tmp;
958
959 tmp = in_be32(bcsr + 1 /* BCSR1 */);
960 tmp |= BCSR1_ETHEN;
961 out_be32(bcsr + 1, tmp);
962 tmp = in_be32(bcsr + 4 /* BCSR4 */);
963 tmp |= BCSR4_ETH10_RST;
964 out_be32(bcsr + 4, tmp);
965 iounmap(bcsr);
966 }
967
968 /* On MPC885ADS SCC ethernet PHY defaults to the full duplex mode
969 * upon reset. SCC is set to half duplex by default. So this
970 * inconsistency should be better fixed by the software.
971 */
972#endif
973
939 dev->base_addr = (unsigned long)ep; 974 dev->base_addr = (unsigned long)ep;
940#if 0 975#if 0
941 dev->name = "CPM_ENET"; 976 dev->name = "CPM_ENET";
@@ -969,3 +1004,4 @@ static int __init scc_enet_init(void)
969} 1004}
970 1005
971module_init(scc_enet_init); 1006module_init(scc_enet_init);
1007
diff --git a/arch/ppc/Kconfig b/arch/ppc/Kconfig
index a7835cd3f51f..23b0d2f662c5 100644
--- a/arch/ppc/Kconfig
+++ b/arch/ppc/Kconfig
@@ -284,6 +284,9 @@ endmenu
284 284
285menu "Platform options" 285menu "Platform options"
286 286
287config FADS
288 bool
289
287choice 290choice
288 prompt "8xx Machine Type" 291 prompt "8xx Machine Type"
289 depends on 8xx 292 depends on 8xx
@@ -399,8 +402,25 @@ config BSEIP
399 26MB DRAM, 4MB flash, Ethernet, a 16K-gate FPGA, USB, an LCD/video 402 26MB DRAM, 4MB flash, Ethernet, a 16K-gate FPGA, USB, an LCD/video
400 controller, and two RS232 ports. 403 controller, and two RS232 ports.
401 404
402config FADS 405config MPC8XXFADS
403 bool "FADS" 406 bool "FADS"
407 select FADS
408
409config MPC86XADS
410 bool "MPC86XADS"
411 help
412 MPC86x Application Development System by Freescale Semiconductor.
413 The MPC86xADS is meant to serve as a platform for s/w and h/w
414 development around the MPC86X processor families.
415 select FADS
416
417config MPC885ADS
418 bool "MPC885ADS"
419 help
420 Freescale Semiconductor MPC885 Application Development System (ADS).
421 Also known as DUET.
422 The MPC885ADS is meant to serve as a platform for s/w and h/w
423 development around the MPC885 processor family.
404 424
405config TQM823L 425config TQM823L
406 bool "TQM823L" 426 bool "TQM823L"
diff --git a/arch/ppc/configs/mpc86x_ads_defconfig b/arch/ppc/configs/mpc86x_ads_defconfig
new file mode 100644
index 000000000000..f63c6f59d68a
--- /dev/null
+++ b/arch/ppc/configs/mpc86x_ads_defconfig
@@ -0,0 +1,633 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.12-rc4
4# Tue Jun 14 13:36:35 2005
5#
6CONFIG_MMU=y
7CONFIG_GENERIC_HARDIRQS=y
8CONFIG_RWSEM_XCHGADD_ALGORITHM=y
9CONFIG_GENERIC_CALIBRATE_DELAY=y
10CONFIG_HAVE_DEC_LOCK=y
11CONFIG_PPC=y
12CONFIG_PPC32=y
13CONFIG_GENERIC_NVRAM=y
14CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
15
16#
17# Code maturity level options
18#
19CONFIG_EXPERIMENTAL=y
20# CONFIG_CLEAN_COMPILE is not set
21CONFIG_BROKEN=y
22CONFIG_BROKEN_ON_SMP=y
23CONFIG_INIT_ENV_ARG_LIMIT=32
24
25#
26# General setup
27#
28CONFIG_LOCALVERSION=""
29# CONFIG_SWAP is not set
30CONFIG_SYSVIPC=y
31# CONFIG_POSIX_MQUEUE is not set
32# CONFIG_BSD_PROCESS_ACCT is not set
33CONFIG_SYSCTL=y
34# CONFIG_AUDIT is not set
35# CONFIG_HOTPLUG is not set
36CONFIG_KOBJECT_UEVENT=y
37# CONFIG_IKCONFIG is not set
38CONFIG_EMBEDDED=y
39# CONFIG_KALLSYMS is not set
40CONFIG_PRINTK=y
41CONFIG_BUG=y
42# CONFIG_BASE_FULL is not set
43CONFIG_FUTEX=y
44# CONFIG_EPOLL is not set
45# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
46# CONFIG_SHMEM is not set
47CONFIG_CC_ALIGN_FUNCTIONS=0
48CONFIG_CC_ALIGN_LABELS=0
49CONFIG_CC_ALIGN_LOOPS=0
50CONFIG_CC_ALIGN_JUMPS=0
51CONFIG_TINY_SHMEM=y
52CONFIG_BASE_SMALL=1
53
54#
55# Loadable module support
56#
57CONFIG_MODULES=y
58# CONFIG_MODULE_UNLOAD is not set
59CONFIG_OBSOLETE_MODPARM=y
60# CONFIG_MODVERSIONS is not set
61# CONFIG_MODULE_SRCVERSION_ALL is not set
62# CONFIG_KMOD is not set
63
64#
65# Processor
66#
67# CONFIG_6xx is not set
68# CONFIG_40x is not set
69# CONFIG_44x is not set
70# CONFIG_POWER3 is not set
71# CONFIG_POWER4 is not set
72CONFIG_8xx=y
73# CONFIG_E500 is not set
74# CONFIG_MATH_EMULATION is not set
75# CONFIG_CPU_FREQ is not set
76CONFIG_EMBEDDEDBOOT=y
77# CONFIG_PM is not set
78CONFIG_NOT_COHERENT_CACHE=y
79
80#
81# Platform options
82#
83CONFIG_FADS=y
84# CONFIG_RPXLITE is not set
85# CONFIG_RPXCLASSIC is not set
86# CONFIG_BSEIP is not set
87# CONFIG_MPC8XXFADS is not set
88CONFIG_MPC86XADS=y
89# CONFIG_TQM823L is not set
90# CONFIG_TQM850L is not set
91# CONFIG_TQM855L is not set
92# CONFIG_TQM860L is not set
93# CONFIG_FPS850L is not set
94# CONFIG_SPD823TS is not set
95# CONFIG_IVMS8 is not set
96# CONFIG_IVML24 is not set
97# CONFIG_SM850 is not set
98# CONFIG_HERMES_PRO is not set
99# CONFIG_IP860 is not set
100# CONFIG_LWMON is not set
101# CONFIG_PCU_E is not set
102# CONFIG_CCM is not set
103# CONFIG_LANTEC is not set
104# CONFIG_MBX is not set
105# CONFIG_WINCEPT is not set
106# CONFIG_SMP is not set
107# CONFIG_PREEMPT is not set
108# CONFIG_HIGHMEM is not set
109CONFIG_BINFMT_ELF=y
110# CONFIG_BINFMT_MISC is not set
111# CONFIG_CMDLINE_BOOL is not set
112CONFIG_ISA_DMA_API=y
113
114#
115# Bus options
116#
117# CONFIG_PCI is not set
118# CONFIG_PCI_DOMAINS is not set
119# CONFIG_PCI_QSPAN is not set
120
121#
122# PCCARD (PCMCIA/CardBus) support
123#
124# CONFIG_PCCARD is not set
125
126#
127# Advanced setup
128#
129# CONFIG_ADVANCED_OPTIONS is not set
130
131#
132# Default settings for advanced configuration options are used
133#
134CONFIG_HIGHMEM_START=0xfe000000
135CONFIG_LOWMEM_SIZE=0x30000000
136CONFIG_KERNEL_START=0xc0000000
137CONFIG_TASK_SIZE=0x80000000
138CONFIG_CONSISTENT_START=0xff100000
139CONFIG_CONSISTENT_SIZE=0x00200000
140CONFIG_BOOT_LOAD=0x00400000
141
142#
143# Device Drivers
144#
145
146#
147# Generic Driver Options
148#
149# CONFIG_STANDALONE is not set
150CONFIG_PREVENT_FIRMWARE_BUILD=y
151# CONFIG_FW_LOADER is not set
152
153#
154# Memory Technology Devices (MTD)
155#
156# CONFIG_MTD is not set
157
158#
159# Parallel port support
160#
161# CONFIG_PARPORT is not set
162
163#
164# Plug and Play support
165#
166
167#
168# Block devices
169#
170# CONFIG_BLK_DEV_FD is not set
171# CONFIG_BLK_DEV_COW_COMMON is not set
172CONFIG_BLK_DEV_LOOP=y
173# CONFIG_BLK_DEV_CRYPTOLOOP is not set
174# CONFIG_BLK_DEV_NBD is not set
175# CONFIG_BLK_DEV_RAM is not set
176CONFIG_BLK_DEV_RAM_COUNT=16
177CONFIG_INITRAMFS_SOURCE=""
178# CONFIG_LBD is not set
179# CONFIG_CDROM_PKTCDVD is not set
180
181#
182# IO Schedulers
183#
184CONFIG_IOSCHED_NOOP=y
185CONFIG_IOSCHED_AS=y
186CONFIG_IOSCHED_DEADLINE=y
187CONFIG_IOSCHED_CFQ=y
188# CONFIG_ATA_OVER_ETH is not set
189
190#
191# ATA/ATAPI/MFM/RLL support
192#
193# CONFIG_IDE is not set
194
195#
196# SCSI device support
197#
198# CONFIG_SCSI is not set
199
200#
201# Multi-device support (RAID and LVM)
202#
203# CONFIG_MD is not set
204
205#
206# Fusion MPT device support
207#
208
209#
210# IEEE 1394 (FireWire) support
211#
212# CONFIG_IEEE1394 is not set
213
214#
215# I2O device support
216#
217
218#
219# Macintosh device drivers
220#
221
222#
223# Networking support
224#
225CONFIG_NET=y
226
227#
228# Networking options
229#
230CONFIG_PACKET=y
231# CONFIG_PACKET_MMAP is not set
232CONFIG_UNIX=y
233# CONFIG_NET_KEY is not set
234CONFIG_INET=y
235# CONFIG_IP_MULTICAST is not set
236# CONFIG_IP_ADVANCED_ROUTER is not set
237CONFIG_IP_PNP=y
238CONFIG_IP_PNP_DHCP=y
239# CONFIG_IP_PNP_BOOTP is not set
240# CONFIG_IP_PNP_RARP is not set
241# CONFIG_NET_IPIP is not set
242# CONFIG_NET_IPGRE is not set
243# CONFIG_ARPD is not set
244# CONFIG_SYN_COOKIES is not set
245# CONFIG_INET_AH is not set
246# CONFIG_INET_ESP is not set
247# CONFIG_INET_IPCOMP is not set
248# CONFIG_INET_TUNNEL is not set
249CONFIG_IP_TCPDIAG=y
250# CONFIG_IP_TCPDIAG_IPV6 is not set
251CONFIG_IPV6=m
252# CONFIG_IPV6_PRIVACY is not set
253# CONFIG_INET6_AH is not set
254# CONFIG_INET6_ESP is not set
255# CONFIG_INET6_IPCOMP is not set
256# CONFIG_INET6_TUNNEL is not set
257# CONFIG_IPV6_TUNNEL is not set
258# CONFIG_NETFILTER is not set
259
260#
261# SCTP Configuration (EXPERIMENTAL)
262#
263# CONFIG_IP_SCTP is not set
264# CONFIG_ATM is not set
265# CONFIG_BRIDGE is not set
266# CONFIG_VLAN_8021Q is not set
267# CONFIG_DECNET is not set
268# CONFIG_LLC2 is not set
269# CONFIG_IPX is not set
270# CONFIG_ATALK is not set
271# CONFIG_X25 is not set
272# CONFIG_LAPB is not set
273# CONFIG_NET_DIVERT is not set
274# CONFIG_ECONET is not set
275# CONFIG_WAN_ROUTER is not set
276
277#
278# QoS and/or fair queueing
279#
280# CONFIG_NET_SCHED is not set
281# CONFIG_NET_CLS_ROUTE is not set
282
283#
284# Network testing
285#
286# CONFIG_NET_PKTGEN is not set
287# CONFIG_NETPOLL is not set
288# CONFIG_NET_POLL_CONTROLLER is not set
289# CONFIG_HAMRADIO is not set
290# CONFIG_IRDA is not set
291# CONFIG_BT is not set
292CONFIG_NETDEVICES=y
293# CONFIG_DUMMY is not set
294# CONFIG_BONDING is not set
295# CONFIG_EQUALIZER is not set
296# CONFIG_TUN is not set
297
298#
299# Ethernet (10 or 100Mbit)
300#
301CONFIG_NET_ETHERNET=y
302# CONFIG_MII is not set
303# CONFIG_OAKNET is not set
304
305#
306# Ethernet (1000 Mbit)
307#
308
309#
310# Ethernet (10000 Mbit)
311#
312
313#
314# Token Ring devices
315#
316
317#
318# Wireless LAN (non-hamradio)
319#
320# CONFIG_NET_RADIO is not set
321
322#
323# Wan interfaces
324#
325# CONFIG_WAN is not set
326# CONFIG_PPP is not set
327# CONFIG_SLIP is not set
328# CONFIG_SHAPER is not set
329# CONFIG_NETCONSOLE is not set
330
331#
332# ISDN subsystem
333#
334# CONFIG_ISDN is not set
335
336#
337# Telephony Support
338#
339# CONFIG_PHONE is not set
340
341#
342# Input device support
343#
344# CONFIG_INPUT is not set
345
346#
347# Hardware I/O ports
348#
349# CONFIG_SERIO is not set
350# CONFIG_GAMEPORT is not set
351CONFIG_SOUND_GAMEPORT=y
352
353#
354# Character devices
355#
356# CONFIG_VT is not set
357# CONFIG_SERIAL_NONSTANDARD is not set
358
359#
360# Serial drivers
361#
362# CONFIG_SERIAL_8250 is not set
363
364#
365# Non-8250 serial port support
366#
367CONFIG_SERIAL_CORE=y
368CONFIG_SERIAL_CORE_CONSOLE=y
369CONFIG_SERIAL_CPM=y
370CONFIG_SERIAL_CPM_CONSOLE=y
371# CONFIG_SERIAL_CPM_SCC1 is not set
372# CONFIG_SERIAL_CPM_SCC2 is not set
373# CONFIG_SERIAL_CPM_SCC3 is not set
374# CONFIG_SERIAL_CPM_SCC4 is not set
375CONFIG_SERIAL_CPM_SMC1=y
376# CONFIG_SERIAL_CPM_SMC2 is not set
377CONFIG_UNIX98_PTYS=y
378# CONFIG_LEGACY_PTYS is not set
379
380#
381# IPMI
382#
383# CONFIG_IPMI_HANDLER is not set
384
385#
386# Watchdog Cards
387#
388# CONFIG_WATCHDOG is not set
389# CONFIG_NVRAM is not set
390# CONFIG_GEN_RTC is not set
391# CONFIG_DTLK is not set
392# CONFIG_R3964 is not set
393
394#
395# Ftape, the floppy tape device driver
396#
397# CONFIG_AGP is not set
398# CONFIG_DRM is not set
399# CONFIG_RAW_DRIVER is not set
400
401#
402# TPM devices
403#
404
405#
406# I2C support
407#
408# CONFIG_I2C is not set
409
410#
411# Dallas's 1-wire bus
412#
413# CONFIG_W1 is not set
414
415#
416# Misc devices
417#
418
419#
420# Multimedia devices
421#
422# CONFIG_VIDEO_DEV is not set
423
424#
425# Digital Video Broadcasting Devices
426#
427# CONFIG_DVB is not set
428
429#
430# Graphics support
431#
432# CONFIG_FB is not set
433
434#
435# Sound
436#
437# CONFIG_SOUND is not set
438
439#
440# USB support
441#
442# CONFIG_USB_ARCH_HAS_HCD is not set
443# CONFIG_USB_ARCH_HAS_OHCI is not set
444
445#
446# USB Gadget Support
447#
448# CONFIG_USB_GADGET is not set
449
450#
451# MMC/SD Card support
452#
453# CONFIG_MMC is not set
454
455#
456# InfiniBand support
457#
458# CONFIG_INFINIBAND is not set
459
460#
461# File systems
462#
463# CONFIG_EXT2_FS is not set
464CONFIG_EXT3_FS=y
465# CONFIG_EXT3_FS_XATTR is not set
466CONFIG_JBD=y
467# CONFIG_JBD_DEBUG is not set
468# CONFIG_REISERFS_FS is not set
469# CONFIG_JFS_FS is not set
470
471#
472# XFS support
473#
474# CONFIG_XFS_FS is not set
475# CONFIG_MINIX_FS is not set
476# CONFIG_ROMFS_FS is not set
477# CONFIG_QUOTA is not set
478# CONFIG_DNOTIFY is not set
479# CONFIG_AUTOFS_FS is not set
480# CONFIG_AUTOFS4_FS is not set
481
482#
483# CD-ROM/DVD Filesystems
484#
485# CONFIG_ISO9660_FS is not set
486# CONFIG_UDF_FS is not set
487
488#
489# DOS/FAT/NT Filesystems
490#
491# CONFIG_MSDOS_FS is not set
492# CONFIG_VFAT_FS is not set
493# CONFIG_NTFS_FS is not set
494
495#
496# Pseudo filesystems
497#
498CONFIG_PROC_FS=y
499CONFIG_PROC_KCORE=y
500CONFIG_SYSFS=y
501# CONFIG_DEVFS_FS is not set
502# CONFIG_DEVPTS_FS_XATTR is not set
503# CONFIG_TMPFS is not set
504# CONFIG_HUGETLBFS is not set
505# CONFIG_HUGETLB_PAGE is not set
506CONFIG_RAMFS=y
507
508#
509# Miscellaneous filesystems
510#
511# CONFIG_ADFS_FS is not set
512# CONFIG_AFFS_FS is not set
513# CONFIG_HFS_FS is not set
514# CONFIG_HFSPLUS_FS is not set
515# CONFIG_BEFS_FS is not set
516# CONFIG_BFS_FS is not set
517# CONFIG_EFS_FS is not set
518# CONFIG_CRAMFS is not set
519# CONFIG_VXFS_FS is not set
520# CONFIG_HPFS_FS is not set
521# CONFIG_QNX4FS_FS is not set
522# CONFIG_SYSV_FS is not set
523# CONFIG_UFS_FS is not set
524
525#
526# Network File Systems
527#
528CONFIG_NFS_FS=y
529CONFIG_NFS_V3=y
530CONFIG_NFS_V4=y
531# CONFIG_NFS_DIRECTIO is not set
532# CONFIG_NFSD is not set
533CONFIG_ROOT_NFS=y
534CONFIG_LOCKD=y
535CONFIG_LOCKD_V4=y
536CONFIG_SUNRPC=y
537CONFIG_SUNRPC_GSS=y
538CONFIG_RPCSEC_GSS_KRB5=y
539# CONFIG_RPCSEC_GSS_SPKM3 is not set
540# CONFIG_SMB_FS is not set
541# CONFIG_CIFS is not set
542# CONFIG_NCP_FS is not set
543# CONFIG_CODA_FS is not set
544# CONFIG_AFS_FS is not set
545
546#
547# Partition Types
548#
549# CONFIG_PARTITION_ADVANCED is not set
550CONFIG_MSDOS_PARTITION=y
551
552#
553# Native Language Support
554#
555# CONFIG_NLS is not set
556
557#
558# MPC8xx CPM Options
559#
560CONFIG_SCC_ENET=y
561CONFIG_SCC1_ENET=y
562# CONFIG_SCC2_ENET is not set
563# CONFIG_SCC3_ENET is not set
564# CONFIG_FEC_ENET is not set
565# CONFIG_ENET_BIG_BUFFERS is not set
566
567#
568# Generic MPC8xx Options
569#
570# CONFIG_8xx_COPYBACK is not set
571# CONFIG_8xx_CPU6 is not set
572CONFIG_NO_UCODE_PATCH=y
573# CONFIG_USB_SOF_UCODE_PATCH is not set
574# CONFIG_I2C_SPI_UCODE_PATCH is not set
575# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set
576
577#
578# Library routines
579#
580# CONFIG_CRC_CCITT is not set
581# CONFIG_CRC32 is not set
582# CONFIG_LIBCRC32C is not set
583
584#
585# Profiling support
586#
587# CONFIG_PROFILING is not set
588
589#
590# Kernel hacking
591#
592# CONFIG_PRINTK_TIME is not set
593# CONFIG_DEBUG_KERNEL is not set
594CONFIG_LOG_BUF_SHIFT=14
595
596#
597# Security options
598#
599# CONFIG_KEYS is not set
600# CONFIG_SECURITY is not set
601
602#
603# Cryptographic options
604#
605CONFIG_CRYPTO=y
606# CONFIG_CRYPTO_HMAC is not set
607# CONFIG_CRYPTO_NULL is not set
608# CONFIG_CRYPTO_MD4 is not set
609CONFIG_CRYPTO_MD5=y
610# CONFIG_CRYPTO_SHA1 is not set
611# CONFIG_CRYPTO_SHA256 is not set
612# CONFIG_CRYPTO_SHA512 is not set
613# CONFIG_CRYPTO_WP512 is not set
614# CONFIG_CRYPTO_TGR192 is not set
615CONFIG_CRYPTO_DES=y
616# CONFIG_CRYPTO_BLOWFISH is not set
617# CONFIG_CRYPTO_TWOFISH is not set
618# CONFIG_CRYPTO_SERPENT is not set
619# CONFIG_CRYPTO_AES is not set
620# CONFIG_CRYPTO_CAST5 is not set
621# CONFIG_CRYPTO_CAST6 is not set
622# CONFIG_CRYPTO_TEA is not set
623# CONFIG_CRYPTO_ARC4 is not set
624# CONFIG_CRYPTO_KHAZAD is not set
625# CONFIG_CRYPTO_ANUBIS is not set
626# CONFIG_CRYPTO_DEFLATE is not set
627# CONFIG_CRYPTO_MICHAEL_MIC is not set
628# CONFIG_CRYPTO_CRC32C is not set
629# CONFIG_CRYPTO_TEST is not set
630
631#
632# Hardware crypto devices
633#
diff --git a/arch/ppc/configs/mpc885ads_defconfig b/arch/ppc/configs/mpc885ads_defconfig
new file mode 100644
index 000000000000..016f94d9325f
--- /dev/null
+++ b/arch/ppc/configs/mpc885ads_defconfig
@@ -0,0 +1,622 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.12-rc6
4# Thu Jun 9 21:17:29 2005
5#
6CONFIG_MMU=y
7CONFIG_GENERIC_HARDIRQS=y
8CONFIG_RWSEM_XCHGADD_ALGORITHM=y
9CONFIG_GENERIC_CALIBRATE_DELAY=y
10CONFIG_HAVE_DEC_LOCK=y
11CONFIG_PPC=y
12CONFIG_PPC32=y
13CONFIG_GENERIC_NVRAM=y
14CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
15
16#
17# Code maturity level options
18#
19CONFIG_EXPERIMENTAL=y
20# CONFIG_CLEAN_COMPILE is not set
21CONFIG_BROKEN=y
22CONFIG_BROKEN_ON_SMP=y
23CONFIG_INIT_ENV_ARG_LIMIT=32
24
25#
26# General setup
27#
28CONFIG_LOCALVERSION=""
29# CONFIG_SWAP is not set
30CONFIG_SYSVIPC=y
31# CONFIG_POSIX_MQUEUE is not set
32# CONFIG_BSD_PROCESS_ACCT is not set
33CONFIG_SYSCTL=y
34# CONFIG_AUDIT is not set
35CONFIG_HOTPLUG=y
36CONFIG_KOBJECT_UEVENT=y
37# CONFIG_IKCONFIG is not set
38CONFIG_EMBEDDED=y
39# CONFIG_KALLSYMS is not set
40CONFIG_PRINTK=y
41CONFIG_BUG=y
42CONFIG_BASE_FULL=y
43CONFIG_FUTEX=y
44# CONFIG_EPOLL is not set
45# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
46CONFIG_SHMEM=y
47CONFIG_CC_ALIGN_FUNCTIONS=0
48CONFIG_CC_ALIGN_LABELS=0
49CONFIG_CC_ALIGN_LOOPS=0
50CONFIG_CC_ALIGN_JUMPS=0
51# CONFIG_TINY_SHMEM is not set
52CONFIG_BASE_SMALL=0
53
54#
55# Loadable module support
56#
57# CONFIG_MODULES is not set
58
59#
60# Processor
61#
62# CONFIG_6xx is not set
63# CONFIG_40x is not set
64# CONFIG_44x is not set
65# CONFIG_POWER3 is not set
66# CONFIG_POWER4 is not set
67CONFIG_8xx=y
68# CONFIG_E500 is not set
69# CONFIG_MATH_EMULATION is not set
70# CONFIG_CPU_FREQ is not set
71CONFIG_EMBEDDEDBOOT=y
72# CONFIG_PM is not set
73CONFIG_NOT_COHERENT_CACHE=y
74
75#
76# Platform options
77#
78# CONFIG_RPXLITE is not set
79# CONFIG_RPXCLASSIC is not set
80# CONFIG_BSEIP is not set
81# CONFIG_FADS is not set
82CONFIG_MPC885ADS=y
83# CONFIG_TQM823L is not set
84# CONFIG_TQM850L is not set
85# CONFIG_TQM855L is not set
86# CONFIG_TQM860L is not set
87# CONFIG_FPS850L is not set
88# CONFIG_SPD823TS is not set
89# CONFIG_IVMS8 is not set
90# CONFIG_IVML24 is not set
91# CONFIG_SM850 is not set
92# CONFIG_HERMES_PRO is not set
93# CONFIG_IP860 is not set
94# CONFIG_LWMON is not set
95# CONFIG_PCU_E is not set
96# CONFIG_CCM is not set
97# CONFIG_LANTEC is not set
98# CONFIG_MBX is not set
99# CONFIG_WINCEPT is not set
100# CONFIG_SMP is not set
101# CONFIG_PREEMPT is not set
102# CONFIG_HIGHMEM is not set
103CONFIG_BINFMT_ELF=y
104# CONFIG_BINFMT_MISC is not set
105# CONFIG_CMDLINE_BOOL is not set
106CONFIG_ISA_DMA_API=y
107
108#
109# Bus options
110#
111# CONFIG_PCI is not set
112# CONFIG_PCI_DOMAINS is not set
113# CONFIG_PCI_QSPAN is not set
114
115#
116# PCCARD (PCMCIA/CardBus) support
117#
118# CONFIG_PCCARD is not set
119
120#
121# Advanced setup
122#
123# CONFIG_ADVANCED_OPTIONS is not set
124
125#
126# Default settings for advanced configuration options are used
127#
128CONFIG_HIGHMEM_START=0xfe000000
129CONFIG_LOWMEM_SIZE=0x30000000
130CONFIG_KERNEL_START=0xc0000000
131CONFIG_TASK_SIZE=0x80000000
132CONFIG_CONSISTENT_START=0xff100000
133CONFIG_CONSISTENT_SIZE=0x00200000
134CONFIG_BOOT_LOAD=0x00400000
135
136#
137# Device Drivers
138#
139
140#
141# Generic Driver Options
142#
143CONFIG_STANDALONE=y
144CONFIG_PREVENT_FIRMWARE_BUILD=y
145# CONFIG_FW_LOADER is not set
146
147#
148# Memory Technology Devices (MTD)
149#
150# CONFIG_MTD is not set
151
152#
153# Parallel port support
154#
155# CONFIG_PARPORT is not set
156
157#
158# Plug and Play support
159#
160
161#
162# Block devices
163#
164# CONFIG_BLK_DEV_FD is not set
165# CONFIG_BLK_DEV_COW_COMMON is not set
166# CONFIG_BLK_DEV_LOOP is not set
167# CONFIG_BLK_DEV_NBD is not set
168# CONFIG_BLK_DEV_RAM is not set
169CONFIG_BLK_DEV_RAM_COUNT=16
170CONFIG_INITRAMFS_SOURCE=""
171# CONFIG_LBD is not set
172# CONFIG_CDROM_PKTCDVD is not set
173
174#
175# IO Schedulers
176#
177CONFIG_IOSCHED_NOOP=y
178# CONFIG_IOSCHED_AS is not set
179# CONFIG_IOSCHED_DEADLINE is not set
180# CONFIG_IOSCHED_CFQ is not set
181# CONFIG_ATA_OVER_ETH is not set
182
183#
184# ATA/ATAPI/MFM/RLL support
185#
186# CONFIG_IDE is not set
187
188#
189# SCSI device support
190#
191# CONFIG_SCSI is not set
192
193#
194# Multi-device support (RAID and LVM)
195#
196# CONFIG_MD is not set
197
198#
199# Fusion MPT device support
200#
201
202#
203# IEEE 1394 (FireWire) support
204#
205# CONFIG_IEEE1394 is not set
206
207#
208# I2O device support
209#
210
211#
212# Macintosh device drivers
213#
214
215#
216# Networking support
217#
218CONFIG_NET=y
219
220#
221# Networking options
222#
223CONFIG_PACKET=y
224# CONFIG_PACKET_MMAP is not set
225CONFIG_UNIX=y
226# CONFIG_NET_KEY is not set
227CONFIG_INET=y
228# CONFIG_IP_MULTICAST is not set
229# CONFIG_IP_ADVANCED_ROUTER is not set
230CONFIG_IP_PNP=y
231CONFIG_IP_PNP_DHCP=y
232CONFIG_IP_PNP_BOOTP=y
233# CONFIG_IP_PNP_RARP is not set
234# CONFIG_NET_IPIP is not set
235# CONFIG_NET_IPGRE is not set
236# CONFIG_ARPD is not set
237# CONFIG_SYN_COOKIES is not set
238# CONFIG_INET_AH is not set
239# CONFIG_INET_ESP is not set
240# CONFIG_INET_IPCOMP is not set
241# CONFIG_INET_TUNNEL is not set
242CONFIG_IP_TCPDIAG=y
243# CONFIG_IP_TCPDIAG_IPV6 is not set
244# CONFIG_IPV6 is not set
245# CONFIG_NETFILTER is not set
246
247#
248# SCTP Configuration (EXPERIMENTAL)
249#
250# CONFIG_IP_SCTP is not set
251# CONFIG_ATM is not set
252# CONFIG_BRIDGE is not set
253# CONFIG_VLAN_8021Q is not set
254# CONFIG_DECNET is not set
255# CONFIG_LLC2 is not set
256# CONFIG_IPX is not set
257# CONFIG_ATALK is not set
258# CONFIG_X25 is not set
259# CONFIG_LAPB is not set
260# CONFIG_NET_DIVERT is not set
261# CONFIG_ECONET is not set
262# CONFIG_WAN_ROUTER is not set
263
264#
265# QoS and/or fair queueing
266#
267# CONFIG_NET_SCHED is not set
268# CONFIG_NET_CLS_ROUTE is not set
269
270#
271# Network testing
272#
273# CONFIG_NET_PKTGEN is not set
274# CONFIG_NETPOLL is not set
275# CONFIG_NET_POLL_CONTROLLER is not set
276# CONFIG_HAMRADIO is not set
277# CONFIG_IRDA is not set
278# CONFIG_BT is not set
279CONFIG_NETDEVICES=y
280# CONFIG_DUMMY is not set
281# CONFIG_BONDING is not set
282# CONFIG_EQUALIZER is not set
283# CONFIG_TUN is not set
284
285#
286# Ethernet (10 or 100Mbit)
287#
288CONFIG_NET_ETHERNET=y
289CONFIG_MII=y
290# CONFIG_OAKNET is not set
291
292#
293# Ethernet (1000 Mbit)
294#
295
296#
297# Ethernet (10000 Mbit)
298#
299
300#
301# Token Ring devices
302#
303
304#
305# Wireless LAN (non-hamradio)
306#
307# CONFIG_NET_RADIO is not set
308
309#
310# Wan interfaces
311#
312# CONFIG_WAN is not set
313CONFIG_PPP=y
314# CONFIG_PPP_MULTILINK is not set
315# CONFIG_PPP_FILTER is not set
316CONFIG_PPP_ASYNC=y
317CONFIG_PPP_SYNC_TTY=y
318CONFIG_PPP_DEFLATE=y
319# CONFIG_PPP_BSDCOMP is not set
320# CONFIG_PPPOE is not set
321# CONFIG_SLIP is not set
322# CONFIG_SHAPER is not set
323# CONFIG_NETCONSOLE is not set
324
325#
326# ISDN subsystem
327#
328# CONFIG_ISDN is not set
329
330#
331# Telephony Support
332#
333# CONFIG_PHONE is not set
334
335#
336# Input device support
337#
338# CONFIG_INPUT is not set
339
340#
341# Hardware I/O ports
342#
343# CONFIG_SERIO is not set
344# CONFIG_GAMEPORT is not set
345
346#
347# Character devices
348#
349# CONFIG_VT is not set
350# CONFIG_SERIAL_NONSTANDARD is not set
351
352#
353# Serial drivers
354#
355# CONFIG_SERIAL_8250 is not set
356
357#
358# Non-8250 serial port support
359#
360CONFIG_SERIAL_CORE=y
361CONFIG_SERIAL_CORE_CONSOLE=y
362CONFIG_SERIAL_CPM=y
363CONFIG_SERIAL_CPM_CONSOLE=y
364# CONFIG_SERIAL_CPM_SCC1 is not set
365# CONFIG_SERIAL_CPM_SCC2 is not set
366# CONFIG_SERIAL_CPM_SCC3 is not set
367# CONFIG_SERIAL_CPM_SCC4 is not set
368CONFIG_SERIAL_CPM_SMC1=y
369CONFIG_SERIAL_CPM_SMC2=y
370CONFIG_UNIX98_PTYS=y
371# CONFIG_LEGACY_PTYS is not set
372
373#
374# IPMI
375#
376# CONFIG_IPMI_HANDLER is not set
377
378#
379# Watchdog Cards
380#
381# CONFIG_WATCHDOG is not set
382# CONFIG_NVRAM is not set
383# CONFIG_GEN_RTC is not set
384# CONFIG_DTLK is not set
385# CONFIG_R3964 is not set
386
387#
388# Ftape, the floppy tape device driver
389#
390# CONFIG_AGP is not set
391# CONFIG_DRM is not set
392# CONFIG_RAW_DRIVER is not set
393
394#
395# TPM devices
396#
397
398#
399# I2C support
400#
401# CONFIG_I2C is not set
402
403#
404# Dallas's 1-wire bus
405#
406# CONFIG_W1 is not set
407
408#
409# Misc devices
410#
411
412#
413# Multimedia devices
414#
415# CONFIG_VIDEO_DEV is not set
416
417#
418# Digital Video Broadcasting Devices
419#
420# CONFIG_DVB is not set
421
422#
423# Graphics support
424#
425# CONFIG_FB is not set
426
427#
428# Sound
429#
430# CONFIG_SOUND is not set
431
432#
433# USB support
434#
435# CONFIG_USB_ARCH_HAS_HCD is not set
436# CONFIG_USB_ARCH_HAS_OHCI is not set
437
438#
439# USB Gadget Support
440#
441# CONFIG_USB_GADGET is not set
442
443#
444# MMC/SD Card support
445#
446# CONFIG_MMC is not set
447
448#
449# InfiniBand support
450#
451# CONFIG_INFINIBAND is not set
452
453#
454# File systems
455#
456CONFIG_EXT2_FS=y
457CONFIG_EXT2_FS_XATTR=y
458# CONFIG_EXT2_FS_POSIX_ACL is not set
459# CONFIG_EXT2_FS_SECURITY is not set
460CONFIG_EXT3_FS=y
461CONFIG_EXT3_FS_XATTR=y
462# CONFIG_EXT3_FS_POSIX_ACL is not set
463# CONFIG_EXT3_FS_SECURITY is not set
464CONFIG_JBD=y
465# CONFIG_JBD_DEBUG is not set
466CONFIG_FS_MBCACHE=y
467# CONFIG_REISERFS_FS is not set
468# CONFIG_JFS_FS is not set
469
470#
471# XFS support
472#
473# CONFIG_XFS_FS is not set
474# CONFIG_MINIX_FS is not set
475# CONFIG_ROMFS_FS is not set
476# CONFIG_QUOTA is not set
477# CONFIG_DNOTIFY is not set
478# CONFIG_AUTOFS_FS is not set
479# CONFIG_AUTOFS4_FS is not set
480
481#
482# CD-ROM/DVD Filesystems
483#
484# CONFIG_ISO9660_FS is not set
485# CONFIG_UDF_FS is not set
486
487#
488# DOS/FAT/NT Filesystems
489#
490# CONFIG_MSDOS_FS is not set
491# CONFIG_VFAT_FS is not set
492# CONFIG_NTFS_FS is not set
493
494#
495# Pseudo filesystems
496#
497CONFIG_PROC_FS=y
498# CONFIG_PROC_KCORE is not set
499CONFIG_SYSFS=y
500# CONFIG_DEVFS_FS is not set
501# CONFIG_DEVPTS_FS_XATTR is not set
502# CONFIG_TMPFS is not set
503# CONFIG_HUGETLBFS is not set
504# CONFIG_HUGETLB_PAGE is not set
505CONFIG_RAMFS=y
506
507#
508# Miscellaneous filesystems
509#
510# CONFIG_ADFS_FS is not set
511# CONFIG_AFFS_FS is not set
512# CONFIG_HFS_FS is not set
513# CONFIG_HFSPLUS_FS is not set
514# CONFIG_BEFS_FS is not set
515# CONFIG_BFS_FS is not set
516# CONFIG_EFS_FS is not set
517# CONFIG_CRAMFS is not set
518# CONFIG_VXFS_FS is not set
519# CONFIG_HPFS_FS is not set
520# CONFIG_QNX4FS_FS is not set
521# CONFIG_SYSV_FS is not set
522# CONFIG_UFS_FS is not set
523
524#
525# Network File Systems
526#
527CONFIG_NFS_FS=y
528# CONFIG_NFS_V3 is not set
529# CONFIG_NFS_V4 is not set
530# CONFIG_NFS_DIRECTIO is not set
531# CONFIG_NFSD is not set
532CONFIG_ROOT_NFS=y
533CONFIG_LOCKD=y
534CONFIG_SUNRPC=y
535# CONFIG_RPCSEC_GSS_KRB5 is not set
536# CONFIG_RPCSEC_GSS_SPKM3 is not set
537# CONFIG_SMB_FS is not set
538# CONFIG_CIFS is not set
539# CONFIG_NCP_FS is not set
540# CONFIG_CODA_FS is not set
541# CONFIG_AFS_FS is not set
542
543#
544# Partition Types
545#
546CONFIG_PARTITION_ADVANCED=y
547# CONFIG_ACORN_PARTITION is not set
548# CONFIG_OSF_PARTITION is not set
549# CONFIG_AMIGA_PARTITION is not set
550# CONFIG_ATARI_PARTITION is not set
551# CONFIG_MAC_PARTITION is not set
552CONFIG_MSDOS_PARTITION=y
553# CONFIG_BSD_DISKLABEL is not set
554# CONFIG_MINIX_SUBPARTITION is not set
555# CONFIG_SOLARIS_X86_PARTITION is not set
556# CONFIG_UNIXWARE_DISKLABEL is not set
557# CONFIG_LDM_PARTITION is not set
558# CONFIG_SGI_PARTITION is not set
559# CONFIG_ULTRIX_PARTITION is not set
560# CONFIG_SUN_PARTITION is not set
561# CONFIG_EFI_PARTITION is not set
562
563#
564# Native Language Support
565#
566# CONFIG_NLS is not set
567
568#
569# MPC8xx CPM Options
570#
571CONFIG_SCC_ENET=y
572# CONFIG_SCC1_ENET is not set
573# CONFIG_SCC2_ENET is not set
574CONFIG_SCC3_ENET=y
575# CONFIG_FEC_ENET is not set
576# CONFIG_ENET_BIG_BUFFERS is not set
577
578#
579# Generic MPC8xx Options
580#
581CONFIG_8xx_COPYBACK=y
582CONFIG_8xx_CPU6=y
583CONFIG_NO_UCODE_PATCH=y
584# CONFIG_USB_SOF_UCODE_PATCH is not set
585# CONFIG_I2C_SPI_UCODE_PATCH is not set
586# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set
587
588#
589# Library routines
590#
591CONFIG_CRC_CCITT=y
592# CONFIG_CRC32 is not set
593# CONFIG_LIBCRC32C is not set
594CONFIG_ZLIB_INFLATE=y
595CONFIG_ZLIB_DEFLATE=y
596
597#
598# Profiling support
599#
600# CONFIG_PROFILING is not set
601
602#
603# Kernel hacking
604#
605# CONFIG_PRINTK_TIME is not set
606# CONFIG_DEBUG_KERNEL is not set
607CONFIG_LOG_BUF_SHIFT=14
608
609#
610# Security options
611#
612# CONFIG_KEYS is not set
613# CONFIG_SECURITY is not set
614
615#
616# Cryptographic options
617#
618# CONFIG_CRYPTO is not set
619
620#
621# Hardware crypto devices
622#
diff --git a/arch/ppc/kernel/time.c b/arch/ppc/kernel/time.c
index 735866559199..bf4ddca5e853 100644
--- a/arch/ppc/kernel/time.c
+++ b/arch/ppc/kernel/time.c
@@ -89,6 +89,9 @@ unsigned long tb_to_ns_scale;
89 89
90extern unsigned long wall_jiffies; 90extern unsigned long wall_jiffies;
91 91
92/* used for timezone offset */
93static long timezone_offset;
94
92DEFINE_SPINLOCK(rtc_lock); 95DEFINE_SPINLOCK(rtc_lock);
93 96
94EXPORT_SYMBOL(rtc_lock); 97EXPORT_SYMBOL(rtc_lock);
@@ -170,7 +173,7 @@ void timer_interrupt(struct pt_regs * regs)
170 xtime.tv_sec - last_rtc_update >= 659 && 173 xtime.tv_sec - last_rtc_update >= 659 &&
171 abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 500000/HZ && 174 abs((xtime.tv_nsec / 1000) - (1000000-1000000/HZ)) < 500000/HZ &&
172 jiffies - wall_jiffies == 1) { 175 jiffies - wall_jiffies == 1) {
173 if (ppc_md.set_rtc_time(xtime.tv_sec+1 + time_offset) == 0) 176 if (ppc_md.set_rtc_time(xtime.tv_sec+1 + timezone_offset) == 0)
174 last_rtc_update = xtime.tv_sec+1; 177 last_rtc_update = xtime.tv_sec+1;
175 else 178 else
176 /* Try again one minute later */ 179 /* Try again one minute later */
@@ -286,7 +289,7 @@ void __init time_init(void)
286 unsigned old_stamp, stamp, elapsed; 289 unsigned old_stamp, stamp, elapsed;
287 290
288 if (ppc_md.time_init != NULL) 291 if (ppc_md.time_init != NULL)
289 time_offset = ppc_md.time_init(); 292 timezone_offset = ppc_md.time_init();
290 293
291 if (__USE_RTC()) { 294 if (__USE_RTC()) {
292 /* 601 processor: dec counts down by 128 every 128ns */ 295 /* 601 processor: dec counts down by 128 every 128ns */
@@ -331,10 +334,10 @@ void __init time_init(void)
331 set_dec(tb_ticks_per_jiffy); 334 set_dec(tb_ticks_per_jiffy);
332 335
333 /* If platform provided a timezone (pmac), we correct the time */ 336 /* If platform provided a timezone (pmac), we correct the time */
334 if (time_offset) { 337 if (timezone_offset) {
335 sys_tz.tz_minuteswest = -time_offset / 60; 338 sys_tz.tz_minuteswest = -timezone_offset / 60;
336 sys_tz.tz_dsttime = 0; 339 sys_tz.tz_dsttime = 0;
337 xtime.tv_sec -= time_offset; 340 xtime.tv_sec -= timezone_offset;
338 } 341 }
339 set_normalized_timespec(&wall_to_monotonic, 342 set_normalized_timespec(&wall_to_monotonic,
340 -xtime.tv_sec, -xtime.tv_nsec); 343 -xtime.tv_sec, -xtime.tv_nsec);
diff --git a/arch/ppc/platforms/fads.h b/arch/ppc/platforms/fads.h
index 632b8178ce66..b60c56450b67 100644
--- a/arch/ppc/platforms/fads.h
+++ b/arch/ppc/platforms/fads.h
@@ -3,7 +3,18 @@
3 * the Motorola 860T FADS board. Copied from the MBX stuff. 3 * the Motorola 860T FADS board. Copied from the MBX stuff.
4 * 4 *
5 * Copyright (c) 1998 Dan Malek (dmalek@jlc.net) 5 * Copyright (c) 1998 Dan Malek (dmalek@jlc.net)
6 *
7 * Added MPC86XADS support.
8 * The MPC86xADS manual says the board "is compatible with the MPC8xxFADS
9 * for SW point of view". This is 99% correct.
10 *
11 * Author: MontaVista Software, Inc.
12 * source@mvista.com
13 * 2005 (c) MontaVista Software, Inc. This file is licensed under the
14 * terms of the GNU General Public License version 2. This program is licensed
15 * "as is" without any warranty of any kind, whether express or implied.
6 */ 16 */
17
7#ifdef __KERNEL__ 18#ifdef __KERNEL__
8#ifndef __ASM_FADS_H__ 19#ifndef __ASM_FADS_H__
9#define __ASM_FADS_H__ 20#define __ASM_FADS_H__
@@ -12,18 +23,45 @@
12 23
13#include <asm/ppcboot.h> 24#include <asm/ppcboot.h>
14 25
26#if defined(CONFIG_MPC86XADS)
27
28/* U-Boot maps BCSR to 0xff080000 */
29#define BCSR_ADDR ((uint)0xff080000)
30
31/* MPC86XADS has one more CPLD and an additional BCSR.
32 */
33#define CFG_PHYDEV_ADDR ((uint)0xff0a0000)
34#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300))
35
36#define BCSR5_T1_RST 0x10
37#define BCSR5_ATM155_RST 0x08
38#define BCSR5_ATM25_RST 0x04
39#define BCSR5_MII1_EN 0x02
40#define BCSR5_MII1_RST 0x01
41
42/* There is no PHY link change interrupt */
43#define PHY_INTERRUPT (-1)
44
45#else /* FADS */
46
15/* Memory map is configured by the PROM startup. 47/* Memory map is configured by the PROM startup.
16 * I tried to follow the FADS manual, although the startup PROM 48 * I tried to follow the FADS manual, although the startup PROM
17 * dictates this and we simply have to move some of the physical 49 * dictates this and we simply have to move some of the physical
18 * addresses for Linux. 50 * addresses for Linux.
19 */ 51 */
20#define BCSR_ADDR ((uint)0xff010000) 52#define BCSR_ADDR ((uint)0xff010000)
53
54/* PHY link change interrupt */
55#define PHY_INTERRUPT SIU_IRQ2
56
57#endif /* CONFIG_MPC86XADS */
58
21#define BCSR_SIZE ((uint)(64 * 1024)) 59#define BCSR_SIZE ((uint)(64 * 1024))
22#define BCSR0 ((uint)0xff010000) 60#define BCSR0 ((uint)(BCSR_ADDR + 0x00))
23#define BCSR1 ((uint)0xff010004) 61#define BCSR1 ((uint)(BCSR_ADDR + 0x04))
24#define BCSR2 ((uint)0xff010008) 62#define BCSR2 ((uint)(BCSR_ADDR + 0x08))
25#define BCSR3 ((uint)0xff01000c) 63#define BCSR3 ((uint)(BCSR_ADDR + 0x0c))
26#define BCSR4 ((uint)0xff010010) 64#define BCSR4 ((uint)(BCSR_ADDR + 0x10))
27 65
28#define IMAP_ADDR ((uint)0xff000000) 66#define IMAP_ADDR ((uint)0xff000000)
29#define IMAP_SIZE ((uint)(64 * 1024)) 67#define IMAP_SIZE ((uint)(64 * 1024))
@@ -34,8 +72,17 @@
34/* Bits of interest in the BCSRs. 72/* Bits of interest in the BCSRs.
35 */ 73 */
36#define BCSR1_ETHEN ((uint)0x20000000) 74#define BCSR1_ETHEN ((uint)0x20000000)
75#define BCSR1_IRDAEN ((uint)0x10000000)
37#define BCSR1_RS232EN_1 ((uint)0x01000000) 76#define BCSR1_RS232EN_1 ((uint)0x01000000)
77#define BCSR1_PCCEN ((uint)0x00800000)
78#define BCSR1_PCCVCC0 ((uint)0x00400000)
79#define BCSR1_PCCVPP0 ((uint)0x00200000)
80#define BCSR1_PCCVPP1 ((uint)0x00100000)
81#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
38#define BCSR1_RS232EN_2 ((uint)0x00040000) 82#define BCSR1_RS232EN_2 ((uint)0x00040000)
83#define BCSR1_PCCVCC1 ((uint)0x00010000)
84#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
85
39#define BCSR4_ETHLOOP ((uint)0x80000000) /* EEST Loopback */ 86#define BCSR4_ETHLOOP ((uint)0x80000000) /* EEST Loopback */
40#define BCSR4_EEFDX ((uint)0x40000000) /* EEST FDX enable */ 87#define BCSR4_EEFDX ((uint)0x40000000) /* EEST FDX enable */
41#define BCSR4_FETH_EN ((uint)0x08000000) /* PHY enable */ 88#define BCSR4_FETH_EN ((uint)0x08000000) /* PHY enable */
@@ -44,14 +91,64 @@
44#define BCSR4_FETHFDE ((uint)0x02000000) /* PHY FDX advertise */ 91#define BCSR4_FETHFDE ((uint)0x02000000) /* PHY FDX advertise */
45#define BCSR4_FETHRST ((uint)0x00200000) /* PHY Reset */ 92#define BCSR4_FETHRST ((uint)0x00200000) /* PHY Reset */
46 93
94/* IO_BASE definition for pcmcia.
95 */
96#define _IO_BASE 0x80000000
97#define _IO_BASE_SIZE 0x1000
98
99#ifdef CONFIG_IDE
100#define MAX_HWIFS 1
101#endif
102
47/* Interrupt level assignments. 103/* Interrupt level assignments.
48 */ 104 */
49#define FEC_INTERRUPT SIU_LEVEL1 /* FEC interrupt */ 105#define FEC_INTERRUPT SIU_LEVEL1 /* FEC interrupt */
50#define PHY_INTERRUPT SIU_IRQ2 /* PHY link change interrupt */
51 106
52/* We don't use the 8259. 107/* We don't use the 8259.
53 */ 108 */
54#define NR_8259_INTS 0 109#define NR_8259_INTS 0
55 110
111/* CPM Ethernet through SCC1 or SCC2 */
112
113#ifdef CONFIG_SCC1_ENET /* Probably 860 variant */
114/* Bits in parallel I/O port registers that have to be set/cleared
115 * to configure the pins for SCC1 use.
116 * TCLK - CLK1, RCLK - CLK2.
117 */
118#define PA_ENET_RXD ((ushort)0x0001)
119#define PA_ENET_TXD ((ushort)0x0002)
120#define PA_ENET_TCLK ((ushort)0x0100)
121#define PA_ENET_RCLK ((ushort)0x0200)
122#define PB_ENET_TENA ((uint)0x00001000)
123#define PC_ENET_CLSN ((ushort)0x0010)
124#define PC_ENET_RENA ((ushort)0x0020)
125
126/* Control bits in the SICR to route TCLK (CLK1) and RCLK (CLK2) to
127 * SCC1. Also, make sure GR1 (bit 24) and SC1 (bit 25) are zero.
128 */
129#define SICR_ENET_MASK ((uint)0x000000ff)
130#define SICR_ENET_CLKRT ((uint)0x0000002c)
131#endif /* CONFIG_SCC1_ENET */
132
133#ifdef CONFIG_SCC2_ENET /* Probably 823/850 variant */
134/* Bits in parallel I/O port registers that have to be set/cleared
135 * to configure the pins for SCC1 use.
136 * TCLK - CLK1, RCLK - CLK2.
137 */
138#define PA_ENET_RXD ((ushort)0x0004)
139#define PA_ENET_TXD ((ushort)0x0008)
140#define PA_ENET_TCLK ((ushort)0x0400)
141#define PA_ENET_RCLK ((ushort)0x0200)
142#define PB_ENET_TENA ((uint)0x00002000)
143#define PC_ENET_CLSN ((ushort)0x0040)
144#define PC_ENET_RENA ((ushort)0x0080)
145
146/* Control bits in the SICR to route TCLK and RCLK to
147 * SCC2. Also, make sure GR1 (bit 24) and SC1 (bit 25) are zero.
148 */
149#define SICR_ENET_MASK ((uint)0x0000ff00)
150#define SICR_ENET_CLKRT ((uint)0x00002e00)
151#endif /* CONFIG_SCC2_ENET */
152
56#endif /* __ASM_FADS_H__ */ 153#endif /* __ASM_FADS_H__ */
57#endif /* __KERNEL__ */ 154#endif /* __KERNEL__ */
diff --git a/arch/ppc/platforms/mpc885ads.h b/arch/ppc/platforms/mpc885ads.h
new file mode 100644
index 000000000000..eb386635b0fd
--- /dev/null
+++ b/arch/ppc/platforms/mpc885ads.h
@@ -0,0 +1,92 @@
1/*
2 * A collection of structures, addresses, and values associated with
3 * the Freescale MPC885ADS board.
4 * Copied from the FADS stuff.
5 *
6 * Author: MontaVista Software, Inc.
7 * source@mvista.com
8 *
9 * 2005 (c) MontaVista Software, Inc. This file is licensed under the
10 * terms of the GNU General Public License version 2. This program is licensed
11 * "as is" without any warranty of any kind, whether express or implied.
12 */
13
14#ifdef __KERNEL__
15#ifndef __ASM_MPC885ADS_H__
16#define __ASM_MPC885ADS_H__
17
18#include <linux/config.h>
19
20#include <asm/ppcboot.h>
21
22/* U-Boot maps BCSR to 0xff080000 */
23#define BCSR_ADDR ((uint)0xff080000)
24#define BCSR_SIZE ((uint)32)
25#define BCSR0 ((uint)(BCSR_ADDR + 0x00))
26#define BCSR1 ((uint)(BCSR_ADDR + 0x04))
27#define BCSR2 ((uint)(BCSR_ADDR + 0x08))
28#define BCSR3 ((uint)(BCSR_ADDR + 0x0c))
29#define BCSR4 ((uint)(BCSR_ADDR + 0x10))
30
31#define CFG_PHYDEV_ADDR ((uint)0xff0a0000)
32#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300))
33
34#define IMAP_ADDR ((uint)0xff000000)
35#define IMAP_SIZE ((uint)(64 * 1024))
36
37#define PCMCIA_MEM_ADDR ((uint)0xff020000)
38#define PCMCIA_MEM_SIZE ((uint)(64 * 1024))
39
40/* Bits of interest in the BCSRs.
41 */
42#define BCSR1_ETHEN ((uint)0x20000000)
43#define BCSR1_IRDAEN ((uint)0x10000000)
44#define BCSR1_RS232EN_1 ((uint)0x01000000)
45#define BCSR1_PCCEN ((uint)0x00800000)
46#define BCSR1_PCCVCC0 ((uint)0x00400000)
47#define BCSR1_PCCVPP0 ((uint)0x00200000)
48#define BCSR1_PCCVPP1 ((uint)0x00100000)
49#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1)
50#define BCSR1_RS232EN_2 ((uint)0x00040000)
51#define BCSR1_PCCVCC1 ((uint)0x00010000)
52#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1)
53
54#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/
55#define BCSR4_USB_LO_SPD ((uint)0x04000000)
56#define BCSR4_USB_VCC ((uint)0x02000000)
57#define BCSR4_USB_FULL_SPD ((uint)0x00040000)
58#define BCSR4_USB_EN ((uint)0x00020000)
59
60#define BCSR5_MII2_EN 0x40
61#define BCSR5_MII2_RST 0x20
62#define BCSR5_T1_RST 0x10
63#define BCSR5_ATM155_RST 0x08
64#define BCSR5_ATM25_RST 0x04
65#define BCSR5_MII1_EN 0x02
66#define BCSR5_MII1_RST 0x01
67
68/* Interrupt level assignments */
69#define PHY_INTERRUPT SIU_IRQ7 /* PHY link change interrupt */
70#define SIU_INT_FEC1 SIU_LEVEL1 /* FEC1 interrupt */
71#define SIU_INT_FEC2 SIU_LEVEL3 /* FEC2 interrupt */
72#define FEC_INTERRUPT SIU_INT_FEC1 /* FEC interrupt */
73
74/* We don't use the 8259 */
75#define NR_8259_INTS 0
76
77/* CPM Ethernet through SCC3 */
78#define PA_ENET_RXD ((ushort)0x0040)
79#define PA_ENET_TXD ((ushort)0x0080)
80#define PE_ENET_TCLK ((uint)0x00004000)
81#define PE_ENET_RCLK ((uint)0x00008000)
82#define PE_ENET_TENA ((uint)0x00000010)
83#define PC_ENET_CLSN ((ushort)0x0400)
84#define PC_ENET_RENA ((ushort)0x0800)
85
86/* Control bits in the SICR to route TCLK (CLK5) and RCLK (CLK6) to
87 * SCC3. Also, make sure GR3 (bit 8) and SC3 (bit 9) are zero */
88#define SICR_ENET_MASK ((uint)0x00ff0000)
89#define SICR_ENET_CLKRT ((uint)0x002c0000)
90
91#endif /* __ASM_MPC885ADS_H__ */
92#endif /* __KERNEL__ */
diff --git a/arch/ppc/syslib/ppc4xx_pic.c b/arch/ppc/syslib/ppc4xx_pic.c
index 05686fa73545..40086212b9c3 100644
--- a/arch/ppc/syslib/ppc4xx_pic.c
+++ b/arch/ppc/syslib/ppc4xx_pic.c
@@ -110,6 +110,10 @@ static int ppc4xx_pic_get_irq(struct pt_regs *regs)
110 110
111static void __init ppc4xx_pic_impl_init(void) 111static void __init ppc4xx_pic_impl_init(void)
112{ 112{
113#if defined(CONFIG_440GX)
114 /* Disable 440GP compatibility mode if it was enabled in firmware */
115 SDR_WRITE(DCRN_SDR_MFR, SDR_READ(DCRN_SDR_MFR) & ~DCRN_SDR_MFR_PCM);
116#endif
113 /* Configure Base UIC */ 117 /* Configure Base UIC */
114 mtdcr(DCRN_UIC_CR(UICB), 0); 118 mtdcr(DCRN_UIC_CR(UICB), 0);
115 mtdcr(DCRN_UIC_TR(UICB), 0); 119 mtdcr(DCRN_UIC_TR(UICB), 0);
diff --git a/arch/ppc64/kernel/kprobes.c b/arch/ppc64/kernel/kprobes.c
index 1d2ff6d6b0b3..a3d519518fb8 100644
--- a/arch/ppc64/kernel/kprobes.c
+++ b/arch/ppc64/kernel/kprobes.c
@@ -444,7 +444,7 @@ static struct kprobe trampoline_p = {
444 .pre_handler = trampoline_probe_handler 444 .pre_handler = trampoline_probe_handler
445}; 445};
446 446
447int __init arch_init(void) 447int __init arch_init_kprobes(void)
448{ 448{
449 return register_kprobe(&trampoline_p); 449 return register_kprobe(&trampoline_p);
450} 450}
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index bdac631cf011..bbf11f85dab1 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -433,3 +433,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
433 return 0; 433 return 0;
434} 434}
435 435
436/* architecture specific initialization */
437int arch_init_kprobes(void)
438{
439 return 0;
440}
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index acd2a778ebe6..5c6dc7051482 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -682,7 +682,7 @@ static struct kprobe trampoline_p = {
682 .pre_handler = trampoline_probe_handler 682 .pre_handler = trampoline_probe_handler
683}; 683};
684 684
685int __init arch_init(void) 685int __init arch_init_kprobes(void)
686{ 686{
687 return register_kprobe(&trampoline_p); 687 return register_kprobe(&trampoline_p);
688} 688}
diff --git a/drivers/net/shaper.c b/drivers/net/shaper.c
index 20edeb345792..3ad0b6751f6f 100644
--- a/drivers/net/shaper.c
+++ b/drivers/net/shaper.c
@@ -135,10 +135,8 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
135{ 135{
136 struct shaper *shaper = dev->priv; 136 struct shaper *shaper = dev->priv;
137 struct sk_buff *ptr; 137 struct sk_buff *ptr;
138 138
139 if (down_trylock(&shaper->sem)) 139 spin_lock(&shaper->lock);
140 return -1;
141
142 ptr=shaper->sendq.prev; 140 ptr=shaper->sendq.prev;
143 141
144 /* 142 /*
@@ -232,7 +230,7 @@ static int shaper_start_xmit(struct sk_buff *skb, struct net_device *dev)
232 shaper->stats.collisions++; 230 shaper->stats.collisions++;
233 } 231 }
234 shaper_kick(shaper); 232 shaper_kick(shaper);
235 up(&shaper->sem); 233 spin_unlock(&shaper->lock);
236 return 0; 234 return 0;
237} 235}
238 236
@@ -271,11 +269,9 @@ static void shaper_timer(unsigned long data)
271{ 269{
272 struct shaper *shaper = (struct shaper *)data; 270 struct shaper *shaper = (struct shaper *)data;
273 271
274 if (!down_trylock(&shaper->sem)) { 272 spin_lock(&shaper->lock);
275 shaper_kick(shaper); 273 shaper_kick(shaper);
276 up(&shaper->sem); 274 spin_unlock(&shaper->lock);
277 } else
278 mod_timer(&shaper->timer, jiffies);
279} 275}
280 276
281/* 277/*
@@ -332,21 +328,6 @@ static void shaper_kick(struct shaper *shaper)
332 328
333 329
334/* 330/*
335 * Flush the shaper queues on a closedown
336 */
337
338static void shaper_flush(struct shaper *shaper)
339{
340 struct sk_buff *skb;
341
342 down(&shaper->sem);
343 while((skb=skb_dequeue(&shaper->sendq))!=NULL)
344 dev_kfree_skb(skb);
345 shaper_kick(shaper);
346 up(&shaper->sem);
347}
348
349/*
350 * Bring the interface up. We just disallow this until a 331 * Bring the interface up. We just disallow this until a
351 * bind. 332 * bind.
352 */ 333 */
@@ -375,7 +356,15 @@ static int shaper_open(struct net_device *dev)
375static int shaper_close(struct net_device *dev) 356static int shaper_close(struct net_device *dev)
376{ 357{
377 struct shaper *shaper=dev->priv; 358 struct shaper *shaper=dev->priv;
378 shaper_flush(shaper); 359 struct sk_buff *skb;
360
361 while ((skb = skb_dequeue(&shaper->sendq)) != NULL)
362 dev_kfree_skb(skb);
363
364 spin_lock_bh(&shaper->lock);
365 shaper_kick(shaper);
366 spin_unlock_bh(&shaper->lock);
367
379 del_timer_sync(&shaper->timer); 368 del_timer_sync(&shaper->timer);
380 return 0; 369 return 0;
381} 370}
@@ -576,6 +565,7 @@ static void shaper_init_priv(struct net_device *dev)
576 init_timer(&sh->timer); 565 init_timer(&sh->timer);
577 sh->timer.function=shaper_timer; 566 sh->timer.function=shaper_timer;
578 sh->timer.data=(unsigned long)sh; 567 sh->timer.data=(unsigned long)sh;
568 spin_lock_init(&sh->lock);
579} 569}
580 570
581/* 571/*
diff --git a/drivers/net/skge.h b/drivers/net/skge.h
index 14d0cc01fb9a..fced3d2bc072 100644
--- a/drivers/net/skge.h
+++ b/drivers/net/skge.h
@@ -7,6 +7,7 @@
7/* PCI config registers */ 7/* PCI config registers */
8#define PCI_DEV_REG1 0x40 8#define PCI_DEV_REG1 0x40
9#define PCI_DEV_REG2 0x44 9#define PCI_DEV_REG2 0x44
10#define PCI_REV_DESC 0x4
10 11
11#define PCI_STATUS_ERROR_BITS (PCI_STATUS_DETECTED_PARITY | \ 12#define PCI_STATUS_ERROR_BITS (PCI_STATUS_DETECTED_PARITY | \
12 PCI_STATUS_SIG_SYSTEM_ERROR | \ 13 PCI_STATUS_SIG_SYSTEM_ERROR | \
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 7e371b1209a1..54640686e983 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -66,8 +66,8 @@
66 66
67#define DRV_MODULE_NAME "tg3" 67#define DRV_MODULE_NAME "tg3"
68#define PFX DRV_MODULE_NAME ": " 68#define PFX DRV_MODULE_NAME ": "
69#define DRV_MODULE_VERSION "3.32" 69#define DRV_MODULE_VERSION "3.33"
70#define DRV_MODULE_RELDATE "June 24, 2005" 70#define DRV_MODULE_RELDATE "July 5, 2005"
71 71
72#define TG3_DEF_MAC_MODE 0 72#define TG3_DEF_MAC_MODE 0
73#define TG3_DEF_RX_MODE 0 73#define TG3_DEF_RX_MODE 0
@@ -5117,7 +5117,7 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
5117} 5117}
5118 5118
5119static void __tg3_set_rx_mode(struct net_device *); 5119static void __tg3_set_rx_mode(struct net_device *);
5120static void tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec) 5120static void __tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
5121{ 5121{
5122 tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs); 5122 tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs);
5123 tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs); 5123 tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs);
@@ -5460,7 +5460,7 @@ static int tg3_reset_hw(struct tg3 *tp)
5460 udelay(10); 5460 udelay(10);
5461 } 5461 }
5462 5462
5463 tg3_set_coalesce(tp, &tp->coal); 5463 __tg3_set_coalesce(tp, &tp->coal);
5464 5464
5465 /* set status block DMA address */ 5465 /* set status block DMA address */
5466 tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH, 5466 tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
@@ -7821,6 +7821,60 @@ static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
7821 return 0; 7821 return 0;
7822} 7822}
7823 7823
7824static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
7825{
7826 struct tg3 *tp = netdev_priv(dev);
7827 u32 max_rxcoal_tick_int = 0, max_txcoal_tick_int = 0;
7828 u32 max_stat_coal_ticks = 0, min_stat_coal_ticks = 0;
7829
7830 if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
7831 max_rxcoal_tick_int = MAX_RXCOAL_TICK_INT;
7832 max_txcoal_tick_int = MAX_TXCOAL_TICK_INT;
7833 max_stat_coal_ticks = MAX_STAT_COAL_TICKS;
7834 min_stat_coal_ticks = MIN_STAT_COAL_TICKS;
7835 }
7836
7837 if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) ||
7838 (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) ||
7839 (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) ||
7840 (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) ||
7841 (ec->rx_coalesce_usecs_irq > max_rxcoal_tick_int) ||
7842 (ec->tx_coalesce_usecs_irq > max_txcoal_tick_int) ||
7843 (ec->rx_max_coalesced_frames_irq > MAX_RXCOAL_MAXF_INT) ||
7844 (ec->tx_max_coalesced_frames_irq > MAX_TXCOAL_MAXF_INT) ||
7845 (ec->stats_block_coalesce_usecs > max_stat_coal_ticks) ||
7846 (ec->stats_block_coalesce_usecs < min_stat_coal_ticks))
7847 return -EINVAL;
7848
7849 /* No rx interrupts will be generated if both are zero */
7850 if ((ec->rx_coalesce_usecs == 0) &&
7851 (ec->rx_max_coalesced_frames == 0))
7852 return -EINVAL;
7853
7854 /* No tx interrupts will be generated if both are zero */
7855 if ((ec->tx_coalesce_usecs == 0) &&
7856 (ec->tx_max_coalesced_frames == 0))
7857 return -EINVAL;
7858
7859 /* Only copy relevant parameters, ignore all others. */
7860 tp->coal.rx_coalesce_usecs = ec->rx_coalesce_usecs;
7861 tp->coal.tx_coalesce_usecs = ec->tx_coalesce_usecs;
7862 tp->coal.rx_max_coalesced_frames = ec->rx_max_coalesced_frames;
7863 tp->coal.tx_max_coalesced_frames = ec->tx_max_coalesced_frames;
7864 tp->coal.rx_coalesce_usecs_irq = ec->rx_coalesce_usecs_irq;
7865 tp->coal.tx_coalesce_usecs_irq = ec->tx_coalesce_usecs_irq;
7866 tp->coal.rx_max_coalesced_frames_irq = ec->rx_max_coalesced_frames_irq;
7867 tp->coal.tx_max_coalesced_frames_irq = ec->tx_max_coalesced_frames_irq;
7868 tp->coal.stats_block_coalesce_usecs = ec->stats_block_coalesce_usecs;
7869
7870 if (netif_running(dev)) {
7871 tg3_full_lock(tp, 0);
7872 __tg3_set_coalesce(tp, &tp->coal);
7873 tg3_full_unlock(tp);
7874 }
7875 return 0;
7876}
7877
7824static struct ethtool_ops tg3_ethtool_ops = { 7878static struct ethtool_ops tg3_ethtool_ops = {
7825 .get_settings = tg3_get_settings, 7879 .get_settings = tg3_get_settings,
7826 .set_settings = tg3_set_settings, 7880 .set_settings = tg3_set_settings,
@@ -7856,6 +7910,7 @@ static struct ethtool_ops tg3_ethtool_ops = {
7856 .get_stats_count = tg3_get_stats_count, 7910 .get_stats_count = tg3_get_stats_count,
7857 .get_ethtool_stats = tg3_get_ethtool_stats, 7911 .get_ethtool_stats = tg3_get_ethtool_stats,
7858 .get_coalesce = tg3_get_coalesce, 7912 .get_coalesce = tg3_get_coalesce,
7913 .set_coalesce = tg3_set_coalesce,
7859}; 7914};
7860 7915
7861static void __devinit tg3_get_eeprom_size(struct tg3 *tp) 7916static void __devinit tg3_get_eeprom_size(struct tg3 *tp)
@@ -9800,6 +9855,12 @@ static void __devinit tg3_init_coal(struct tg3 *tp)
9800 ec->tx_coalesce_usecs = LOW_TXCOL_TICKS_CLRTCKS; 9855 ec->tx_coalesce_usecs = LOW_TXCOL_TICKS_CLRTCKS;
9801 ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT_CLRTCKS; 9856 ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT_CLRTCKS;
9802 } 9857 }
9858
9859 if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS) {
9860 ec->rx_coalesce_usecs_irq = 0;
9861 ec->tx_coalesce_usecs_irq = 0;
9862 ec->stats_block_coalesce_usecs = 0;
9863 }
9803} 9864}
9804 9865
9805static int __devinit tg3_init_one(struct pci_dev *pdev, 9866static int __devinit tg3_init_one(struct pci_dev *pdev,
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 99c5f9675a56..70ad450733e6 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -879,31 +879,41 @@
879#define LOW_RXCOL_TICKS_CLRTCKS 0x00000014 879#define LOW_RXCOL_TICKS_CLRTCKS 0x00000014
880#define DEFAULT_RXCOL_TICKS 0x00000048 880#define DEFAULT_RXCOL_TICKS 0x00000048
881#define HIGH_RXCOL_TICKS 0x00000096 881#define HIGH_RXCOL_TICKS 0x00000096
882#define MAX_RXCOL_TICKS 0x000003ff
882#define HOSTCC_TXCOL_TICKS 0x00003c0c 883#define HOSTCC_TXCOL_TICKS 0x00003c0c
883#define LOW_TXCOL_TICKS 0x00000096 884#define LOW_TXCOL_TICKS 0x00000096
884#define LOW_TXCOL_TICKS_CLRTCKS 0x00000048 885#define LOW_TXCOL_TICKS_CLRTCKS 0x00000048
885#define DEFAULT_TXCOL_TICKS 0x0000012c 886#define DEFAULT_TXCOL_TICKS 0x0000012c
886#define HIGH_TXCOL_TICKS 0x00000145 887#define HIGH_TXCOL_TICKS 0x00000145
888#define MAX_TXCOL_TICKS 0x000003ff
887#define HOSTCC_RXMAX_FRAMES 0x00003c10 889#define HOSTCC_RXMAX_FRAMES 0x00003c10
888#define LOW_RXMAX_FRAMES 0x00000005 890#define LOW_RXMAX_FRAMES 0x00000005
889#define DEFAULT_RXMAX_FRAMES 0x00000008 891#define DEFAULT_RXMAX_FRAMES 0x00000008
890#define HIGH_RXMAX_FRAMES 0x00000012 892#define HIGH_RXMAX_FRAMES 0x00000012
893#define MAX_RXMAX_FRAMES 0x000000ff
891#define HOSTCC_TXMAX_FRAMES 0x00003c14 894#define HOSTCC_TXMAX_FRAMES 0x00003c14
892#define LOW_TXMAX_FRAMES 0x00000035 895#define LOW_TXMAX_FRAMES 0x00000035
893#define DEFAULT_TXMAX_FRAMES 0x0000004b 896#define DEFAULT_TXMAX_FRAMES 0x0000004b
894#define HIGH_TXMAX_FRAMES 0x00000052 897#define HIGH_TXMAX_FRAMES 0x00000052
898#define MAX_TXMAX_FRAMES 0x000000ff
895#define HOSTCC_RXCOAL_TICK_INT 0x00003c18 899#define HOSTCC_RXCOAL_TICK_INT 0x00003c18
896#define DEFAULT_RXCOAL_TICK_INT 0x00000019 900#define DEFAULT_RXCOAL_TICK_INT 0x00000019
897#define DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014 901#define DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014
902#define MAX_RXCOAL_TICK_INT 0x000003ff
898#define HOSTCC_TXCOAL_TICK_INT 0x00003c1c 903#define HOSTCC_TXCOAL_TICK_INT 0x00003c1c
899#define DEFAULT_TXCOAL_TICK_INT 0x00000019 904#define DEFAULT_TXCOAL_TICK_INT 0x00000019
900#define DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014 905#define DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014
906#define MAX_TXCOAL_TICK_INT 0x000003ff
901#define HOSTCC_RXCOAL_MAXF_INT 0x00003c20 907#define HOSTCC_RXCOAL_MAXF_INT 0x00003c20
902#define DEFAULT_RXCOAL_MAXF_INT 0x00000005 908#define DEFAULT_RXCOAL_MAXF_INT 0x00000005
909#define MAX_RXCOAL_MAXF_INT 0x000000ff
903#define HOSTCC_TXCOAL_MAXF_INT 0x00003c24 910#define HOSTCC_TXCOAL_MAXF_INT 0x00003c24
904#define DEFAULT_TXCOAL_MAXF_INT 0x00000005 911#define DEFAULT_TXCOAL_MAXF_INT 0x00000005
912#define MAX_TXCOAL_MAXF_INT 0x000000ff
905#define HOSTCC_STAT_COAL_TICKS 0x00003c28 913#define HOSTCC_STAT_COAL_TICKS 0x00003c28
906#define DEFAULT_STAT_COAL_TICKS 0x000f4240 914#define DEFAULT_STAT_COAL_TICKS 0x000f4240
915#define MAX_STAT_COAL_TICKS 0xd693d400
916#define MIN_STAT_COAL_TICKS 0x00000064
907/* 0x3c2c --> 0x3c30 unused */ 917/* 0x3c2c --> 0x3c30 unused */
908#define HOSTCC_STATS_BLK_HOST_ADDR 0x00003c30 /* 64-bit */ 918#define HOSTCC_STATS_BLK_HOST_ADDR 0x00003c30 /* 64-bit */
909#define HOSTCC_STATUS_BLK_HOST_ADDR 0x00003c38 /* 64-bit */ 919#define HOSTCC_STATUS_BLK_HOST_ADDR 0x00003c38 /* 64-bit */
diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
index de26cf7b003c..7911912f50c7 100644
--- a/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
@@ -94,12 +94,42 @@ void smc1_lineif(struct uart_cpm_port *pinfo)
94 ((immap_t *)IMAP_ADDR)->im_ioport.iop_paodr &= ~iobits; 94 ((immap_t *)IMAP_ADDR)->im_ioport.iop_paodr &= ~iobits;
95 } 95 }
96 96
97#ifdef CONFIG_MPC885ADS
98 /* Enable SMC1 transceivers */
99 {
100 volatile uint __iomem *bcsr1 = ioremap(BCSR1, 4);
101 uint tmp;
102
103 tmp = in_be32(bcsr1);
104 tmp &= ~BCSR1_RS232EN_1;
105 out_be32(bcsr1, tmp);
106 iounmap(bcsr1);
107 }
108#endif
109
97 pinfo->brg = 1; 110 pinfo->brg = 1;
98} 111}
99 112
100void smc2_lineif(struct uart_cpm_port *pinfo) 113void smc2_lineif(struct uart_cpm_port *pinfo)
101{ 114{
102 /* XXX SMC2: insert port configuration here */ 115#ifdef CONFIG_MPC885ADS
116 volatile cpm8xx_t *cp = cpmp;
117 volatile uint __iomem *bcsr1;
118 uint tmp;
119
120 cp->cp_pepar |= 0x00000c00;
121 cp->cp_pedir &= ~0x00000c00;
122 cp->cp_peso &= ~0x00000400;
123 cp->cp_peso |= 0x00000800;
124
125 /* Enable SMC2 transceivers */
126 bcsr1 = ioremap(BCSR1, 4);
127 tmp = in_be32(bcsr1);
128 tmp &= ~BCSR1_RS232EN_2;
129 out_be32(bcsr1, tmp);
130 iounmap(bcsr1);
131#endif
132
103 pinfo->brg = 2; 133 pinfo->brg = 2;
104} 134}
105 135
diff --git a/include/asm-ppc/mpc8xx.h b/include/asm-ppc/mpc8xx.h
index 714d69c819d3..7c31f2d564a1 100644
--- a/include/asm-ppc/mpc8xx.h
+++ b/include/asm-ppc/mpc8xx.h
@@ -68,6 +68,10 @@
68#include <platforms/lantec.h> 68#include <platforms/lantec.h>
69#endif 69#endif
70 70
71#if defined(CONFIG_MPC885ADS)
72#include <platforms/mpc885ads.h>
73#endif
74
71/* Currently, all 8xx boards that support a processor to PCI/ISA bridge 75/* Currently, all 8xx boards that support a processor to PCI/ISA bridge
72 * use the same memory map. 76 * use the same memory map.
73 */ 77 */
diff --git a/include/linux/if_shaper.h b/include/linux/if_shaper.h
index 004e6f09a6e2..68c896a36a34 100644
--- a/include/linux/if_shaper.h
+++ b/include/linux/if_shaper.h
@@ -23,7 +23,7 @@ struct shaper
23 __u32 shapeclock; 23 __u32 shapeclock;
24 unsigned long recovery; /* Time we can next clock a packet out on 24 unsigned long recovery; /* Time we can next clock a packet out on
25 an empty queue */ 25 an empty queue */
26 struct semaphore sem; 26 spinlock_t lock;
27 struct net_device_stats stats; 27 struct net_device_stats stats;
28 struct net_device *dev; 28 struct net_device *dev;
29 int (*hard_start_xmit) (struct sk_buff *skb, 29 int (*hard_start_xmit) (struct sk_buff *skb,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index b7a194c4362a..e050fc2d4c26 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -155,7 +155,7 @@ extern void arch_copy_kprobe(struct kprobe *p);
155extern void arch_arm_kprobe(struct kprobe *p); 155extern void arch_arm_kprobe(struct kprobe *p);
156extern void arch_disarm_kprobe(struct kprobe *p); 156extern void arch_disarm_kprobe(struct kprobe *p);
157extern void arch_remove_kprobe(struct kprobe *p); 157extern void arch_remove_kprobe(struct kprobe *p);
158extern int arch_init(void); 158extern int arch_init_kprobes(void);
159extern void show_registers(struct pt_regs *regs); 159extern void show_registers(struct pt_regs *regs);
160extern kprobe_opcode_t *get_insn_slot(void); 160extern kprobe_opcode_t *get_insn_slot(void);
161extern void free_insn_slot(kprobe_opcode_t *slot); 161extern void free_insn_slot(kprobe_opcode_t *slot);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 416a2e4024b2..14b950413495 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -183,7 +183,6 @@ struct skb_shared_info {
183 * @priority: Packet queueing priority 183 * @priority: Packet queueing priority
184 * @users: User count - see {datagram,tcp}.c 184 * @users: User count - see {datagram,tcp}.c
185 * @protocol: Packet protocol from driver 185 * @protocol: Packet protocol from driver
186 * @security: Security level of packet
187 * @truesize: Buffer size 186 * @truesize: Buffer size
188 * @head: Head of buffer 187 * @head: Head of buffer
189 * @data: Data head pointer 188 * @data: Data head pointer
@@ -249,18 +248,18 @@ struct sk_buff {
249 data_len, 248 data_len,
250 mac_len, 249 mac_len,
251 csum; 250 csum;
252 unsigned char local_df,
253 cloned:1,
254 nohdr:1,
255 pkt_type,
256 ip_summed;
257 __u32 priority; 251 __u32 priority;
258 unsigned short protocol, 252 __u8 local_df:1,
259 security; 253 cloned:1,
254 ip_summed:2,
255 nohdr:1;
256 /* 3 bits spare */
257 __u8 pkt_type;
258 __u16 protocol;
260 259
261 void (*destructor)(struct sk_buff *skb); 260 void (*destructor)(struct sk_buff *skb);
262#ifdef CONFIG_NETFILTER 261#ifdef CONFIG_NETFILTER
263 unsigned long nfmark; 262 unsigned long nfmark;
264 __u32 nfcache; 263 __u32 nfcache;
265 __u32 nfctinfo; 264 __u32 nfctinfo;
266 struct nf_conntrack *nfct; 265 struct nf_conntrack *nfct;
@@ -1211,7 +1210,7 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
1211{ 1210{
1212 int hlen = skb_headlen(skb); 1211 int hlen = skb_headlen(skb);
1213 1212
1214 if (offset + len <= hlen) 1213 if (hlen - offset >= len)
1215 return skb->data + offset; 1214 return skb->data + offset;
1216 1215
1217 if (skb_copy_bits(skb, offset, buffer, len) < 0) 1216 if (skb_copy_bits(skb, offset, buffer, len) < 0)
diff --git a/include/linux/tc_ematch/tc_em_meta.h b/include/linux/tc_ematch/tc_em_meta.h
index a6b2cc530af5..bcb762d93123 100644
--- a/include/linux/tc_ematch/tc_em_meta.h
+++ b/include/linux/tc_ematch/tc_em_meta.h
@@ -45,7 +45,7 @@ enum
45 TCF_META_ID_REALDEV, 45 TCF_META_ID_REALDEV,
46 TCF_META_ID_PRIORITY, 46 TCF_META_ID_PRIORITY,
47 TCF_META_ID_PROTOCOL, 47 TCF_META_ID_PROTOCOL,
48 TCF_META_ID_SECURITY, 48 TCF_META_ID_SECURITY, /* obsolete */
49 TCF_META_ID_PKTTYPE, 49 TCF_META_ID_PKTTYPE,
50 TCF_META_ID_PKTLEN, 50 TCF_META_ID_PKTLEN,
51 TCF_META_ID_DATALEN, 51 TCF_META_ID_DATALEN,
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index dfd93d03f5d2..e4fd82e42104 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -286,7 +286,7 @@ struct tcp_sock {
286 __u32 max_window; /* Maximal window ever seen from peer */ 286 __u32 max_window; /* Maximal window ever seen from peer */
287 __u32 pmtu_cookie; /* Last pmtu seen by socket */ 287 __u32 pmtu_cookie; /* Last pmtu seen by socket */
288 __u32 mss_cache; /* Cached effective mss, not including SACKS */ 288 __u32 mss_cache; /* Cached effective mss, not including SACKS */
289 __u16 mss_cache_std; /* Like mss_cache, but without TSO */ 289 __u16 xmit_size_goal; /* Goal for segmenting output packets */
290 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ 290 __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */
291 __u8 ca_state; /* State of fast-retransmit machine */ 291 __u8 ca_state; /* State of fast-retransmit machine */
292 __u8 retransmits; /* Number of unrecovered RTO timeouts. */ 292 __u8 retransmits; /* Number of unrecovered RTO timeouts. */
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index fcb05a387dbe..6492e7363d84 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -13,13 +13,12 @@ struct qdisc_walker
13 13
14extern rwlock_t qdisc_tree_lock; 14extern rwlock_t qdisc_tree_lock;
15 15
16#define QDISC_ALIGN 32 16#define QDISC_ALIGNTO 32
17#define QDISC_ALIGN_CONST (QDISC_ALIGN - 1) 17#define QDISC_ALIGN(len) (((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
18 18
19static inline void *qdisc_priv(struct Qdisc *q) 19static inline void *qdisc_priv(struct Qdisc *q)
20{ 20{
21 return (char *)q + ((sizeof(struct Qdisc) + QDISC_ALIGN_CONST) 21 return (char *) q + QDISC_ALIGN(sizeof(struct Qdisc));
22 & ~QDISC_ALIGN_CONST);
23} 22}
24 23
25/* 24/*
@@ -207,8 +206,6 @@ psched_tod_diff(int delta_sec, int bound)
207 206
208#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */ 207#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
209 208
210extern struct Qdisc noop_qdisc;
211extern struct Qdisc_ops noop_qdisc_ops;
212extern struct Qdisc_ops pfifo_qdisc_ops; 209extern struct Qdisc_ops pfifo_qdisc_ops;
213extern struct Qdisc_ops bfifo_qdisc_ops; 210extern struct Qdisc_ops bfifo_qdisc_ops;
214 211
@@ -216,14 +213,6 @@ extern int register_qdisc(struct Qdisc_ops *qops);
216extern int unregister_qdisc(struct Qdisc_ops *qops); 213extern int unregister_qdisc(struct Qdisc_ops *qops);
217extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); 214extern struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
218extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); 215extern struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle);
219extern void dev_init_scheduler(struct net_device *dev);
220extern void dev_shutdown(struct net_device *dev);
221extern void dev_activate(struct net_device *dev);
222extern void dev_deactivate(struct net_device *dev);
223extern void qdisc_reset(struct Qdisc *qdisc);
224extern void qdisc_destroy(struct Qdisc *qdisc);
225extern struct Qdisc * qdisc_create_dflt(struct net_device *dev,
226 struct Qdisc_ops *ops);
227extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, 216extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
228 struct rtattr *tab); 217 struct rtattr *tab);
229extern void qdisc_put_rtab(struct qdisc_rate_table *tab); 218extern void qdisc_put_rtab(struct qdisc_rate_table *tab);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7b97405e2dbf..7b6ec9986715 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -164,6 +164,19 @@ extern void qdisc_unlock_tree(struct net_device *dev);
164#define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev) 164#define tcf_tree_lock(tp) qdisc_lock_tree((tp)->q->dev)
165#define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev) 165#define tcf_tree_unlock(tp) qdisc_unlock_tree((tp)->q->dev)
166 166
167extern struct Qdisc noop_qdisc;
168extern struct Qdisc_ops noop_qdisc_ops;
169
170extern void dev_init_scheduler(struct net_device *dev);
171extern void dev_shutdown(struct net_device *dev);
172extern void dev_activate(struct net_device *dev);
173extern void dev_deactivate(struct net_device *dev);
174extern void qdisc_reset(struct Qdisc *qdisc);
175extern void qdisc_destroy(struct Qdisc *qdisc);
176extern struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops);
177extern struct Qdisc *qdisc_create_dflt(struct net_device *dev,
178 struct Qdisc_ops *ops);
179
167static inline void 180static inline void
168tcf_destroy(struct tcf_proto *tp) 181tcf_destroy(struct tcf_proto *tp)
169{ 182{
diff --git a/include/net/slhc_vj.h b/include/net/slhc_vj.h
index 0b2c2784f333..8716d5942b65 100644
--- a/include/net/slhc_vj.h
+++ b/include/net/slhc_vj.h
@@ -170,19 +170,14 @@ struct slcompress {
170}; 170};
171#define NULLSLCOMPR (struct slcompress *)0 171#define NULLSLCOMPR (struct slcompress *)0
172 172
173#define __ARGS(x) x
174
175/* In slhc.c: */ 173/* In slhc.c: */
176struct slcompress *slhc_init __ARGS((int rslots, int tslots)); 174struct slcompress *slhc_init(int rslots, int tslots);
177void slhc_free __ARGS((struct slcompress *comp)); 175void slhc_free(struct slcompress *comp);
178 176
179int slhc_compress __ARGS((struct slcompress *comp, unsigned char *icp, 177int slhc_compress(struct slcompress *comp, unsigned char *icp, int isize,
180 int isize, unsigned char *ocp, unsigned char **cpp, 178 unsigned char *ocp, unsigned char **cpp, int compress_cid);
181 int compress_cid)); 179int slhc_uncompress(struct slcompress *comp, unsigned char *icp, int isize);
182int slhc_uncompress __ARGS((struct slcompress *comp, unsigned char *icp, 180int slhc_remember(struct slcompress *comp, unsigned char *icp, int isize);
183 int isize)); 181int slhc_toss(struct slcompress *comp);
184int slhc_remember __ARGS((struct slcompress *comp, unsigned char *icp,
185 int isize));
186int slhc_toss __ARGS((struct slcompress *comp));
187 182
188#endif /* _SLHC_H */ 183#endif /* _SLHC_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index e593af5b1ecc..7b76f891ae2d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1134,13 +1134,16 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
1134static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, 1134static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk,
1135 int size, int mem, int gfp) 1135 int size, int mem, int gfp)
1136{ 1136{
1137 struct sk_buff *skb = alloc_skb(size + sk->sk_prot->max_header, gfp); 1137 struct sk_buff *skb;
1138 int hdr_len;
1138 1139
1140 hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header);
1141 skb = alloc_skb(size + hdr_len, gfp);
1139 if (skb) { 1142 if (skb) {
1140 skb->truesize += mem; 1143 skb->truesize += mem;
1141 if (sk->sk_forward_alloc >= (int)skb->truesize || 1144 if (sk->sk_forward_alloc >= (int)skb->truesize ||
1142 sk_stream_mem_schedule(sk, skb->truesize, 0)) { 1145 sk_stream_mem_schedule(sk, skb->truesize, 0)) {
1143 skb_reserve(skb, sk->sk_prot->max_header); 1146 skb_reserve(skb, hdr_len);
1144 return skb; 1147 return skb;
1145 } 1148 }
1146 __kfree_skb(skb); 1149 __kfree_skb(skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index ec9e20c27179..a166918ca56d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -721,11 +721,16 @@ static inline int tcp_ack_scheduled(struct tcp_sock *tp)
721 return tp->ack.pending&TCP_ACK_SCHED; 721 return tp->ack.pending&TCP_ACK_SCHED;
722} 722}
723 723
724static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp) 724static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts)
725{ 725{
726 if (tp->ack.quick && --tp->ack.quick == 0) { 726 if (tp->ack.quick) {
727 /* Leaving quickack mode we deflate ATO. */ 727 if (pkts >= tp->ack.quick) {
728 tp->ack.ato = TCP_ATO_MIN; 728 tp->ack.quick = 0;
729
730 /* Leaving quickack mode we deflate ATO. */
731 tp->ack.ato = TCP_ATO_MIN;
732 } else
733 tp->ack.quick -= pkts;
729 } 734 }
730} 735}
731 736
@@ -843,7 +848,9 @@ extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
843 848
844/* tcp_output.c */ 849/* tcp_output.c */
845 850
846extern int tcp_write_xmit(struct sock *, int nonagle); 851extern void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
852 unsigned int cur_mss, int nonagle);
853extern int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp);
847extern int tcp_retransmit_skb(struct sock *, struct sk_buff *); 854extern int tcp_retransmit_skb(struct sock *, struct sk_buff *);
848extern void tcp_xmit_retransmit_queue(struct sock *); 855extern void tcp_xmit_retransmit_queue(struct sock *);
849extern void tcp_simple_retransmit(struct sock *); 856extern void tcp_simple_retransmit(struct sock *);
@@ -855,10 +862,13 @@ extern int tcp_write_wakeup(struct sock *);
855extern void tcp_send_fin(struct sock *sk); 862extern void tcp_send_fin(struct sock *sk);
856extern void tcp_send_active_reset(struct sock *sk, int priority); 863extern void tcp_send_active_reset(struct sock *sk, int priority);
857extern int tcp_send_synack(struct sock *); 864extern int tcp_send_synack(struct sock *);
858extern void tcp_push_one(struct sock *, unsigned mss_now); 865extern void tcp_push_one(struct sock *, unsigned int mss_now);
859extern void tcp_send_ack(struct sock *sk); 866extern void tcp_send_ack(struct sock *sk);
860extern void tcp_send_delayed_ack(struct sock *sk); 867extern void tcp_send_delayed_ack(struct sock *sk);
861 868
869/* tcp_input.c */
870extern void tcp_cwnd_application_limited(struct sock *sk);
871
862/* tcp_timer.c */ 872/* tcp_timer.c */
863extern void tcp_init_xmit_timers(struct sock *); 873extern void tcp_init_xmit_timers(struct sock *);
864extern void tcp_clear_xmit_timers(struct sock *); 874extern void tcp_clear_xmit_timers(struct sock *);
@@ -958,7 +968,7 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
958static inline void tcp_initialize_rcv_mss(struct sock *sk) 968static inline void tcp_initialize_rcv_mss(struct sock *sk)
959{ 969{
960 struct tcp_sock *tp = tcp_sk(sk); 970 struct tcp_sock *tp = tcp_sk(sk);
961 unsigned int hint = min(tp->advmss, tp->mss_cache_std); 971 unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
962 972
963 hint = min(hint, tp->rcv_wnd/2); 973 hint = min(hint, tp->rcv_wnd/2);
964 hint = min(hint, TCP_MIN_RCVMSS); 974 hint = min(hint, TCP_MIN_RCVMSS);
@@ -1225,28 +1235,6 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp)
1225 tp->left_out = tp->sacked_out + tp->lost_out; 1235 tp->left_out = tp->sacked_out + tp->lost_out;
1226} 1236}
1227 1237
1228extern void tcp_cwnd_application_limited(struct sock *sk);
1229
1230/* Congestion window validation. (RFC2861) */
1231
1232static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
1233{
1234 __u32 packets_out = tp->packets_out;
1235
1236 if (packets_out >= tp->snd_cwnd) {
1237 /* Network is feed fully. */
1238 tp->snd_cwnd_used = 0;
1239 tp->snd_cwnd_stamp = tcp_time_stamp;
1240 } else {
1241 /* Network starves. */
1242 if (tp->packets_out > tp->snd_cwnd_used)
1243 tp->snd_cwnd_used = tp->packets_out;
1244
1245 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
1246 tcp_cwnd_application_limited(sk);
1247 }
1248}
1249
1250/* Set slow start threshould and cwnd not falling to slow start */ 1238/* Set slow start threshould and cwnd not falling to slow start */
1251static inline void __tcp_enter_cwr(struct tcp_sock *tp) 1239static inline void __tcp_enter_cwr(struct tcp_sock *tp)
1252{ 1240{
@@ -1279,12 +1267,6 @@ static __inline__ __u32 tcp_max_burst(const struct tcp_sock *tp)
1279 return 3; 1267 return 3;
1280} 1268}
1281 1269
1282static __inline__ int tcp_minshall_check(const struct tcp_sock *tp)
1283{
1284 return after(tp->snd_sml,tp->snd_una) &&
1285 !after(tp->snd_sml, tp->snd_nxt);
1286}
1287
1288static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, 1270static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
1289 const struct sk_buff *skb) 1271 const struct sk_buff *skb)
1290{ 1272{
@@ -1292,122 +1274,18 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss,
1292 tp->snd_sml = TCP_SKB_CB(skb)->end_seq; 1274 tp->snd_sml = TCP_SKB_CB(skb)->end_seq;
1293} 1275}
1294 1276
1295/* Return 0, if packet can be sent now without violation Nagle's rules:
1296 1. It is full sized.
1297 2. Or it contains FIN.
1298 3. Or TCP_NODELAY was set.
1299 4. Or TCP_CORK is not set, and all sent packets are ACKed.
1300 With Minshall's modification: all sent small packets are ACKed.
1301 */
1302
1303static __inline__ int
1304tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb,
1305 unsigned mss_now, int nonagle)
1306{
1307 return (skb->len < mss_now &&
1308 !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
1309 ((nonagle&TCP_NAGLE_CORK) ||
1310 (!nonagle &&
1311 tp->packets_out &&
1312 tcp_minshall_check(tp))));
1313}
1314
1315extern void tcp_set_skb_tso_segs(struct sock *, struct sk_buff *);
1316
1317/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
1318 * should be put on the wire right now.
1319 */
1320static __inline__ int tcp_snd_test(struct sock *sk,
1321 struct sk_buff *skb,
1322 unsigned cur_mss, int nonagle)
1323{
1324 struct tcp_sock *tp = tcp_sk(sk);
1325 int pkts = tcp_skb_pcount(skb);
1326
1327 if (!pkts) {
1328 tcp_set_skb_tso_segs(sk, skb);
1329 pkts = tcp_skb_pcount(skb);
1330 }
1331
1332 /* RFC 1122 - section 4.2.3.4
1333 *
1334 * We must queue if
1335 *
1336 * a) The right edge of this frame exceeds the window
1337 * b) There are packets in flight and we have a small segment
1338 * [SWS avoidance and Nagle algorithm]
1339 * (part of SWS is done on packetization)
1340 * Minshall version sounds: there are no _small_
1341 * segments in flight. (tcp_nagle_check)
1342 * c) We have too many packets 'in flight'
1343 *
1344 * Don't use the nagle rule for urgent data (or
1345 * for the final FIN -DaveM).
1346 *
1347 * Also, Nagle rule does not apply to frames, which
1348 * sit in the middle of queue (they have no chances
1349 * to get new data) and if room at tail of skb is
1350 * not enough to save something seriously (<32 for now).
1351 */
1352
1353 /* Don't be strict about the congestion window for the
1354 * final FIN frame. -DaveM
1355 */
1356 return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
1357 || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
1358 (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) ||
1359 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
1360 !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
1361}
1362
1363static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) 1277static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
1364{ 1278{
1365 if (!tp->packets_out && !tp->pending) 1279 if (!tp->packets_out && !tp->pending)
1366 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); 1280 tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
1367} 1281}
1368 1282
1369static __inline__ int tcp_skb_is_last(const struct sock *sk,
1370 const struct sk_buff *skb)
1371{
1372 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
1373}
1374
1375/* Push out any pending frames which were held back due to
1376 * TCP_CORK or attempt at coalescing tiny packets.
1377 * The socket must be locked by the caller.
1378 */
1379static __inline__ void __tcp_push_pending_frames(struct sock *sk,
1380 struct tcp_sock *tp,
1381 unsigned cur_mss,
1382 int nonagle)
1383{
1384 struct sk_buff *skb = sk->sk_send_head;
1385
1386 if (skb) {
1387 if (!tcp_skb_is_last(sk, skb))
1388 nonagle = TCP_NAGLE_PUSH;
1389 if (!tcp_snd_test(sk, skb, cur_mss, nonagle) ||
1390 tcp_write_xmit(sk, nonagle))
1391 tcp_check_probe_timer(sk, tp);
1392 }
1393 tcp_cwnd_validate(sk, tp);
1394}
1395
1396static __inline__ void tcp_push_pending_frames(struct sock *sk, 1283static __inline__ void tcp_push_pending_frames(struct sock *sk,
1397 struct tcp_sock *tp) 1284 struct tcp_sock *tp)
1398{ 1285{
1399 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle); 1286 __tcp_push_pending_frames(sk, tp, tcp_current_mss(sk, 1), tp->nonagle);
1400} 1287}
1401 1288
1402static __inline__ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
1403{
1404 struct sk_buff *skb = sk->sk_send_head;
1405
1406 return (skb &&
1407 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
1408 tcp_skb_is_last(sk, skb) ? TCP_NAGLE_PUSH : tp->nonagle));
1409}
1410
1411static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq) 1289static __inline__ void tcp_init_wl(struct tcp_sock *tp, u32 ack, u32 seq)
1412{ 1290{
1413 tp->snd_wl1 = seq; 1291 tp->snd_wl1 = seq;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 90c0e82b650c..b0237122b24e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -574,7 +574,7 @@ static int __init init_kprobes(void)
574 INIT_HLIST_HEAD(&kretprobe_inst_table[i]); 574 INIT_HLIST_HEAD(&kretprobe_inst_table[i]);
575 } 575 }
576 576
577 err = arch_init(); 577 err = arch_init_kprobes();
578 if (!err) 578 if (!err)
579 err = register_die_notifier(&kprobe_exceptions_nb); 579 err = register_die_notifier(&kprobe_exceptions_nb);
580 580
diff --git a/net/core/dev.c b/net/core/dev.c
index 7016e0c36b3d..7f5f62c65115 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2089,10 +2089,11 @@ void dev_set_promiscuity(struct net_device *dev, int inc)
2089{ 2089{
2090 unsigned short old_flags = dev->flags; 2090 unsigned short old_flags = dev->flags;
2091 2091
2092 dev->flags |= IFF_PROMISC;
2093 if ((dev->promiscuity += inc) == 0) 2092 if ((dev->promiscuity += inc) == 0)
2094 dev->flags &= ~IFF_PROMISC; 2093 dev->flags &= ~IFF_PROMISC;
2095 if (dev->flags ^ old_flags) { 2094 else
2095 dev->flags |= IFF_PROMISC;
2096 if (dev->flags != old_flags) {
2096 dev_mc_upload(dev); 2097 dev_mc_upload(dev);
2097 printk(KERN_INFO "device %s %s promiscuous mode\n", 2098 printk(KERN_INFO "device %s %s promiscuous mode\n",
2098 dev->name, (dev->flags & IFF_PROMISC) ? "entered" : 2099 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
diff --git a/net/core/filter.c b/net/core/filter.c
index f3b88205ace2..cd91a24f9720 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,7 @@
36#include <linux/filter.h> 36#include <linux/filter.h>
37 37
38/* No hurry in this branch */ 38/* No hurry in this branch */
39static u8 *load_pointer(struct sk_buff *skb, int k) 39static void *__load_pointer(struct sk_buff *skb, int k)
40{ 40{
41 u8 *ptr = NULL; 41 u8 *ptr = NULL;
42 42
@@ -50,6 +50,18 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
50 return NULL; 50 return NULL;
51} 51}
52 52
53static inline void *load_pointer(struct sk_buff *skb, int k,
54 unsigned int size, void *buffer)
55{
56 if (k >= 0)
57 return skb_header_pointer(skb, k, size, buffer);
58 else {
59 if (k >= SKF_AD_OFF)
60 return NULL;
61 return __load_pointer(skb, k);
62 }
63}
64
53/** 65/**
54 * sk_run_filter - run a filter on a socket 66 * sk_run_filter - run a filter on a socket
55 * @skb: buffer to run the filter on 67 * @skb: buffer to run the filter on
@@ -64,15 +76,12 @@ static u8 *load_pointer(struct sk_buff *skb, int k)
64 76
65int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen) 77int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
66{ 78{
67 unsigned char *data = skb->data;
68 /* len is UNSIGNED. Byte wide insns relies only on implicit
69 type casts to prevent reading arbitrary memory locations.
70 */
71 unsigned int len = skb->len-skb->data_len;
72 struct sock_filter *fentry; /* We walk down these */ 79 struct sock_filter *fentry; /* We walk down these */
80 void *ptr;
73 u32 A = 0; /* Accumulator */ 81 u32 A = 0; /* Accumulator */
74 u32 X = 0; /* Index Register */ 82 u32 X = 0; /* Index Register */
75 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */ 83 u32 mem[BPF_MEMWORDS]; /* Scratch Memory Store */
84 u32 tmp;
76 int k; 85 int k;
77 int pc; 86 int pc;
78 87
@@ -168,86 +177,35 @@ int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
168 case BPF_LD|BPF_W|BPF_ABS: 177 case BPF_LD|BPF_W|BPF_ABS:
169 k = fentry->k; 178 k = fentry->k;
170 load_w: 179 load_w:
171 if (k >= 0 && (unsigned int)(k+sizeof(u32)) <= len) { 180 ptr = load_pointer(skb, k, 4, &tmp);
172 A = ntohl(*(u32*)&data[k]); 181 if (ptr != NULL) {
182 A = ntohl(*(u32 *)ptr);
173 continue; 183 continue;
174 } 184 }
175 if (k < 0) {
176 u8 *ptr;
177
178 if (k >= SKF_AD_OFF)
179 break;
180 ptr = load_pointer(skb, k);
181 if (ptr) {
182 A = ntohl(*(u32*)ptr);
183 continue;
184 }
185 } else {
186 u32 _tmp, *p;
187 p = skb_header_pointer(skb, k, 4, &_tmp);
188 if (p != NULL) {
189 A = ntohl(*p);
190 continue;
191 }
192 }
193 return 0; 185 return 0;
194 case BPF_LD|BPF_H|BPF_ABS: 186 case BPF_LD|BPF_H|BPF_ABS:
195 k = fentry->k; 187 k = fentry->k;
196 load_h: 188 load_h:
197 if (k >= 0 && (unsigned int)(k + sizeof(u16)) <= len) { 189 ptr = load_pointer(skb, k, 2, &tmp);
198 A = ntohs(*(u16*)&data[k]); 190 if (ptr != NULL) {
191 A = ntohs(*(u16 *)ptr);
199 continue; 192 continue;
200 } 193 }
201 if (k < 0) {
202 u8 *ptr;
203
204 if (k >= SKF_AD_OFF)
205 break;
206 ptr = load_pointer(skb, k);
207 if (ptr) {
208 A = ntohs(*(u16*)ptr);
209 continue;
210 }
211 } else {
212 u16 _tmp, *p;
213 p = skb_header_pointer(skb, k, 2, &_tmp);
214 if (p != NULL) {
215 A = ntohs(*p);
216 continue;
217 }
218 }
219 return 0; 194 return 0;
220 case BPF_LD|BPF_B|BPF_ABS: 195 case BPF_LD|BPF_B|BPF_ABS:
221 k = fentry->k; 196 k = fentry->k;
222load_b: 197load_b:
223 if (k >= 0 && (unsigned int)k < len) { 198 ptr = load_pointer(skb, k, 1, &tmp);
224 A = data[k]; 199 if (ptr != NULL) {
200 A = *(u8 *)ptr;
225 continue; 201 continue;
226 } 202 }
227 if (k < 0) {
228 u8 *ptr;
229
230 if (k >= SKF_AD_OFF)
231 break;
232 ptr = load_pointer(skb, k);
233 if (ptr) {
234 A = *ptr;
235 continue;
236 }
237 } else {
238 u8 _tmp, *p;
239 p = skb_header_pointer(skb, k, 1, &_tmp);
240 if (p != NULL) {
241 A = *p;
242 continue;
243 }
244 }
245 return 0; 203 return 0;
246 case BPF_LD|BPF_W|BPF_LEN: 204 case BPF_LD|BPF_W|BPF_LEN:
247 A = len; 205 A = skb->len;
248 continue; 206 continue;
249 case BPF_LDX|BPF_W|BPF_LEN: 207 case BPF_LDX|BPF_W|BPF_LEN:
250 X = len; 208 X = skb->len;
251 continue; 209 continue;
252 case BPF_LD|BPF_W|BPF_IND: 210 case BPF_LD|BPF_W|BPF_IND:
253 k = X + fentry->k; 211 k = X + fentry->k;
@@ -259,10 +217,12 @@ load_b:
259 k = X + fentry->k; 217 k = X + fentry->k;
260 goto load_b; 218 goto load_b;
261 case BPF_LDX|BPF_B|BPF_MSH: 219 case BPF_LDX|BPF_B|BPF_MSH:
262 if (fentry->k >= len) 220 ptr = load_pointer(skb, fentry->k, 1, &tmp);
263 return 0; 221 if (ptr != NULL) {
264 X = (data[fentry->k] & 0xf) << 2; 222 X = (*(u8 *)ptr & 0xf) << 2;
265 continue; 223 continue;
224 }
225 return 0;
266 case BPF_LD|BPF_IMM: 226 case BPF_LD|BPF_IMM:
267 A = fentry->k; 227 A = fentry->k;
268 continue; 228 continue;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bb73b2190ec7..733deee24b9f 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -357,7 +357,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
357 C(ip_summed); 357 C(ip_summed);
358 C(priority); 358 C(priority);
359 C(protocol); 359 C(protocol);
360 C(security);
361 n->destructor = NULL; 360 n->destructor = NULL;
362#ifdef CONFIG_NETFILTER 361#ifdef CONFIG_NETFILTER
363 C(nfmark); 362 C(nfmark);
@@ -422,7 +421,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
422 new->pkt_type = old->pkt_type; 421 new->pkt_type = old->pkt_type;
423 new->stamp = old->stamp; 422 new->stamp = old->stamp;
424 new->destructor = NULL; 423 new->destructor = NULL;
425 new->security = old->security;
426#ifdef CONFIG_NETFILTER 424#ifdef CONFIG_NETFILTER
427 new->nfmark = old->nfmark; 425 new->nfmark = old->nfmark;
428 new->nfcache = old->nfcache; 426 new->nfcache = old->nfcache;
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index 9934b25720e4..99bc061759c3 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -551,7 +551,8 @@ int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb)
551 if (t < s_t) 551 if (t < s_t)
552 continue; 552 continue;
553 if (t > s_t) 553 if (t > s_t)
554 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(int)); 554 memset(&cb->args[1], 0,
555 sizeof(cb->args) - sizeof(cb->args[0]));
555 tb = dn_fib_get_table(t, 0); 556 tb = dn_fib_get_table(t, 0);
556 if (tb == NULL) 557 if (tb == NULL)
557 continue; 558 continue;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 658e7977924d..ef7468376ae6 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1009,6 +1009,15 @@ static int __init init_ipv4_mibs(void)
1009static int ipv4_proc_init(void); 1009static int ipv4_proc_init(void);
1010extern void ipfrag_init(void); 1010extern void ipfrag_init(void);
1011 1011
1012/*
1013 * IP protocol layer initialiser
1014 */
1015
1016static struct packet_type ip_packet_type = {
1017 .type = __constant_htons(ETH_P_IP),
1018 .func = ip_rcv,
1019};
1020
1012static int __init inet_init(void) 1021static int __init inet_init(void)
1013{ 1022{
1014 struct sk_buff *dummy_skb; 1023 struct sk_buff *dummy_skb;
@@ -1102,6 +1111,8 @@ static int __init inet_init(void)
1102 1111
1103 ipfrag_init(); 1112 ipfrag_init();
1104 1113
1114 dev_add_pack(&ip_packet_type);
1115
1105 rc = 0; 1116 rc = 0;
1106out: 1117out:
1107 return rc; 1118 return rc;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b56e88edf1b3..4be234c7d8c3 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
43 * 2 of the License, or (at your option) any later version. 43 * 2 of the License, or (at your option) any later version.
44 */ 44 */
45 45
46#define VERSION "0.324" 46#define VERSION "0.325"
47 47
48#include <linux/config.h> 48#include <linux/config.h>
49#include <asm/uaccess.h> 49#include <asm/uaccess.h>
@@ -136,6 +136,7 @@ struct trie_use_stats {
136 unsigned int semantic_match_passed; 136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss; 137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit; 138 unsigned int null_node_hit;
139 unsigned int resize_node_skipped;
139}; 140};
140#endif 141#endif
141 142
@@ -164,8 +165,8 @@ static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull); 165static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165static int tnode_child_length(struct tnode *tn); 166static int tnode_child_length(struct tnode *tn);
166static struct node *resize(struct trie *t, struct tnode *tn); 167static struct node *resize(struct trie *t, struct tnode *tn);
167static struct tnode *inflate(struct trie *t, struct tnode *tn); 168static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
168static struct tnode *halve(struct trie *t, struct tnode *tn); 169static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
169static void tnode_free(struct tnode *tn); 170static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t); 171static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); 172extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
@@ -358,11 +359,32 @@ static inline void free_leaf_info(struct leaf_info *li)
358 kfree(li); 359 kfree(li);
359} 360}
360 361
362static struct tnode *tnode_alloc(unsigned int size)
363{
364 if (size <= PAGE_SIZE) {
365 return kmalloc(size, GFP_KERNEL);
366 } else {
367 return (struct tnode *)
368 __get_free_pages(GFP_KERNEL, get_order(size));
369 }
370}
371
372static void __tnode_free(struct tnode *tn)
373{
374 unsigned int size = sizeof(struct tnode) +
375 (1<<tn->bits) * sizeof(struct node *);
376
377 if (size <= PAGE_SIZE)
378 kfree(tn);
379 else
380 free_pages((unsigned long)tn, get_order(size));
381}
382
361static struct tnode* tnode_new(t_key key, int pos, int bits) 383static struct tnode* tnode_new(t_key key, int pos, int bits)
362{ 384{
363 int nchildren = 1<<bits; 385 int nchildren = 1<<bits;
364 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *); 386 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
365 struct tnode *tn = kmalloc(sz, GFP_KERNEL); 387 struct tnode *tn = tnode_alloc(sz);
366 388
367 if(tn) { 389 if(tn) {
368 memset(tn, 0, sz); 390 memset(tn, 0, sz);
@@ -390,7 +412,7 @@ static void tnode_free(struct tnode *tn)
390 printk("FL %p \n", tn); 412 printk("FL %p \n", tn);
391 } 413 }
392 else if(IS_TNODE(tn)) { 414 else if(IS_TNODE(tn)) {
393 kfree(tn); 415 __tnode_free(tn);
394 if(trie_debug > 0 ) 416 if(trie_debug > 0 )
395 printk("FT %p \n", tn); 417 printk("FT %p \n", tn);
396 } 418 }
@@ -460,6 +482,7 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int w
460static struct node *resize(struct trie *t, struct tnode *tn) 482static struct node *resize(struct trie *t, struct tnode *tn)
461{ 483{
462 int i; 484 int i;
485 int err = 0;
463 486
464 if (!tn) 487 if (!tn)
465 return NULL; 488 return NULL;
@@ -556,12 +579,20 @@ static struct node *resize(struct trie *t, struct tnode *tn)
556 */ 579 */
557 580
558 check_tnode(tn); 581 check_tnode(tn);
559 582
583 err = 0;
560 while ((tn->full_children > 0 && 584 while ((tn->full_children > 0 &&
561 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= 585 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
562 inflate_threshold * tnode_child_length(tn))) { 586 inflate_threshold * tnode_child_length(tn))) {
563 587
564 tn = inflate(t, tn); 588 tn = inflate(t, tn, &err);
589
590 if(err) {
591#ifdef CONFIG_IP_FIB_TRIE_STATS
592 t->stats.resize_node_skipped++;
593#endif
594 break;
595 }
565 } 596 }
566 597
567 check_tnode(tn); 598 check_tnode(tn);
@@ -570,11 +601,22 @@ static struct node *resize(struct trie *t, struct tnode *tn)
570 * Halve as long as the number of empty children in this 601 * Halve as long as the number of empty children in this
571 * node is above threshold. 602 * node is above threshold.
572 */ 603 */
604
605 err = 0;
573 while (tn->bits > 1 && 606 while (tn->bits > 1 &&
574 100 * (tnode_child_length(tn) - tn->empty_children) < 607 100 * (tnode_child_length(tn) - tn->empty_children) <
575 halve_threshold * tnode_child_length(tn)) 608 halve_threshold * tnode_child_length(tn)) {
609
610 tn = halve(t, tn, &err);
611
612 if(err) {
613#ifdef CONFIG_IP_FIB_TRIE_STATS
614 t->stats.resize_node_skipped++;
615#endif
616 break;
617 }
618 }
576 619
577 tn = halve(t, tn);
578 620
579 /* Only one child remains */ 621 /* Only one child remains */
580 622
@@ -599,7 +641,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
599 return (struct node *) tn; 641 return (struct node *) tn;
600} 642}
601 643
602static struct tnode *inflate(struct trie *t, struct tnode *tn) 644static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
603{ 645{
604 struct tnode *inode; 646 struct tnode *inode;
605 struct tnode *oldtnode = tn; 647 struct tnode *oldtnode = tn;
@@ -611,8 +653,63 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
611 653
612 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1); 654 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
613 655
614 if (!tn) 656 if (!tn) {
615 trie_bug("tnode_new failed"); 657 *err = -ENOMEM;
658 return oldtnode;
659 }
660
661 /*
662 * Preallocate and store tnodes before the actual work so we
663 * don't get into an inconsistent state if memory allocation
664 * fails. In case of failure we return the oldnode and inflate
665 * of tnode is ignored.
666 */
667
668 for(i = 0; i < olen; i++) {
669 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
670
671 if (inode &&
672 IS_TNODE(inode) &&
673 inode->pos == oldtnode->pos + oldtnode->bits &&
674 inode->bits > 1) {
675 struct tnode *left, *right;
676
677 t_key m = TKEY_GET_MASK(inode->pos, 1);
678
679 left = tnode_new(inode->key&(~m), inode->pos + 1,
680 inode->bits - 1);
681
682 if(!left) {
683 *err = -ENOMEM;
684 break;
685 }
686
687 right = tnode_new(inode->key|m, inode->pos + 1,
688 inode->bits - 1);
689
690 if(!right) {
691 *err = -ENOMEM;
692 break;
693 }
694
695 put_child(t, tn, 2*i, (struct node *) left);
696 put_child(t, tn, 2*i+1, (struct node *) right);
697 }
698 }
699
700 if(*err) {
701 int size = tnode_child_length(tn);
702 int j;
703
704 for(j = 0; j < size; j++)
705 if( tn->child[j])
706 tnode_free((struct tnode *)tn->child[j]);
707
708 tnode_free(tn);
709
710 *err = -ENOMEM;
711 return oldtnode;
712 }
616 713
617 for(i = 0; i < olen; i++) { 714 for(i = 0; i < olen; i++) {
618 struct node *node = tnode_get_child(oldtnode, i); 715 struct node *node = tnode_get_child(oldtnode, i);
@@ -625,7 +722,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
625 722
626 if(IS_LEAF(node) || ((struct tnode *) node)->pos > 723 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
627 tn->pos + tn->bits - 1) { 724 tn->pos + tn->bits - 1) {
628 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1, 725 if(tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
629 1) == 0) 726 1) == 0)
630 put_child(t, tn, 2*i, node); 727 put_child(t, tn, 2*i, node);
631 else 728 else
@@ -665,27 +762,22 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
665 * the position (inode->pos) 762 * the position (inode->pos)
666 */ 763 */
667 764
668 t_key m = TKEY_GET_MASK(inode->pos, 1);
669
670 /* Use the old key, but set the new significant 765 /* Use the old key, but set the new significant
671 * bit to zero. 766 * bit to zero.
672 */ 767 */
673 left = tnode_new(inode->key&(~m), inode->pos + 1,
674 inode->bits - 1);
675 768
676 if(!left) 769 left = (struct tnode *) tnode_get_child(tn, 2*i);
677 trie_bug("tnode_new failed"); 770 put_child(t, tn, 2*i, NULL);
678 771
679 772 if(!left)
680 /* Use the old key, but set the new significant 773 BUG();
681 * bit to one. 774
682 */ 775 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
683 right = tnode_new(inode->key|m, inode->pos + 1, 776 put_child(t, tn, 2*i+1, NULL);
684 inode->bits - 1); 777
778 if(!right)
779 BUG();
685 780
686 if(!right)
687 trie_bug("tnode_new failed");
688
689 size = tnode_child_length(left); 781 size = tnode_child_length(left);
690 for(j = 0; j < size; j++) { 782 for(j = 0; j < size; j++) {
691 put_child(t, left, j, inode->child[j]); 783 put_child(t, left, j, inode->child[j]);
@@ -701,7 +793,7 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn)
701 return tn; 793 return tn;
702} 794}
703 795
704static struct tnode *halve(struct trie *t, struct tnode *tn) 796static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
705{ 797{
706 struct tnode *oldtnode = tn; 798 struct tnode *oldtnode = tn;
707 struct node *left, *right; 799 struct node *left, *right;
@@ -712,8 +804,48 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
712 804
713 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1); 805 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
714 806
715 if(!tn) 807 if (!tn) {
716 trie_bug("tnode_new failed"); 808 *err = -ENOMEM;
809 return oldtnode;
810 }
811
812 /*
813 * Preallocate and store tnodes before the actual work so we
814 * don't get into an inconsistent state if memory allocation
815 * fails. In case of failure we return the oldnode and halve
816 * of tnode is ignored.
817 */
818
819 for(i = 0; i < olen; i += 2) {
820 left = tnode_get_child(oldtnode, i);
821 right = tnode_get_child(oldtnode, i+1);
822
823 /* Two nonempty children */
824 if( left && right) {
825 struct tnode *newBinNode =
826 tnode_new(left->key, tn->pos + tn->bits, 1);
827
828 if(!newBinNode) {
829 *err = -ENOMEM;
830 break;
831 }
832 put_child(t, tn, i/2, (struct node *)newBinNode);
833 }
834 }
835
836 if(*err) {
837 int size = tnode_child_length(tn);
838 int j;
839
840 for(j = 0; j < size; j++)
841 if( tn->child[j])
842 tnode_free((struct tnode *)tn->child[j]);
843
844 tnode_free(tn);
845
846 *err = -ENOMEM;
847 return oldtnode;
848 }
717 849
718 for(i = 0; i < olen; i += 2) { 850 for(i = 0; i < olen; i += 2) {
719 left = tnode_get_child(oldtnode, i); 851 left = tnode_get_child(oldtnode, i);
@@ -730,10 +862,11 @@ static struct tnode *halve(struct trie *t, struct tnode *tn)
730 /* Two nonempty children */ 862 /* Two nonempty children */
731 else { 863 else {
732 struct tnode *newBinNode = 864 struct tnode *newBinNode =
733 tnode_new(left->key, tn->pos + tn->bits, 1); 865 (struct tnode *) tnode_get_child(tn, i/2);
866 put_child(t, tn, i/2, NULL);
734 867
735 if(!newBinNode) 868 if(!newBinNode)
736 trie_bug("tnode_new failed"); 869 BUG();
737 870
738 put_child(t, newBinNode, 0, left); 871 put_child(t, newBinNode, 0, left);
739 put_child(t, newBinNode, 1, right); 872 put_child(t, newBinNode, 1, right);
@@ -2301,6 +2434,7 @@ static void collect_and_show(struct trie *t, struct seq_file *seq)
2301 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed); 2434 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2302 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss); 2435 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2303 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit); 2436 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2437 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
2304#ifdef CLEAR_STATS 2438#ifdef CLEAR_STATS
2305 memset(&(t->stats), 0, sizeof(t->stats)); 2439 memset(&(t->stats), 0, sizeof(t->stats));
2306#endif 2440#endif
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ce5c3292f9f..9de83e6e0f1d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -389,7 +389,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
389 to->pkt_type = from->pkt_type; 389 to->pkt_type = from->pkt_type;
390 to->priority = from->priority; 390 to->priority = from->priority;
391 to->protocol = from->protocol; 391 to->protocol = from->protocol;
392 to->security = from->security;
393 dst_release(to->dst); 392 dst_release(to->dst);
394 to->dst = dst_clone(from->dst); 393 to->dst = dst_clone(from->dst);
395 to->dev = from->dev; 394 to->dev = from->dev;
@@ -1329,23 +1328,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1329 ip_rt_put(rt); 1328 ip_rt_put(rt);
1330} 1329}
1331 1330
1332/*
1333 * IP protocol layer initialiser
1334 */
1335
1336static struct packet_type ip_packet_type = {
1337 .type = __constant_htons(ETH_P_IP),
1338 .func = ip_rcv,
1339};
1340
1341/*
1342 * IP registers the packet type and then calls the subprotocol initialisers
1343 */
1344
1345void __init ip_init(void) 1331void __init ip_init(void)
1346{ 1332{
1347 dev_add_pack(&ip_packet_type);
1348
1349 ip_rt_init(); 1333 ip_rt_init();
1350 inet_initpeers(); 1334 inet_initpeers();
1351 1335
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 12a1cf306f67..726ea5e8180a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -54,6 +54,7 @@
54 * Marc Boucher : routing by fwmark 54 * Marc Boucher : routing by fwmark
55 * Robert Olsson : Added rt_cache statistics 55 * Robert Olsson : Added rt_cache statistics
56 * Arnaldo C. Melo : Convert proc stuff to seq_file 56 * Arnaldo C. Melo : Convert proc stuff to seq_file
57 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
57 * 58 *
58 * This program is free software; you can redistribute it and/or 59 * This program is free software; you can redistribute it and/or
59 * modify it under the terms of the GNU General Public License 60 * modify it under the terms of the GNU General Public License
@@ -70,6 +71,7 @@
70#include <linux/kernel.h> 71#include <linux/kernel.h>
71#include <linux/sched.h> 72#include <linux/sched.h>
72#include <linux/mm.h> 73#include <linux/mm.h>
74#include <linux/bootmem.h>
73#include <linux/string.h> 75#include <linux/string.h>
74#include <linux/socket.h> 76#include <linux/socket.h>
75#include <linux/sockios.h> 77#include <linux/sockios.h>
@@ -201,8 +203,37 @@ __u8 ip_tos2prio[16] = {
201 203
202struct rt_hash_bucket { 204struct rt_hash_bucket {
203 struct rtable *chain; 205 struct rtable *chain;
204 spinlock_t lock; 206};
205} __attribute__((__aligned__(8))); 207#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
208/*
209 * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
210 * The size of this table is a power of two and depends on the number of CPUS.
211 */
212#if NR_CPUS >= 32
213#define RT_HASH_LOCK_SZ 4096
214#elif NR_CPUS >= 16
215#define RT_HASH_LOCK_SZ 2048
216#elif NR_CPUS >= 8
217#define RT_HASH_LOCK_SZ 1024
218#elif NR_CPUS >= 4
219#define RT_HASH_LOCK_SZ 512
220#else
221#define RT_HASH_LOCK_SZ 256
222#endif
223
224static spinlock_t *rt_hash_locks;
225# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
226# define rt_hash_lock_init() { \
227 int i; \
228 rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
229 if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
230 for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
231 spin_lock_init(&rt_hash_locks[i]); \
232 }
233#else
234# define rt_hash_lock_addr(slot) NULL
235# define rt_hash_lock_init()
236#endif
206 237
207static struct rt_hash_bucket *rt_hash_table; 238static struct rt_hash_bucket *rt_hash_table;
208static unsigned rt_hash_mask; 239static unsigned rt_hash_mask;
@@ -575,19 +606,26 @@ static struct rtable **rt_remove_balanced_route(struct rtable **chain_head,
575/* This runs via a timer and thus is always in BH context. */ 606/* This runs via a timer and thus is always in BH context. */
576static void rt_check_expire(unsigned long dummy) 607static void rt_check_expire(unsigned long dummy)
577{ 608{
578 static int rover; 609 static unsigned int rover;
579 int i = rover, t; 610 unsigned int i = rover, goal;
580 struct rtable *rth, **rthp; 611 struct rtable *rth, **rthp;
581 unsigned long now = jiffies; 612 unsigned long now = jiffies;
582 613 u64 mult;
583 for (t = ip_rt_gc_interval << rt_hash_log; t >= 0; 614
584 t -= ip_rt_gc_timeout) { 615 mult = ((u64)ip_rt_gc_interval) << rt_hash_log;
616 if (ip_rt_gc_timeout > 1)
617 do_div(mult, ip_rt_gc_timeout);
618 goal = (unsigned int)mult;
619 if (goal > rt_hash_mask) goal = rt_hash_mask + 1;
620 for (; goal > 0; goal--) {
585 unsigned long tmo = ip_rt_gc_timeout; 621 unsigned long tmo = ip_rt_gc_timeout;
586 622
587 i = (i + 1) & rt_hash_mask; 623 i = (i + 1) & rt_hash_mask;
588 rthp = &rt_hash_table[i].chain; 624 rthp = &rt_hash_table[i].chain;
589 625
590 spin_lock(&rt_hash_table[i].lock); 626 if (*rthp == 0)
627 continue;
628 spin_lock(rt_hash_lock_addr(i));
591 while ((rth = *rthp) != NULL) { 629 while ((rth = *rthp) != NULL) {
592 if (rth->u.dst.expires) { 630 if (rth->u.dst.expires) {
593 /* Entry is expired even if it is in use */ 631 /* Entry is expired even if it is in use */
@@ -620,14 +658,14 @@ static void rt_check_expire(unsigned long dummy)
620 rt_free(rth); 658 rt_free(rth);
621#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 659#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
622 } 660 }
623 spin_unlock(&rt_hash_table[i].lock); 661 spin_unlock(rt_hash_lock_addr(i));
624 662
625 /* Fallback loop breaker. */ 663 /* Fallback loop breaker. */
626 if (time_after(jiffies, now)) 664 if (time_after(jiffies, now))
627 break; 665 break;
628 } 666 }
629 rover = i; 667 rover = i;
630 mod_timer(&rt_periodic_timer, now + ip_rt_gc_interval); 668 mod_timer(&rt_periodic_timer, jiffies + ip_rt_gc_interval);
631} 669}
632 670
633/* This can run from both BH and non-BH contexts, the latter 671/* This can run from both BH and non-BH contexts, the latter
@@ -643,11 +681,11 @@ static void rt_run_flush(unsigned long dummy)
643 get_random_bytes(&rt_hash_rnd, 4); 681 get_random_bytes(&rt_hash_rnd, 4);
644 682
645 for (i = rt_hash_mask; i >= 0; i--) { 683 for (i = rt_hash_mask; i >= 0; i--) {
646 spin_lock_bh(&rt_hash_table[i].lock); 684 spin_lock_bh(rt_hash_lock_addr(i));
647 rth = rt_hash_table[i].chain; 685 rth = rt_hash_table[i].chain;
648 if (rth) 686 if (rth)
649 rt_hash_table[i].chain = NULL; 687 rt_hash_table[i].chain = NULL;
650 spin_unlock_bh(&rt_hash_table[i].lock); 688 spin_unlock_bh(rt_hash_lock_addr(i));
651 689
652 for (; rth; rth = next) { 690 for (; rth; rth = next) {
653 next = rth->u.rt_next; 691 next = rth->u.rt_next;
@@ -780,7 +818,7 @@ static int rt_garbage_collect(void)
780 818
781 k = (k + 1) & rt_hash_mask; 819 k = (k + 1) & rt_hash_mask;
782 rthp = &rt_hash_table[k].chain; 820 rthp = &rt_hash_table[k].chain;
783 spin_lock_bh(&rt_hash_table[k].lock); 821 spin_lock_bh(rt_hash_lock_addr(k));
784 while ((rth = *rthp) != NULL) { 822 while ((rth = *rthp) != NULL) {
785 if (!rt_may_expire(rth, tmo, expire)) { 823 if (!rt_may_expire(rth, tmo, expire)) {
786 tmo >>= 1; 824 tmo >>= 1;
@@ -812,7 +850,7 @@ static int rt_garbage_collect(void)
812 goal--; 850 goal--;
813#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */ 851#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
814 } 852 }
815 spin_unlock_bh(&rt_hash_table[k].lock); 853 spin_unlock_bh(rt_hash_lock_addr(k));
816 if (goal <= 0) 854 if (goal <= 0)
817 break; 855 break;
818 } 856 }
@@ -882,7 +920,7 @@ restart:
882 920
883 rthp = &rt_hash_table[hash].chain; 921 rthp = &rt_hash_table[hash].chain;
884 922
885 spin_lock_bh(&rt_hash_table[hash].lock); 923 spin_lock_bh(rt_hash_lock_addr(hash));
886 while ((rth = *rthp) != NULL) { 924 while ((rth = *rthp) != NULL) {
887#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED 925#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
888 if (!(rth->u.dst.flags & DST_BALANCED) && 926 if (!(rth->u.dst.flags & DST_BALANCED) &&
@@ -908,7 +946,7 @@ restart:
908 rth->u.dst.__use++; 946 rth->u.dst.__use++;
909 dst_hold(&rth->u.dst); 947 dst_hold(&rth->u.dst);
910 rth->u.dst.lastuse = now; 948 rth->u.dst.lastuse = now;
911 spin_unlock_bh(&rt_hash_table[hash].lock); 949 spin_unlock_bh(rt_hash_lock_addr(hash));
912 950
913 rt_drop(rt); 951 rt_drop(rt);
914 *rp = rth; 952 *rp = rth;
@@ -949,7 +987,7 @@ restart:
949 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 987 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
950 int err = arp_bind_neighbour(&rt->u.dst); 988 int err = arp_bind_neighbour(&rt->u.dst);
951 if (err) { 989 if (err) {
952 spin_unlock_bh(&rt_hash_table[hash].lock); 990 spin_unlock_bh(rt_hash_lock_addr(hash));
953 991
954 if (err != -ENOBUFS) { 992 if (err != -ENOBUFS) {
955 rt_drop(rt); 993 rt_drop(rt);
@@ -990,7 +1028,7 @@ restart:
990 } 1028 }
991#endif 1029#endif
992 rt_hash_table[hash].chain = rt; 1030 rt_hash_table[hash].chain = rt;
993 spin_unlock_bh(&rt_hash_table[hash].lock); 1031 spin_unlock_bh(rt_hash_lock_addr(hash));
994 *rp = rt; 1032 *rp = rt;
995 return 0; 1033 return 0;
996} 1034}
@@ -1058,7 +1096,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1058{ 1096{
1059 struct rtable **rthp; 1097 struct rtable **rthp;
1060 1098
1061 spin_lock_bh(&rt_hash_table[hash].lock); 1099 spin_lock_bh(rt_hash_lock_addr(hash));
1062 ip_rt_put(rt); 1100 ip_rt_put(rt);
1063 for (rthp = &rt_hash_table[hash].chain; *rthp; 1101 for (rthp = &rt_hash_table[hash].chain; *rthp;
1064 rthp = &(*rthp)->u.rt_next) 1102 rthp = &(*rthp)->u.rt_next)
@@ -1067,7 +1105,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
1067 rt_free(rt); 1105 rt_free(rt);
1068 break; 1106 break;
1069 } 1107 }
1070 spin_unlock_bh(&rt_hash_table[hash].lock); 1108 spin_unlock_bh(rt_hash_lock_addr(hash));
1071} 1109}
1072 1110
1073void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, 1111void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
@@ -3073,12 +3111,14 @@ __setup("rhash_entries=", set_rhash_entries);
3073 3111
3074int __init ip_rt_init(void) 3112int __init ip_rt_init(void)
3075{ 3113{
3076 int i, order, goal, rc = 0; 3114 int rc = 0;
3077 3115
3078 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^ 3116 rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
3079 (jiffies ^ (jiffies >> 7))); 3117 (jiffies ^ (jiffies >> 7)));
3080 3118
3081#ifdef CONFIG_NET_CLS_ROUTE 3119#ifdef CONFIG_NET_CLS_ROUTE
3120 {
3121 int order;
3082 for (order = 0; 3122 for (order = 0;
3083 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++) 3123 (PAGE_SIZE << order) < 256 * sizeof(struct ip_rt_acct) * NR_CPUS; order++)
3084 /* NOTHING */; 3124 /* NOTHING */;
@@ -3086,6 +3126,7 @@ int __init ip_rt_init(void)
3086 if (!ip_rt_acct) 3126 if (!ip_rt_acct)
3087 panic("IP: failed to allocate ip_rt_acct\n"); 3127 panic("IP: failed to allocate ip_rt_acct\n");
3088 memset(ip_rt_acct, 0, PAGE_SIZE << order); 3128 memset(ip_rt_acct, 0, PAGE_SIZE << order);
3129 }
3089#endif 3130#endif
3090 3131
3091 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", 3132 ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache",
@@ -3096,36 +3137,19 @@ int __init ip_rt_init(void)
3096 if (!ipv4_dst_ops.kmem_cachep) 3137 if (!ipv4_dst_ops.kmem_cachep)
3097 panic("IP: failed to allocate ip_dst_cache\n"); 3138 panic("IP: failed to allocate ip_dst_cache\n");
3098 3139
3099 goal = num_physpages >> (26 - PAGE_SHIFT); 3140 rt_hash_table = (struct rt_hash_bucket *)
3100 if (rhash_entries) 3141 alloc_large_system_hash("IP route cache",
3101 goal = (rhash_entries * sizeof(struct rt_hash_bucket)) >> PAGE_SHIFT; 3142 sizeof(struct rt_hash_bucket),
3102 for (order = 0; (1UL << order) < goal; order++) 3143 rhash_entries,
3103 /* NOTHING */; 3144 (num_physpages >= 128 * 1024) ?
3104 3145 (27 - PAGE_SHIFT) :
3105 do { 3146 (29 - PAGE_SHIFT),
3106 rt_hash_mask = (1UL << order) * PAGE_SIZE / 3147 HASH_HIGHMEM,
3107 sizeof(struct rt_hash_bucket); 3148 &rt_hash_log,
3108 while (rt_hash_mask & (rt_hash_mask - 1)) 3149 &rt_hash_mask,
3109 rt_hash_mask--; 3150 0);
3110 rt_hash_table = (struct rt_hash_bucket *) 3151 memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
3111 __get_free_pages(GFP_ATOMIC, order); 3152 rt_hash_lock_init();
3112 } while (rt_hash_table == NULL && --order > 0);
3113
3114 if (!rt_hash_table)
3115 panic("Failed to allocate IP route cache hash table\n");
3116
3117 printk(KERN_INFO "IP: routing cache hash table of %u buckets, %ldKbytes\n",
3118 rt_hash_mask,
3119 (long) (rt_hash_mask * sizeof(struct rt_hash_bucket)) / 1024);
3120
3121 for (rt_hash_log = 0; (1 << rt_hash_log) != rt_hash_mask; rt_hash_log++)
3122 /* NOTHING */;
3123
3124 rt_hash_mask--;
3125 for (i = 0; i <= rt_hash_mask; i++) {
3126 spin_lock_init(&rt_hash_table[i].lock);
3127 rt_hash_table[i].chain = NULL;
3128 }
3129 3153
3130 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1); 3154 ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
3131 ip_rt_max_size = (rt_hash_mask + 1) * 16; 3155 ip_rt_max_size = (rt_hash_mask + 1) * 16;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 882436da9a3a..29894c749163 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -615,7 +615,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
615 size_t psize, int flags) 615 size_t psize, int flags)
616{ 616{
617 struct tcp_sock *tp = tcp_sk(sk); 617 struct tcp_sock *tp = tcp_sk(sk);
618 int mss_now; 618 int mss_now, size_goal;
619 int err; 619 int err;
620 ssize_t copied; 620 ssize_t copied;
621 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); 621 long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -628,6 +628,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
628 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 628 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
629 629
630 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 630 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
631 size_goal = tp->xmit_size_goal;
631 copied = 0; 632 copied = 0;
632 633
633 err = -EPIPE; 634 err = -EPIPE;
@@ -641,7 +642,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
641 int offset = poffset % PAGE_SIZE; 642 int offset = poffset % PAGE_SIZE;
642 int size = min_t(size_t, psize, PAGE_SIZE - offset); 643 int size = min_t(size_t, psize, PAGE_SIZE - offset);
643 644
644 if (!sk->sk_send_head || (copy = mss_now - skb->len) <= 0) { 645 if (!sk->sk_send_head || (copy = size_goal - skb->len) <= 0) {
645new_segment: 646new_segment:
646 if (!sk_stream_memory_free(sk)) 647 if (!sk_stream_memory_free(sk))
647 goto wait_for_sndbuf; 648 goto wait_for_sndbuf;
@@ -652,7 +653,7 @@ new_segment:
652 goto wait_for_memory; 653 goto wait_for_memory;
653 654
654 skb_entail(sk, tp, skb); 655 skb_entail(sk, tp, skb);
655 copy = mss_now; 656 copy = size_goal;
656 } 657 }
657 658
658 if (copy > size) 659 if (copy > size)
@@ -693,7 +694,7 @@ new_segment:
693 if (!(psize -= copy)) 694 if (!(psize -= copy))
694 goto out; 695 goto out;
695 696
696 if (skb->len != mss_now || (flags & MSG_OOB)) 697 if (skb->len < mss_now || (flags & MSG_OOB))
697 continue; 698 continue;
698 699
699 if (forced_push(tp)) { 700 if (forced_push(tp)) {
@@ -713,6 +714,7 @@ wait_for_memory:
713 goto do_error; 714 goto do_error;
714 715
715 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 716 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
717 size_goal = tp->xmit_size_goal;
716 } 718 }
717 719
718out: 720out:
@@ -754,15 +756,20 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
754 756
755static inline int select_size(struct sock *sk, struct tcp_sock *tp) 757static inline int select_size(struct sock *sk, struct tcp_sock *tp)
756{ 758{
757 int tmp = tp->mss_cache_std; 759 int tmp = tp->mss_cache;
758 760
759 if (sk->sk_route_caps & NETIF_F_SG) { 761 if (sk->sk_route_caps & NETIF_F_SG) {
760 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); 762 if (sk->sk_route_caps & NETIF_F_TSO)
763 tmp = 0;
764 else {
765 int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
761 766
762 if (tmp >= pgbreak && 767 if (tmp >= pgbreak &&
763 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE) 768 tmp <= pgbreak + (MAX_SKB_FRAGS - 1) * PAGE_SIZE)
764 tmp = pgbreak; 769 tmp = pgbreak;
770 }
765 } 771 }
772
766 return tmp; 773 return tmp;
767} 774}
768 775
@@ -773,7 +780,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
773 struct tcp_sock *tp = tcp_sk(sk); 780 struct tcp_sock *tp = tcp_sk(sk);
774 struct sk_buff *skb; 781 struct sk_buff *skb;
775 int iovlen, flags; 782 int iovlen, flags;
776 int mss_now; 783 int mss_now, size_goal;
777 int err, copied; 784 int err, copied;
778 long timeo; 785 long timeo;
779 786
@@ -792,6 +799,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
792 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 799 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
793 800
794 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 801 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
802 size_goal = tp->xmit_size_goal;
795 803
796 /* Ok commence sending. */ 804 /* Ok commence sending. */
797 iovlen = msg->msg_iovlen; 805 iovlen = msg->msg_iovlen;
@@ -814,7 +822,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
814 skb = sk->sk_write_queue.prev; 822 skb = sk->sk_write_queue.prev;
815 823
816 if (!sk->sk_send_head || 824 if (!sk->sk_send_head ||
817 (copy = mss_now - skb->len) <= 0) { 825 (copy = size_goal - skb->len) <= 0) {
818 826
819new_segment: 827new_segment:
820 /* Allocate new segment. If the interface is SG, 828 /* Allocate new segment. If the interface is SG,
@@ -837,7 +845,7 @@ new_segment:
837 skb->ip_summed = CHECKSUM_HW; 845 skb->ip_summed = CHECKSUM_HW;
838 846
839 skb_entail(sk, tp, skb); 847 skb_entail(sk, tp, skb);
840 copy = mss_now; 848 copy = size_goal;
841 } 849 }
842 850
843 /* Try to append data to the end of skb. */ 851 /* Try to append data to the end of skb. */
@@ -872,11 +880,6 @@ new_segment:
872 tcp_mark_push(tp, skb); 880 tcp_mark_push(tp, skb);
873 goto new_segment; 881 goto new_segment;
874 } else if (page) { 882 } else if (page) {
875 /* If page is cached, align
876 * offset to L1 cache boundary
877 */
878 off = (off + L1_CACHE_BYTES - 1) &
879 ~(L1_CACHE_BYTES - 1);
880 if (off == PAGE_SIZE) { 883 if (off == PAGE_SIZE) {
881 put_page(page); 884 put_page(page);
882 TCP_PAGE(sk) = page = NULL; 885 TCP_PAGE(sk) = page = NULL;
@@ -937,7 +940,7 @@ new_segment:
937 if ((seglen -= copy) == 0 && iovlen == 0) 940 if ((seglen -= copy) == 0 && iovlen == 0)
938 goto out; 941 goto out;
939 942
940 if (skb->len != mss_now || (flags & MSG_OOB)) 943 if (skb->len < mss_now || (flags & MSG_OOB))
941 continue; 944 continue;
942 945
943 if (forced_push(tp)) { 946 if (forced_push(tp)) {
@@ -957,6 +960,7 @@ wait_for_memory:
957 goto do_error; 960 goto do_error;
958 961
959 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 962 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB));
963 size_goal = tp->xmit_size_goal;
960 } 964 }
961 } 965 }
962 966
@@ -2128,7 +2132,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
2128 2132
2129 info->tcpi_rto = jiffies_to_usecs(tp->rto); 2133 info->tcpi_rto = jiffies_to_usecs(tp->rto);
2130 info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); 2134 info->tcpi_ato = jiffies_to_usecs(tp->ack.ato);
2131 info->tcpi_snd_mss = tp->mss_cache_std; 2135 info->tcpi_snd_mss = tp->mss_cache;
2132 info->tcpi_rcv_mss = tp->ack.rcv_mss; 2136 info->tcpi_rcv_mss = tp->ack.rcv_mss;
2133 2137
2134 info->tcpi_unacked = tp->packets_out; 2138 info->tcpi_unacked = tp->packets_out;
@@ -2178,7 +2182,7 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
2178 2182
2179 switch (optname) { 2183 switch (optname) {
2180 case TCP_MAXSEG: 2184 case TCP_MAXSEG:
2181 val = tp->mss_cache_std; 2185 val = tp->mss_cache;
2182 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) 2186 if (!val && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
2183 val = tp->rx_opt.user_mss; 2187 val = tp->rx_opt.user_mss;
2184 break; 2188 break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7bbbbc33eb4b..8de2f1071c2b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -740,10 +740,10 @@ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
740 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 740 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
741 741
742 if (!cwnd) { 742 if (!cwnd) {
743 if (tp->mss_cache_std > 1460) 743 if (tp->mss_cache > 1460)
744 cwnd = 2; 744 cwnd = 2;
745 else 745 else
746 cwnd = (tp->mss_cache_std > 1095) ? 3 : 4; 746 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
747 } 747 }
748 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 748 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
749} 749}
@@ -914,7 +914,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
914 if (sk->sk_route_caps & NETIF_F_TSO) { 914 if (sk->sk_route_caps & NETIF_F_TSO) {
915 sk->sk_route_caps &= ~NETIF_F_TSO; 915 sk->sk_route_caps &= ~NETIF_F_TSO;
916 sock_set_flag(sk, SOCK_NO_LARGESEND); 916 sock_set_flag(sk, SOCK_NO_LARGESEND);
917 tp->mss_cache = tp->mss_cache_std; 917 tp->mss_cache = tp->mss_cache;
918 } 918 }
919 919
920 if (!tp->sacked_out) 920 if (!tp->sacked_out)
@@ -1077,7 +1077,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
1077 (IsFack(tp) || 1077 (IsFack(tp) ||
1078 !before(lost_retrans, 1078 !before(lost_retrans,
1079 TCP_SKB_CB(skb)->ack_seq + tp->reordering * 1079 TCP_SKB_CB(skb)->ack_seq + tp->reordering *
1080 tp->mss_cache_std))) { 1080 tp->mss_cache))) {
1081 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1081 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1082 tp->retrans_out -= tcp_skb_pcount(skb); 1082 tp->retrans_out -= tcp_skb_pcount(skb);
1083 1083
@@ -1957,15 +1957,6 @@ static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
1957 } 1957 }
1958} 1958}
1959 1959
1960/* There is one downside to this scheme. Although we keep the
1961 * ACK clock ticking, adjusting packet counters and advancing
1962 * congestion window, we do not liberate socket send buffer
1963 * space.
1964 *
1965 * Mucking with skb->truesize and sk->sk_wmem_alloc et al.
1966 * then making a write space wakeup callback is a possible
1967 * future enhancement. WARNING: it is not trivial to make.
1968 */
1969static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, 1960static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
1970 __u32 now, __s32 *seq_rtt) 1961 __u32 now, __s32 *seq_rtt)
1971{ 1962{
@@ -2047,7 +2038,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, s32 *seq_usrtt
2047 * the other end. 2038 * the other end.
2048 */ 2039 */
2049 if (after(scb->end_seq, tp->snd_una)) { 2040 if (after(scb->end_seq, tp->snd_una)) {
2050 if (tcp_skb_pcount(skb) > 1) 2041 if (tcp_skb_pcount(skb) > 1 &&
2042 after(tp->snd_una, scb->seq))
2051 acked |= tcp_tso_acked(sk, skb, 2043 acked |= tcp_tso_acked(sk, skb,
2052 now, &seq_rtt); 2044 now, &seq_rtt);
2053 break; 2045 break;
@@ -3308,6 +3300,28 @@ void tcp_cwnd_application_limited(struct sock *sk)
3308 tp->snd_cwnd_stamp = tcp_time_stamp; 3300 tp->snd_cwnd_stamp = tcp_time_stamp;
3309} 3301}
3310 3302
3303static inline int tcp_should_expand_sndbuf(struct sock *sk, struct tcp_sock *tp)
3304{
3305 /* If the user specified a specific send buffer setting, do
3306 * not modify it.
3307 */
3308 if (sk->sk_userlocks & SOCK_SNDBUF_LOCK)
3309 return 0;
3310
3311 /* If we are under global TCP memory pressure, do not expand. */
3312 if (tcp_memory_pressure)
3313 return 0;
3314
3315 /* If we are under soft global TCP memory pressure, do not expand. */
3316 if (atomic_read(&tcp_memory_allocated) >= sysctl_tcp_mem[0])
3317 return 0;
3318
3319 /* If we filled the congestion window, do not expand. */
3320 if (tp->packets_out >= tp->snd_cwnd)
3321 return 0;
3322
3323 return 1;
3324}
3311 3325
3312/* When incoming ACK allowed to free some skb from write_queue, 3326/* When incoming ACK allowed to free some skb from write_queue,
3313 * we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket 3327 * we remember this event in flag SOCK_QUEUE_SHRUNK and wake up socket
@@ -3319,11 +3333,8 @@ static void tcp_new_space(struct sock *sk)
3319{ 3333{
3320 struct tcp_sock *tp = tcp_sk(sk); 3334 struct tcp_sock *tp = tcp_sk(sk);
3321 3335
3322 if (tp->packets_out < tp->snd_cwnd && 3336 if (tcp_should_expand_sndbuf(sk, tp)) {
3323 !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && 3337 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
3324 !tcp_memory_pressure &&
3325 atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
3326 int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache_std) +
3327 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), 3338 MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
3328 demanded = max_t(unsigned int, tp->snd_cwnd, 3339 demanded = max_t(unsigned int, tp->snd_cwnd,
3329 tp->reordering + 1); 3340 tp->reordering + 1);
@@ -3346,22 +3357,9 @@ static inline void tcp_check_space(struct sock *sk)
3346 } 3357 }
3347} 3358}
3348 3359
3349static void __tcp_data_snd_check(struct sock *sk, struct sk_buff *skb) 3360static __inline__ void tcp_data_snd_check(struct sock *sk, struct tcp_sock *tp)
3350{
3351 struct tcp_sock *tp = tcp_sk(sk);
3352
3353 if (after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd) ||
3354 tcp_packets_in_flight(tp) >= tp->snd_cwnd ||
3355 tcp_write_xmit(sk, tp->nonagle))
3356 tcp_check_probe_timer(sk, tp);
3357}
3358
3359static __inline__ void tcp_data_snd_check(struct sock *sk)
3360{ 3361{
3361 struct sk_buff *skb = sk->sk_send_head; 3362 tcp_push_pending_frames(sk, tp);
3362
3363 if (skb != NULL)
3364 __tcp_data_snd_check(sk, skb);
3365 tcp_check_space(sk); 3363 tcp_check_space(sk);
3366} 3364}
3367 3365
@@ -3655,7 +3653,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3655 */ 3653 */
3656 tcp_ack(sk, skb, 0); 3654 tcp_ack(sk, skb, 0);
3657 __kfree_skb(skb); 3655 __kfree_skb(skb);
3658 tcp_data_snd_check(sk); 3656 tcp_data_snd_check(sk, tp);
3659 return 0; 3657 return 0;
3660 } else { /* Header too small */ 3658 } else { /* Header too small */
3661 TCP_INC_STATS_BH(TCP_MIB_INERRS); 3659 TCP_INC_STATS_BH(TCP_MIB_INERRS);
@@ -3721,7 +3719,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
3721 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) { 3719 if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
3722 /* Well, only one small jumplet in fast path... */ 3720 /* Well, only one small jumplet in fast path... */
3723 tcp_ack(sk, skb, FLAG_DATA); 3721 tcp_ack(sk, skb, FLAG_DATA);
3724 tcp_data_snd_check(sk); 3722 tcp_data_snd_check(sk, tp);
3725 if (!tcp_ack_scheduled(tp)) 3723 if (!tcp_ack_scheduled(tp))
3726 goto no_ack; 3724 goto no_ack;
3727 } 3725 }
@@ -3799,7 +3797,7 @@ step5:
3799 /* step 7: process the segment text */ 3797 /* step 7: process the segment text */
3800 tcp_data_queue(sk, skb); 3798 tcp_data_queue(sk, skb);
3801 3799
3802 tcp_data_snd_check(sk); 3800 tcp_data_snd_check(sk, tp);
3803 tcp_ack_snd_check(sk); 3801 tcp_ack_snd_check(sk);
3804 return 0; 3802 return 0;
3805 3803
@@ -4109,7 +4107,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4109 /* Do step6 onward by hand. */ 4107 /* Do step6 onward by hand. */
4110 tcp_urg(sk, skb, th); 4108 tcp_urg(sk, skb, th);
4111 __kfree_skb(skb); 4109 __kfree_skb(skb);
4112 tcp_data_snd_check(sk); 4110 tcp_data_snd_check(sk, tp);
4113 return 0; 4111 return 0;
4114 } 4112 }
4115 4113
@@ -4300,7 +4298,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
4300 4298
4301 /* tcp_data could move socket to TIME-WAIT */ 4299 /* tcp_data could move socket to TIME-WAIT */
4302 if (sk->sk_state != TCP_CLOSE) { 4300 if (sk->sk_state != TCP_CLOSE) {
4303 tcp_data_snd_check(sk); 4301 tcp_data_snd_check(sk, tp);
4304 tcp_ack_snd_check(sk); 4302 tcp_ack_snd_check(sk);
4305 } 4303 }
4306 4304
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ebf112347a97..62f62bb05c2a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2045,7 +2045,7 @@ static int tcp_v4_init_sock(struct sock *sk)
2045 */ 2045 */
2046 tp->snd_ssthresh = 0x7fffffff; /* Infinity */ 2046 tp->snd_ssthresh = 0x7fffffff; /* Infinity */
2047 tp->snd_cwnd_clamp = ~0; 2047 tp->snd_cwnd_clamp = ~0;
2048 tp->mss_cache_std = tp->mss_cache = 536; 2048 tp->mss_cache = 536;
2049 2049
2050 tp->reordering = sysctl_tcp_reordering; 2050 tp->reordering = sysctl_tcp_reordering;
2051 tp->ca_ops = &tcp_init_congestion_ops; 2051 tp->ca_ops = &tcp_init_congestion_ops;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0e17c244875c..e041d057ec86 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -49,7 +49,7 @@ int sysctl_tcp_retrans_collapse = 1;
49 * will allow a single TSO frame to consume. Building TSO frames 49 * will allow a single TSO frame to consume. Building TSO frames
50 * which are too large can cause TCP streams to be bursty. 50 * which are too large can cause TCP streams to be bursty.
51 */ 51 */
52int sysctl_tcp_tso_win_divisor = 8; 52int sysctl_tcp_tso_win_divisor = 3;
53 53
54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp, 54static inline void update_send_head(struct sock *sk, struct tcp_sock *tp,
55 struct sk_buff *skb) 55 struct sk_buff *skb)
@@ -140,11 +140,11 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
140 tp->ack.pingpong = 1; 140 tp->ack.pingpong = 1;
141} 141}
142 142
143static __inline__ void tcp_event_ack_sent(struct sock *sk) 143static __inline__ void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
144{ 144{
145 struct tcp_sock *tp = tcp_sk(sk); 145 struct tcp_sock *tp = tcp_sk(sk);
146 146
147 tcp_dec_quickack_mode(tp); 147 tcp_dec_quickack_mode(tp, pkts);
148 tcp_clear_xmit_timer(sk, TCP_TIME_DACK); 148 tcp_clear_xmit_timer(sk, TCP_TIME_DACK);
149} 149}
150 150
@@ -355,7 +355,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
355 tp->af_specific->send_check(sk, th, skb->len, skb); 355 tp->af_specific->send_check(sk, th, skb->len, skb);
356 356
357 if (tcb->flags & TCPCB_FLAG_ACK) 357 if (tcb->flags & TCPCB_FLAG_ACK)
358 tcp_event_ack_sent(sk); 358 tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
359 359
360 if (skb->len != tcp_header_size) 360 if (skb->len != tcp_header_size)
361 tcp_event_data_sent(tp, skb, sk); 361 tcp_event_data_sent(tp, skb, sk);
@@ -403,42 +403,11 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
403 sk->sk_send_head = skb; 403 sk->sk_send_head = skb;
404} 404}
405 405
406static inline void tcp_tso_set_push(struct sk_buff *skb) 406static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
407{
408 /* Force push to be on for any TSO frames to workaround
409 * problems with busted implementations like Mac OS-X that
410 * hold off socket receive wakeups until push is seen.
411 */
412 if (tcp_skb_pcount(skb) > 1)
413 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
414}
415
416/* Send _single_ skb sitting at the send head. This function requires
417 * true push pending frames to setup probe timer etc.
418 */
419void tcp_push_one(struct sock *sk, unsigned cur_mss)
420{ 407{
421 struct tcp_sock *tp = tcp_sk(sk); 408 struct tcp_sock *tp = tcp_sk(sk);
422 struct sk_buff *skb = sk->sk_send_head;
423 409
424 if (tcp_snd_test(sk, skb, cur_mss, TCP_NAGLE_PUSH)) { 410 if (skb->len <= tp->mss_cache ||
425 /* Send it out now. */
426 TCP_SKB_CB(skb)->when = tcp_time_stamp;
427 tcp_tso_set_push(skb);
428 if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
429 sk->sk_send_head = NULL;
430 tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
431 tcp_packets_out_inc(sk, tp, skb);
432 return;
433 }
434 }
435}
436
437void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
438{
439 struct tcp_sock *tp = tcp_sk(sk);
440
441 if (skb->len <= tp->mss_cache_std ||
442 !(sk->sk_route_caps & NETIF_F_TSO)) { 411 !(sk->sk_route_caps & NETIF_F_TSO)) {
443 /* Avoid the costly divide in the normal 412 /* Avoid the costly divide in the normal
444 * non-TSO case. 413 * non-TSO case.
@@ -448,10 +417,10 @@ void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
448 } else { 417 } else {
449 unsigned int factor; 418 unsigned int factor;
450 419
451 factor = skb->len + (tp->mss_cache_std - 1); 420 factor = skb->len + (tp->mss_cache - 1);
452 factor /= tp->mss_cache_std; 421 factor /= tp->mss_cache;
453 skb_shinfo(skb)->tso_segs = factor; 422 skb_shinfo(skb)->tso_segs = factor;
454 skb_shinfo(skb)->tso_size = tp->mss_cache_std; 423 skb_shinfo(skb)->tso_size = tp->mss_cache;
455 } 424 }
456} 425}
457 426
@@ -537,6 +506,7 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
537 } 506 }
538 507
539 /* Link BUFF into the send queue. */ 508 /* Link BUFF into the send queue. */
509 skb_header_release(buff);
540 __skb_append(skb, buff); 510 __skb_append(skb, buff);
541 511
542 return 0; 512 return 0;
@@ -657,7 +627,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
657 627
658 /* And store cached results */ 628 /* And store cached results */
659 tp->pmtu_cookie = pmtu; 629 tp->pmtu_cookie = pmtu;
660 tp->mss_cache = tp->mss_cache_std = mss_now; 630 tp->mss_cache = mss_now;
661 631
662 return mss_now; 632 return mss_now;
663} 633}
@@ -669,57 +639,316 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
669 * cannot be large. However, taking into account rare use of URG, this 639 * cannot be large. However, taking into account rare use of URG, this
670 * is not a big flaw. 640 * is not a big flaw.
671 */ 641 */
672 642unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
673unsigned int tcp_current_mss(struct sock *sk, int large)
674{ 643{
675 struct tcp_sock *tp = tcp_sk(sk); 644 struct tcp_sock *tp = tcp_sk(sk);
676 struct dst_entry *dst = __sk_dst_get(sk); 645 struct dst_entry *dst = __sk_dst_get(sk);
677 unsigned int do_large, mss_now; 646 u32 mss_now;
647 u16 xmit_size_goal;
648 int doing_tso = 0;
649
650 mss_now = tp->mss_cache;
651
652 if (large_allowed &&
653 (sk->sk_route_caps & NETIF_F_TSO) &&
654 !tp->urg_mode)
655 doing_tso = 1;
678 656
679 mss_now = tp->mss_cache_std;
680 if (dst) { 657 if (dst) {
681 u32 mtu = dst_mtu(dst); 658 u32 mtu = dst_mtu(dst);
682 if (mtu != tp->pmtu_cookie) 659 if (mtu != tp->pmtu_cookie)
683 mss_now = tcp_sync_mss(sk, mtu); 660 mss_now = tcp_sync_mss(sk, mtu);
684 } 661 }
685 662
686 do_large = (large && 663 if (tp->rx_opt.eff_sacks)
687 (sk->sk_route_caps & NETIF_F_TSO) && 664 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
688 !tp->urg_mode); 665 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK));
689 666
690 if (do_large) { 667 xmit_size_goal = mss_now;
691 unsigned int large_mss, factor, limit;
692 668
693 large_mss = 65535 - tp->af_specific->net_header_len - 669 if (doing_tso) {
670 xmit_size_goal = 65535 -
671 tp->af_specific->net_header_len -
694 tp->ext_header_len - tp->tcp_header_len; 672 tp->ext_header_len - tp->tcp_header_len;
695 673
696 if (tp->max_window && large_mss > (tp->max_window>>1)) 674 if (tp->max_window &&
697 large_mss = max((tp->max_window>>1), 675 (xmit_size_goal > (tp->max_window >> 1)))
698 68U - tp->tcp_header_len); 676 xmit_size_goal = max((tp->max_window >> 1),
677 68U - tp->tcp_header_len);
678
679 xmit_size_goal -= (xmit_size_goal % mss_now);
680 }
681 tp->xmit_size_goal = xmit_size_goal;
699 682
700 factor = large_mss / mss_now; 683 return mss_now;
684}
701 685
702 /* Always keep large mss multiple of real mss, but 686/* Congestion window validation. (RFC2861) */
703 * do not exceed 1/tso_win_divisor of the congestion window
704 * so we can keep the ACK clock ticking and minimize
705 * bursting.
706 */
707 limit = tp->snd_cwnd;
708 if (sysctl_tcp_tso_win_divisor)
709 limit /= sysctl_tcp_tso_win_divisor;
710 limit = max(1U, limit);
711 if (factor > limit)
712 factor = limit;
713 687
714 tp->mss_cache = mss_now * factor; 688static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
689{
690 __u32 packets_out = tp->packets_out;
691
692 if (packets_out >= tp->snd_cwnd) {
693 /* Network is feed fully. */
694 tp->snd_cwnd_used = 0;
695 tp->snd_cwnd_stamp = tcp_time_stamp;
696 } else {
697 /* Network starves. */
698 if (tp->packets_out > tp->snd_cwnd_used)
699 tp->snd_cwnd_used = tp->packets_out;
715 700
716 mss_now = tp->mss_cache; 701 if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
702 tcp_cwnd_application_limited(sk);
717 } 703 }
704}
718 705
719 if (tp->rx_opt.eff_sacks) 706static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
720 mss_now -= (TCPOLEN_SACK_BASE_ALIGNED + 707{
721 (tp->rx_opt.eff_sacks * TCPOLEN_SACK_PERBLOCK)); 708 u32 window, cwnd_len;
722 return mss_now; 709
710 window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
711 cwnd_len = mss_now * cwnd;
712 return min(window, cwnd_len);
713}
714
715/* Can at least one segment of SKB be sent right now, according to the
716 * congestion window rules? If so, return how many segments are allowed.
717 */
718static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *skb)
719{
720 u32 in_flight, cwnd;
721
722 /* Don't be strict about the congestion window for the final FIN. */
723 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
724 return 1;
725
726 in_flight = tcp_packets_in_flight(tp);
727 cwnd = tp->snd_cwnd;
728 if (in_flight < cwnd)
729 return (cwnd - in_flight);
730
731 return 0;
732}
733
734/* This must be invoked the first time we consider transmitting
735 * SKB onto the wire.
736 */
737static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
738{
739 int tso_segs = tcp_skb_pcount(skb);
740
741 if (!tso_segs) {
742 tcp_set_skb_tso_segs(sk, skb);
743 tso_segs = tcp_skb_pcount(skb);
744 }
745 return tso_segs;
746}
747
748static inline int tcp_minshall_check(const struct tcp_sock *tp)
749{
750 return after(tp->snd_sml,tp->snd_una) &&
751 !after(tp->snd_sml, tp->snd_nxt);
752}
753
754/* Return 0, if packet can be sent now without violation Nagle's rules:
755 * 1. It is full sized.
756 * 2. Or it contains FIN. (already checked by caller)
757 * 3. Or TCP_NODELAY was set.
758 * 4. Or TCP_CORK is not set, and all sent packets are ACKed.
759 * With Minshall's modification: all sent small packets are ACKed.
760 */
761
762static inline int tcp_nagle_check(const struct tcp_sock *tp,
763 const struct sk_buff *skb,
764 unsigned mss_now, int nonagle)
765{
766 return (skb->len < mss_now &&
767 ((nonagle&TCP_NAGLE_CORK) ||
768 (!nonagle &&
769 tp->packets_out &&
770 tcp_minshall_check(tp))));
771}
772
773/* Return non-zero if the Nagle test allows this packet to be
774 * sent now.
775 */
776static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb,
777 unsigned int cur_mss, int nonagle)
778{
779 /* Nagle rule does not apply to frames, which sit in the middle of the
780 * write_queue (they have no chances to get new data).
781 *
782 * This is implemented in the callers, where they modify the 'nonagle'
783 * argument based upon the location of SKB in the send queue.
784 */
785 if (nonagle & TCP_NAGLE_PUSH)
786 return 1;
787
788 /* Don't use the nagle rule for urgent data (or for the final FIN). */
789 if (tp->urg_mode ||
790 (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN))
791 return 1;
792
793 if (!tcp_nagle_check(tp, skb, cur_mss, nonagle))
794 return 1;
795
796 return 0;
797}
798
799/* Does at least the first segment of SKB fit into the send window? */
800static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, unsigned int cur_mss)
801{
802 u32 end_seq = TCP_SKB_CB(skb)->end_seq;
803
804 if (skb->len > cur_mss)
805 end_seq = TCP_SKB_CB(skb)->seq + cur_mss;
806
807 return !after(end_seq, tp->snd_una + tp->snd_wnd);
808}
809
810/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
811 * should be put on the wire right now. If so, it returns the number of
812 * packets allowed by the congestion window.
813 */
814static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
815 unsigned int cur_mss, int nonagle)
816{
817 struct tcp_sock *tp = tcp_sk(sk);
818 unsigned int cwnd_quota;
819
820 tcp_init_tso_segs(sk, skb);
821
822 if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
823 return 0;
824
825 cwnd_quota = tcp_cwnd_test(tp, skb);
826 if (cwnd_quota &&
827 !tcp_snd_wnd_test(tp, skb, cur_mss))
828 cwnd_quota = 0;
829
830 return cwnd_quota;
831}
832
833static inline int tcp_skb_is_last(const struct sock *sk,
834 const struct sk_buff *skb)
835{
836 return skb->next == (struct sk_buff *)&sk->sk_write_queue;
837}
838
839int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
840{
841 struct sk_buff *skb = sk->sk_send_head;
842
843 return (skb &&
844 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1),
845 (tcp_skb_is_last(sk, skb) ?
846 TCP_NAGLE_PUSH :
847 tp->nonagle)));
848}
849
850/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
851 * which is put after SKB on the list. It is very much like
852 * tcp_fragment() except that it may make several kinds of assumptions
853 * in order to speed up the splitting operation. In particular, we
854 * know that all the data is in scatter-gather pages, and that the
855 * packet has never been sent out before (and thus is not cloned).
856 */
857static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
858{
859 struct sk_buff *buff;
860 int nlen = skb->len - len;
861 u16 flags;
862
863 /* All of a TSO frame must be composed of paged data. */
864 BUG_ON(skb->len != skb->data_len);
865
866 buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
867 if (unlikely(buff == NULL))
868 return -ENOMEM;
869
870 buff->truesize = nlen;
871 skb->truesize -= nlen;
872
873 /* Correct the sequence numbers. */
874 TCP_SKB_CB(buff)->seq = TCP_SKB_CB(skb)->seq + len;
875 TCP_SKB_CB(buff)->end_seq = TCP_SKB_CB(skb)->end_seq;
876 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(buff)->seq;
877
878 /* PSH and FIN should only be set in the second packet. */
879 flags = TCP_SKB_CB(skb)->flags;
880 TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
881 TCP_SKB_CB(buff)->flags = flags;
882
883 /* This packet was never sent out yet, so no SACK bits. */
884 TCP_SKB_CB(buff)->sacked = 0;
885
886 buff->ip_summed = skb->ip_summed = CHECKSUM_HW;
887 skb_split(skb, buff, len);
888
889 /* Fix up tso_factor for both original and new SKB. */
890 tcp_set_skb_tso_segs(sk, skb);
891 tcp_set_skb_tso_segs(sk, buff);
892
893 /* Link BUFF into the send queue. */
894 skb_header_release(buff);
895 __skb_append(skb, buff);
896
897 return 0;
898}
899
900/* Try to defer sending, if possible, in order to minimize the amount
901 * of TSO splitting we do. View it as a kind of TSO Nagle test.
902 *
903 * This algorithm is from John Heffner.
904 */
905static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
906{
907 u32 send_win, cong_win, limit, in_flight;
908
909 if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
910 return 0;
911
912 if (tp->ca_state != TCP_CA_Open)
913 return 0;
914
915 in_flight = tcp_packets_in_flight(tp);
916
917 BUG_ON(tcp_skb_pcount(skb) <= 1 ||
918 (tp->snd_cwnd <= in_flight));
919
920 send_win = (tp->snd_una + tp->snd_wnd) - TCP_SKB_CB(skb)->seq;
921
922 /* From in_flight test above, we know that cwnd > in_flight. */
923 cong_win = (tp->snd_cwnd - in_flight) * tp->mss_cache;
924
925 limit = min(send_win, cong_win);
926
927 /* If sk_send_head can be sent fully now, just do it. */
928 if (skb->len <= limit)
929 return 0;
930
931 if (sysctl_tcp_tso_win_divisor) {
932 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
933
934 /* If at least some fraction of a window is available,
935 * just use it.
936 */
937 chunk /= sysctl_tcp_tso_win_divisor;
938 if (limit >= chunk)
939 return 0;
940 } else {
941 /* Different approach, try not to defer past a single
942 * ACK. Receiver should ACK every other full sized
943 * frame, so if we have space for more than 3 frames
944 * then send now.
945 */
946 if (limit > tcp_max_burst(tp) * tp->mss_cache)
947 return 0;
948 }
949
950 /* Ok, it looks like it is advisable to defer. */
951 return 1;
723} 952}
724 953
725/* This routine writes packets to the network. It advances the 954/* This routine writes packets to the network. It advances the
@@ -729,57 +958,158 @@ unsigned int tcp_current_mss(struct sock *sk, int large)
729 * Returns 1, if no segments are in flight and we have queued segments, but 958 * Returns 1, if no segments are in flight and we have queued segments, but
730 * cannot send anything now because of SWS or another problem. 959 * cannot send anything now because of SWS or another problem.
731 */ 960 */
732int tcp_write_xmit(struct sock *sk, int nonagle) 961static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
733{ 962{
734 struct tcp_sock *tp = tcp_sk(sk); 963 struct tcp_sock *tp = tcp_sk(sk);
735 unsigned int mss_now; 964 struct sk_buff *skb;
965 unsigned int tso_segs, sent_pkts;
966 int cwnd_quota;
736 967
737 /* If we are closed, the bytes will have to remain here. 968 /* If we are closed, the bytes will have to remain here.
738 * In time closedown will finish, we empty the write queue and all 969 * In time closedown will finish, we empty the write queue and all
739 * will be happy. 970 * will be happy.
740 */ 971 */
741 if (sk->sk_state != TCP_CLOSE) { 972 if (unlikely(sk->sk_state == TCP_CLOSE))
742 struct sk_buff *skb; 973 return 0;
743 int sent_pkts = 0; 974
975 skb = sk->sk_send_head;
976 if (unlikely(!skb))
977 return 0;
978
979 tso_segs = tcp_init_tso_segs(sk, skb);
980 cwnd_quota = tcp_cwnd_test(tp, skb);
981 if (unlikely(!cwnd_quota))
982 goto out;
983
984 sent_pkts = 0;
985 while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
986 BUG_ON(!tso_segs);
987
988 if (tso_segs == 1) {
989 if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
990 (tcp_skb_is_last(sk, skb) ?
991 nonagle : TCP_NAGLE_PUSH))))
992 break;
993 } else {
994 if (tcp_tso_should_defer(sk, tp, skb))
995 break;
996 }
744 997
745 /* Account for SACKS, we may need to fragment due to this. 998 if (tso_segs > 1) {
746 * It is just like the real MSS changing on us midstream. 999 u32 limit = tcp_window_allows(tp, skb,
747 * We also handle things correctly when the user adds some 1000 mss_now, cwnd_quota);
748 * IP options mid-stream. Silly to do, but cover it. 1001
749 */ 1002 if (skb->len < limit) {
750 mss_now = tcp_current_mss(sk, 1); 1003 unsigned int trim = skb->len % mss_now;
751 1004
752 while ((skb = sk->sk_send_head) && 1005 if (trim)
753 tcp_snd_test(sk, skb, mss_now, 1006 limit = skb->len - trim;
754 tcp_skb_is_last(sk, skb) ? nonagle : 1007 }
755 TCP_NAGLE_PUSH)) { 1008 if (skb->len > limit) {
756 if (skb->len > mss_now) { 1009 if (tso_fragment(sk, skb, limit))
757 if (tcp_fragment(sk, skb, mss_now))
758 break; 1010 break;
759 } 1011 }
760 1012 } else if (unlikely(skb->len > mss_now)) {
761 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1013 if (unlikely(tcp_fragment(sk, skb, mss_now)))
762 tcp_tso_set_push(skb);
763 if (tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))
764 break; 1014 break;
1015 }
765 1016
766 /* Advance the send_head. This one is sent out. 1017 TCP_SKB_CB(skb)->when = tcp_time_stamp;
767 * This call will increment packets_out. 1018
768 */ 1019 if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
769 update_send_head(sk, tp, skb); 1020 break;
1021
1022 /* Advance the send_head. This one is sent out.
1023 * This call will increment packets_out.
1024 */
1025 update_send_head(sk, tp, skb);
1026
1027 tcp_minshall_update(tp, mss_now, skb);
1028 sent_pkts++;
1029
1030 /* Do not optimize this to use tso_segs. If we chopped up
1031 * the packet above, tso_segs will no longer be valid.
1032 */
1033 cwnd_quota -= tcp_skb_pcount(skb);
1034
1035 BUG_ON(cwnd_quota < 0);
1036 if (!cwnd_quota)
1037 break;
1038
1039 skb = sk->sk_send_head;
1040 if (!skb)
1041 break;
1042 tso_segs = tcp_init_tso_segs(sk, skb);
1043 }
1044
1045 if (likely(sent_pkts)) {
1046 tcp_cwnd_validate(sk, tp);
1047 return 0;
1048 }
1049out:
1050 return !tp->packets_out && sk->sk_send_head;
1051}
1052
1053/* Push out any pending frames which were held back due to
1054 * TCP_CORK or attempt at coalescing tiny packets.
1055 * The socket must be locked by the caller.
1056 */
1057void __tcp_push_pending_frames(struct sock *sk, struct tcp_sock *tp,
1058 unsigned int cur_mss, int nonagle)
1059{
1060 struct sk_buff *skb = sk->sk_send_head;
770 1061
771 tcp_minshall_update(tp, mss_now, skb); 1062 if (skb) {
772 sent_pkts = 1; 1063 if (tcp_write_xmit(sk, cur_mss, nonagle))
1064 tcp_check_probe_timer(sk, tp);
1065 }
1066}
1067
1068/* Send _single_ skb sitting at the send head. This function requires
1069 * true push pending frames to setup probe timer etc.
1070 */
1071void tcp_push_one(struct sock *sk, unsigned int mss_now)
1072{
1073 struct tcp_sock *tp = tcp_sk(sk);
1074 struct sk_buff *skb = sk->sk_send_head;
1075 unsigned int tso_segs, cwnd_quota;
1076
1077 BUG_ON(!skb || skb->len < mss_now);
1078
1079 tso_segs = tcp_init_tso_segs(sk, skb);
1080 cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
1081
1082 if (likely(cwnd_quota)) {
1083 BUG_ON(!tso_segs);
1084
1085 if (tso_segs > 1) {
1086 u32 limit = tcp_window_allows(tp, skb,
1087 mss_now, cwnd_quota);
1088
1089 if (skb->len < limit) {
1090 unsigned int trim = skb->len % mss_now;
1091
1092 if (trim)
1093 limit = skb->len - trim;
1094 }
1095 if (skb->len > limit) {
1096 if (unlikely(tso_fragment(sk, skb, limit)))
1097 return;
1098 }
1099 } else if (unlikely(skb->len > mss_now)) {
1100 if (unlikely(tcp_fragment(sk, skb, mss_now)))
1101 return;
773 } 1102 }
774 1103
775 if (sent_pkts) { 1104 /* Send it out now. */
1105 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1106
1107 if (likely(!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation)))) {
1108 update_send_head(sk, tp, skb);
776 tcp_cwnd_validate(sk, tp); 1109 tcp_cwnd_validate(sk, tp);
777 return 0; 1110 return;
778 } 1111 }
779
780 return !tp->packets_out && sk->sk_send_head;
781 } 1112 }
782 return 0;
783} 1113}
784 1114
785/* This function returns the amount that we can raise the 1115/* This function returns the amount that we can raise the
@@ -1039,7 +1369,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1039 if (sk->sk_route_caps & NETIF_F_TSO) { 1369 if (sk->sk_route_caps & NETIF_F_TSO) {
1040 sk->sk_route_caps &= ~NETIF_F_TSO; 1370 sk->sk_route_caps &= ~NETIF_F_TSO;
1041 sock_set_flag(sk, SOCK_NO_LARGESEND); 1371 sock_set_flag(sk, SOCK_NO_LARGESEND);
1042 tp->mss_cache = tp->mss_cache_std;
1043 } 1372 }
1044 1373
1045 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) 1374 if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq))
@@ -1101,7 +1430,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1101 * is still in somebody's hands, else make a clone. 1430 * is still in somebody's hands, else make a clone.
1102 */ 1431 */
1103 TCP_SKB_CB(skb)->when = tcp_time_stamp; 1432 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1104 tcp_tso_set_push(skb);
1105 1433
1106 err = tcp_transmit_skb(sk, (skb_cloned(skb) ? 1434 err = tcp_transmit_skb(sk, (skb_cloned(skb) ?
1107 pskb_copy(skb, GFP_ATOMIC): 1435 pskb_copy(skb, GFP_ATOMIC):
@@ -1670,14 +1998,12 @@ int tcp_write_wakeup(struct sock *sk)
1670 if (sk->sk_route_caps & NETIF_F_TSO) { 1998 if (sk->sk_route_caps & NETIF_F_TSO) {
1671 sock_set_flag(sk, SOCK_NO_LARGESEND); 1999 sock_set_flag(sk, SOCK_NO_LARGESEND);
1672 sk->sk_route_caps &= ~NETIF_F_TSO; 2000 sk->sk_route_caps &= ~NETIF_F_TSO;
1673 tp->mss_cache = tp->mss_cache_std;
1674 } 2001 }
1675 } else if (!tcp_skb_pcount(skb)) 2002 } else if (!tcp_skb_pcount(skb))
1676 tcp_set_skb_tso_segs(sk, skb); 2003 tcp_set_skb_tso_segs(sk, skb);
1677 2004
1678 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; 2005 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
1679 TCP_SKB_CB(skb)->when = tcp_time_stamp; 2006 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1680 tcp_tso_set_push(skb);
1681 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); 2007 err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
1682 if (!err) { 2008 if (!err) {
1683 update_send_head(sk, tp, skb); 2009 update_send_head(sk, tp, skb);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2b193e3df49a..28d9bcab0970 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -774,7 +774,6 @@ static int __init inet6_init(void)
774 if (if6_proc_init()) 774 if (if6_proc_init())
775 goto proc_if6_fail; 775 goto proc_if6_fail;
776#endif 776#endif
777 ipv6_packet_init();
778 ip6_route_init(); 777 ip6_route_init();
779 ip6_flowlabel_init(); 778 ip6_flowlabel_init();
780 err = addrconf_init(); 779 err = addrconf_init();
@@ -791,6 +790,8 @@ static int __init inet6_init(void)
791 /* Init v6 transport protocols. */ 790 /* Init v6 transport protocols. */
792 udpv6_init(); 791 udpv6_init();
793 tcpv6_init(); 792 tcpv6_init();
793
794 ipv6_packet_init();
794 err = 0; 795 err = 0;
795out: 796out:
796 return err; 797 return err;
@@ -798,7 +799,6 @@ out:
798addrconf_fail: 799addrconf_fail:
799 ip6_flowlabel_cleanup(); 800 ip6_flowlabel_cleanup();
800 ip6_route_cleanup(); 801 ip6_route_cleanup();
801 ipv6_packet_cleanup();
802#ifdef CONFIG_PROC_FS 802#ifdef CONFIG_PROC_FS
803 if6_proc_exit(); 803 if6_proc_exit();
804proc_if6_fail: 804proc_if6_fail:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 06e7cdaeedc5..1f2c2f9e353f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -465,7 +465,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
465 to->pkt_type = from->pkt_type; 465 to->pkt_type = from->pkt_type;
466 to->priority = from->priority; 466 to->priority = from->priority;
467 to->protocol = from->protocol; 467 to->protocol = from->protocol;
468 to->security = from->security;
469 dst_release(to->dst); 468 dst_release(to->dst);
470 to->dst = dst_clone(from->dst); 469 to->dst = dst_clone(from->dst);
471 to->dev = from->dev; 470 to->dev = from->dev;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9dac7fdf4726..f6e288dc116e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2018,7 +2018,7 @@ static int tcp_v6_init_sock(struct sock *sk)
2018 */ 2018 */
2019 tp->snd_ssthresh = 0x7fffffff; 2019 tp->snd_ssthresh = 0x7fffffff;
2020 tp->snd_cwnd_clamp = ~0; 2020 tp->snd_cwnd_clamp = ~0;
2021 tp->mss_cache_std = tp->mss_cache = 536; 2021 tp->mss_cache = 536;
2022 2022
2023 tp->reordering = sysctl_tcp_reordering; 2023 tp->reordering = sysctl_tcp_reordering;
2024 2024
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 8f58cecd6266..e48d0d456b3e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -4,7 +4,7 @@
4 4
5obj-y := sch_generic.o 5obj-y := sch_generic.o
6 6
7obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o 7obj-$(CONFIG_NET_SCHED) += sch_api.o sch_fifo.o sch_blackhole.o
8obj-$(CONFIG_NET_CLS) += cls_api.o 8obj-$(CONFIG_NET_CLS) += cls_api.o
9obj-$(CONFIG_NET_CLS_ACT) += act_api.o 9obj-$(CONFIG_NET_CLS_ACT) += act_api.o
10obj-$(CONFIG_NET_ACT_POLICE) += police.o 10obj-$(CONFIG_NET_ACT_POLICE) += police.o
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 48bb23c2a35a..53d98f8d3d80 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -205,11 +205,6 @@ META_COLLECTOR(int_protocol)
205 dst->value = skb->protocol; 205 dst->value = skb->protocol;
206} 206}
207 207
208META_COLLECTOR(int_security)
209{
210 dst->value = skb->security;
211}
212
213META_COLLECTOR(int_pkttype) 208META_COLLECTOR(int_pkttype)
214{ 209{
215 dst->value = skb->pkt_type; 210 dst->value = skb->pkt_type;
@@ -524,7 +519,6 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
524 [META_ID(REALDEV)] = META_FUNC(int_realdev), 519 [META_ID(REALDEV)] = META_FUNC(int_realdev),
525 [META_ID(PRIORITY)] = META_FUNC(int_priority), 520 [META_ID(PRIORITY)] = META_FUNC(int_priority),
526 [META_ID(PROTOCOL)] = META_FUNC(int_protocol), 521 [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
527 [META_ID(SECURITY)] = META_FUNC(int_security),
528 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), 522 [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
529 [META_ID(PKTLEN)] = META_FUNC(int_pktlen), 523 [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
530 [META_ID(DATALEN)] = META_FUNC(int_datalen), 524 [META_ID(DATALEN)] = META_FUNC(int_datalen),
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 05e6e0a799da..b9a069af4a02 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -399,10 +399,8 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
399{ 399{
400 int err; 400 int err;
401 struct rtattr *kind = tca[TCA_KIND-1]; 401 struct rtattr *kind = tca[TCA_KIND-1];
402 void *p = NULL;
403 struct Qdisc *sch; 402 struct Qdisc *sch;
404 struct Qdisc_ops *ops; 403 struct Qdisc_ops *ops;
405 int size;
406 404
407 ops = qdisc_lookup_ops(kind); 405 ops = qdisc_lookup_ops(kind);
408#ifdef CONFIG_KMOD 406#ifdef CONFIG_KMOD
@@ -437,64 +435,55 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
437 if (ops == NULL) 435 if (ops == NULL)
438 goto err_out; 436 goto err_out;
439 437
440 /* ensure that the Qdisc and the private data are 32-byte aligned */ 438 sch = qdisc_alloc(dev, ops);
441 size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); 439 if (IS_ERR(sch)) {
442 size += ops->priv_size + QDISC_ALIGN_CONST; 440 err = PTR_ERR(sch);
443
444 p = kmalloc(size, GFP_KERNEL);
445 err = -ENOBUFS;
446 if (!p)
447 goto err_out2; 441 goto err_out2;
448 memset(p, 0, size); 442 }
449 sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST)
450 & ~QDISC_ALIGN_CONST);
451 sch->padded = (char *)sch - (char *)p;
452
453 INIT_LIST_HEAD(&sch->list);
454 skb_queue_head_init(&sch->q);
455 443
456 if (handle == TC_H_INGRESS) 444 if (handle == TC_H_INGRESS) {
457 sch->flags |= TCQ_F_INGRESS; 445 sch->flags |= TCQ_F_INGRESS;
458 446 handle = TC_H_MAKE(TC_H_INGRESS, 0);
459 sch->ops = ops; 447 } else if (handle == 0) {
460 sch->enqueue = ops->enqueue;
461 sch->dequeue = ops->dequeue;
462 sch->dev = dev;
463 dev_hold(dev);
464 atomic_set(&sch->refcnt, 1);
465 sch->stats_lock = &dev->queue_lock;
466 if (handle == 0) {
467 handle = qdisc_alloc_handle(dev); 448 handle = qdisc_alloc_handle(dev);
468 err = -ENOMEM; 449 err = -ENOMEM;
469 if (handle == 0) 450 if (handle == 0)
470 goto err_out3; 451 goto err_out3;
471 } 452 }
472 453
473 if (handle == TC_H_INGRESS) 454 sch->handle = handle;
474 sch->handle =TC_H_MAKE(TC_H_INGRESS, 0);
475 else
476 sch->handle = handle;
477 455
478 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { 456 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
457#ifdef CONFIG_NET_ESTIMATOR
458 if (tca[TCA_RATE-1]) {
459 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
460 sch->stats_lock,
461 tca[TCA_RATE-1]);
462 if (err) {
463 /*
464 * Any broken qdiscs that would require
465 * a ops->reset() here? The qdisc was never
466 * in action so it shouldn't be necessary.
467 */
468 if (ops->destroy)
469 ops->destroy(sch);
470 goto err_out3;
471 }
472 }
473#endif
479 qdisc_lock_tree(dev); 474 qdisc_lock_tree(dev);
480 list_add_tail(&sch->list, &dev->qdisc_list); 475 list_add_tail(&sch->list, &dev->qdisc_list);
481 qdisc_unlock_tree(dev); 476 qdisc_unlock_tree(dev);
482 477
483#ifdef CONFIG_NET_ESTIMATOR
484 if (tca[TCA_RATE-1])
485 gen_new_estimator(&sch->bstats, &sch->rate_est,
486 sch->stats_lock, tca[TCA_RATE-1]);
487#endif
488 return sch; 478 return sch;
489 } 479 }
490err_out3: 480err_out3:
491 dev_put(dev); 481 dev_put(dev);
482 kfree((char *) sch - sch->padded);
492err_out2: 483err_out2:
493 module_put(ops->owner); 484 module_put(ops->owner);
494err_out: 485err_out:
495 *errp = err; 486 *errp = err;
496 if (p)
497 kfree(p);
498 return NULL; 487 return NULL;
499} 488}
500 489
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
new file mode 100644
index 000000000000..81f0b8346d17
--- /dev/null
+++ b/net/sched/sch_blackhole.c
@@ -0,0 +1,54 @@
1/*
2 * net/sched/sch_blackhole.c Black hole queue
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Thomas Graf <tgraf@suug.ch>
10 *
11 * Note: Quantum tunneling is not supported.
12 */
13
14#include <linux/config.h>
15#include <linux/module.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/netdevice.h>
19#include <linux/skbuff.h>
20#include <net/pkt_sched.h>
21
22static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
23{
24 qdisc_drop(skb, sch);
25 return NET_XMIT_SUCCESS;
26}
27
28static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
29{
30 return NULL;
31}
32
33static struct Qdisc_ops blackhole_qdisc_ops = {
34 .id = "blackhole",
35 .priv_size = 0,
36 .enqueue = blackhole_enqueue,
37 .dequeue = blackhole_dequeue,
38 .owner = THIS_MODULE,
39};
40
41static int __init blackhole_module_init(void)
42{
43 return register_qdisc(&blackhole_qdisc_ops);
44}
45
46static void __exit blackhole_module_exit(void)
47{
48 unregister_qdisc(&blackhole_qdisc_ops);
49}
50
51module_init(blackhole_module_init)
52module_exit(blackhole_module_exit)
53
54MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7683b34dc6a9..73e218e646ac 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -395,24 +395,23 @@ static struct Qdisc_ops pfifo_fast_ops = {
395 .owner = THIS_MODULE, 395 .owner = THIS_MODULE,
396}; 396};
397 397
398struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops) 398struct Qdisc *qdisc_alloc(struct net_device *dev, struct Qdisc_ops *ops)
399{ 399{
400 void *p; 400 void *p;
401 struct Qdisc *sch; 401 struct Qdisc *sch;
402 int size; 402 unsigned int size;
403 int err = -ENOBUFS;
403 404
404 /* ensure that the Qdisc and the private data are 32-byte aligned */ 405 /* ensure that the Qdisc and the private data are 32-byte aligned */
405 size = ((sizeof(*sch) + QDISC_ALIGN_CONST) & ~QDISC_ALIGN_CONST); 406 size = QDISC_ALIGN(sizeof(*sch));
406 size += ops->priv_size + QDISC_ALIGN_CONST; 407 size += ops->priv_size + (QDISC_ALIGNTO - 1);
407 408
408 p = kmalloc(size, GFP_KERNEL); 409 p = kmalloc(size, GFP_KERNEL);
409 if (!p) 410 if (!p)
410 return NULL; 411 goto errout;
411 memset(p, 0, size); 412 memset(p, 0, size);
412 413 sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
413 sch = (struct Qdisc *)(((unsigned long)p + QDISC_ALIGN_CONST) 414 sch->padded = (char *) sch - (char *) p;
414 & ~QDISC_ALIGN_CONST);
415 sch->padded = (char *)sch - (char *)p;
416 415
417 INIT_LIST_HEAD(&sch->list); 416 INIT_LIST_HEAD(&sch->list);
418 skb_queue_head_init(&sch->q); 417 skb_queue_head_init(&sch->q);
@@ -423,11 +422,24 @@ struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
423 dev_hold(dev); 422 dev_hold(dev);
424 sch->stats_lock = &dev->queue_lock; 423 sch->stats_lock = &dev->queue_lock;
425 atomic_set(&sch->refcnt, 1); 424 atomic_set(&sch->refcnt, 1);
425
426 return sch;
427errout:
428 return ERR_PTR(-err);
429}
430
431struct Qdisc * qdisc_create_dflt(struct net_device *dev, struct Qdisc_ops *ops)
432{
433 struct Qdisc *sch;
434
435 sch = qdisc_alloc(dev, ops);
436 if (IS_ERR(sch))
437 goto errout;
438
426 if (!ops->init || ops->init(sch, NULL) == 0) 439 if (!ops->init || ops->init(sch, NULL) == 0)
427 return sch; 440 return sch;
428 441
429 dev_put(dev); 442errout:
430 kfree(p);
431 return NULL; 443 return NULL;
432} 444}
433 445
@@ -591,6 +603,7 @@ EXPORT_SYMBOL(__netdev_watchdog_up);
591EXPORT_SYMBOL(noop_qdisc); 603EXPORT_SYMBOL(noop_qdisc);
592EXPORT_SYMBOL(noop_qdisc_ops); 604EXPORT_SYMBOL(noop_qdisc_ops);
593EXPORT_SYMBOL(qdisc_create_dflt); 605EXPORT_SYMBOL(qdisc_create_dflt);
606EXPORT_SYMBOL(qdisc_alloc);
594EXPORT_SYMBOL(qdisc_destroy); 607EXPORT_SYMBOL(qdisc_destroy);
595EXPORT_SYMBOL(qdisc_reset); 608EXPORT_SYMBOL(qdisc_reset);
596EXPORT_SYMBOL(qdisc_restart); 609EXPORT_SYMBOL(qdisc_restart);