aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/DocBook/uio-howto.tmpl4
-rw-r--r--Documentation/fb/pvr2fb.txt22
-rw-r--r--Documentation/i386/zero-page.txt10
-rw-r--r--Documentation/kbuild/kconfig-language.txt9
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/lguest/Makefile4
-rw-r--r--Documentation/memory-hotplug.txt322
-rw-r--r--Documentation/sched-design-CFS.txt2
-rw-r--r--Documentation/sched-nice-design.txt108
-rw-r--r--Documentation/sysrq.txt4
-rw-r--r--Documentation/vm/slabinfo.c2
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/alpha/kernel/sys_titan.c27
-rw-r--r--arch/blackfin/kernel/init_task.c1
-rw-r--r--arch/blackfin/kernel/process.c2
-rw-r--r--arch/blackfin/kernel/sys_bfin.c1
-rw-r--r--arch/blackfin/kernel/traps.c1
-rw-r--r--arch/cris/Kconfig2
-rw-r--r--arch/frv/kernel/entry.S1
-rw-r--r--arch/i386/kernel/alternative.c37
-rw-r--r--arch/i386/kernel/apic.c10
-rw-r--r--arch/i386/kernel/cpu/amd.c7
-rw-r--r--arch/i386/kernel/doublefault.c13
-rw-r--r--arch/i386/kernel/head.S4
-rw-r--r--arch/i386/kernel/paravirt.c52
-rw-r--r--arch/i386/kernel/vmi.c35
-rw-r--r--arch/i386/mm/pageattr.c2
-rw-r--r--arch/i386/pci/common.c23
-rw-r--r--arch/i386/pci/fixup.c6
-rw-r--r--arch/i386/pci/irq.c5
-rw-r--r--arch/i386/pci/legacy.c2
-rw-r--r--arch/i386/pci/mmconfig.c14
-rw-r--r--arch/i386/pci/numa.c15
-rw-r--r--arch/i386/pci/pci.h43
-rw-r--r--arch/i386/pci/visws.c4
-rw-r--r--arch/i386/xen/enlighten.c12
-rw-r--r--arch/sparc/kernel/prom.c15
-rw-r--r--arch/sparc64/kernel/cpu.c36
-rw-r--r--arch/sparc64/kernel/head.S111
-rw-r--r--arch/sparc64/kernel/hvtramp.S7
-rw-r--r--arch/sparc64/kernel/irq.c74
-rw-r--r--arch/sparc64/kernel/mdesc.c24
-rw-r--r--arch/sparc64/kernel/prom.c15
-rw-r--r--arch/sparc64/kernel/smp.c5
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c1
-rw-r--r--arch/sparc64/kernel/trampoline.S7
-rw-r--r--arch/sparc64/kernel/vio.c19
-rw-r--r--arch/sparc64/lib/GENbzero.S160
-rw-r--r--arch/sparc64/lib/GENcopy_from_user.S34
-rw-r--r--arch/sparc64/lib/GENcopy_to_user.S38
-rw-r--r--arch/sparc64/lib/GENmemcpy.S121
-rw-r--r--arch/sparc64/lib/GENpage.S77
-rw-r--r--arch/sparc64/lib/GENpatch.S33
-rw-r--r--arch/sparc64/lib/Makefile4
-rw-r--r--arch/x86_64/boot/compressed/head.S7
-rw-r--r--arch/x86_64/kernel/apic.c6
-rw-r--r--arch/x86_64/kernel/pci-calgary.c13
-rw-r--r--arch/x86_64/mm/pageattr.c3
-rw-r--r--arch/x86_64/pci/mmconfig.c12
-rw-r--r--arch/x86_64/vdso/.gitignore1
-rw-r--r--block/ll_rw_blk.c4
-rw-r--r--drivers/acpi/resources/rsxface.c2
-rw-r--r--drivers/block/cciss.c16
-rw-r--r--drivers/block/cpqarray.c78
-rw-r--r--drivers/block/viodasd.c1
-rw-r--r--drivers/block/xsysace.c29
-rw-r--r--drivers/char/hvc_lguest.c1
-rw-r--r--drivers/char/pcmcia/cm4000_cs.c5
-rw-r--r--drivers/char/pcmcia/cm4040_cs.c2
-rw-r--r--drivers/char/tty_io.c56
-rw-r--r--drivers/lguest/core.c5
-rw-r--r--drivers/lguest/interrupts_and_traps.c9
-rw-r--r--drivers/lguest/lguest.c18
-rw-r--r--drivers/lguest/lguest_bus.c1
-rw-r--r--drivers/lguest/segments.c62
-rw-r--r--drivers/lguest/switcher.S15
-rw-r--r--drivers/md/dm.c4
-rw-r--r--drivers/mmc/card/queue.c14
-rw-r--r--drivers/mmc/host/at91_mci.c8
-rw-r--r--drivers/mmc/host/wbsd.c10
-rw-r--r--drivers/mtd/mtdchar.c1
-rw-r--r--drivers/net/atl1/atl1_main.c4
-rw-r--r--drivers/net/ehea/ehea.h2
-rw-r--r--drivers/net/ehea/ehea_main.c44
-rw-r--r--drivers/net/ibmveth.c27
-rw-r--r--drivers/net/ibmveth.h3
-rw-r--r--drivers/net/phy/phy.c4
-rw-r--r--drivers/net/r8169.c24
-rw-r--r--drivers/net/sis190.c3
-rw-r--r--drivers/net/smc91x.h4
-rw-r--r--drivers/net/ucc_geth_ethtool.c1
-rw-r--r--drivers/net/ucc_geth_mii.c3
-rw-r--r--drivers/net/wireless/bcm43xx/bcm43xx_phy.c2
-rw-r--r--drivers/net/wireless/rtl8187_dev.c2
-rw-r--r--drivers/net/wireless/zd1211rw/zd_mac.c2
-rw-r--r--drivers/spi/spi_mpc83xx.c25
-rw-r--r--drivers/spi/spidev.c4
-rw-r--r--drivers/video/console/fbcon.c14
-rw-r--r--drivers/video/matrox/g450_pll.c23
-rw-r--r--drivers/video/matrox/matroxfb_DAC1064.h15
-rw-r--r--drivers/video/matrox/matroxfb_base.h1
-rw-r--r--drivers/video/matrox/matroxfb_misc.c1
-rw-r--r--drivers/video/pvr2fb.c16
-rw-r--r--drivers/video/stifb.c19
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/direct-io.c1
-rw-r--r--fs/ecryptfs/inode.c4
-rw-r--r--fs/ecryptfs/main.c18
-rw-r--r--fs/nfs/delegation.c21
-rw-r--r--fs/nfs/inode.c24
-rw-r--r--fs/nfs/namespace.c6
-rw-r--r--fs/nfs/nfs4proc.c16
-rw-r--r--fs/nfs/nfs4renewd.c5
-rw-r--r--fs/nfs/nfs4state.c5
-rw-r--r--fs/ocfs2/alloc.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c24
-rw-r--r--fs/ocfs2/file.c28
-rw-r--r--fs/ocfs2/namei.c16
-rw-r--r--fs/ocfs2/ocfs2.h8
-rw-r--r--fs/ocfs2/super.c69
-rw-r--r--fs/ocfs2/super.h2
-rw-r--r--include/asm-alpha/fcntl.h1
-rw-r--r--include/asm-frv/unistd.h3
-rw-r--r--include/asm-generic/pgtable.h73
-rw-r--r--include/asm-i386/apic.h2
-rw-r--r--include/asm-i386/cpufeature.h2
-rw-r--r--include/asm-i386/paravirt.h16
-rw-r--r--include/asm-i386/pci.h3
-rw-r--r--include/asm-sparc/prom.h1
-rw-r--r--include/asm-sparc64/oplib.h7
-rw-r--r--include/asm-sparc64/prom.h1
-rw-r--r--include/asm-sparc64/spitfire.h7
-rw-r--r--include/asm-sparc64/xor.h6
-rw-r--r--include/asm-x86_64/pci.h2
-rw-r--r--include/linux/bio.h3
-rw-r--r--include/linux/blktrace_api.h3
-rw-r--r--include/linux/clockchips.h1
-rw-r--r--include/linux/fs.h1
-rw-r--r--include/linux/init.h2
-rw-r--r--include/linux/kernel.h4
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/proc_fs.h1
-rw-r--r--include/linux/rcupdate.h1
-rw-r--r--include/linux/sched.h20
-rw-r--r--include/linux/tty.h2
-rw-r--r--include/net/netfilter/ipv4/nf_conntrack_ipv4.h2
-rw-r--r--kernel/auditsc.c22
-rw-r--r--kernel/irq/resend.c9
-rw-r--r--kernel/kprobes.c5
-rw-r--r--kernel/power/snapshot.c3
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/sched.c354
-rw-r--r--kernel/sched_debug.c18
-rw-r--r--kernel/sched_fair.c213
-rw-r--r--kernel/sched_idletask.c10
-rw-r--r--kernel/sched_rt.c48
-rw-r--r--kernel/sysctl.c2
-rw-r--r--kernel/time/clockevents.c10
-rw-r--r--lib/hexdump.c6
-rw-r--r--mm/filemap.c22
-rw-r--r--mm/slub.c68
-rw-r--r--net/core/utils.c1
-rw-r--r--net/ieee80211/softmac/ieee80211softmac_wx.c11
-rw-r--r--net/ipv4/netfilter/ipt_recent.c7
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c2
-rw-r--r--net/ipv4/tcp_htcp.c4
-rw-r--r--net/mac80211/debugfs_netdev.c8
-rw-r--r--net/mac80211/debugfs_sta.c2
-rw-r--r--net/mac80211/ieee80211.c1
-rw-r--r--net/mac80211/ieee80211_ioctl.c19
-rw-r--r--net/netfilter/nf_conntrack_netlink.c17
-rw-r--r--net/netlabel/netlabel_domainhash.c10
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c9
-rw-r--r--net/sunrpc/cache.c3
-rw-r--r--net/sunrpc/rpc_pipe.c3
-rw-r--r--net/sunrpc/sched.c57
-rwxr-xr-xscripts/checkpatch.pl102
178 files changed, 2480 insertions, 1213 deletions
diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index e3bb29a8d8dd..c119484258b8 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -133,10 +133,6 @@ interested in translating it, please email me
133 <para>updates of your driver can take place without recompiling 133 <para>updates of your driver can take place without recompiling
134 the kernel.</para> 134 the kernel.</para>
135</listitem> 135</listitem>
136<listitem>
137 <para>if you need to keep some parts of your driver closed source,
138 you can do so without violating the GPL license on the kernel.</para>
139</listitem>
140</itemizedlist> 136</itemizedlist>
141 137
142<sect1 id="how_uio_works"> 138<sect1 id="how_uio_works">
diff --git a/Documentation/fb/pvr2fb.txt b/Documentation/fb/pvr2fb.txt
index 2bf6c2321c2d..36bdeff585e2 100644
--- a/Documentation/fb/pvr2fb.txt
+++ b/Documentation/fb/pvr2fb.txt
@@ -9,14 +9,13 @@ one found in the Dreamcast.
9Advantages: 9Advantages:
10 10
11 * It provides a nice large console (128 cols + 48 lines with 1024x768) 11 * It provides a nice large console (128 cols + 48 lines with 1024x768)
12 without using tiny, unreadable fonts. 12 without using tiny, unreadable fonts (NOT on the Dreamcast)
13 * You can run XF86_FBDev on top of /dev/fb0 13 * You can run XF86_FBDev on top of /dev/fb0
14 * Most important: boot logo :-) 14 * Most important: boot logo :-)
15 15
16Disadvantages: 16Disadvantages:
17 17
18 * Driver is currently limited to the Dreamcast PowerVR 2 implementation 18 * Driver is largely untested on non-Dreamcast systems.
19 at the time of this writing.
20 19
21Configuration 20Configuration
22============= 21=============
@@ -29,11 +28,16 @@ Accepted options:
29font:X - default font to use. All fonts are supported, including the 28font:X - default font to use. All fonts are supported, including the
30 SUN12x22 font which is very nice at high resolutions. 29 SUN12x22 font which is very nice at high resolutions.
31 30
32mode:X - default video mode. The following video modes are supported:
33 640x240-60, 640x480-60.
34 31
32mode:X - default video mode with format [xres]x[yres]-<bpp>@<refresh rate>
33 The following video modes are supported:
34 640x640-16@60, 640x480-24@60, 640x480-32@60. The Dreamcast
35 defaults to 640x480-16@60. At the time of writing the
36 24bpp and 32bpp modes function poorly. Work to fix that is
37 ongoing
38
35 Note: the 640x240 mode is currently broken, and should not be 39 Note: the 640x240 mode is currently broken, and should not be
36 used for any reason. It is only mentioned as a reference. 40 used for any reason. It is only mentioned here as a reference.
37 41
38inverse - invert colors on screen (for LCD displays) 42inverse - invert colors on screen (for LCD displays)
39 43
@@ -52,10 +56,10 @@ output:X - output type. This can be any of the following: pal, ntsc, and
52X11 56X11
53=== 57===
54 58
55XF86_FBDev should work, in theory. At the time of this writing it is 59XF86_FBDev has been shown to work on the Dreamcast in the past - though not yet
56totally untested and may or may not even portray the beginnings of 60on any 2.6 series kernel.
57working. If you end up testing this, please let me know!
58 61
59-- 62--
60Paul Mundt <lethal@linuxdc.org> 63Paul Mundt <lethal@linuxdc.org>
64Updated by Adrian McMenamin <adrian@mcmen.demon.co.uk>
61 65
diff --git a/Documentation/i386/zero-page.txt b/Documentation/i386/zero-page.txt
index 75b3680c41eb..6c0817c45683 100644
--- a/Documentation/i386/zero-page.txt
+++ b/Documentation/i386/zero-page.txt
@@ -1,3 +1,13 @@
1---------------------------------------------------------------------------
2!!!!!!!!!!!!!!!WARNING!!!!!!!!
3The zero page is a kernel internal data structure, not a stable ABI. It might change
4without warning and the kernel has no way to detect old version of it.
5If you're writing some external code like a boot loader you should only use
6the stable versioned real mode boot protocol described in boot.txt. Otherwise the kernel
7might break you at any time.
8!!!!!!!!!!!!!WARNING!!!!!!!!!!!
9----------------------------------------------------------------------------
10
1Summary of boot_params layout (kernel point of view) 11Summary of boot_params layout (kernel point of view)
2 ( collected by Hans Lermen and Martin Mares ) 12 ( collected by Hans Lermen and Martin Mares )
3 13
diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
index 536d5bfbdb8d..fe8b0c4892cf 100644
--- a/Documentation/kbuild/kconfig-language.txt
+++ b/Documentation/kbuild/kconfig-language.txt
@@ -98,6 +98,15 @@ applicable everywhere (see syntax).
98 times, the limit is set to the largest selection. 98 times, the limit is set to the largest selection.
99 Reverse dependencies can only be used with boolean or tristate 99 Reverse dependencies can only be used with boolean or tristate
100 symbols. 100 symbols.
101 Note:
102 select is evil.... select will by brute force set a symbol
103 equal to 'y' without visiting the dependencies. So abusing
104 select you are able to select a symbol FOO even if FOO depends
105 on BAR that is not set. In general use select only for
106 non-visible symbols (no promts anywhere) and for symbols with
107 no dependencies. That will limit the usefulness but on the
108 other hand avoid the illegal configurations all over. kconfig
109 should one day warn about such things.
101 110
102- numerical ranges: "range" <symbol> <symbol> ["if" <expr>] 111- numerical ranges: "range" <symbol> <symbol> ["if" <expr>]
103 This allows to limit the range of possible input values for int 112 This allows to limit the range of possible input values for int
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index efdb42fd3fb8..a326487a3ab5 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1922,7 +1922,7 @@ and is between 256 and 4096 characters. It is defined in the file
1922 See header of drivers/scsi/wd7000.c. 1922 See header of drivers/scsi/wd7000.c.
1923 1923
1924 wdt= [WDT] Watchdog 1924 wdt= [WDT] Watchdog
1925 See Documentation/watchdog/watchdog.txt. 1925 See Documentation/watchdog/wdt.txt.
1926 1926
1927 xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks. 1927 xd= [HW,XT] Original XT pre-IDE (RLL encoded) disks.
1928 xd_geo= See header of drivers/block/xd.c. 1928 xd_geo= See header of drivers/block/xd.c.
diff --git a/Documentation/lguest/Makefile b/Documentation/lguest/Makefile
index 31e794ef5f98..c0b7a4556390 100644
--- a/Documentation/lguest/Makefile
+++ b/Documentation/lguest/Makefile
@@ -13,7 +13,9 @@ LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000)
13 13
14CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds 14CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds
15LDLIBS:=-lz 15LDLIBS:=-lz
16 16# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and
17# not others (eg. FC7).
18LDFLAGS+=-static
17all: lguest.lds lguest 19all: lguest.lds lguest
18 20
19# The linker script on x86 is so complex the only way of creating one 21# The linker script on x86 is so complex the only way of creating one
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
new file mode 100644
index 000000000000..5fbcc22c98e9
--- /dev/null
+++ b/Documentation/memory-hotplug.txt
@@ -0,0 +1,322 @@
1==============
2Memory Hotplug
3==============
4
5Last Updated: Jul 28 2007
6
7This document is about memory hotplug including how-to-use and current status.
8Because Memory Hotplug is still under development, contents of this text will
9be changed often.
10
111. Introduction
12 1.1 purpose of memory hotplug
13 1.2. Phases of memory hotplug
14 1.3. Unit of Memory online/offline operation
152. Kernel Configuration
163. sysfs files for memory hotplug
174. Physical memory hot-add phase
18 4.1 Hardware(Firmware) Support
19 4.2 Notify memory hot-add event by hand
205. Logical Memory hot-add phase
21 5.1. State of memory
22 5.2. How to online memory
236. Logical memory remove
24 6.1 Memory offline and ZONE_MOVABLE
25 6.2. How to offline memory
267. Physical memory remove
278. Future Work List
28
29Note(1): x86_64's has special implementation for memory hotplug.
30 This text does not describe it.
31Note(2): This text assumes that sysfs is mounted at /sys.
32
33
34---------------
351. Introduction
36---------------
37
381.1 purpose of memory hotplug
39------------
40Memory Hotplug allows users to increase/decrease the amount of memory.
41Generally, there are two purposes.
42
43(A) For changing the amount of memory.
44 This is to allow a feature like capacity on demand.
45(B) For installing/removing DIMMs or NUMA-nodes physically.
46 This is to exchange DIMMs/NUMA-nodes, reduce power consumption, etc.
47
48(A) is required by highly virtualized environments and (B) is required by
49hardware which supports memory power management.
50
51Linux memory hotplug is designed for both purpose.
52
53
541.2. Phases of memory hotplug
55---------------
56There are 2 phases in Memory Hotplug.
57 1) Physical Memory Hotplug phase
58 2) Logical Memory Hotplug phase.
59
60The First phase is to communicate hardware/firmware and make/erase
61environment for hotplugged memory. Basically, this phase is necessary
62for the purpose (B), but this is good phase for communication between
63highly virtualized environments too.
64
65When memory is hotplugged, the kernel recognizes new memory, makes new memory
66management tables, and makes sysfs files for new memory's operation.
67
68If firmware supports notification of connection of new memory to OS,
69this phase is triggered automatically. ACPI can notify this event. If not,
70"probe" operation by system administration is used instead.
71(see Section 4.).
72
73Logical Memory Hotplug phase is to change memory state into
74avaiable/unavailable for users. Amount of memory from user's view is
75changed by this phase. The kernel makes all memory in it as free pages
76when a memory range is available.
77
78In this document, this phase is described as online/offline.
79
80Logical Memory Hotplug phase is triggred by write of sysfs file by system
81administrator. For the hot-add case, it must be executed after Physical Hotplug
82phase by hand.
83(However, if you writes udev's hotplug scripts for memory hotplug, these
84 phases can be execute in seamless way.)
85
86
871.3. Unit of Memory online/offline operation
88------------
89Memory hotplug uses SPARSEMEM memory model. SPARSEMEM divides the whole memory
90into chunks of the same size. The chunk is called a "section". The size of
91a section is architecture dependent. For example, power uses 16MiB, ia64 uses
921GiB. The unit of online/offline operation is "one section". (see Section 3.)
93
94To determine the size of sections, please read this file:
95
96/sys/devices/system/memory/block_size_bytes
97
98This file shows the size of sections in byte.
99
100-----------------------
1012. Kernel Configuration
102-----------------------
103To use memory hotplug feature, kernel must be compiled with following
104config options.
105
106- For all memory hotplug
107 Memory model -> Sparse Memory (CONFIG_SPARSEMEM)
108 Allow for memory hot-add (CONFIG_MEMORY_HOTPLUG)
109
110- To enable memory removal, the followings are also necessary
111 Allow for memory hot remove (CONFIG_MEMORY_HOTREMOVE)
112 Page Migration (CONFIG_MIGRATION)
113
114- For ACPI memory hotplug, the followings are also necessary
115 Memory hotplug (under ACPI Support menu) (CONFIG_ACPI_HOTPLUG_MEMORY)
116 This option can be kernel module.
117
118- As a related configuration, if your box has a feature of NUMA-node hotplug
119 via ACPI, then this option is necessary too.
120 ACPI0004,PNP0A05 and PNP0A06 Container Driver (under ACPI Support menu)
121 (CONFIG_ACPI_CONTAINER).
122 This option can be kernel module too.
123
124--------------------------------
1253 sysfs files for memory hotplug
126--------------------------------
127All sections have their device information under /sys/devices/system/memory as
128
129/sys/devices/system/memory/memoryXXX
130(XXX is section id.)
131
132Now, XXX is defined as start_address_of_section / section_size.
133
134For example, assume 1GiB section size. A device for a memory starting at
1350x100000000 is /sys/device/system/memory/memory4
136(0x100000000 / 1Gib = 4)
137This device covers address range [0x100000000 ... 0x140000000)
138
139Under each section, you can see 3 files.
140
141/sys/devices/system/memory/memoryXXX/phys_index
142/sys/devices/system/memory/memoryXXX/phys_device
143/sys/devices/system/memory/memoryXXX/state
144
145'phys_index' : read-only and contains section id, same as XXX.
146'state' : read-write
147 at read: contains online/offline state of memory.
148 at write: user can specify "online", "offline" command
149'phys_device': read-only: designed to show the name of physical memory device.
150 This is not well implemented now.
151
152NOTE:
153 These directories/files appear after physical memory hotplug phase.
154
155
156--------------------------------
1574. Physical memory hot-add phase
158--------------------------------
159
1604.1 Hardware(Firmware) Support
161------------
162On x86_64/ia64 platform, memory hotplug by ACPI is supported.
163
164In general, the firmware (ACPI) which supports memory hotplug defines
165memory class object of _HID "PNP0C80". When a notify is asserted to PNP0C80,
166Linux's ACPI handler does hot-add memory to the system and calls a hotplug udev
167script. This will be done automatically.
168
169But scripts for memory hotplug are not contained in generic udev package(now).
170You may have to write it by yourself or online/offline memory by hand.
171Please see "How to online memory", "How to offline memory" in this text.
172
173If firmware supports NUMA-node hotplug, and defines an object _HID "ACPI0004",
174"PNP0A05", or "PNP0A06", notification is asserted to it, and ACPI handler
175calls hotplug code for all of objects which are defined in it.
176If memory device is found, memory hotplug code will be called.
177
178
1794.2 Notify memory hot-add event by hand
180------------
181In some environments, especially virtualized environment, firmware will not
182notify memory hotplug event to the kernel. For such environment, "probe"
183interface is supported. This interface depends on CONFIG_ARCH_MEMORY_PROBE.
184
185Now, CONFIG_ARCH_MEMORY_PROBE is supported only by powerpc but it does not
186contain highly architecture codes. Please add config if you need "probe"
187interface.
188
189Probe interface is located at
190/sys/devices/system/memory/probe
191
192You can tell the physical address of new memory to the kernel by
193
194% echo start_address_of_new_memory > /sys/devices/system/memory/probe
195
196Then, [start_address_of_new_memory, start_address_of_new_memory + section_size)
197memory range is hot-added. In this case, hotplug script is not called (in
198current implementation). You'll have to online memory by yourself.
199Please see "How to online memory" in this text.
200
201
202
203------------------------------
2045. Logical Memory hot-add phase
205------------------------------
206
2075.1. State of memory
208------------
209To see (online/offline) state of memory section, read 'state' file.
210
211% cat /sys/device/system/memory/memoryXXX/state
212
213
214If the memory section is online, you'll read "online".
215If the memory section is offline, you'll read "offline".
216
217
2185.2. How to online memory
219------------
220Even if the memory is hot-added, it is not at ready-to-use state.
221For using newly added memory, you have to "online" the memory section.
222
223For onlining, you have to write "online" to the section's state file as:
224
225% echo online > /sys/devices/system/memory/memoryXXX/state
226
227After this, section memoryXXX's state will be 'online' and the amount of
228available memory will be increased.
229
230Currently, newly added memory is added as ZONE_NORMAL (for powerpc, ZONE_DMA).
231This may be changed in future.
232
233
234
235------------------------
2366. Logical memory remove
237------------------------
238
2396.1 Memory offline and ZONE_MOVABLE
240------------
241Memory offlining is more complicated than memory online. Because memory offline
242has to make the whole memory section be unused, memory offline can fail if
243the section includes memory which cannot be freed.
244
245In general, memory offline can use 2 techniques.
246
247(1) reclaim and free all memory in the section.
248(2) migrate all pages in the section.
249
250In the current implementation, Linux's memory offline uses method (2), freeing
251all pages in the section by page migration. But not all pages are
252migratable. Under current Linux, migratable pages are anonymous pages and
253page caches. For offlining a section by migration, the kernel has to guarantee
254that the section contains only migratable pages.
255
256Now, a boot option for making a section which consists of migratable pages is
257supported. By specifying "kernelcore=" or "movablecore=" boot option, you can
258create ZONE_MOVABLE...a zone which is just used for movable pages.
259(See also Documentation/kernel-parameters.txt)
260
261Assume the system has "TOTAL" amount of memory at boot time, this boot option
262creates ZONE_MOVABLE as following.
263
2641) When kernelcore=YYYY boot option is used,
265 Size of memory not for movable pages (not for offline) is YYYY.
266 Size of memory for movable pages (for offline) is TOTAL-YYYY.
267
2682) When movablecore=ZZZZ boot option is used,
269 Size of memory not for movable pages (not for offline) is TOTAL - ZZZZ.
270 Size of memory for movable pages (for offline) is ZZZZ.
271
272
273Note) Unfortunately, there is no information to show which section belongs
274to ZONE_MOVABLE. This is TBD.
275
276
2776.2. How to offline memory
278------------
279You can offline a section by using the same sysfs interface that was used in
280memory onlining.
281
282% echo offline > /sys/devices/system/memory/memoryXXX/state
283
284If offline succeeds, the state of the memory section is changed to be "offline".
285If it fails, some error core (like -EBUSY) will be returned by the kernel.
286Even if a section does not belong to ZONE_MOVABLE, you can try to offline it.
287If it doesn't contain 'unmovable' memory, you'll get success.
288
289A section under ZONE_MOVABLE is considered to be able to be offlined easily.
290But under some busy state, it may return -EBUSY. Even if a memory section
291cannot be offlined due to -EBUSY, you can retry offlining it and may be able to
292offline it (or not).
293(For example, a page is referred to by some kernel internal call and released
294 soon.)
295
296Consideration:
297Memory hotplug's design direction is to make the possibility of memory offlining
298higher and to guarantee unplugging memory under any situation. But it needs
299more work. Returning -EBUSY under some situation may be good because the user
300can decide to retry more or not by himself. Currently, memory offlining code
301does some amount of retry with 120 seconds timeout.
302
303-------------------------
3047. Physical memory remove
305-------------------------
306Need more implementation yet....
307 - Notification completion of remove works by OS to firmware.
308 - Guard from remove if not yet.
309
310--------------
3118. Future Work
312--------------
313 - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
314 sysctl or new control file.
315 - showing memory section and physical device relationship.
316 - showing memory section and node relationship (maybe good for NUMA)
317 - showing memory section is under ZONE_MOVABLE or not
318 - test and make it better memory offlining.
319 - support HugeTLB page migration and offlining.
320 - memmap removing at memory offline.
321 - physical remove memory.
322
diff --git a/Documentation/sched-design-CFS.txt b/Documentation/sched-design-CFS.txt
index 16feebb7bdc0..84901e7c0508 100644
--- a/Documentation/sched-design-CFS.txt
+++ b/Documentation/sched-design-CFS.txt
@@ -83,7 +83,7 @@ Some implementation details:
83 CFS uses nanosecond granularity accounting and does not rely on any 83 CFS uses nanosecond granularity accounting and does not rely on any
84 jiffies or other HZ detail. Thus the CFS scheduler has no notion of 84 jiffies or other HZ detail. Thus the CFS scheduler has no notion of
85 'timeslices' and has no heuristics whatsoever. There is only one 85 'timeslices' and has no heuristics whatsoever. There is only one
86 central tunable: 86 central tunable (you have to switch on CONFIG_SCHED_DEBUG):
87 87
88 /proc/sys/kernel/sched_granularity_ns 88 /proc/sys/kernel/sched_granularity_ns
89 89
diff --git a/Documentation/sched-nice-design.txt b/Documentation/sched-nice-design.txt
new file mode 100644
index 000000000000..e2bae5a577e3
--- /dev/null
+++ b/Documentation/sched-nice-design.txt
@@ -0,0 +1,108 @@
1This document explains the thinking about the revamped and streamlined
2nice-levels implementation in the new Linux scheduler.
3
4Nice levels were always pretty weak under Linux and people continuously
5pestered us to make nice +19 tasks use up much less CPU time.
6
7Unfortunately that was not that easy to implement under the old
8scheduler, (otherwise we'd have done it long ago) because nice level
9support was historically coupled to timeslice length, and timeslice
10units were driven by the HZ tick, so the smallest timeslice was 1/HZ.
11
12In the O(1) scheduler (in 2003) we changed negative nice levels to be
13much stronger than they were before in 2.4 (and people were happy about
14that change), and we also intentionally calibrated the linear timeslice
15rule so that nice +19 level would be _exactly_ 1 jiffy. To better
16understand it, the timeslice graph went like this (cheesy ASCII art
17alert!):
18
19
20 A
21 \ | [timeslice length]
22 \ |
23 \ |
24 \ |
25 \ |
26 \|___100msecs
27 |^ . _
28 | ^ . _
29 | ^ . _
30 -*----------------------------------*-----> [nice level]
31 -20 | +19
32 |
33 |
34
35So that if someone wanted to really renice tasks, +19 would give a much
36bigger hit than the normal linear rule would do. (The solution of
37changing the ABI to extend priorities was discarded early on.)
38
39This approach worked to some degree for some time, but later on with
40HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which
41we felt to be a bit excessive. Excessive _not_ because it's too small of
42a CPU utilization, but because it causes too frequent (once per
43millisec) rescheduling. (and would thus trash the cache, etc. Remember,
44this was long ago when hardware was weaker and caches were smaller, and
45people were running number crunching apps at nice +19.)
46
47So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the
48right minimal granularity - and this translates to 5% CPU utilization.
49But the fundamental HZ-sensitive property for nice+19 still remained,
50and we never got a single complaint about nice +19 being too _weak_ in
51terms of CPU utilization, we only got complaints about it (still) being
52too _strong_ :-)
53
54To sum it up: we always wanted to make nice levels more consistent, but
55within the constraints of HZ and jiffies and their nasty design level
56coupling to timeslices and granularity it was not really viable.
57
58The second (less frequent but still periodically occuring) complaint
59about Linux's nice level support was its assymetry around the origo
60(which you can see demonstrated in the picture above), or more
61accurately: the fact that nice level behavior depended on the _absolute_
62nice level as well, while the nice API itself is fundamentally
63"relative":
64
65 int nice(int inc);
66
67 asmlinkage long sys_nice(int increment)
68
69(the first one is the glibc API, the second one is the syscall API.)
70Note that the 'inc' is relative to the current nice level. Tools like
71bash's "nice" command mirror this relative API.
72
73With the old scheduler, if you for example started a niced task with +1
74and another task with +2, the CPU split between the two tasks would
75depend on the nice level of the parent shell - if it was at nice -10 the
76CPU split was different than if it was at +5 or +10.
77
78A third complaint against Linux's nice level support was that negative
79nice levels were not 'punchy enough', so lots of people had to resort to
80run audio (and other multimedia) apps under RT priorities such as
81SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation
82proof, and a buggy SCHED_FIFO app can also lock up the system for good.
83
84The new scheduler in v2.6.23 addresses all three types of complaints:
85
86To address the first complaint (of nice levels being not "punchy"
87enough), the scheduler was decoupled from 'time slice' and HZ concepts
88(and granularity was made a separate concept from nice levels) and thus
89it was possible to implement better and more consistent nice +19
90support: with the new scheduler nice +19 tasks get a HZ-independent
911.5%, instead of the variable 3%-5%-9% range they got in the old
92scheduler.
93
94To address the second complaint (of nice levels not being consistent),
95the new scheduler makes nice(1) have the same CPU utilization effect on
96tasks, regardless of their absolute nice levels. So on the new
97scheduler, running a nice +10 and a nice 11 task has the same CPU
98utilization "split" between them as running a nice -5 and a nice -4
99task. (one will get 55% of the CPU, the other 45%.) That is why nice
100levels were changed to be "multiplicative" (or exponential) - that way
101it does not matter which nice level you start out from, the 'relative
102result' will always be the same.
103
104The third complaint (of negative nice levels not being "punchy" enough
105and forcing audio apps to run under the more dangerous SCHED_FIFO
106scheduling policy) is addressed by the new scheduler almost
107automatically: stronger negative nice levels are an automatic
108side-effect of the recalibrated dynamic range of nice levels.
diff --git a/Documentation/sysrq.txt b/Documentation/sysrq.txt
index ba328f255417..ef19142896ca 100644
--- a/Documentation/sysrq.txt
+++ b/Documentation/sysrq.txt
@@ -1,6 +1,6 @@
1Linux Magic System Request Key Hacks 1Linux Magic System Request Key Hacks
2Documentation for sysrq.c 2Documentation for sysrq.c
3Last update: 2007-MAR-14 3Last update: 2007-AUG-04
4 4
5* What is the magic SysRq key? 5* What is the magic SysRq key?
6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 6~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -78,7 +78,7 @@ On all - write a character to /proc/sysrq-trigger. e.g.:
78'g' - Used by kgdb on ppc and sh platforms. 78'g' - Used by kgdb on ppc and sh platforms.
79 79
80'h' - Will display help (actually any other key than those listed 80'h' - Will display help (actually any other key than those listed
81 above will display help. but 'h' is easy to remember :-) 81 here will display help. but 'h' is easy to remember :-)
82 82
83'i' - Send a SIGKILL to all processes, except for init. 83'i' - Send a SIGKILL to all processes, except for init.
84 84
diff --git a/Documentation/vm/slabinfo.c b/Documentation/vm/slabinfo.c
index d4f21ffd1404..1af7bd5a2183 100644
--- a/Documentation/vm/slabinfo.c
+++ b/Documentation/vm/slabinfo.c
@@ -396,7 +396,7 @@ void report(struct slabinfo *s)
396 if (strcmp(s->name, "*") == 0) 396 if (strcmp(s->name, "*") == 0)
397 return; 397 return;
398 398
399 printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %d\n", 399 printf("\nSlabcache: %-20s Aliases: %2d Order : %2d Objects: %lu\n",
400 s->name, s->aliases, s->order, s->objects); 400 s->name, s->aliases, s->order, s->objects);
401 if (s->hwcache_align) 401 if (s->hwcache_align)
402 printf("** Hardware cacheline aligned\n"); 402 printf("** Hardware cacheline aligned\n");
diff --git a/MAINTAINERS b/MAINTAINERS
index fa52a307d894..d3a0684945b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -673,7 +673,7 @@ S: Maintained
673AUDIT SUBSYSTEM 673AUDIT SUBSYSTEM
674P: David Woodhouse 674P: David Woodhouse
675M: dwmw2@infradead.org 675M: dwmw2@infradead.org
676L: linux-audit@redhat.com 676L: linux-audit@redhat.com (subscribers-only)
677W: http://people.redhat.com/sgrubb/audit/ 677W: http://people.redhat.com/sgrubb/audit/
678T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git 678T: git kernel.org:/pub/scm/linux/kernel/git/dwmw2/audit-2.6.git
679S: Maintained 679S: Maintained
diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c
index 1d3c1398c428..52c91ccc1648 100644
--- a/arch/alpha/kernel/sys_titan.c
+++ b/arch/alpha/kernel/sys_titan.c
@@ -271,6 +271,19 @@ titan_dispatch_irqs(u64 mask)
271 * Titan Family 271 * Titan Family
272 */ 272 */
273static void __init 273static void __init
274titan_request_irq(unsigned int irq, irq_handler_t handler,
275 unsigned long irqflags, const char *devname,
276 void *dev_id)
277{
278 int err;
279 err = request_irq(irq, handler, irqflags, devname, dev_id);
280 if (err) {
281 printk("titan_request_irq for IRQ %d returned %d; ignoring\n",
282 irq, err);
283 }
284}
285
286static void __init
274titan_late_init(void) 287titan_late_init(void)
275{ 288{
276 /* 289 /*
@@ -278,15 +291,15 @@ titan_late_init(void)
278 * all reported to the kernel as machine checks, so the handler 291 * all reported to the kernel as machine checks, so the handler
279 * is a nop so it can be called to count the individual events. 292 * is a nop so it can be called to count the individual events.
280 */ 293 */
281 request_irq(63+16, titan_intr_nop, IRQF_DISABLED, 294 titan_request_irq(63+16, titan_intr_nop, IRQF_DISABLED,
282 "CChip Error", NULL); 295 "CChip Error", NULL);
283 request_irq(62+16, titan_intr_nop, IRQF_DISABLED, 296 titan_request_irq(62+16, titan_intr_nop, IRQF_DISABLED,
284 "PChip 0 H_Error", NULL); 297 "PChip 0 H_Error", NULL);
285 request_irq(61+16, titan_intr_nop, IRQF_DISABLED, 298 titan_request_irq(61+16, titan_intr_nop, IRQF_DISABLED,
286 "PChip 1 H_Error", NULL); 299 "PChip 1 H_Error", NULL);
287 request_irq(60+16, titan_intr_nop, IRQF_DISABLED, 300 titan_request_irq(60+16, titan_intr_nop, IRQF_DISABLED,
288 "PChip 0 C_Error", NULL); 301 "PChip 0 C_Error", NULL);
289 request_irq(59+16, titan_intr_nop, IRQF_DISABLED, 302 titan_request_irq(59+16, titan_intr_nop, IRQF_DISABLED,
290 "PChip 1 C_Error", NULL); 303 "PChip 1 C_Error", NULL);
291 304
292 /* 305 /*
@@ -345,9 +358,9 @@ privateer_init_pci(void)
345 * Hook a couple of extra err interrupts that the 358 * Hook a couple of extra err interrupts that the
346 * common titan code won't. 359 * common titan code won't.
347 */ 360 */
348 request_irq(53+16, titan_intr_nop, IRQF_DISABLED, 361 titan_request_irq(53+16, titan_intr_nop, IRQF_DISABLED,
349 "NMI", NULL); 362 "NMI", NULL);
350 request_irq(50+16, titan_intr_nop, IRQF_DISABLED, 363 titan_request_irq(50+16, titan_intr_nop, IRQF_DISABLED,
351 "Temperature Warning", NULL); 364 "Temperature Warning", NULL);
352 365
353 /* 366 /*
diff --git a/arch/blackfin/kernel/init_task.c b/arch/blackfin/kernel/init_task.c
index b45188f8512e..673c860ffc23 100644
--- a/arch/blackfin/kernel/init_task.c
+++ b/arch/blackfin/kernel/init_task.c
@@ -31,6 +31,7 @@
31#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/init_task.h> 32#include <linux/init_task.h>
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/fs.h>
34 35
35static struct fs_struct init_fs = INIT_FS; 36static struct fs_struct init_fs = INIT_FS;
36static struct files_struct init_files = INIT_FILES; 37static struct files_struct init_files = INIT_FILES;
diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 5a51dd6ab280..6a7aefe48346 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -33,6 +33,8 @@
33#include <linux/user.h> 33#include <linux/user.h>
34#include <linux/a.out.h> 34#include <linux/a.out.h>
35#include <linux/uaccess.h> 35#include <linux/uaccess.h>
36#include <linux/fs.h>
37#include <linux/err.h>
36 38
37#include <asm/blackfin.h> 39#include <asm/blackfin.h>
38#include <asm/fixed_code.h> 40#include <asm/fixed_code.h>
diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
index f5e1ae3d1705..abcd14817d0e 100644
--- a/arch/blackfin/kernel/sys_bfin.c
+++ b/arch/blackfin/kernel/sys_bfin.c
@@ -37,6 +37,7 @@
37#include <linux/syscalls.h> 37#include <linux/syscalls.h>
38#include <linux/mman.h> 38#include <linux/mman.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#include <linux/fs.h>
40#include <linux/uaccess.h> 41#include <linux/uaccess.h>
41#include <linux/ipc.h> 42#include <linux/ipc.h>
42#include <linux/unistd.h> 43#include <linux/unistd.h>
diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c
index 8766bd612b47..792a8416fe10 100644
--- a/arch/blackfin/kernel/traps.c
+++ b/arch/blackfin/kernel/traps.c
@@ -31,6 +31,7 @@
31#include <linux/interrupt.h> 31#include <linux/interrupt.h>
32#include <linux/module.h> 32#include <linux/module.h>
33#include <linux/kallsyms.h> 33#include <linux/kallsyms.h>
34#include <linux/fs.h>
34#include <asm/traps.h> 35#include <asm/traps.h>
35#include <asm/cacheflush.h> 36#include <asm/cacheflush.h>
36#include <asm/blackfin.h> 37#include <asm/blackfin.h>
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 4b41248b61ad..6b4d026a00a1 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -180,8 +180,6 @@ source "drivers/isdn/Kconfig"
180 180
181source "drivers/telephony/Kconfig" 181source "drivers/telephony/Kconfig"
182 182
183source "drivers/cdrom/Kconfig"
184
185# 183#
186# input before char - char/joystick depends on it. As does USB. 184# input before char - char/joystick depends on it. As does USB.
187# 185#
diff --git a/arch/frv/kernel/entry.S b/arch/frv/kernel/entry.S
index 275673c192aa..1e74f3c5cee2 100644
--- a/arch/frv/kernel/entry.S
+++ b/arch/frv/kernel/entry.S
@@ -1496,6 +1496,7 @@ sys_call_table:
1496 .long sys_signalfd 1496 .long sys_signalfd
1497 .long sys_timerfd 1497 .long sys_timerfd
1498 .long sys_eventfd 1498 .long sys_eventfd
1499 .long sys_fallocate
1499 1500
1500 1501
1501syscall_table_size = (. - sys_call_table) 1502syscall_table_size = (. - sys_call_table)
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index c85598acb8fd..1b66d5c70eaf 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -11,6 +11,8 @@
11#include <asm/mce.h> 11#include <asm/mce.h>
12#include <asm/nmi.h> 12#include <asm/nmi.h>
13 13
14#define MAX_PATCH_LEN (255-1)
15
14#ifdef CONFIG_HOTPLUG_CPU 16#ifdef CONFIG_HOTPLUG_CPU
15static int smp_alt_once; 17static int smp_alt_once;
16 18
@@ -148,7 +150,8 @@ static unsigned char** find_nop_table(void)
148 150
149#endif /* CONFIG_X86_64 */ 151#endif /* CONFIG_X86_64 */
150 152
151static void nop_out(void *insns, unsigned int len) 153/* Use this to add nops to a buffer, then text_poke the whole buffer. */
154static void add_nops(void *insns, unsigned int len)
152{ 155{
153 unsigned char **noptable = find_nop_table(); 156 unsigned char **noptable = find_nop_table();
154 157
@@ -156,7 +159,7 @@ static void nop_out(void *insns, unsigned int len)
156 unsigned int noplen = len; 159 unsigned int noplen = len;
157 if (noplen > ASM_NOP_MAX) 160 if (noplen > ASM_NOP_MAX)
158 noplen = ASM_NOP_MAX; 161 noplen = ASM_NOP_MAX;
159 text_poke(insns, noptable[noplen], noplen); 162 memcpy(insns, noptable[noplen], noplen);
160 insns += noplen; 163 insns += noplen;
161 len -= noplen; 164 len -= noplen;
162 } 165 }
@@ -174,15 +177,15 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
174void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 177void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
175{ 178{
176 struct alt_instr *a; 179 struct alt_instr *a;
177 u8 *instr; 180 char insnbuf[MAX_PATCH_LEN];
178 int diff;
179 181
180 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); 182 DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
181 for (a = start; a < end; a++) { 183 for (a = start; a < end; a++) {
184 u8 *instr = a->instr;
182 BUG_ON(a->replacementlen > a->instrlen); 185 BUG_ON(a->replacementlen > a->instrlen);
186 BUG_ON(a->instrlen > sizeof(insnbuf));
183 if (!boot_cpu_has(a->cpuid)) 187 if (!boot_cpu_has(a->cpuid))
184 continue; 188 continue;
185 instr = a->instr;
186#ifdef CONFIG_X86_64 189#ifdef CONFIG_X86_64
187 /* vsyscall code is not mapped yet. resolve it manually. */ 190 /* vsyscall code is not mapped yet. resolve it manually. */
188 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) { 191 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
@@ -191,9 +194,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
191 __FUNCTION__, a->instr, instr); 194 __FUNCTION__, a->instr, instr);
192 } 195 }
193#endif 196#endif
194 memcpy(instr, a->replacement, a->replacementlen); 197 memcpy(insnbuf, a->replacement, a->replacementlen);
195 diff = a->instrlen - a->replacementlen; 198 add_nops(insnbuf + a->replacementlen,
196 nop_out(instr + a->replacementlen, diff); 199 a->instrlen - a->replacementlen);
200 text_poke(instr, insnbuf, a->instrlen);
197 } 201 }
198} 202}
199 203
@@ -215,16 +219,18 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
215static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) 219static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
216{ 220{
217 u8 **ptr; 221 u8 **ptr;
222 char insn[1];
218 223
219 if (noreplace_smp) 224 if (noreplace_smp)
220 return; 225 return;
221 226
227 add_nops(insn, 1);
222 for (ptr = start; ptr < end; ptr++) { 228 for (ptr = start; ptr < end; ptr++) {
223 if (*ptr < text) 229 if (*ptr < text)
224 continue; 230 continue;
225 if (*ptr > text_end) 231 if (*ptr > text_end)
226 continue; 232 continue;
227 nop_out(*ptr, 1); 233 text_poke(*ptr, insn, 1);
228 }; 234 };
229} 235}
230 236
@@ -351,6 +357,7 @@ void apply_paravirt(struct paravirt_patch_site *start,
351 struct paravirt_patch_site *end) 357 struct paravirt_patch_site *end)
352{ 358{
353 struct paravirt_patch_site *p; 359 struct paravirt_patch_site *p;
360 char insnbuf[MAX_PATCH_LEN];
354 361
355 if (noreplace_paravirt) 362 if (noreplace_paravirt)
356 return; 363 return;
@@ -358,13 +365,15 @@ void apply_paravirt(struct paravirt_patch_site *start,
358 for (p = start; p < end; p++) { 365 for (p = start; p < end; p++) {
359 unsigned int used; 366 unsigned int used;
360 367
361 used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, 368 BUG_ON(p->len > MAX_PATCH_LEN);
362 p->len); 369 used = paravirt_ops.patch(p->instrtype, p->clobbers, insnbuf,
370 (unsigned long)p->instr, p->len);
363 371
364 BUG_ON(used > p->len); 372 BUG_ON(used > p->len);
365 373
366 /* Pad the rest with nops */ 374 /* Pad the rest with nops */
367 nop_out(p->instr + used, p->len - used); 375 add_nops(insnbuf + used, p->len - used);
376 text_poke(p->instr, insnbuf, p->len);
368 } 377 }
369} 378}
370extern struct paravirt_patch_site __start_parainstructions[], 379extern struct paravirt_patch_site __start_parainstructions[],
@@ -379,7 +388,7 @@ void __init alternative_instructions(void)
379 that might execute the to be patched code. 388 that might execute the to be patched code.
380 Other CPUs are not running. */ 389 Other CPUs are not running. */
381 stop_nmi(); 390 stop_nmi();
382#ifdef CONFIG_MCE 391#ifdef CONFIG_X86_MCE
383 stop_mce(); 392 stop_mce();
384#endif 393#endif
385 394
@@ -417,7 +426,7 @@ void __init alternative_instructions(void)
417 local_irq_restore(flags); 426 local_irq_restore(flags);
418 427
419 restart_nmi(); 428 restart_nmi();
420#ifdef CONFIG_MCE 429#ifdef CONFIG_X86_MCE
421 restart_mce(); 430 restart_mce();
422#endif 431#endif
423} 432}
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index bfc6cb7df7e7..f9fff29e01a9 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -61,8 +61,9 @@ static int enable_local_apic __initdata = 0;
61 61
62/* Local APIC timer verification ok */ 62/* Local APIC timer verification ok */
63static int local_apic_timer_verify_ok; 63static int local_apic_timer_verify_ok;
64/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 64/* Disable local APIC timer from the kernel commandline or via dmi quirk
65static int local_apic_timer_disabled; 65 or using CPU MSR check */
66int local_apic_timer_disabled;
66/* Local APIC timer works in C2 */ 67/* Local APIC timer works in C2 */
67int local_apic_timer_c2_ok; 68int local_apic_timer_c2_ok;
68EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 69EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -370,12 +371,9 @@ void __init setup_boot_APIC_clock(void)
370 long delta, deltapm; 371 long delta, deltapm;
371 int pm_referenced = 0; 372 int pm_referenced = 0;
372 373
373 if (boot_cpu_has(X86_FEATURE_LAPIC_TIMER_BROKEN))
374 local_apic_timer_disabled = 1;
375
376 /* 374 /*
377 * The local apic timer can be disabled via the kernel 375 * The local apic timer can be disabled via the kernel
378 * commandline or from the test above. Register the lapic 376 * commandline or from the CPU detection code. Register the lapic
379 * timer as a dummy clock event source on SMP systems, so the 377 * timer as a dummy clock event source on SMP systems, so the
380 * broadcast mechanism is used. On UP systems simply ignore it. 378 * broadcast mechanism is used. On UP systems simply ignore it.
381 */ 379 */
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index c7ba455d5ac7..dcf6bbb1c7c0 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -3,6 +3,7 @@
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <asm/io.h> 4#include <asm/io.h>
5#include <asm/processor.h> 5#include <asm/processor.h>
6#include <asm/apic.h>
6 7
7#include "cpu.h" 8#include "cpu.h"
8 9
@@ -22,6 +23,7 @@
22extern void vide(void); 23extern void vide(void);
23__asm__(".align 4\nvide: ret"); 24__asm__(".align 4\nvide: ret");
24 25
26#ifdef CONFIG_X86_LOCAL_APIC
25#define ENABLE_C1E_MASK 0x18000000 27#define ENABLE_C1E_MASK 0x18000000
26#define CPUID_PROCESSOR_SIGNATURE 1 28#define CPUID_PROCESSOR_SIGNATURE 1
27#define CPUID_XFAM 0x0ff00000 29#define CPUID_XFAM 0x0ff00000
@@ -52,6 +54,7 @@ static __cpuinit int amd_apic_timer_broken(void)
52 } 54 }
53 return 0; 55 return 0;
54} 56}
57#endif
55 58
56int force_mwait __cpuinitdata; 59int force_mwait __cpuinitdata;
57 60
@@ -282,8 +285,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
282 num_cache_leaves = 3; 285 num_cache_leaves = 3;
283 } 286 }
284 287
288#ifdef CONFIG_X86_LOCAL_APIC
285 if (amd_apic_timer_broken()) 289 if (amd_apic_timer_broken())
286 set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); 290 local_apic_timer_disabled = 1;
291#endif
287 292
288 if (c->x86 == 0x10 && !force_mwait) 293 if (c->x86 == 0x10 && !force_mwait)
289 clear_bit(X86_FEATURE_MWAIT, c->x86_capability); 294 clear_bit(X86_FEATURE_MWAIT, c->x86_capability);
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c
index 265c5597efb0..40978af630e7 100644
--- a/arch/i386/kernel/doublefault.c
+++ b/arch/i386/kernel/doublefault.c
@@ -13,7 +13,7 @@
13static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE]; 13static unsigned long doublefault_stack[DOUBLEFAULT_STACKSIZE];
14#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE) 14#define STACK_START (unsigned long)(doublefault_stack+DOUBLEFAULT_STACKSIZE)
15 15
16#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + 0x1000000) 16#define ptr_ok(x) ((x) > PAGE_OFFSET && (x) < PAGE_OFFSET + MAXMEM)
17 17
18static void doublefault_fn(void) 18static void doublefault_fn(void)
19{ 19{
@@ -23,23 +23,23 @@ static void doublefault_fn(void)
23 store_gdt(&gdt_desc); 23 store_gdt(&gdt_desc);
24 gdt = gdt_desc.address; 24 gdt = gdt_desc.address;
25 25
26 printk("double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size); 26 printk(KERN_EMERG "PANIC: double fault, gdt at %08lx [%d bytes]\n", gdt, gdt_desc.size);
27 27
28 if (ptr_ok(gdt)) { 28 if (ptr_ok(gdt)) {
29 gdt += GDT_ENTRY_TSS << 3; 29 gdt += GDT_ENTRY_TSS << 3;
30 tss = *(u16 *)(gdt+2); 30 tss = *(u16 *)(gdt+2);
31 tss += *(u8 *)(gdt+4) << 16; 31 tss += *(u8 *)(gdt+4) << 16;
32 tss += *(u8 *)(gdt+7) << 24; 32 tss += *(u8 *)(gdt+7) << 24;
33 printk("double fault, tss at %08lx\n", tss); 33 printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
34 34
35 if (ptr_ok(tss)) { 35 if (ptr_ok(tss)) {
36 struct i386_hw_tss *t = (struct i386_hw_tss *)tss; 36 struct i386_hw_tss *t = (struct i386_hw_tss *)tss;
37 37
38 printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); 38 printk(KERN_EMERG "eip = %08lx, esp = %08lx\n", t->eip, t->esp);
39 39
40 printk("eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n", 40 printk(KERN_EMERG "eax = %08lx, ebx = %08lx, ecx = %08lx, edx = %08lx\n",
41 t->eax, t->ebx, t->ecx, t->edx); 41 t->eax, t->ebx, t->ecx, t->edx);
42 printk("esi = %08lx, edi = %08lx\n", 42 printk(KERN_EMERG "esi = %08lx, edi = %08lx\n",
43 t->esi, t->edi); 43 t->esi, t->edi);
44 } 44 }
45 } 45 }
@@ -63,6 +63,7 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
63 .cs = __KERNEL_CS, 63 .cs = __KERNEL_CS,
64 .ss = __KERNEL_DS, 64 .ss = __KERNEL_DS,
65 .ds = __USER_DS, 65 .ds = __USER_DS,
66 .fs = __KERNEL_PERCPU,
66 67
67 .__cr3 = __pa(swapper_pg_dir) 68 .__cr3 = __pa(swapper_pg_dir)
68 } 69 }
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index 7c52b222207e..8f0382161c91 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -162,9 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
162 * which will be freed later 162 * which will be freed later
163 */ 163 */
164 164
165#ifdef CONFIG_HOTPLUG_CPU 165#ifndef CONFIG_HOTPLUG_CPU
166.section .text,"ax",@progbits
167#else
168.section .init.text,"ax",@progbits 166.section .init.text,"ax",@progbits
169#endif 167#endif
170 168
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c
index ea962c0667d5..739cfb207dd7 100644
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -69,7 +69,8 @@ DEF_NATIVE(read_tsc, "rdtsc");
69 69
70DEF_NATIVE(ud2a, "ud2a"); 70DEF_NATIVE(ud2a, "ud2a");
71 71
72static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) 72static unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
73 unsigned long addr, unsigned len)
73{ 74{
74 const unsigned char *start, *end; 75 const unsigned char *start, *end;
75 unsigned ret; 76 unsigned ret;
@@ -90,7 +91,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
90#undef SITE 91#undef SITE
91 92
92 patch_site: 93 patch_site:
93 ret = paravirt_patch_insns(insns, len, start, end); 94 ret = paravirt_patch_insns(ibuf, len, start, end);
94 break; 95 break;
95 96
96 case PARAVIRT_PATCH(make_pgd): 97 case PARAVIRT_PATCH(make_pgd):
@@ -107,7 +108,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len)
107 break; 108 break;
108 109
109 default: 110 default:
110 ret = paravirt_patch_default(type, clobbers, insns, len); 111 ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
111 break; 112 break;
112 } 113 }
113 114
@@ -129,68 +130,67 @@ struct branch {
129 u32 delta; 130 u32 delta;
130} __attribute__((packed)); 131} __attribute__((packed));
131 132
132unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, 133unsigned paravirt_patch_call(void *insnbuf,
133 void *site, u16 site_clobbers, 134 const void *target, u16 tgt_clobbers,
135 unsigned long addr, u16 site_clobbers,
134 unsigned len) 136 unsigned len)
135{ 137{
136 unsigned char *call = site; 138 struct branch *b = insnbuf;
137 unsigned long delta = (unsigned long)target - (unsigned long)(call+5); 139 unsigned long delta = (unsigned long)target - (addr+5);
138 struct branch b;
139 140
140 if (tgt_clobbers & ~site_clobbers) 141 if (tgt_clobbers & ~site_clobbers)
141 return len; /* target would clobber too much for this site */ 142 return len; /* target would clobber too much for this site */
142 if (len < 5) 143 if (len < 5)
143 return len; /* call too long for patch site */ 144 return len; /* call too long for patch site */
144 145
145 b.opcode = 0xe8; /* call */ 146 b->opcode = 0xe8; /* call */
146 b.delta = delta; 147 b->delta = delta;
147 BUILD_BUG_ON(sizeof(b) != 5); 148 BUILD_BUG_ON(sizeof(*b) != 5);
148 text_poke(call, (unsigned char *)&b, 5);
149 149
150 return 5; 150 return 5;
151} 151}
152 152
153unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) 153unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
154 unsigned long addr, unsigned len)
154{ 155{
155 unsigned char *jmp = site; 156 struct branch *b = insnbuf;
156 unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); 157 unsigned long delta = (unsigned long)target - (addr+5);
157 struct branch b;
158 158
159 if (len < 5) 159 if (len < 5)
160 return len; /* call too long for patch site */ 160 return len; /* call too long for patch site */
161 161
162 b.opcode = 0xe9; /* jmp */ 162 b->opcode = 0xe9; /* jmp */
163 b.delta = delta; 163 b->delta = delta;
164 text_poke(jmp, (unsigned char *)&b, 5);
165 164
166 return 5; 165 return 5;
167} 166}
168 167
169unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) 168unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
169 unsigned long addr, unsigned len)
170{ 170{
171 void *opfunc = *((void **)&paravirt_ops + type); 171 void *opfunc = *((void **)&paravirt_ops + type);
172 unsigned ret; 172 unsigned ret;
173 173
174 if (opfunc == NULL) 174 if (opfunc == NULL)
175 /* If there's no function, patch it with a ud2a (BUG) */ 175 /* If there's no function, patch it with a ud2a (BUG) */
176 ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); 176 ret = paravirt_patch_insns(insnbuf, len, start_ud2a, end_ud2a);
177 else if (opfunc == paravirt_nop) 177 else if (opfunc == paravirt_nop)
178 /* If the operation is a nop, then nop the callsite */ 178 /* If the operation is a nop, then nop the callsite */
179 ret = paravirt_patch_nop(); 179 ret = paravirt_patch_nop();
180 else if (type == PARAVIRT_PATCH(iret) || 180 else if (type == PARAVIRT_PATCH(iret) ||
181 type == PARAVIRT_PATCH(irq_enable_sysexit)) 181 type == PARAVIRT_PATCH(irq_enable_sysexit))
182 /* If operation requires a jmp, then jmp */ 182 /* If operation requires a jmp, then jmp */
183 ret = paravirt_patch_jmp(opfunc, site, len); 183 ret = paravirt_patch_jmp(opfunc, insnbuf, addr, len);
184 else 184 else
185 /* Otherwise call the function; assume target could 185 /* Otherwise call the function; assume target could
186 clobber any caller-save reg */ 186 clobber any caller-save reg */
187 ret = paravirt_patch_call(opfunc, CLBR_ANY, 187 ret = paravirt_patch_call(insnbuf, opfunc, CLBR_ANY,
188 site, clobbers, len); 188 addr, clobbers, len);
189 189
190 return ret; 190 return ret;
191} 191}
192 192
193unsigned paravirt_patch_insns(void *site, unsigned len, 193unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
194 const char *start, const char *end) 194 const char *start, const char *end)
195{ 195{
196 unsigned insn_len = end - start; 196 unsigned insn_len = end - start;
@@ -198,7 +198,7 @@ unsigned paravirt_patch_insns(void *site, unsigned len,
198 if (insn_len > len || start == NULL) 198 if (insn_len > len || start == NULL)
199 insn_len = len; 199 insn_len = len;
200 else 200 else
201 memcpy(site, start, insn_len); 201 memcpy(insnbuf, start, insn_len);
202 202
203 return insn_len; 203 return insn_len;
204} 204}
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c
index 72042bb7ec94..18673e0f193b 100644
--- a/arch/i386/kernel/vmi.c
+++ b/arch/i386/kernel/vmi.c
@@ -87,12 +87,14 @@ struct vmi_timer_ops vmi_timer_ops;
87#define IRQ_PATCH_INT_MASK 0 87#define IRQ_PATCH_INT_MASK 0
88#define IRQ_PATCH_DISABLE 5 88#define IRQ_PATCH_DISABLE 5
89 89
90static inline void patch_offset(unsigned char *eip, unsigned char *dest) 90static inline void patch_offset(void *insnbuf,
91 unsigned long eip, unsigned long dest)
91{ 92{
92 *(unsigned long *)(eip+1) = dest-eip-5; 93 *(unsigned long *)(insnbuf+1) = dest-eip-5;
93} 94}
94 95
95static unsigned patch_internal(int call, unsigned len, void *insns) 96static unsigned patch_internal(int call, unsigned len, void *insnbuf,
97 unsigned long eip)
96{ 98{
97 u64 reloc; 99 u64 reloc;
98 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; 100 struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc;
@@ -100,14 +102,14 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
100 switch(rel->type) { 102 switch(rel->type) {
101 case VMI_RELOCATION_CALL_REL: 103 case VMI_RELOCATION_CALL_REL:
102 BUG_ON(len < 5); 104 BUG_ON(len < 5);
103 *(char *)insns = MNEM_CALL; 105 *(char *)insnbuf = MNEM_CALL;
104 patch_offset(insns, rel->eip); 106 patch_offset(insnbuf, eip, (unsigned long)rel->eip);
105 return 5; 107 return 5;
106 108
107 case VMI_RELOCATION_JUMP_REL: 109 case VMI_RELOCATION_JUMP_REL:
108 BUG_ON(len < 5); 110 BUG_ON(len < 5);
109 *(char *)insns = MNEM_JMP; 111 *(char *)insnbuf = MNEM_JMP;
110 patch_offset(insns, rel->eip); 112 patch_offset(insnbuf, eip, (unsigned long)rel->eip);
111 return 5; 113 return 5;
112 114
113 case VMI_RELOCATION_NOP: 115 case VMI_RELOCATION_NOP:
@@ -128,21 +130,26 @@ static unsigned patch_internal(int call, unsigned len, void *insns)
128 * Apply patch if appropriate, return length of new instruction 130 * Apply patch if appropriate, return length of new instruction
129 * sequence. The callee does nop padding for us. 131 * sequence. The callee does nop padding for us.
130 */ 132 */
131static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) 133static unsigned vmi_patch(u8 type, u16 clobbers, void *insns,
134 unsigned long eip, unsigned len)
132{ 135{
133 switch (type) { 136 switch (type) {
134 case PARAVIRT_PATCH(irq_disable): 137 case PARAVIRT_PATCH(irq_disable):
135 return patch_internal(VMI_CALL_DisableInterrupts, len, insns); 138 return patch_internal(VMI_CALL_DisableInterrupts, len,
139 insns, eip);
136 case PARAVIRT_PATCH(irq_enable): 140 case PARAVIRT_PATCH(irq_enable):
137 return patch_internal(VMI_CALL_EnableInterrupts, len, insns); 141 return patch_internal(VMI_CALL_EnableInterrupts, len,
142 insns, eip);
138 case PARAVIRT_PATCH(restore_fl): 143 case PARAVIRT_PATCH(restore_fl):
139 return patch_internal(VMI_CALL_SetInterruptMask, len, insns); 144 return patch_internal(VMI_CALL_SetInterruptMask, len,
145 insns, eip);
140 case PARAVIRT_PATCH(save_fl): 146 case PARAVIRT_PATCH(save_fl):
141 return patch_internal(VMI_CALL_GetInterruptMask, len, insns); 147 return patch_internal(VMI_CALL_GetInterruptMask, len,
148 insns, eip);
142 case PARAVIRT_PATCH(iret): 149 case PARAVIRT_PATCH(iret):
143 return patch_internal(VMI_CALL_IRET, len, insns); 150 return patch_internal(VMI_CALL_IRET, len, insns, eip);
144 case PARAVIRT_PATCH(irq_enable_sysexit): 151 case PARAVIRT_PATCH(irq_enable_sysexit):
145 return patch_internal(VMI_CALL_SYSEXIT, len, insns); 152 return patch_internal(VMI_CALL_SYSEXIT, len, insns, eip);
146 default: 153 default:
147 break; 154 break;
148 } 155 }
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 8927222b3ab2..4241a74d16c8 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -82,7 +82,7 @@ static void flush_kernel_map(void *arg)
82 struct page *p; 82 struct page *p;
83 83
84 /* High level code is not ready for clflush yet */ 84 /* High level code is not ready for clflush yet */
85 if (cpu_has_clflush) { 85 if (0 && cpu_has_clflush) {
86 list_for_each_entry (p, lh, lru) 86 list_for_each_entry (p, lh, lru)
87 cache_flush_page(p); 87 cache_flush_page(p);
88 } else if (boot_cpu_data.x86_model >= 4) 88 } else if (boot_cpu_data.x86_model >= 4)
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 85503deeda46..ebc6f3c66340 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -455,3 +455,26 @@ void pcibios_disable_device (struct pci_dev *dev)
455 if (!dev->msi_enabled && pcibios_disable_irq) 455 if (!dev->msi_enabled && pcibios_disable_irq)
456 pcibios_disable_irq(dev); 456 pcibios_disable_irq(dev);
457} 457}
458
459struct pci_bus *pci_scan_bus_with_sysdata(int busno)
460{
461 struct pci_bus *bus = NULL;
462 struct pci_sysdata *sd;
463
464 /*
465 * Allocate per-root-bus (not per bus) arch-specific data.
466 * TODO: leak; this memory is never freed.
467 * It's arguable whether it's worth the trouble to care.
468 */
469 sd = kzalloc(sizeof(*sd), GFP_KERNEL);
470 if (!sd) {
471 printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno);
472 return NULL;
473 }
474 sd->node = -1;
475 bus = pci_scan_bus(busno, &pci_root_ops, sd);
476 if (!bus)
477 kfree(sd);
478
479 return bus;
480}
diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c
index e7306dbf6c42..c82cbf4c7226 100644
--- a/arch/i386/pci/fixup.c
+++ b/arch/i386/pci/fixup.c
@@ -25,9 +25,9 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
25 pci_read_config_byte(d, reg++, &subb); 25 pci_read_config_byte(d, reg++, &subb);
26 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); 26 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
27 if (busno) 27 if (busno)
28 pci_scan_bus(busno, &pci_root_ops, NULL); /* Bus A */ 28 pci_scan_bus_with_sysdata(busno); /* Bus A */
29 if (suba < subb) 29 if (suba < subb)
30 pci_scan_bus(suba+1, &pci_root_ops, NULL); /* Bus B */ 30 pci_scan_bus_with_sysdata(suba+1); /* Bus B */
31 } 31 }
32 pcibios_last_bus = -1; 32 pcibios_last_bus = -1;
33} 33}
@@ -42,7 +42,7 @@ static void __devinit pci_fixup_i450gx(struct pci_dev *d)
42 u8 busno; 42 u8 busno;
43 pci_read_config_byte(d, 0x4a, &busno); 43 pci_read_config_byte(d, 0x4a, &busno);
44 printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno); 44 printk(KERN_INFO "PCI: i440KX/GX host bridge %s: secondary bus %02x\n", pci_name(d), busno);
45 pci_scan_bus(busno, &pci_root_ops, NULL); 45 pci_scan_bus_with_sysdata(busno);
46 pcibios_last_bus = -1; 46 pcibios_last_bus = -1;
47} 47}
48DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); 48DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx);
diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c
index f2cb942f8281..665db063a40a 100644
--- a/arch/i386/pci/irq.c
+++ b/arch/i386/pci/irq.c
@@ -138,8 +138,9 @@ static void __init pirq_peer_trick(void)
138 for(i = 1; i < 256; i++) { 138 for(i = 1; i < 256; i++) {
139 if (!busmap[i] || pci_find_bus(0, i)) 139 if (!busmap[i] || pci_find_bus(0, i))
140 continue; 140 continue;
141 if (pci_scan_bus(i, &pci_root_ops, NULL)) 141 if (pci_scan_bus_with_sysdata(i))
142 printk(KERN_INFO "PCI: Discovered primary peer bus %02x [IRQ]\n", i); 142 printk(KERN_INFO "PCI: Discovered primary peer "
143 "bus %02x [IRQ]\n", i);
143 } 144 }
144 pcibios_last_bus = -1; 145 pcibios_last_bus = -1;
145} 146}
diff --git a/arch/i386/pci/legacy.c b/arch/i386/pci/legacy.c
index 149a9588c256..5565d7016b75 100644
--- a/arch/i386/pci/legacy.c
+++ b/arch/i386/pci/legacy.c
@@ -26,7 +26,7 @@ static void __devinit pcibios_fixup_peer_bridges(void)
26 l != 0x0000 && l != 0xffff) { 26 l != 0x0000 && l != 0xffff) {
27 DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l); 27 DBG("Found device at %02x:%02x [%04x]\n", n, devfn, l);
28 printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n); 28 printk(KERN_INFO "PCI: Discovered peer bus %02x\n", n);
29 pci_scan_bus(n, &pci_root_ops, NULL); 29 pci_scan_bus_with_sysdata(n);
30 break; 30 break;
31 } 31 }
32 } 32 }
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c
index bb1afd9e589d..0d46b7a88b3b 100644
--- a/arch/i386/pci/mmconfig.c
+++ b/arch/i386/pci/mmconfig.c
@@ -82,16 +82,15 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
82 82
83 switch (len) { 83 switch (len) {
84 case 1: 84 case 1:
85 *value = readb(mmcfg_virt_addr + reg); 85 *value = mmio_config_readb(mmcfg_virt_addr + reg);
86 break; 86 break;
87 case 2: 87 case 2:
88 *value = readw(mmcfg_virt_addr + reg); 88 *value = mmio_config_readw(mmcfg_virt_addr + reg);
89 break; 89 break;
90 case 4: 90 case 4:
91 *value = readl(mmcfg_virt_addr + reg); 91 *value = mmio_config_readl(mmcfg_virt_addr + reg);
92 break; 92 break;
93 } 93 }
94
95 spin_unlock_irqrestore(&pci_config_lock, flags); 94 spin_unlock_irqrestore(&pci_config_lock, flags);
96 95
97 return 0; 96 return 0;
@@ -116,16 +115,15 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
116 115
117 switch (len) { 116 switch (len) {
118 case 1: 117 case 1:
119 writeb(value, mmcfg_virt_addr + reg); 118 mmio_config_writeb(mmcfg_virt_addr, value);
120 break; 119 break;
121 case 2: 120 case 2:
122 writew(value, mmcfg_virt_addr + reg); 121 mmio_config_writew(mmcfg_virt_addr, value);
123 break; 122 break;
124 case 4: 123 case 4:
125 writel(value, mmcfg_virt_addr + reg); 124 mmio_config_writel(mmcfg_virt_addr, value);
126 break; 125 break;
127 } 126 }
128
129 spin_unlock_irqrestore(&pci_config_lock, flags); 127 spin_unlock_irqrestore(&pci_config_lock, flags);
130 128
131 return 0; 129 return 0;
diff --git a/arch/i386/pci/numa.c b/arch/i386/pci/numa.c
index adbe17a38f6f..f5f165f69e0c 100644
--- a/arch/i386/pci/numa.c
+++ b/arch/i386/pci/numa.c
@@ -96,10 +96,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d)
96 pci_read_config_byte(d, reg++, &suba); 96 pci_read_config_byte(d, reg++, &suba);
97 pci_read_config_byte(d, reg++, &subb); 97 pci_read_config_byte(d, reg++, &subb);
98 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); 98 DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb);
99 if (busno) 99 if (busno) {
100 pci_scan_bus(QUADLOCAL2BUS(quad,busno), &pci_root_ops, NULL); /* Bus A */ 100 /* Bus A */
101 if (suba < subb) 101 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno));
102 pci_scan_bus(QUADLOCAL2BUS(quad,suba+1), &pci_root_ops, NULL); /* Bus B */ 102 }
103 if (suba < subb) {
104 /* Bus B */
105 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1));
106 }
103 } 107 }
104 pcibios_last_bus = -1; 108 pcibios_last_bus = -1;
105} 109}
@@ -123,8 +127,7 @@ static int __init pci_numa_init(void)
123 continue; 127 continue;
124 printk("Scanning PCI bus %d for quad %d\n", 128 printk("Scanning PCI bus %d for quad %d\n",
125 QUADLOCAL2BUS(quad,0), quad); 129 QUADLOCAL2BUS(quad,0), quad);
126 pci_scan_bus(QUADLOCAL2BUS(quad,0), 130 pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0));
127 &pci_root_ops, NULL);
128 } 131 }
129 return 0; 132 return 0;
130} 133}
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index e58bae2076ad..8c66f275756f 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -104,3 +104,46 @@ extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
104extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, 104extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus,
105 unsigned int devfn); 105 unsigned int devfn);
106extern int __init pci_mmcfg_arch_init(void); 106extern int __init pci_mmcfg_arch_init(void);
107
108/*
109 * AMD Fam10h CPUs are buggy, and cannot access MMIO config space
110 * on their northbrige except through the * %eax register. As such, you MUST
111 * NOT use normal IOMEM accesses, you need to only use the magic mmio-config
112 * accessor functions.
113 * In fact just use pci_config_*, nothing else please.
114 */
115static inline unsigned char mmio_config_readb(void __iomem *pos)
116{
117 u8 val;
118 asm volatile("movb (%1),%%al" : "=a" (val) : "r" (pos));
119 return val;
120}
121
122static inline unsigned short mmio_config_readw(void __iomem *pos)
123{
124 u16 val;
125 asm volatile("movw (%1),%%ax" : "=a" (val) : "r" (pos));
126 return val;
127}
128
129static inline unsigned int mmio_config_readl(void __iomem *pos)
130{
131 u32 val;
132 asm volatile("movl (%1),%%eax" : "=a" (val) : "r" (pos));
133 return val;
134}
135
136static inline void mmio_config_writeb(void __iomem *pos, u8 val)
137{
138 asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory");
139}
140
141static inline void mmio_config_writew(void __iomem *pos, u16 val)
142{
143 asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory");
144}
145
146static inline void mmio_config_writel(void __iomem *pos, u32 val)
147{
148 asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory");
149}
diff --git a/arch/i386/pci/visws.c b/arch/i386/pci/visws.c
index f1b486d4190b..8ecb1c722594 100644
--- a/arch/i386/pci/visws.c
+++ b/arch/i386/pci/visws.c
@@ -101,8 +101,8 @@ static int __init pcibios_init(void)
101 "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0); 101 "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0);
102 102
103 raw_pci_ops = &pci_direct_conf1; 103 raw_pci_ops = &pci_direct_conf1;
104 pci_scan_bus(pci_bus0, &pci_root_ops, NULL); 104 pci_scan_bus_with_sysdata(pci_bus0);
105 pci_scan_bus(pci_bus1, &pci_root_ops, NULL); 105 pci_scan_bus_with_sysdata(pci_bus1);
106 pci_fixup_irqs(visws_swizzle, visws_map_irq); 106 pci_fixup_irqs(visws_swizzle, visws_map_irq);
107 pcibios_resource_survey(); 107 pcibios_resource_survey();
108 return 0; 108 return 0;
diff --git a/arch/i386/xen/enlighten.c b/arch/i386/xen/enlighten.c
index 9a8c1181c001..f0c37511d8da 100644
--- a/arch/i386/xen/enlighten.c
+++ b/arch/i386/xen/enlighten.c
@@ -842,7 +842,8 @@ void __init xen_setup_vcpu_info_placement(void)
842 } 842 }
843} 843}
844 844
845static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len) 845static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf,
846 unsigned long addr, unsigned len)
846{ 847{
847 char *start, *end, *reloc; 848 char *start, *end, *reloc;
848 unsigned ret; 849 unsigned ret;
@@ -869,7 +870,7 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
869 if (start == NULL || (end-start) > len) 870 if (start == NULL || (end-start) > len)
870 goto default_patch; 871 goto default_patch;
871 872
872 ret = paravirt_patch_insns(insns, len, start, end); 873 ret = paravirt_patch_insns(insnbuf, len, start, end);
873 874
874 /* Note: because reloc is assigned from something that 875 /* Note: because reloc is assigned from something that
875 appears to be an array, gcc assumes it's non-null, 876 appears to be an array, gcc assumes it's non-null,
@@ -877,8 +878,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
877 end. */ 878 end. */
878 if (reloc > start && reloc < end) { 879 if (reloc > start && reloc < end) {
879 int reloc_off = reloc - start; 880 int reloc_off = reloc - start;
880 long *relocp = (long *)(insns + reloc_off); 881 long *relocp = (long *)(insnbuf + reloc_off);
881 long delta = start - (char *)insns; 882 long delta = start - (char *)addr;
882 883
883 *relocp += delta; 884 *relocp += delta;
884 } 885 }
@@ -886,7 +887,8 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insns, unsigned len)
886 887
887 default_patch: 888 default_patch:
888 default: 889 default:
889 ret = paravirt_patch_default(type, clobbers, insns, len); 890 ret = paravirt_patch_default(type, clobbers, insnbuf,
891 addr, len);
890 break; 892 break;
891 } 893 }
892 894
diff --git a/arch/sparc/kernel/prom.c b/arch/sparc/kernel/prom.c
index 39fbd3c8ab0b..cd4fb79aa3a8 100644
--- a/arch/sparc/kernel/prom.c
+++ b/arch/sparc/kernel/prom.c
@@ -102,6 +102,21 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
102} 102}
103EXPORT_SYMBOL(of_set_property); 103EXPORT_SYMBOL(of_set_property);
104 104
105int of_find_in_proplist(const char *list, const char *match, int len)
106{
107 while (len > 0) {
108 int l;
109
110 if (!strcmp(list, match))
111 return 1;
112 l = strlen(list) + 1;
113 list += l;
114 len -= l;
115 }
116 return 0;
117}
118EXPORT_SYMBOL(of_find_in_proplist);
119
105static unsigned int prom_early_allocated; 120static unsigned int prom_early_allocated;
106 121
107static void * __init prom_early_alloc(unsigned long size) 122static void * __init prom_early_alloc(unsigned long size)
diff --git a/arch/sparc64/kernel/cpu.c b/arch/sparc64/kernel/cpu.c
index 7eb81d3954d9..e43db73f2b91 100644
--- a/arch/sparc64/kernel/cpu.c
+++ b/arch/sparc64/kernel/cpu.c
@@ -1,7 +1,7 @@
1/* cpu.c: Dinky routines to look for the kind of Sparc cpu 1/* cpu.c: Dinky routines to look for the kind of Sparc cpu
2 * we are on. 2 * we are on.
3 * 3 *
4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 4 * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
5 */ 5 */
6 6
7#include <linux/kernel.h> 7#include <linux/kernel.h>
@@ -13,6 +13,7 @@
13#include <asm/fpumacro.h> 13#include <asm/fpumacro.h>
14#include <asm/cpudata.h> 14#include <asm/cpudata.h>
15#include <asm/spitfire.h> 15#include <asm/spitfire.h>
16#include <asm/oplib.h>
16 17
17DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; 18DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 };
18 19
@@ -61,21 +62,40 @@ struct cpu_iu_info linux_sparc_chips[] = {
61 62
62#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips) 63#define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips)
63 64
64char *sparc_cpu_type = "cpu-oops"; 65char *sparc_cpu_type;
65char *sparc_fpu_type = "fpu-oops"; 66char *sparc_fpu_type;
66 67
67unsigned int fsr_storage; 68unsigned int fsr_storage;
68 69
70static void __init sun4v_cpu_probe(void)
71{
72 switch (sun4v_chip_type) {
73 case SUN4V_CHIP_NIAGARA1:
74 sparc_cpu_type = "UltraSparc T1 (Niagara)";
75 sparc_fpu_type = "UltraSparc T1 integrated FPU";
76 break;
77
78 case SUN4V_CHIP_NIAGARA2:
79 sparc_cpu_type = "UltraSparc T2 (Niagara2)";
80 sparc_fpu_type = "UltraSparc T2 integrated FPU";
81 break;
82
83 default:
84 printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n",
85 prom_cpu_compatible);
86 sparc_cpu_type = "Unknown SUN4V CPU";
87 sparc_fpu_type = "Unknown SUN4V FPU";
88 break;
89 }
90}
91
69void __init cpu_probe(void) 92void __init cpu_probe(void)
70{ 93{
71 unsigned long ver, fpu_vers, manuf, impl, fprs; 94 unsigned long ver, fpu_vers, manuf, impl, fprs;
72 int i; 95 int i;
73 96
74 if (tlb_type == hypervisor) { 97 if (tlb_type == hypervisor)
75 sparc_cpu_type = "UltraSparc T1 (Niagara)"; 98 return sun4v_cpu_probe();
76 sparc_fpu_type = "UltraSparc T1 integrated FPU";
77 return;
78 }
79 99
80 fprs = fprs_read(); 100 fprs = fprs_read();
81 fprs_write(FPRS_FEF); 101 fprs_write(FPRS_FEF);
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index 9dbd833d79d6..ac18bd8e273f 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -97,7 +97,8 @@ sparc64_boot:
97 .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache 97 .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache
98 .globl prom_boot_mapped_pc, prom_boot_mapping_mode 98 .globl prom_boot_mapped_pc, prom_boot_mapping_mode
99 .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low 99 .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low
100 .globl is_sun4v 100 .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible
101 .globl is_sun4v, sun4v_chip_type
101prom_peer_name: 102prom_peer_name:
102 .asciz "peer" 103 .asciz "peer"
103prom_compatible_name: 104prom_compatible_name:
@@ -106,6 +107,8 @@ prom_finddev_name:
106 .asciz "finddevice" 107 .asciz "finddevice"
107prom_chosen_path: 108prom_chosen_path:
108 .asciz "/chosen" 109 .asciz "/chosen"
110prom_cpu_path:
111 .asciz "/cpu"
109prom_getprop_name: 112prom_getprop_name:
110 .asciz "getprop" 113 .asciz "getprop"
111prom_mmu_name: 114prom_mmu_name:
@@ -120,9 +123,13 @@ prom_unmap_name:
120 .asciz "unmap" 123 .asciz "unmap"
121prom_sun4v_name: 124prom_sun4v_name:
122 .asciz "sun4v" 125 .asciz "sun4v"
126prom_niagara_prefix:
127 .asciz "SUNW,UltraSPARC-T"
123 .align 4 128 .align 4
124prom_root_compatible: 129prom_root_compatible:
125 .skip 64 130 .skip 64
131prom_cpu_compatible:
132 .skip 64
126prom_root_node: 133prom_root_node:
127 .word 0 134 .word 0
128prom_mmu_ihandle_cache: 135prom_mmu_ihandle_cache:
@@ -138,6 +145,8 @@ prom_boot_mapping_phys_low:
138 .xword 0 145 .xword 0
139is_sun4v: 146is_sun4v:
140 .word 0 147 .word 0
148sun4v_chip_type:
149 .word SUN4V_CHIP_INVALID
1411: 1501:
142 rd %pc, %l0 151 rd %pc, %l0
143 152
@@ -296,13 +305,13 @@ is_sun4v:
296 sethi %hi(prom_sun4v_name), %g7 305 sethi %hi(prom_sun4v_name), %g7
297 or %g7, %lo(prom_sun4v_name), %g7 306 or %g7, %lo(prom_sun4v_name), %g7
298 mov 5, %g3 307 mov 5, %g3
2991: ldub [%g7], %g2 30890: ldub [%g7], %g2
300 ldub [%g1], %g4 309 ldub [%g1], %g4
301 cmp %g2, %g4 310 cmp %g2, %g4
302 bne,pn %icc, 2f 311 bne,pn %icc, 80f
303 add %g7, 1, %g7 312 add %g7, 1, %g7
304 subcc %g3, 1, %g3 313 subcc %g3, 1, %g3
305 bne,pt %xcc, 1b 314 bne,pt %xcc, 90b
306 add %g1, 1, %g1 315 add %g1, 1, %g1
307 316
308 sethi %hi(is_sun4v), %g1 317 sethi %hi(is_sun4v), %g1
@@ -310,7 +319,80 @@ is_sun4v:
310 mov 1, %g7 319 mov 1, %g7
311 stw %g7, [%g1] 320 stw %g7, [%g1]
312 321
3132: 322 /* cpu_node = prom_finddevice("/cpu") */
323 mov (1b - prom_finddev_name), %l1
324 mov (1b - prom_cpu_path), %l2
325 sub %l0, %l1, %l1
326 sub %l0, %l2, %l2
327 sub %sp, (192 + 128), %sp
328
329 stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice"
330 mov 1, %l3
331 stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1
332 stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
333 stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu"
334 stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1
335 call %l7
336 add %sp, (2047 + 128), %o0 ! argument array
337
338 ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node
339
340 mov (1b - prom_getprop_name), %l1
341 mov (1b - prom_compatible_name), %l2
342 mov (1b - prom_cpu_compatible), %l5
343 sub %l0, %l1, %l1
344 sub %l0, %l2, %l2
345 sub %l0, %l5, %l5
346
347 /* prom_getproperty(cpu_node, "compatible",
348 * &prom_cpu_compatible, 64)
349 */
350 stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop"
351 mov 4, %l3
352 stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4
353 mov 1, %l3
354 stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1
355 stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node
356 stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible"
357 stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible
358 mov 64, %l3
359 stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size
360 stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1
361 call %l7
362 add %sp, (2047 + 128), %o0 ! argument array
363
364 add %sp, (192 + 128), %sp
365
366 sethi %hi(prom_cpu_compatible), %g1
367 or %g1, %lo(prom_cpu_compatible), %g1
368 sethi %hi(prom_niagara_prefix), %g7
369 or %g7, %lo(prom_niagara_prefix), %g7
370 mov 17, %g3
37190: ldub [%g7], %g2
372 ldub [%g1], %g4
373 cmp %g2, %g4
374 bne,pn %icc, 4f
375 add %g7, 1, %g7
376 subcc %g3, 1, %g3
377 bne,pt %xcc, 90b
378 add %g1, 1, %g1
379
380 sethi %hi(prom_cpu_compatible), %g1
381 or %g1, %lo(prom_cpu_compatible), %g1
382 ldub [%g1 + 17], %g2
383 cmp %g2, '1'
384 be,pt %xcc, 5f
385 mov SUN4V_CHIP_NIAGARA1, %g4
386 cmp %g2, '2'
387 be,pt %xcc, 5f
388 mov SUN4V_CHIP_NIAGARA2, %g4
3894:
390 mov SUN4V_CHIP_UNKNOWN, %g4
3915: sethi %hi(sun4v_chip_type), %g2
392 or %g2, %lo(sun4v_chip_type), %g2
393 stw %g4, [%g2]
394
39580:
314 BRANCH_IF_SUN4V(g1, jump_to_sun4u_init) 396 BRANCH_IF_SUN4V(g1, jump_to_sun4u_init)
315 BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) 397 BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot)
316 BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) 398 BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot)
@@ -414,6 +496,24 @@ niagara_tlb_fixup:
414 stw %g2, [%g1 + %lo(tlb_type)] 496 stw %g2, [%g1 + %lo(tlb_type)]
415 497
416 /* Patch copy/clear ops. */ 498 /* Patch copy/clear ops. */
499 sethi %hi(sun4v_chip_type), %g1
500 lduw [%g1 + %lo(sun4v_chip_type)], %g1
501 cmp %g1, SUN4V_CHIP_NIAGARA1
502 be,pt %xcc, niagara_patch
503 cmp %g1, SUN4V_CHIP_NIAGARA2
504 be,pt %xcc, niagara_patch
505 nop
506
507 call generic_patch_copyops
508 nop
509 call generic_patch_bzero
510 nop
511 call generic_patch_pageops
512 nop
513
514 ba,a,pt %xcc, 80f
515
516niagara_patch:
417 call niagara_patch_copyops 517 call niagara_patch_copyops
418 nop 518 nop
419 call niagara_patch_bzero 519 call niagara_patch_bzero
@@ -421,6 +521,7 @@ niagara_tlb_fixup:
421 call niagara_patch_pageops 521 call niagara_patch_pageops
422 nop 522 nop
423 523
52480:
424 /* Patch TLB/cache ops. */ 525 /* Patch TLB/cache ops. */
425 call hypervisor_patch_cachetlbops 526 call hypervisor_patch_cachetlbops
426 nop 527 nop
diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S
index a55c252e18cc..b692e044a463 100644
--- a/arch/sparc64/kernel/hvtramp.S
+++ b/arch/sparc64/kernel/hvtramp.S
@@ -115,11 +115,8 @@ hv_cpu_startup:
115 call hard_smp_processor_id 115 call hard_smp_processor_id
116 nop 116 nop
117 117
118 mov %o0, %o1 118 call sun4v_register_mondo_queues
119 mov 0, %o0 119 nop
120 mov 0, %o2
121 call sun4v_init_mondo_queues
122 mov 1, %o3
123 120
124 call init_cur_cpu_trap 121 call init_cur_cpu_trap
125 mov %g6, %o0 122 mov %g6, %o0
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index db31bf6b42db..384abf410cf0 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -929,7 +929,7 @@ static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type
929 } 929 }
930} 930}
931 931
932static void __cpuinit sun4v_register_mondo_queues(int this_cpu) 932void __cpuinit sun4v_register_mondo_queues(int this_cpu)
933{ 933{
934 struct trap_per_cpu *tb = &trap_block[this_cpu]; 934 struct trap_per_cpu *tb = &trap_block[this_cpu];
935 935
@@ -943,20 +943,10 @@ static void __cpuinit sun4v_register_mondo_queues(int this_cpu)
943 tb->nonresum_qmask); 943 tb->nonresum_qmask);
944} 944}
945 945
946static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem) 946static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask)
947{ 947{
948 unsigned long size = PAGE_ALIGN(qmask + 1); 948 unsigned long size = PAGE_ALIGN(qmask + 1);
949 unsigned long order = get_order(size); 949 void *p = __alloc_bootmem_low(size, size, 0);
950 void *p = NULL;
951
952 if (use_bootmem) {
953 p = __alloc_bootmem_low(size, size, 0);
954 } else {
955 struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
956 if (page)
957 p = page_address(page);
958 }
959
960 if (!p) { 950 if (!p) {
961 prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); 951 prom_printf("SUN4V: Error, cannot allocate mondo queue.\n");
962 prom_halt(); 952 prom_halt();
@@ -965,19 +955,10 @@ static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask
965 *pa_ptr = __pa(p); 955 *pa_ptr = __pa(p);
966} 956}
967 957
968static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem) 958static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask)
969{ 959{
970 unsigned long size = PAGE_ALIGN(qmask + 1); 960 unsigned long size = PAGE_ALIGN(qmask + 1);
971 unsigned long order = get_order(size); 961 void *p = __alloc_bootmem_low(size, size, 0);
972 void *p = NULL;
973
974 if (use_bootmem) {
975 p = __alloc_bootmem_low(size, size, 0);
976 } else {
977 struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order);
978 if (page)
979 p = page_address(page);
980 }
981 962
982 if (!p) { 963 if (!p) {
983 prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); 964 prom_printf("SUN4V: Error, cannot allocate kbuf page.\n");
@@ -987,18 +968,14 @@ static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask,
987 *pa_ptr = __pa(p); 968 *pa_ptr = __pa(p);
988} 969}
989 970
990static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem) 971static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb)
991{ 972{
992#ifdef CONFIG_SMP 973#ifdef CONFIG_SMP
993 void *page; 974 void *page;
994 975
995 BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); 976 BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64));
996 977
997 if (use_bootmem) 978 page = alloc_bootmem_low_pages(PAGE_SIZE);
998 page = alloc_bootmem_low_pages(PAGE_SIZE);
999 else
1000 page = (void *) get_zeroed_page(GFP_ATOMIC);
1001
1002 if (!page) { 979 if (!page) {
1003 prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); 980 prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n");
1004 prom_halt(); 981 prom_halt();
@@ -1009,30 +986,27 @@ static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_
1009#endif 986#endif
1010} 987}
1011 988
1012/* Allocate and register the mondo and error queues for this cpu. */ 989/* Allocate mondo and error queues for all possible cpus. */
1013void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load) 990static void __init sun4v_init_mondo_queues(void)
1014{ 991{
1015 struct trap_per_cpu *tb = &trap_block[cpu]; 992 int cpu;
1016 993
1017 if (alloc) { 994 for_each_possible_cpu(cpu) {
1018 alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask, use_bootmem); 995 struct trap_per_cpu *tb = &trap_block[cpu];
1019 alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask, use_bootmem);
1020 alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask, use_bootmem);
1021 alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask, use_bootmem);
1022 alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask, use_bootmem);
1023 alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, tb->nonresum_qmask, use_bootmem);
1024 996
1025 init_cpu_send_mondo_info(tb, use_bootmem); 997 alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask);
1026 } 998 alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask);
999 alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask);
1000 alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask);
1001 alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask);
1002 alloc_one_kbuf(&tb->nonresum_kernel_buf_pa,
1003 tb->nonresum_qmask);
1027 1004
1028 if (load) { 1005 init_cpu_send_mondo_info(tb);
1029 if (cpu != hard_smp_processor_id()) {
1030 prom_printf("SUN4V: init mondo on cpu %d not %d\n",
1031 cpu, hard_smp_processor_id());
1032 prom_halt();
1033 }
1034 sun4v_register_mondo_queues(cpu);
1035 } 1006 }
1007
1008 /* Load up the boot cpu's entries. */
1009 sun4v_register_mondo_queues(hard_smp_processor_id());
1036} 1010}
1037 1011
1038static struct irqaction timer_irq_action = { 1012static struct irqaction timer_irq_action = {
@@ -1047,7 +1021,7 @@ void __init init_IRQ(void)
1047 memset(&ivector_table[0], 0, sizeof(ivector_table)); 1021 memset(&ivector_table[0], 0, sizeof(ivector_table));
1048 1022
1049 if (tlb_type == hypervisor) 1023 if (tlb_type == hypervisor)
1050 sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1); 1024 sun4v_init_mondo_queues();
1051 1025
1052 /* We need to clear any IRQ's pending in the soft interrupt 1026 /* We need to clear any IRQ's pending in the soft interrupt
1053 * registers, a spurious one could be left around from the 1027 * registers, a spurious one could be left around from the
diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c
index cce4d0ddf5d5..95059c2ec414 100644
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -568,20 +568,6 @@ static void __init report_platform_properties(void)
568 mdesc_release(hp); 568 mdesc_release(hp);
569} 569}
570 570
571static int inline find_in_proplist(const char *list, const char *match, int len)
572{
573 while (len > 0) {
574 int l;
575
576 if (!strcmp(list, match))
577 return 1;
578 l = strlen(list) + 1;
579 list += l;
580 len -= l;
581 }
582 return 0;
583}
584
585static void __devinit fill_in_one_cache(cpuinfo_sparc *c, 571static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
586 struct mdesc_handle *hp, 572 struct mdesc_handle *hp,
587 u64 mp) 573 u64 mp)
@@ -596,10 +582,10 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
596 582
597 switch (*level) { 583 switch (*level) {
598 case 1: 584 case 1:
599 if (find_in_proplist(type, "instn", type_len)) { 585 if (of_find_in_proplist(type, "instn", type_len)) {
600 c->icache_size = *size; 586 c->icache_size = *size;
601 c->icache_line_size = *line_size; 587 c->icache_line_size = *line_size;
602 } else if (find_in_proplist(type, "data", type_len)) { 588 } else if (of_find_in_proplist(type, "data", type_len)) {
603 c->dcache_size = *size; 589 c->dcache_size = *size;
604 c->dcache_line_size = *line_size; 590 c->dcache_line_size = *line_size;
605 } 591 }
@@ -677,7 +663,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp)
677 continue; 663 continue;
678 664
679 type = mdesc_get_property(hp, mp, "type", &len); 665 type = mdesc_get_property(hp, mp, "type", &len);
680 if (!find_in_proplist(type, "instn", len)) 666 if (!of_find_in_proplist(type, "instn", len))
681 continue; 667 continue;
682 668
683 mark_core_ids(hp, mp, idx); 669 mark_core_ids(hp, mp, idx);
@@ -718,8 +704,8 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp,
718 int len; 704 int len;
719 705
720 type = mdesc_get_property(hp, mp, "type", &len); 706 type = mdesc_get_property(hp, mp, "type", &len);
721 if (!find_in_proplist(type, "int", len) && 707 if (!of_find_in_proplist(type, "int", len) &&
722 !find_in_proplist(type, "integer", len)) 708 !of_find_in_proplist(type, "integer", len))
723 continue; 709 continue;
724 710
725 mark_proc_ids(hp, mp, idx); 711 mark_proc_ids(hp, mp, idx);
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index f4e0a9ad9be3..d1a78c976cef 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -107,6 +107,21 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len
107} 107}
108EXPORT_SYMBOL(of_set_property); 108EXPORT_SYMBOL(of_set_property);
109 109
110int of_find_in_proplist(const char *list, const char *match, int len)
111{
112 while (len > 0) {
113 int l;
114
115 if (!strcmp(list, match))
116 return 1;
117 l = strlen(list) + 1;
118 list += l;
119 len -= l;
120 }
121 return 0;
122}
123EXPORT_SYMBOL(of_find_in_proplist);
124
110static unsigned int prom_early_allocated; 125static unsigned int prom_early_allocated;
111 126
112static void * __init prom_early_alloc(unsigned long size) 127static void * __init prom_early_alloc(unsigned long size)
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index b448d33321c6..b84c49e3697c 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -334,8 +334,6 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
334} 334}
335#endif 335#endif
336 336
337extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
338
339extern unsigned long sparc64_cpu_startup; 337extern unsigned long sparc64_cpu_startup;
340 338
341/* The OBP cpu startup callback truncates the 3rd arg cookie to 339/* The OBP cpu startup callback truncates the 3rd arg cookie to
@@ -359,9 +357,6 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
359 cpu_new_thread = task_thread_info(p); 357 cpu_new_thread = task_thread_info(p);
360 358
361 if (tlb_type == hypervisor) { 359 if (tlb_type == hypervisor) {
362 /* Alloc the mondo queues, cpu will load them. */
363 sun4v_init_mondo_queues(0, cpu, 1, 0);
364
365#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) 360#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
366 if (ldom_domaining_enabled) 361 if (ldom_domaining_enabled)
367 ldom_startcpu_cpuid(cpu, 362 ldom_startcpu_cpuid(cpu,
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index d270c2f0be0f..23fad7ebdd0d 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -168,6 +168,7 @@ EXPORT_SYMBOL(change_bit);
168EXPORT_SYMBOL(__flushw_user); 168EXPORT_SYMBOL(__flushw_user);
169 169
170EXPORT_SYMBOL(tlb_type); 170EXPORT_SYMBOL(tlb_type);
171EXPORT_SYMBOL(sun4v_chip_type);
171EXPORT_SYMBOL(get_fb_unmapped_area); 172EXPORT_SYMBOL(get_fb_unmapped_area);
172EXPORT_SYMBOL(flush_icache_range); 173EXPORT_SYMBOL(flush_icache_range);
173 174
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index a4dc01a3d238..9448595f9063 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -366,11 +366,8 @@ after_lock_tlb:
366 call hard_smp_processor_id 366 call hard_smp_processor_id
367 nop 367 nop
368 368
369 mov %o0, %o1 369 call sun4v_register_mondo_queues
370 mov 0, %o0 370 nop
371 mov 0, %o2
372 call sun4v_init_mondo_queues
373 mov 1, %o3
374 371
3751: call init_cur_cpu_trap 3721: call init_cur_cpu_trap
376 ldx [%l0], %o0 373 ldx [%l0], %o0
diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c
index 3685daf5157f..1550ac5673da 100644
--- a/arch/sparc64/kernel/vio.c
+++ b/arch/sparc64/kernel/vio.c
@@ -16,21 +16,6 @@
16#include <asm/mdesc.h> 16#include <asm/mdesc.h>
17#include <asm/vio.h> 17#include <asm/vio.h>
18 18
19static inline int find_in_proplist(const char *list, const char *match,
20 int len)
21{
22 while (len > 0) {
23 int l;
24
25 if (!strcmp(list, match))
26 return 1;
27 l = strlen(list) + 1;
28 list += l;
29 len -= l;
30 }
31 return 0;
32}
33
34static const struct vio_device_id *vio_match_device( 19static const struct vio_device_id *vio_match_device(
35 const struct vio_device_id *matches, 20 const struct vio_device_id *matches,
36 const struct vio_dev *dev) 21 const struct vio_dev *dev)
@@ -49,7 +34,7 @@ static const struct vio_device_id *vio_match_device(
49 34
50 if (matches->compat[0]) { 35 if (matches->compat[0]) {
51 match &= len && 36 match &= len &&
52 find_in_proplist(compat, matches->compat, len); 37 of_find_in_proplist(compat, matches->compat, len);
53 } 38 }
54 if (match) 39 if (match)
55 return matches; 40 return matches;
@@ -406,7 +391,7 @@ static int __init vio_init(void)
406 "property\n"); 391 "property\n");
407 goto out_release; 392 goto out_release;
408 } 393 }
409 if (!find_in_proplist(compat, channel_devices_compat, len)) { 394 if (!of_find_in_proplist(compat, channel_devices_compat, len)) {
410 printk(KERN_ERR "VIO: Channel devices node lacks (%s) " 395 printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
411 "compat entry.\n", channel_devices_compat); 396 "compat entry.\n", channel_devices_compat);
412 goto out_release; 397 goto out_release;
diff --git a/arch/sparc64/lib/GENbzero.S b/arch/sparc64/lib/GENbzero.S
new file mode 100644
index 000000000000..f9c71d64eba1
--- /dev/null
+++ b/arch/sparc64/lib/GENbzero.S
@@ -0,0 +1,160 @@
1/* GENbzero.S: Generic sparc64 memset/clear_user.
2 *
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */
5#include <asm/asi.h>
6
7#define EX_ST(x,y) \
898: x,y; \
9 .section .fixup; \
10 .align 4; \
1199: retl; \
12 mov %o1, %o0; \
13 .section __ex_table; \
14 .align 4; \
15 .word 98b, 99b; \
16 .text; \
17 .align 4;
18
19 .align 32
20 .text
21
22 .globl GENmemset
23 .type GENmemset, #function
24GENmemset: /* %o0=buf, %o1=pat, %o2=len */
25 and %o1, 0xff, %o3
26 mov %o2, %o1
27 sllx %o3, 8, %g1
28 or %g1, %o3, %o2
29 sllx %o2, 16, %g1
30 or %g1, %o2, %o2
31 sllx %o2, 32, %g1
32 ba,pt %xcc, 1f
33 or %g1, %o2, %o2
34
35 .globl GENbzero
36 .type GENbzero, #function
37GENbzero:
38 clr %o2
391: brz,pn %o1, GENbzero_return
40 mov %o0, %o3
41
42 /* %o5: saved %asi, restored at GENbzero_done
43 * %o4: store %asi to use
44 */
45 rd %asi, %o5
46 mov ASI_P, %o4
47 wr %o4, 0x0, %asi
48
49GENbzero_from_clear_user:
50 cmp %o1, 15
51 bl,pn %icc, GENbzero_tiny
52 andcc %o0, 0x7, %g1
53 be,pt %xcc, 2f
54 mov 8, %g2
55 sub %g2, %g1, %g1
56 sub %o1, %g1, %o1
571: EX_ST(stba %o2, [%o0 + 0x00] %asi)
58 subcc %g1, 1, %g1
59 bne,pt %xcc, 1b
60 add %o0, 1, %o0
612: cmp %o1, 128
62 bl,pn %icc, GENbzero_medium
63 andcc %o0, (64 - 1), %g1
64 be,pt %xcc, GENbzero_pre_loop
65 mov 64, %g2
66 sub %g2, %g1, %g1
67 sub %o1, %g1, %o1
681: EX_ST(stxa %o2, [%o0 + 0x00] %asi)
69 subcc %g1, 8, %g1
70 bne,pt %xcc, 1b
71 add %o0, 8, %o0
72
73GENbzero_pre_loop:
74 andn %o1, (64 - 1), %g1
75 sub %o1, %g1, %o1
76GENbzero_loop:
77 EX_ST(stxa %o2, [%o0 + 0x00] %asi)
78 EX_ST(stxa %o2, [%o0 + 0x08] %asi)
79 EX_ST(stxa %o2, [%o0 + 0x10] %asi)
80 EX_ST(stxa %o2, [%o0 + 0x18] %asi)
81 EX_ST(stxa %o2, [%o0 + 0x20] %asi)
82 EX_ST(stxa %o2, [%o0 + 0x28] %asi)
83 EX_ST(stxa %o2, [%o0 + 0x30] %asi)
84 EX_ST(stxa %o2, [%o0 + 0x38] %asi)
85 subcc %g1, 64, %g1
86 bne,pt %xcc, GENbzero_loop
87 add %o0, 64, %o0
88
89 membar #Sync
90 wr %o4, 0x0, %asi
91 brz,pn %o1, GENbzero_done
92GENbzero_medium:
93 andncc %o1, 0x7, %g1
94 be,pn %xcc, 2f
95 sub %o1, %g1, %o1
961: EX_ST(stxa %o2, [%o0 + 0x00] %asi)
97 subcc %g1, 8, %g1
98 bne,pt %xcc, 1b
99 add %o0, 8, %o0
1002: brz,pt %o1, GENbzero_done
101 nop
102
103GENbzero_tiny:
1041: EX_ST(stba %o2, [%o0 + 0x00] %asi)
105 subcc %o1, 1, %o1
106 bne,pt %icc, 1b
107 add %o0, 1, %o0
108
109 /* fallthrough */
110
111GENbzero_done:
112 wr %o5, 0x0, %asi
113
114GENbzero_return:
115 retl
116 mov %o3, %o0
117 .size GENbzero, .-GENbzero
118 .size GENmemset, .-GENmemset
119
120 .globl GENclear_user
121 .type GENclear_user, #function
122GENclear_user: /* %o0=buf, %o1=len */
123 rd %asi, %o5
124 brz,pn %o1, GENbzero_done
125 clr %o3
126 cmp %o5, ASI_AIUS
127 bne,pn %icc, GENbzero
128 clr %o2
129 ba,pt %xcc, GENbzero_from_clear_user
130 mov ASI_AIUS, %o4
131 .size GENclear_user, .-GENclear_user
132
133#define BRANCH_ALWAYS 0x10680000
134#define NOP 0x01000000
135#define GEN_DO_PATCH(OLD, NEW) \
136 sethi %hi(NEW), %g1; \
137 or %g1, %lo(NEW), %g1; \
138 sethi %hi(OLD), %g2; \
139 or %g2, %lo(OLD), %g2; \
140 sub %g1, %g2, %g1; \
141 sethi %hi(BRANCH_ALWAYS), %g3; \
142 sll %g1, 11, %g1; \
143 srl %g1, 11 + 2, %g1; \
144 or %g3, %lo(BRANCH_ALWAYS), %g3; \
145 or %g3, %g1, %g3; \
146 stw %g3, [%g2]; \
147 sethi %hi(NOP), %g3; \
148 or %g3, %lo(NOP), %g3; \
149 stw %g3, [%g2 + 0x4]; \
150 flush %g2;
151
152 .globl generic_patch_bzero
153 .type generic_patch_bzero,#function
154generic_patch_bzero:
155 GEN_DO_PATCH(memset, GENmemset)
156 GEN_DO_PATCH(__bzero, GENbzero)
157 GEN_DO_PATCH(__clear_user, GENclear_user)
158 retl
159 nop
160 .size generic_patch_bzero,.-generic_patch_bzero
diff --git a/arch/sparc64/lib/GENcopy_from_user.S b/arch/sparc64/lib/GENcopy_from_user.S
new file mode 100644
index 000000000000..2b9df99e87f9
--- /dev/null
+++ b/arch/sparc64/lib/GENcopy_from_user.S
@@ -0,0 +1,34 @@
1/* GENcopy_from_user.S: Generic sparc64 copy from userspace.
2 *
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */
5
6#define EX_LD(x) \
798: x; \
8 .section .fixup; \
9 .align 4; \
1099: retl; \
11 mov 1, %o0; \
12 .section __ex_table,"a";\
13 .align 4; \
14 .word 98b, 99b; \
15 .text; \
16 .align 4;
17
18#ifndef ASI_AIUS
19#define ASI_AIUS 0x11
20#endif
21
22#define FUNC_NAME GENcopy_from_user
23#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest
24#define EX_RETVAL(x) 0
25
26#ifdef __KERNEL__
27#define PREAMBLE \
28 rd %asi, %g1; \
29 cmp %g1, ASI_AIUS; \
30 bne,pn %icc, memcpy_user_stub; \
31 nop
32#endif
33
34#include "GENmemcpy.S"
diff --git a/arch/sparc64/lib/GENcopy_to_user.S b/arch/sparc64/lib/GENcopy_to_user.S
new file mode 100644
index 000000000000..bb3f7084daf9
--- /dev/null
+++ b/arch/sparc64/lib/GENcopy_to_user.S
@@ -0,0 +1,38 @@
1/* GENcopy_to_user.S: Generic sparc64 copy to userspace.
2 *
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */
5
6#define EX_ST(x) \
798: x; \
8 .section .fixup; \
9 .align 4; \
1099: retl; \
11 mov 1, %o0; \
12 .section __ex_table,"a";\
13 .align 4; \
14 .word 98b, 99b; \
15 .text; \
16 .align 4;
17
18#ifndef ASI_AIUS
19#define ASI_AIUS 0x11
20#endif
21
22#define FUNC_NAME GENcopy_to_user
23#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS
24#define EX_RETVAL(x) 0
25
26#ifdef __KERNEL__
27 /* Writing to %asi is _expensive_ so we hardcode it.
28 * Reading %asi to check for KERNEL_DS is comparatively
29 * cheap.
30 */
31#define PREAMBLE \
32 rd %asi, %g1; \
33 cmp %g1, ASI_AIUS; \
34 bne,pn %icc, memcpy_user_stub; \
35 nop
36#endif
37
38#include "GENmemcpy.S"
diff --git a/arch/sparc64/lib/GENmemcpy.S b/arch/sparc64/lib/GENmemcpy.S
new file mode 100644
index 000000000000..89358ee94851
--- /dev/null
+++ b/arch/sparc64/lib/GENmemcpy.S
@@ -0,0 +1,121 @@
1/* GENmemcpy.S: Generic sparc64 memcpy.
2 *
3 * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
4 */
5
6#ifdef __KERNEL__
7#define GLOBAL_SPARE %g7
8#else
9#define GLOBAL_SPARE %g5
10#endif
11
12#ifndef EX_LD
13#define EX_LD(x) x
14#endif
15
16#ifndef EX_ST
17#define EX_ST(x) x
18#endif
19
20#ifndef EX_RETVAL
21#define EX_RETVAL(x) x
22#endif
23
24#ifndef LOAD
25#define LOAD(type,addr,dest) type [addr], dest
26#endif
27
28#ifndef STORE
29#define STORE(type,src,addr) type src, [addr]
30#endif
31
32#ifndef FUNC_NAME
33#define FUNC_NAME GENmemcpy
34#endif
35
36#ifndef PREAMBLE
37#define PREAMBLE
38#endif
39
40#ifndef XCC
41#define XCC xcc
42#endif
43
44 .register %g2,#scratch
45 .register %g3,#scratch
46
47 .text
48 .align 64
49
50 .globl FUNC_NAME
51 .type FUNC_NAME,#function
52FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
53 srlx %o2, 31, %g2
54 cmp %g2, 0
55 tne %XCC, 5
56 PREAMBLE
57 mov %o0, GLOBAL_SPARE
58
59 cmp %o2, 0
60 be,pn %XCC, 85f
61 or %o0, %o1, %o3
62 cmp %o2, 16
63 blu,a,pn %XCC, 80f
64 or %o3, %o2, %o3
65
66 xor %o0, %o1, %o4
67 andcc %o4, 0x7, %g0
68 bne,a,pn %XCC, 90f
69 sub %o0, %o1, %o3
70
71 and %o0, 0x7, %o4
72 sub %o4, 0x8, %o4
73 sub %g0, %o4, %o4
74 sub %o2, %o4, %o2
751: subcc %o4, 1, %o4
76 EX_LD(LOAD(ldub, %o1, %g1))
77 EX_ST(STORE(stb, %g1, %o0))
78 add %o1, 1, %o1
79 bne,pt %XCC, 1b
80 add %o0, 1, %o0
81
82 andn %o2, 0x7, %g1
83 sub %o2, %g1, %o2
841: subcc %g1, 0x8, %g1
85 EX_LD(LOAD(ldx, %o1, %g2))
86 EX_ST(STORE(stx, %g2, %o0))
87 add %o1, 0x8, %o1
88 bne,pt %XCC, 1b
89 add %o0, 0x8, %o0
90
91 brz,pt %o2, 85f
92 sub %o0, %o1, %o3
93 ba,a,pt %XCC, 90f
94
95 .align 64
9680: /* 0 < len <= 16 */
97 andcc %o3, 0x3, %g0
98 bne,pn %XCC, 90f
99 sub %o0, %o1, %o3
100
1011:
102 subcc %o2, 4, %o2
103 EX_LD(LOAD(lduw, %o1, %g1))
104 EX_ST(STORE(stw, %g1, %o1 + %o3))
105 bgu,pt %XCC, 1b
106 add %o1, 4, %o1
107
10885: retl
109 mov EX_RETVAL(GLOBAL_SPARE), %o0
110
111 .align 32
11290:
113 subcc %o2, 1, %o2
114 EX_LD(LOAD(ldub, %o1, %g1))
115 EX_ST(STORE(stb, %g1, %o1 + %o3))
116 bgu,pt %XCC, 90b
117 add %o1, 1, %o1
118 retl
119 mov EX_RETVAL(GLOBAL_SPARE), %o0
120
121 .size FUNC_NAME, .-FUNC_NAME
diff --git a/arch/sparc64/lib/GENpage.S b/arch/sparc64/lib/GENpage.S
new file mode 100644
index 000000000000..2ef9d05f21bc
--- /dev/null
+++ b/arch/sparc64/lib/GENpage.S
@@ -0,0 +1,77 @@
1/* GENpage.S: Generic clear and copy page.
2 *
3 * Copyright (C) 2007 (davem@davemloft.net)
4 */
5#include <asm/page.h>
6
7 .text
8 .align 32
9
10GENcopy_user_page:
11 set PAGE_SIZE, %g7
121: ldx [%o1 + 0x00], %o2
13 ldx [%o1 + 0x08], %o3
14 ldx [%o1 + 0x10], %o4
15 ldx [%o1 + 0x18], %o5
16 stx %o2, [%o0 + 0x00]
17 stx %o3, [%o0 + 0x08]
18 stx %o4, [%o0 + 0x10]
19 stx %o5, [%o0 + 0x18]
20 ldx [%o1 + 0x20], %o2
21 ldx [%o1 + 0x28], %o3
22 ldx [%o1 + 0x30], %o4
23 ldx [%o1 + 0x38], %o5
24 stx %o2, [%o0 + 0x20]
25 stx %o3, [%o0 + 0x28]
26 stx %o4, [%o0 + 0x30]
27 stx %o5, [%o0 + 0x38]
28 subcc %g7, 64, %g7
29 add %o1, 64, %o1
30 bne,pt %xcc, 1b
31 add %o0, 64, %o0
32 retl
33 nop
34
35GENclear_page:
36GENclear_user_page:
37 set PAGE_SIZE, %g7
381: stx %g0, [%o0 + 0x00]
39 stx %g0, [%o0 + 0x08]
40 stx %g0, [%o0 + 0x10]
41 stx %g0, [%o0 + 0x18]
42 stx %g0, [%o0 + 0x20]
43 stx %g0, [%o0 + 0x28]
44 stx %g0, [%o0 + 0x30]
45 stx %g0, [%o0 + 0x38]
46 subcc %g7, 64, %g7
47 bne,pt %xcc, 1b
48 add %o0, 64, %o0
49
50#define BRANCH_ALWAYS 0x10680000
51#define NOP 0x01000000
52#define GEN_DO_PATCH(OLD, NEW) \
53 sethi %hi(NEW), %g1; \
54 or %g1, %lo(NEW), %g1; \
55 sethi %hi(OLD), %g2; \
56 or %g2, %lo(OLD), %g2; \
57 sub %g1, %g2, %g1; \
58 sethi %hi(BRANCH_ALWAYS), %g3; \
59 sll %g1, 11, %g1; \
60 srl %g1, 11 + 2, %g1; \
61 or %g3, %lo(BRANCH_ALWAYS), %g3; \
62 or %g3, %g1, %g3; \
63 stw %g3, [%g2]; \
64 sethi %hi(NOP), %g3; \
65 or %g3, %lo(NOP), %g3; \
66 stw %g3, [%g2 + 0x4]; \
67 flush %g2;
68
69 .globl generic_patch_pageops
70 .type generic_patch_pageops,#function
71generic_patch_pageops:
72 GEN_DO_PATCH(copy_user_page, GENcopy_user_page)
73 GEN_DO_PATCH(_clear_page, GENclear_page)
74 GEN_DO_PATCH(clear_user_page, GENclear_user_page)
75 retl
76 nop
77 .size generic_patch_pageops,.-generic_patch_pageops
diff --git a/arch/sparc64/lib/GENpatch.S b/arch/sparc64/lib/GENpatch.S
new file mode 100644
index 000000000000..fab9e89f16bd
--- /dev/null
+++ b/arch/sparc64/lib/GENpatch.S
@@ -0,0 +1,33 @@
1/* GENpatch.S: Patch Ultra-I routines with generic variant.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#define BRANCH_ALWAYS 0x10680000
7#define NOP 0x01000000
8#define GEN_DO_PATCH(OLD, NEW) \
9 sethi %hi(NEW), %g1; \
10 or %g1, %lo(NEW), %g1; \
11 sethi %hi(OLD), %g2; \
12 or %g2, %lo(OLD), %g2; \
13 sub %g1, %g2, %g1; \
14 sethi %hi(BRANCH_ALWAYS), %g3; \
15 sll %g1, 11, %g1; \
16 srl %g1, 11 + 2, %g1; \
17 or %g3, %lo(BRANCH_ALWAYS), %g3; \
18 or %g3, %g1, %g3; \
19 stw %g3, [%g2]; \
20 sethi %hi(NOP), %g3; \
21 or %g3, %lo(NOP), %g3; \
22 stw %g3, [%g2 + 0x4]; \
23 flush %g2;
24
25 .globl generic_patch_copyops
26 .type generic_patch_copyops,#function
27generic_patch_copyops:
28 GEN_DO_PATCH(memcpy, GENmemcpy)
29 GEN_DO_PATCH(___copy_from_user, GENcopy_from_user)
30 GEN_DO_PATCH(___copy_to_user, GENcopy_to_user)
31 retl
32 nop
33 .size generic_patch_copyops,.-generic_patch_copyops
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index c4a6d6e7d03c..f95fbfa3eeb8 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -1,4 +1,4 @@
1# $Id: Makefile,v 1.25 2000/12/14 22:57:25 davem Exp $ 1#
2# Makefile for Sparc64 library files.. 2# Makefile for Sparc64 library files..
3# 3#
4 4
@@ -13,6 +13,8 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
13 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ 13 U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \
14 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ 14 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
15 NGpage.o NGbzero.o \ 15 NGpage.o NGbzero.o \
16 GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \
17 GENpage.o GENbzero.o \
16 copy_in_user.o user_fixup.o memmove.o \ 18 copy_in_user.o user_fixup.o memmove.o \
17 mcount.o ipcsum.o rwsem.o xor.o 19 mcount.o ipcsum.o rwsem.o xor.o
18 20
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S
index 1312bfaff306..9fd8030cc54f 100644
--- a/arch/x86_64/boot/compressed/head.S
+++ b/arch/x86_64/boot/compressed/head.S
@@ -195,6 +195,11 @@ ENTRY(startup_64)
195 movl %eax, %ds 195 movl %eax, %ds
196 movl %eax, %es 196 movl %eax, %es
197 movl %eax, %ss 197 movl %eax, %ss
198 movl %eax, %fs
199 movl %eax, %gs
200 lldt %ax
201 movl $0x20, %eax
202 ltr %ax
198 203
199 /* Compute the decompressed kernel start address. It is where 204 /* Compute the decompressed kernel start address. It is where
200 * we were loaded at aligned to a 2M boundary. %rbp contains the 205 * we were loaded at aligned to a 2M boundary. %rbp contains the
@@ -295,6 +300,8 @@ gdt:
295 .quad 0x0000000000000000 /* NULL descriptor */ 300 .quad 0x0000000000000000 /* NULL descriptor */
296 .quad 0x00af9a000000ffff /* __KERNEL_CS */ 301 .quad 0x00af9a000000ffff /* __KERNEL_CS */
297 .quad 0x00cf92000000ffff /* __KERNEL_DS */ 302 .quad 0x00cf92000000ffff /* __KERNEL_DS */
303 .quad 0x0080890000000000 /* TS descriptor */
304 .quad 0x0000000000000000 /* TS continued */
298gdt_end: 305gdt_end:
299 .bss 306 .bss
300/* Stack for uncompression */ 307/* Stack for uncompression */
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 900ff38d68de..925758dbca0c 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -791,10 +791,8 @@ static void setup_APIC_timer(unsigned int clocks)
791 791
792 /* wait for irq slice */ 792 /* wait for irq slice */
793 if (hpet_address && hpet_use_timer) { 793 if (hpet_address && hpet_use_timer) {
794 int trigger = hpet_readl(HPET_T0_CMP); 794 u32 trigger = hpet_readl(HPET_T0_CMP);
795 while (hpet_readl(HPET_COUNTER) >= trigger) 795 while (hpet_readl(HPET_T0_CMP) == trigger)
796 /* do nothing */ ;
797 while (hpet_readl(HPET_COUNTER) < trigger)
798 /* do nothing */ ; 796 /* do nothing */ ;
799 } else { 797 } else {
800 int c1, c2; 798 int c1, c2;
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index ba16c968ca3f..71da01e73f03 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -367,16 +367,15 @@ static inline struct iommu_table *find_iommu_table(struct device *dev)
367 367
368 pdev = to_pci_dev(dev); 368 pdev = to_pci_dev(dev);
369 369
370 /* is the device behind a bridge? */ 370 pbus = pdev->bus;
371 if (unlikely(pdev->bus->parent)) 371
372 pbus = pdev->bus->parent; 372 /* is the device behind a bridge? Look for the root bus */
373 else 373 while (pbus->parent)
374 pbus = pdev->bus; 374 pbus = pbus->parent;
375 375
376 tbl = pci_iommu(pbus); 376 tbl = pci_iommu(pbus);
377 377
378 BUG_ON(pdev->bus->parent && 378 BUG_ON(tbl && (tbl->it_busno != pbus->number));
379 (tbl->it_busno != pdev->bus->parent->number));
380 379
381 return tbl; 380 return tbl;
382} 381}
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 7e161c698af4..10b9809ce821 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -75,7 +75,8 @@ static void flush_kernel_map(void *arg)
75 75
76 /* When clflush is available always use it because it is 76 /* When clflush is available always use it because it is
77 much cheaper than WBINVD. */ 77 much cheaper than WBINVD. */
78 if (!cpu_has_clflush) 78 /* clflush is still broken. Disable for now. */
79 if (1 || !cpu_has_clflush)
79 asm volatile("wbinvd" ::: "memory"); 80 asm volatile("wbinvd" ::: "memory");
80 else list_for_each_entry(pg, l, lru) { 81 else list_for_each_entry(pg, l, lru) {
81 void *adr = page_address(pg); 82 void *adr = page_address(pg);
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c
index 65d82736987e..4095e4d66a1d 100644
--- a/arch/x86_64/pci/mmconfig.c
+++ b/arch/x86_64/pci/mmconfig.c
@@ -66,13 +66,13 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus,
66 66
67 switch (len) { 67 switch (len) {
68 case 1: 68 case 1:
69 *value = readb(addr + reg); 69 *value = mmio_config_readb(addr + reg);
70 break; 70 break;
71 case 2: 71 case 2:
72 *value = readw(addr + reg); 72 *value = mmio_config_readw(addr + reg);
73 break; 73 break;
74 case 4: 74 case 4:
75 *value = readl(addr + reg); 75 *value = mmio_config_readl(addr + reg);
76 break; 76 break;
77 } 77 }
78 78
@@ -94,13 +94,13 @@ static int pci_mmcfg_write(unsigned int seg, unsigned int bus,
94 94
95 switch (len) { 95 switch (len) {
96 case 1: 96 case 1:
97 writeb(value, addr + reg); 97 mmio_config_writeb(addr + reg, value);
98 break; 98 break;
99 case 2: 99 case 2:
100 writew(value, addr + reg); 100 mmio_config_writew(addr + reg, value);
101 break; 101 break;
102 case 4: 102 case 4:
103 writel(value, addr + reg); 103 mmio_config_writel(addr + reg, value);
104 break; 104 break;
105 } 105 }
106 106
diff --git a/arch/x86_64/vdso/.gitignore b/arch/x86_64/vdso/.gitignore
new file mode 100644
index 000000000000..f8b69d84238e
--- /dev/null
+++ b/arch/x86_64/vdso/.gitignore
@@ -0,0 +1 @@
vdso.lds
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 8c2caff87cc3..a15845c164f2 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -3047,6 +3047,10 @@ static inline void blk_partition_remap(struct bio *bio)
3047 3047
3048 bio->bi_sector += p->start_sect; 3048 bio->bi_sector += p->start_sect;
3049 bio->bi_bdev = bdev->bd_contains; 3049 bio->bi_bdev = bdev->bd_contains;
3050
3051 blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio,
3052 bdev->bd_dev, bio->bi_sector,
3053 bio->bi_sector - p->start_sect);
3050 } 3054 }
3051} 3055}
3052 3056
diff --git a/drivers/acpi/resources/rsxface.c b/drivers/acpi/resources/rsxface.c
index f63813a358c5..4c3fd4cdaf73 100644
--- a/drivers/acpi/resources/rsxface.c
+++ b/drivers/acpi/resources/rsxface.c
@@ -474,8 +474,6 @@ acpi_rs_match_vendor_resource(struct acpi_resource *resource, void *context)
474 return (AE_CTRL_TERMINATE); 474 return (AE_CTRL_TERMINATE);
475} 475}
476 476
477ACPI_EXPORT_SYMBOL(acpi_rs_match_vendor_resource)
478
479/******************************************************************************* 477/*******************************************************************************
480 * 478 *
481 * FUNCTION: acpi_walk_resources 479 * FUNCTION: acpi_walk_resources
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a11b2bd54bbe..084358a828e9 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1977,12 +1977,13 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
1977{ 1977{
1978 ReadCapdata_struct *buf; 1978 ReadCapdata_struct *buf;
1979 int return_code; 1979 int return_code;
1980 buf = kmalloc(sizeof(ReadCapdata_struct), GFP_KERNEL); 1980
1981 if (buf == NULL) { 1981 buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
1982 if (!buf) {
1982 printk(KERN_WARNING "cciss: out of memory\n"); 1983 printk(KERN_WARNING "cciss: out of memory\n");
1983 return; 1984 return;
1984 } 1985 }
1985 memset(buf, 0, sizeof(ReadCapdata_struct)); 1986
1986 if (withirq) 1987 if (withirq)
1987 return_code = sendcmd_withirq(CCISS_READ_CAPACITY, 1988 return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
1988 ctlr, buf, sizeof(ReadCapdata_struct), 1989 ctlr, buf, sizeof(ReadCapdata_struct),
@@ -2003,7 +2004,6 @@ cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
2003 printk(KERN_INFO " blocks= %llu block_size= %d\n", 2004 printk(KERN_INFO " blocks= %llu block_size= %d\n",
2004 (unsigned long long)*total_size+1, *block_size); 2005 (unsigned long long)*total_size+1, *block_size);
2005 kfree(buf); 2006 kfree(buf);
2006 return;
2007} 2007}
2008 2008
2009static void 2009static void
@@ -2011,12 +2011,13 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
2011{ 2011{
2012 ReadCapdata_struct_16 *buf; 2012 ReadCapdata_struct_16 *buf;
2013 int return_code; 2013 int return_code;
2014 buf = kmalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL); 2014
2015 if (buf == NULL) { 2015 buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
2016 if (!buf) {
2016 printk(KERN_WARNING "cciss: out of memory\n"); 2017 printk(KERN_WARNING "cciss: out of memory\n");
2017 return; 2018 return;
2018 } 2019 }
2019 memset(buf, 0, sizeof(ReadCapdata_struct_16)); 2020
2020 if (withirq) { 2021 if (withirq) {
2021 return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16, 2022 return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
2022 ctlr, buf, sizeof(ReadCapdata_struct_16), 2023 ctlr, buf, sizeof(ReadCapdata_struct_16),
@@ -2038,7 +2039,6 @@ cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,
2038 printk(KERN_INFO " blocks= %llu block_size= %d\n", 2039 printk(KERN_INFO " blocks= %llu block_size= %d\n",
2039 (unsigned long long)*total_size+1, *block_size); 2040 (unsigned long long)*total_size+1, *block_size);
2040 kfree(buf); 2041 kfree(buf);
2041 return;
2042} 2042}
2043 2043
2044static int cciss_revalidate(struct gendisk *disk) 2044static int cciss_revalidate(struct gendisk *disk)
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index be4e3477d83b..eb9799acf65b 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -420,18 +420,17 @@ static int __init cpqarray_register_ctlr( int i, struct pci_dev *pdev)
420 goto Enomem2; 420 goto Enomem2;
421 } 421 }
422 422
423 hba[i]->cmd_pool = (cmdlist_t *)pci_alloc_consistent( 423 hba[i]->cmd_pool = pci_alloc_consistent(
424 hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t), 424 hba[i]->pci_dev, NR_CMDS * sizeof(cmdlist_t),
425 &(hba[i]->cmd_pool_dhandle)); 425 &(hba[i]->cmd_pool_dhandle));
426 hba[i]->cmd_pool_bits = kmalloc( 426 hba[i]->cmd_pool_bits = kcalloc(
427 ((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long), 427 (NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG, sizeof(unsigned long),
428 GFP_KERNEL); 428 GFP_KERNEL);
429 429
430 if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool) 430 if (!hba[i]->cmd_pool_bits || !hba[i]->cmd_pool)
431 goto Enomem1; 431 goto Enomem1;
432 432
433 memset(hba[i]->cmd_pool, 0, NR_CMDS * sizeof(cmdlist_t)); 433 memset(hba[i]->cmd_pool, 0, NR_CMDS * sizeof(cmdlist_t));
434 memset(hba[i]->cmd_pool_bits, 0, ((NR_CMDS+BITS_PER_LONG-1)/BITS_PER_LONG)*sizeof(unsigned long));
435 printk(KERN_INFO "cpqarray: Finding drives on %s", 434 printk(KERN_INFO "cpqarray: Finding drives on %s",
436 hba[i]->devname); 435 hba[i]->devname);
437 436
@@ -1660,45 +1659,30 @@ static void getgeometry(int ctlr)
1660 1659
1661 info_p->log_drv_map = 0; 1660 info_p->log_drv_map = 0;
1662 1661
1663 id_ldrive = kmalloc(sizeof(id_log_drv_t), GFP_KERNEL); 1662 id_ldrive = kzalloc(sizeof(id_log_drv_t), GFP_KERNEL);
1664 if(id_ldrive == NULL) 1663 if (!id_ldrive) {
1665 {
1666 printk( KERN_ERR "cpqarray: out of memory.\n"); 1664 printk( KERN_ERR "cpqarray: out of memory.\n");
1667 return; 1665 goto err_0;
1668 } 1666 }
1669 1667
1670 id_ctlr_buf = kmalloc(sizeof(id_ctlr_t), GFP_KERNEL); 1668 id_ctlr_buf = kzalloc(sizeof(id_ctlr_t), GFP_KERNEL);
1671 if(id_ctlr_buf == NULL) 1669 if (!id_ctlr_buf) {
1672 {
1673 kfree(id_ldrive);
1674 printk( KERN_ERR "cpqarray: out of memory.\n"); 1670 printk( KERN_ERR "cpqarray: out of memory.\n");
1675 return; 1671 goto err_1;
1676 } 1672 }
1677 1673
1678 id_lstatus_buf = kmalloc(sizeof(sense_log_drv_stat_t), GFP_KERNEL); 1674 id_lstatus_buf = kzalloc(sizeof(sense_log_drv_stat_t), GFP_KERNEL);
1679 if(id_lstatus_buf == NULL) 1675 if (!id_lstatus_buf) {
1680 {
1681 kfree(id_ctlr_buf);
1682 kfree(id_ldrive);
1683 printk( KERN_ERR "cpqarray: out of memory.\n"); 1676 printk( KERN_ERR "cpqarray: out of memory.\n");
1684 return; 1677 goto err_2;
1685 } 1678 }
1686 1679
1687 sense_config_buf = kmalloc(sizeof(config_t), GFP_KERNEL); 1680 sense_config_buf = kzalloc(sizeof(config_t), GFP_KERNEL);
1688 if(sense_config_buf == NULL) 1681 if (!sense_config_buf) {
1689 {
1690 kfree(id_lstatus_buf);
1691 kfree(id_ctlr_buf);
1692 kfree(id_ldrive);
1693 printk( KERN_ERR "cpqarray: out of memory.\n"); 1682 printk( KERN_ERR "cpqarray: out of memory.\n");
1694 return; 1683 goto err_3;
1695 } 1684 }
1696 1685
1697 memset(id_ldrive, 0, sizeof(id_log_drv_t));
1698 memset(id_ctlr_buf, 0, sizeof(id_ctlr_t));
1699 memset(id_lstatus_buf, 0, sizeof(sense_log_drv_stat_t));
1700 memset(sense_config_buf, 0, sizeof(config_t));
1701
1702 info_p->phys_drives = 0; 1686 info_p->phys_drives = 0;
1703 info_p->log_drv_map = 0; 1687 info_p->log_drv_map = 0;
1704 info_p->drv_assign_map = 0; 1688 info_p->drv_assign_map = 0;
@@ -1712,13 +1696,8 @@ static void getgeometry(int ctlr)
1712 * so the idastubopen will fail on all logical drives 1696 * so the idastubopen will fail on all logical drives
1713 * on the controller. 1697 * on the controller.
1714 */ 1698 */
1715 /* Free all the buffers and return */
1716 printk(KERN_ERR "cpqarray: error sending ID controller\n"); 1699 printk(KERN_ERR "cpqarray: error sending ID controller\n");
1717 kfree(sense_config_buf); 1700 goto err_4;
1718 kfree(id_lstatus_buf);
1719 kfree(id_ctlr_buf);
1720 kfree(id_ldrive);
1721 return;
1722 } 1701 }
1723 1702
1724 info_p->log_drives = id_ctlr_buf->nr_drvs; 1703 info_p->log_drives = id_ctlr_buf->nr_drvs;
@@ -1764,12 +1743,7 @@ static void getgeometry(int ctlr)
1764 " failed to report status of logical drive %d\n" 1743 " failed to report status of logical drive %d\n"
1765 "Access to this controller has been disabled\n", 1744 "Access to this controller has been disabled\n",
1766 ctlr, log_unit); 1745 ctlr, log_unit);
1767 /* Free all the buffers and return */ 1746 goto err_4;
1768 kfree(sense_config_buf);
1769 kfree(id_lstatus_buf);
1770 kfree(id_ctlr_buf);
1771 kfree(id_ldrive);
1772 return;
1773 } 1747 }
1774 /* 1748 /*
1775 Make sure the logical drive is configured 1749 Make sure the logical drive is configured
@@ -1798,14 +1772,8 @@ static void getgeometry(int ctlr)
1798 sizeof(config_t), 0, 0, log_unit); 1772 sizeof(config_t), 0, 0, log_unit);
1799 if (ret_code == IO_ERROR) { 1773 if (ret_code == IO_ERROR) {
1800 info_p->log_drv_map = 0; 1774 info_p->log_drv_map = 0;
1801 /* Free all the buffers and return */
1802 printk(KERN_ERR "cpqarray: error sending sense config\n"); 1775 printk(KERN_ERR "cpqarray: error sending sense config\n");
1803 kfree(sense_config_buf); 1776 goto err_4;
1804 kfree(id_lstatus_buf);
1805 kfree(id_ctlr_buf);
1806 kfree(id_ldrive);
1807 return;
1808
1809 } 1777 }
1810 1778
1811 info_p->phys_drives = 1779 info_p->phys_drives =
@@ -1820,12 +1788,18 @@ static void getgeometry(int ctlr)
1820 log_index = log_index + 1; 1788 log_index = log_index + 1;
1821 } /* end of if logical drive configured */ 1789 } /* end of if logical drive configured */
1822 } /* end of for log_unit */ 1790 } /* end of for log_unit */
1791
1792 /* Free all the buffers and return */
1793err_4:
1823 kfree(sense_config_buf); 1794 kfree(sense_config_buf);
1824 kfree(id_ldrive); 1795err_3:
1825 kfree(id_lstatus_buf); 1796 kfree(id_lstatus_buf);
1797err_2:
1826 kfree(id_ctlr_buf); 1798 kfree(id_ctlr_buf);
1799err_1:
1800 kfree(id_ldrive);
1801err_0:
1827 return; 1802 return;
1828
1829} 1803}
1830 1804
1831static void __exit cpqarray_exit(void) 1805static void __exit cpqarray_exit(void)
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 85916e2665d4..af3969a9c963 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -41,7 +41,6 @@
41#include <linux/dma-mapping.h> 41#include <linux/dma-mapping.h>
42#include <linux/completion.h> 42#include <linux/completion.h>
43#include <linux/device.h> 43#include <linux/device.h>
44#include <linux/kernel.h>
45 44
46#include <asm/uaccess.h> 45#include <asm/uaccess.h>
47#include <asm/vio.h> 46#include <asm/vio.h>
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c
index cb27e8863d7c..3ede0b63da13 100644
--- a/drivers/block/xsysace.c
+++ b/drivers/block/xsysace.c
@@ -902,26 +902,17 @@ static int ace_release(struct inode *inode, struct file *filp)
902 return 0; 902 return 0;
903} 903}
904 904
905static int ace_ioctl(struct inode *inode, struct file *filp, 905static int ace_getgeo(struct block_device *bdev, struct hd_geometry *geo)
906 unsigned int cmd, unsigned long arg)
907{ 906{
908 struct ace_device *ace = inode->i_bdev->bd_disk->private_data; 907 struct ace_device *ace = bdev->bd_disk->private_data;
909 struct hd_geometry __user *geo = (struct hd_geometry __user *)arg;
910 struct hd_geometry g;
911 dev_dbg(ace->dev, "ace_ioctl()\n");
912
913 switch (cmd) {
914 case HDIO_GETGEO:
915 g.heads = ace->cf_id.heads;
916 g.sectors = ace->cf_id.sectors;
917 g.cylinders = ace->cf_id.cyls;
918 g.start = 0;
919 return copy_to_user(geo, &g, sizeof(g)) ? -EFAULT : 0;
920 908
921 default: 909 dev_dbg(ace->dev, "ace_getgeo()\n");
922 return -ENOTTY; 910
923 } 911 geo->heads = ace->cf_id.heads;
924 return -ENOTTY; 912 geo->sectors = ace->cf_id.sectors;
913 geo->cylinders = ace->cf_id.cyls;
914
915 return 0;
925} 916}
926 917
927static struct block_device_operations ace_fops = { 918static struct block_device_operations ace_fops = {
@@ -930,7 +921,7 @@ static struct block_device_operations ace_fops = {
930 .release = ace_release, 921 .release = ace_release,
931 .media_changed = ace_media_changed, 922 .media_changed = ace_media_changed,
932 .revalidate_disk = ace_revalidate_disk, 923 .revalidate_disk = ace_revalidate_disk,
933 .ioctl = ace_ioctl, 924 .getgeo = ace_getgeo,
934}; 925};
935 926
936/* -------------------------------------------------------------------- 927/* --------------------------------------------------------------------
diff --git a/drivers/char/hvc_lguest.c b/drivers/char/hvc_lguest.c
index feeccbaec438..3d6bd0baa56d 100644
--- a/drivers/char/hvc_lguest.c
+++ b/drivers/char/hvc_lguest.c
@@ -35,6 +35,7 @@
35#include <linux/err.h> 35#include <linux/err.h>
36#include <linux/init.h> 36#include <linux/init.h>
37#include <linux/lguest_bus.h> 37#include <linux/lguest_bus.h>
38#include <asm/paravirt.h>
38#include "hvc_console.h" 39#include "hvc_console.h"
39 40
40/*D:340 This is our single console input buffer, with associated "struct 41/*D:340 This is our single console input buffer, with associated "struct
diff --git a/drivers/char/pcmcia/cm4000_cs.c b/drivers/char/pcmcia/cm4000_cs.c
index fee58e03dbe2..4177f6db83e9 100644
--- a/drivers/char/pcmcia/cm4000_cs.c
+++ b/drivers/char/pcmcia/cm4000_cs.c
@@ -1629,7 +1629,7 @@ static int cmm_open(struct inode *inode, struct file *filp)
1629{ 1629{
1630 struct cm4000_dev *dev; 1630 struct cm4000_dev *dev;
1631 struct pcmcia_device *link; 1631 struct pcmcia_device *link;
1632 int rc, minor = iminor(inode); 1632 int minor = iminor(inode);
1633 1633
1634 if (minor >= CM4000_MAX_DEV) 1634 if (minor >= CM4000_MAX_DEV)
1635 return -ENODEV; 1635 return -ENODEV;
@@ -1668,7 +1668,6 @@ static int cmm_open(struct inode *inode, struct file *filp)
1668 start_monitor(dev); 1668 start_monitor(dev);
1669 1669
1670 link->open = 1; /* only one open per device */ 1670 link->open = 1; /* only one open per device */
1671 rc = 0;
1672 1671
1673 DEBUGP(2, dev, "<- cmm_open\n"); 1672 DEBUGP(2, dev, "<- cmm_open\n");
1674 return nonseekable_open(inode, filp); 1673 return nonseekable_open(inode, filp);
@@ -1824,7 +1823,7 @@ static int cm4000_resume(struct pcmcia_device *link)
1824 1823
1825static void cm4000_release(struct pcmcia_device *link) 1824static void cm4000_release(struct pcmcia_device *link)
1826{ 1825{
1827 cmm_cm4000_release(link->priv); /* delay release until device closed */ 1826 cmm_cm4000_release(link); /* delay release until device closed */
1828 pcmcia_disable_device(link); 1827 pcmcia_disable_device(link);
1829} 1828}
1830 1829
diff --git a/drivers/char/pcmcia/cm4040_cs.c b/drivers/char/pcmcia/cm4040_cs.c
index af88181a17f4..b24a3e7bbb9f 100644
--- a/drivers/char/pcmcia/cm4040_cs.c
+++ b/drivers/char/pcmcia/cm4040_cs.c
@@ -599,7 +599,7 @@ cs_release:
599 599
600static void reader_release(struct pcmcia_device *link) 600static void reader_release(struct pcmcia_device *link)
601{ 601{
602 cm4040_reader_release(link->priv); 602 cm4040_reader_release(link);
603 pcmcia_disable_device(link); 603 pcmcia_disable_device(link);
604} 604}
605 605
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
index de37ebc3a4cf..51ea93cab6c4 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -369,25 +369,54 @@ static void tty_buffer_free(struct tty_struct *tty, struct tty_buffer *b)
369} 369}
370 370
371/** 371/**
372 * tty_buffer_flush - flush full tty buffers 372 * __tty_buffer_flush - flush full tty buffers
373 * @tty: tty to flush 373 * @tty: tty to flush
374 * 374 *
375 * flush all the buffers containing receive data 375 * flush all the buffers containing receive data. Caller must
376 * hold the buffer lock and must have ensured no parallel flush to
377 * ldisc is running.
376 * 378 *
377 * Locking: none 379 * Locking: Caller must hold tty->buf.lock
378 */ 380 */
379 381
380static void tty_buffer_flush(struct tty_struct *tty) 382static void __tty_buffer_flush(struct tty_struct *tty)
381{ 383{
382 struct tty_buffer *thead; 384 struct tty_buffer *thead;
383 unsigned long flags;
384 385
385 spin_lock_irqsave(&tty->buf.lock, flags);
386 while((thead = tty->buf.head) != NULL) { 386 while((thead = tty->buf.head) != NULL) {
387 tty->buf.head = thead->next; 387 tty->buf.head = thead->next;
388 tty_buffer_free(tty, thead); 388 tty_buffer_free(tty, thead);
389 } 389 }
390 tty->buf.tail = NULL; 390 tty->buf.tail = NULL;
391}
392
393/**
394 * tty_buffer_flush - flush full tty buffers
395 * @tty: tty to flush
396 *
397 * flush all the buffers containing receive data. If the buffer is
398 * being processed by flush_to_ldisc then we defer the processing
399 * to that function
400 *
401 * Locking: none
402 */
403
404static void tty_buffer_flush(struct tty_struct *tty)
405{
406 unsigned long flags;
407 spin_lock_irqsave(&tty->buf.lock, flags);
408
409 /* If the data is being pushed to the tty layer then we can't
410 process it here. Instead set a flag and the flush_to_ldisc
411 path will process the flush request before it exits */
412 if (test_bit(TTY_FLUSHING, &tty->flags)) {
413 set_bit(TTY_FLUSHPENDING, &tty->flags);
414 spin_unlock_irqrestore(&tty->buf.lock, flags);
415 wait_event(tty->read_wait,
416 test_bit(TTY_FLUSHPENDING, &tty->flags) == 0);
417 return;
418 } else
419 __tty_buffer_flush(tty);
391 spin_unlock_irqrestore(&tty->buf.lock, flags); 420 spin_unlock_irqrestore(&tty->buf.lock, flags);
392} 421}
393 422
@@ -3594,6 +3623,7 @@ static void flush_to_ldisc(struct work_struct *work)
3594 return; 3623 return;
3595 3624
3596 spin_lock_irqsave(&tty->buf.lock, flags); 3625 spin_lock_irqsave(&tty->buf.lock, flags);
3626 set_bit(TTY_FLUSHING, &tty->flags); /* So we know a flush is running */
3597 head = tty->buf.head; 3627 head = tty->buf.head;
3598 if (head != NULL) { 3628 if (head != NULL) {
3599 tty->buf.head = NULL; 3629 tty->buf.head = NULL;
@@ -3607,6 +3637,11 @@ static void flush_to_ldisc(struct work_struct *work)
3607 tty_buffer_free(tty, tbuf); 3637 tty_buffer_free(tty, tbuf);
3608 continue; 3638 continue;
3609 } 3639 }
3640 /* Ldisc or user is trying to flush the buffers
3641 we are feeding to the ldisc, stop feeding the
3642 line discipline as we want to empty the queue */
3643 if (test_bit(TTY_FLUSHPENDING, &tty->flags))
3644 break;
3610 if (!tty->receive_room) { 3645 if (!tty->receive_room) {
3611 schedule_delayed_work(&tty->buf.work, 1); 3646 schedule_delayed_work(&tty->buf.work, 1);
3612 break; 3647 break;
@@ -3620,8 +3655,17 @@ static void flush_to_ldisc(struct work_struct *work)
3620 disc->receive_buf(tty, char_buf, flag_buf, count); 3655 disc->receive_buf(tty, char_buf, flag_buf, count);
3621 spin_lock_irqsave(&tty->buf.lock, flags); 3656 spin_lock_irqsave(&tty->buf.lock, flags);
3622 } 3657 }
3658 /* Restore the queue head */
3623 tty->buf.head = head; 3659 tty->buf.head = head;
3624 } 3660 }
3661 /* We may have a deferred request to flush the input buffer,
3662 if so pull the chain under the lock and empty the queue */
3663 if (test_bit(TTY_FLUSHPENDING, &tty->flags)) {
3664 __tty_buffer_flush(tty);
3665 clear_bit(TTY_FLUSHPENDING, &tty->flags);
3666 wake_up(&tty->read_wait);
3667 }
3668 clear_bit(TTY_FLUSHING, &tty->flags);
3625 spin_unlock_irqrestore(&tty->buf.lock, flags); 3669 spin_unlock_irqrestore(&tty->buf.lock, flags);
3626 3670
3627 tty_ldisc_deref(disc); 3671 tty_ldisc_deref(disc);
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 0a46e8837d9a..4a315f08a567 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -453,6 +453,11 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages)
453 * lguest_pages". */ 453 * lguest_pages". */
454 copy_in_guest_info(lg, pages); 454 copy_in_guest_info(lg, pages);
455 455
456 /* Set the trap number to 256 (impossible value). If we fault while
457 * switching to the Guest (bad segment registers or bug), this will
458 * cause us to abort the Guest. */
459 lg->regs->trapnum = 256;
460
456 /* Now: we push the "eflags" register on the stack, then do an "lcall". 461 /* Now: we push the "eflags" register on the stack, then do an "lcall".
457 * This is how we change from using the kernel code segment to using 462 * This is how we change from using the kernel code segment to using
458 * the dedicated lguest code segment, as well as jumping into the 463 * the dedicated lguest code segment, as well as jumping into the
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index 49787e964a0d..49aa55577d0d 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -195,13 +195,16 @@ static int has_err(unsigned int trap)
195/* deliver_trap() returns true if it could deliver the trap. */ 195/* deliver_trap() returns true if it could deliver the trap. */
196int deliver_trap(struct lguest *lg, unsigned int num) 196int deliver_trap(struct lguest *lg, unsigned int num)
197{ 197{
198 u32 lo = lg->idt[num].a, hi = lg->idt[num].b; 198 /* Trap numbers are always 8 bit, but we set an impossible trap number
199 * for traps inside the Switcher, so check that here. */
200 if (num >= ARRAY_SIZE(lg->idt))
201 return 0;
199 202
200 /* Early on the Guest hasn't set the IDT entries (or maybe it put a 203 /* Early on the Guest hasn't set the IDT entries (or maybe it put a
201 * bogus one in): if we fail here, the Guest will be killed. */ 204 * bogus one in): if we fail here, the Guest will be killed. */
202 if (!idt_present(lo, hi)) 205 if (!idt_present(lg->idt[num].a, lg->idt[num].b))
203 return 0; 206 return 0;
204 set_guest_interrupt(lg, lo, hi, has_err(num)); 207 set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num));
205 return 1; 208 return 1;
206} 209}
207 210
diff --git a/drivers/lguest/lguest.c b/drivers/lguest/lguest.c
index 1bc1546c7fd0..6e135ac0834f 100644
--- a/drivers/lguest/lguest.c
+++ b/drivers/lguest/lguest.c
@@ -323,9 +323,12 @@ static void lguest_write_gdt_entry(struct desc_struct *dt,
323 * __thread variables). So we have a hypercall specifically for this case. */ 323 * __thread variables). So we have a hypercall specifically for this case. */
324static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) 324static void lguest_load_tls(struct thread_struct *t, unsigned int cpu)
325{ 325{
326 /* There's one problem which normal hardware doesn't have: the Host
327 * can't handle us removing entries we're currently using. So we clear
328 * the GS register here: if it's needed it'll be reloaded anyway. */
329 loadsegment(gs, 0);
326 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); 330 lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
327} 331}
328/*:*/
329 332
330/*G:038 That's enough excitement for now, back to ploughing through each of 333/*G:038 That's enough excitement for now, back to ploughing through each of
331 * the paravirt_ops (we're about 1/3 of the way through). 334 * the paravirt_ops (we're about 1/3 of the way through).
@@ -687,7 +690,8 @@ static struct clocksource lguest_clock = {
687 .rating = 400, 690 .rating = 400,
688 .read = lguest_clock_read, 691 .read = lguest_clock_read,
689 .mask = CLOCKSOURCE_MASK(64), 692 .mask = CLOCKSOURCE_MASK(64),
690 .mult = 1, 693 .mult = 1 << 22,
694 .shift = 22,
691}; 695};
692 696
693/* The "scheduler clock" is just our real clock, adjusted to start at zero */ 697/* The "scheduler clock" is just our real clock, adjusted to start at zero */
@@ -770,7 +774,6 @@ static void lguest_time_init(void)
770 * way, the "rating" is initialized so high that it's always chosen 774 * way, the "rating" is initialized so high that it's always chosen
771 * over any other clocksource. */ 775 * over any other clocksource. */
772 if (lguest_data.tsc_khz) { 776 if (lguest_data.tsc_khz) {
773 lguest_clock.shift = 22;
774 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, 777 lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz,
775 lguest_clock.shift); 778 lguest_clock.shift);
776 lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; 779 lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS;
@@ -933,23 +936,24 @@ static const struct lguest_insns
933/* Now our patch routine is fairly simple (based on the native one in 936/* Now our patch routine is fairly simple (based on the native one in
934 * paravirt.c). If we have a replacement, we copy it in and return how much of 937 * paravirt.c). If we have a replacement, we copy it in and return how much of
935 * the available space we used. */ 938 * the available space we used. */
936static unsigned lguest_patch(u8 type, u16 clobber, void *insns, unsigned len) 939static unsigned lguest_patch(u8 type, u16 clobber, void *ibuf,
940 unsigned long addr, unsigned len)
937{ 941{
938 unsigned int insn_len; 942 unsigned int insn_len;
939 943
940 /* Don't do anything special if we don't have a replacement */ 944 /* Don't do anything special if we don't have a replacement */
941 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start) 945 if (type >= ARRAY_SIZE(lguest_insns) || !lguest_insns[type].start)
942 return paravirt_patch_default(type, clobber, insns, len); 946 return paravirt_patch_default(type, clobber, ibuf, addr, len);
943 947
944 insn_len = lguest_insns[type].end - lguest_insns[type].start; 948 insn_len = lguest_insns[type].end - lguest_insns[type].start;
945 949
946 /* Similarly if we can't fit replacement (shouldn't happen, but let's 950 /* Similarly if we can't fit replacement (shouldn't happen, but let's
947 * be thorough). */ 951 * be thorough). */
948 if (len < insn_len) 952 if (len < insn_len)
949 return paravirt_patch_default(type, clobber, insns, len); 953 return paravirt_patch_default(type, clobber, ibuf, addr, len);
950 954
951 /* Copy in our instructions. */ 955 /* Copy in our instructions. */
952 memcpy(insns, lguest_insns[type].start, insn_len); 956 memcpy(ibuf, lguest_insns[type].start, insn_len);
953 return insn_len; 957 return insn_len;
954} 958}
955 959
diff --git a/drivers/lguest/lguest_bus.c b/drivers/lguest/lguest_bus.c
index 55a7940ca732..9e7752cc8002 100644
--- a/drivers/lguest/lguest_bus.c
+++ b/drivers/lguest/lguest_bus.c
@@ -5,6 +5,7 @@
5#include <linux/bootmem.h> 5#include <linux/bootmem.h>
6#include <linux/lguest_bus.h> 6#include <linux/lguest_bus.h>
7#include <asm/io.h> 7#include <asm/io.h>
8#include <asm/paravirt.h>
8 9
9static ssize_t type_show(struct device *_dev, 10static ssize_t type_show(struct device *_dev,
10 struct device_attribute *attr, char *buf) 11 struct device_attribute *attr, char *buf)
diff --git a/drivers/lguest/segments.c b/drivers/lguest/segments.c
index f675a41a80da..9b81119f46e9 100644
--- a/drivers/lguest/segments.c
+++ b/drivers/lguest/segments.c
@@ -43,22 +43,6 @@
43 * begin. 43 * begin.
44 */ 44 */
45 45
46/* Is the descriptor the Guest wants us to put in OK?
47 *
48 * The flag which Intel says must be zero: must be zero. The descriptor must
49 * be present, (this is actually checked earlier but is here for thorougness),
50 * and the descriptor type must be 1 (a memory segment). */
51static int desc_ok(const struct desc_struct *gdt)
52{
53 return ((gdt->b & 0x00209000) == 0x00009000);
54}
55
56/* Is the segment present? (Otherwise it can't be used by the Guest). */
57static int segment_present(const struct desc_struct *gdt)
58{
59 return gdt->b & 0x8000;
60}
61
62/* There are several entries we don't let the Guest set. The TSS entry is the 46/* There are several entries we don't let the Guest set. The TSS entry is the
63 * "Task State Segment" which controls all kinds of delicate things. The 47 * "Task State Segment" which controls all kinds of delicate things. The
64 * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the 48 * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the
@@ -71,37 +55,11 @@ static int ignored_gdt(unsigned int num)
71 || num == GDT_ENTRY_DOUBLEFAULT_TSS); 55 || num == GDT_ENTRY_DOUBLEFAULT_TSS);
72} 56}
73 57
74/* If the Guest asks us to remove an entry from the GDT, we have to be careful. 58/*H:610 Once the GDT has been changed, we fix the new entries up a little. We
75 * If one of the segment registers is pointing at that entry the Switcher will 59 * don't care if they're invalid: the worst that can happen is a General
76 * crash when it tries to reload the segment registers for the Guest. 60 * Protection Fault in the Switcher when it restores a Guest segment register
77 * 61 * which tries to use that entry. Then we kill the Guest for causing such a
78 * It doesn't make much sense for the Guest to try to remove its own code, data 62 * mess: the message will be "unhandled trap 256". */
79 * or stack segments while they're in use: assume that's a Guest bug. If it's
80 * one of the lesser segment registers using the removed entry, we simply set
81 * that register to 0 (unusable). */
82static void check_segment_use(struct lguest *lg, unsigned int desc)
83{
84 /* GDT entries are 8 bytes long, so we divide to get the index and
85 * ignore the bottom bits. */
86 if (lg->regs->gs / 8 == desc)
87 lg->regs->gs = 0;
88 if (lg->regs->fs / 8 == desc)
89 lg->regs->fs = 0;
90 if (lg->regs->es / 8 == desc)
91 lg->regs->es = 0;
92 if (lg->regs->ds / 8 == desc
93 || lg->regs->cs / 8 == desc
94 || lg->regs->ss / 8 == desc)
95 kill_guest(lg, "Removed live GDT entry %u", desc);
96}
97/*:*/
98/*M:009 We wouldn't need to check for removal of in-use segments if we handled
99 * faults in the Switcher. However, it's probably not a worthwhile
100 * optimization. :*/
101
102/*H:610 Once the GDT has been changed, we look through the changed entries and
103 * see if they're OK. If not, we'll call kill_guest() and the Guest will never
104 * get to use the invalid entries. */
105static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) 63static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
106{ 64{
107 unsigned int i; 65 unsigned int i;
@@ -112,16 +70,6 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end)
112 if (ignored_gdt(i)) 70 if (ignored_gdt(i))
113 continue; 71 continue;
114 72
115 /* We could fault in switch_to_guest if they are using
116 * a removed segment. */
117 if (!segment_present(&lg->gdt[i])) {
118 check_segment_use(lg, i);
119 continue;
120 }
121
122 if (!desc_ok(&lg->gdt[i]))
123 kill_guest(lg, "Bad GDT descriptor %i", i);
124
125 /* Segment descriptors contain a privilege level: the Guest is 73 /* Segment descriptors contain a privilege level: the Guest is
126 * sometimes careless and leaves this as 0, even though it's 74 * sometimes careless and leaves this as 0, even though it's
127 * running at privilege level 1. If so, we fix it here. */ 75 * running at privilege level 1. If so, we fix it here. */
diff --git a/drivers/lguest/switcher.S b/drivers/lguest/switcher.S
index d418179ea6b5..7c9c230cc845 100644
--- a/drivers/lguest/switcher.S
+++ b/drivers/lguest/switcher.S
@@ -47,6 +47,7 @@
47// Down here in the depths of assembler code. 47// Down here in the depths of assembler code.
48#include <linux/linkage.h> 48#include <linux/linkage.h>
49#include <asm/asm-offsets.h> 49#include <asm/asm-offsets.h>
50#include <asm/page.h>
50#include "lg.h" 51#include "lg.h"
51 52
52// We mark the start of the code to copy 53// We mark the start of the code to copy
@@ -182,13 +183,15 @@ ENTRY(switch_to_guest)
182 movl $(LGUEST_DS), %eax; \ 183 movl $(LGUEST_DS), %eax; \
183 movl %eax, %ds; \ 184 movl %eax, %ds; \
184 /* So where are we? Which CPU, which struct? \ 185 /* So where are we? Which CPU, which struct? \
185 * The stack is our clue: our TSS sets \ 186 * The stack is our clue: our TSS starts \
186 * It at the end of "struct lguest_pages" \ 187 * It at the end of "struct lguest_pages". \
187 * And we then pushed and pushed and pushed Guest regs: \ 188 * Or we may have stumbled while restoring \
188 * Now stack points atop the "struct lguest_regs". \ 189 * Our Guest segment regs while in switch_to_guest, \
189 * Subtract that offset, and we find our struct. */ \ 190 * The fault pushed atop that part-unwound stack. \
191 * If we round the stack down to the page start \
192 * We're at the start of "struct lguest_pages". */ \
190 movl %esp, %eax; \ 193 movl %esp, %eax; \
191 subl $LGUEST_PAGES_regs, %eax; \ 194 andl $(~(1 << PAGE_SHIFT - 1)), %eax; \
192 /* Save our trap number: the switch will obscure it \ 195 /* Save our trap number: the switch will obscure it \
193 * (The Guest regs are not mapped here in the Host) \ 196 * (The Guest regs are not mapped here in the Host) \
194 * %ebx holds it safe for deliver_to_host */ \ 197 * %ebx holds it safe for deliver_to_host */ \
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 141ff9fa296e..2120155929a6 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -580,8 +580,8 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
580 /* the bio has been remapped so dispatch it */ 580 /* the bio has been remapped so dispatch it */
581 581
582 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, 582 blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone,
583 tio->io->bio->bi_bdev->bd_dev, sector, 583 tio->io->bio->bi_bdev->bd_dev,
584 clone->bi_sector); 584 clone->bi_sector, sector);
585 585
586 generic_make_request(clone); 586 generic_make_request(clone);
587 } else if (r < 0 || r == DM_MAPIO_REQUEUE) { 587 } else if (r < 0 || r == DM_MAPIO_REQUEUE) {
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index c9a289c6c139..b0abc7d92805 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -117,7 +117,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
117 struct mmc_host *host = card->host; 117 struct mmc_host *host = card->host;
118 u64 limit = BLK_BOUNCE_HIGH; 118 u64 limit = BLK_BOUNCE_HIGH;
119 int ret; 119 int ret;
120 unsigned int bouncesz;
121 120
122 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) 121 if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
123 limit = *mmc_dev(host)->dma_mask; 122 limit = *mmc_dev(host)->dma_mask;
@@ -134,6 +133,8 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
134 133
135#ifdef CONFIG_MMC_BLOCK_BOUNCE 134#ifdef CONFIG_MMC_BLOCK_BOUNCE
136 if (host->max_hw_segs == 1) { 135 if (host->max_hw_segs == 1) {
136 unsigned int bouncesz;
137
137 bouncesz = MMC_QUEUE_BOUNCESZ; 138 bouncesz = MMC_QUEUE_BOUNCESZ;
138 139
139 if (bouncesz > host->max_req_size) 140 if (bouncesz > host->max_req_size)
@@ -156,14 +157,14 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
156 GFP_KERNEL); 157 GFP_KERNEL);
157 if (!mq->sg) { 158 if (!mq->sg) {
158 ret = -ENOMEM; 159 ret = -ENOMEM;
159 goto free_bounce_buf; 160 goto cleanup_queue;
160 } 161 }
161 162
162 mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * 163 mq->bounce_sg = kmalloc(sizeof(struct scatterlist) *
163 bouncesz / 512, GFP_KERNEL); 164 bouncesz / 512, GFP_KERNEL);
164 if (!mq->bounce_sg) { 165 if (!mq->bounce_sg) {
165 ret = -ENOMEM; 166 ret = -ENOMEM;
166 goto free_sg; 167 goto cleanup_queue;
167 } 168 }
168 } 169 }
169 } 170 }
@@ -197,14 +198,13 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock
197 if (mq->bounce_sg) 198 if (mq->bounce_sg)
198 kfree(mq->bounce_sg); 199 kfree(mq->bounce_sg);
199 mq->bounce_sg = NULL; 200 mq->bounce_sg = NULL;
200 free_sg: 201 cleanup_queue:
201 kfree(mq->sg); 202 if (mq->sg)
203 kfree(mq->sg);
202 mq->sg = NULL; 204 mq->sg = NULL;
203 free_bounce_buf:
204 if (mq->bounce_buf) 205 if (mq->bounce_buf)
205 kfree(mq->bounce_buf); 206 kfree(mq->bounce_buf);
206 mq->bounce_buf = NULL; 207 mq->bounce_buf = NULL;
207 cleanup_queue:
208 blk_cleanup_queue(mq->queue); 208 blk_cleanup_queue(mq->queue);
209 return ret; 209 return ret;
210} 210}
diff --git a/drivers/mmc/host/at91_mci.c b/drivers/mmc/host/at91_mci.c
index 62564ccde03a..bfebd2fa7ada 100644
--- a/drivers/mmc/host/at91_mci.c
+++ b/drivers/mmc/host/at91_mci.c
@@ -83,7 +83,7 @@
83 83
84#define AT91_MCI_ERRORS (AT91_MCI_RINDE | AT91_MCI_RDIRE | AT91_MCI_RCRCE \ 84#define AT91_MCI_ERRORS (AT91_MCI_RINDE | AT91_MCI_RDIRE | AT91_MCI_RCRCE \
85 | AT91_MCI_RENDE | AT91_MCI_RTOE | AT91_MCI_DCRCE \ 85 | AT91_MCI_RENDE | AT91_MCI_RTOE | AT91_MCI_DCRCE \
86 | AT91_MCI_DTOE | AT91_MCI_OVRE | AT91_MCI_UNRE) 86 | AT91_MCI_DTOE | AT91_MCI_OVRE | AT91_MCI_UNRE)
87 87
88#define at91_mci_read(host, reg) __raw_readl((host)->baseaddr + (reg)) 88#define at91_mci_read(host, reg) __raw_readl((host)->baseaddr + (reg))
89#define at91_mci_write(host, reg, val) __raw_writel((val), (host)->baseaddr + (reg)) 89#define at91_mci_write(host, reg, val) __raw_writel((val), (host)->baseaddr + (reg))
@@ -676,15 +676,15 @@ static irqreturn_t at91_mci_irq(int irq, void *devid)
676 676
677 int_status = at91_mci_read(host, AT91_MCI_SR); 677 int_status = at91_mci_read(host, AT91_MCI_SR);
678 int_mask = at91_mci_read(host, AT91_MCI_IMR); 678 int_mask = at91_mci_read(host, AT91_MCI_IMR);
679 679
680 pr_debug("MCI irq: status = %08X, %08X, %08X\n", int_status, int_mask, 680 pr_debug("MCI irq: status = %08X, %08X, %08X\n", int_status, int_mask,
681 int_status & int_mask); 681 int_status & int_mask);
682 682
683 int_status = int_status & int_mask; 683 int_status = int_status & int_mask;
684 684
685 if (int_status & AT91_MCI_ERRORS) { 685 if (int_status & AT91_MCI_ERRORS) {
686 completed = 1; 686 completed = 1;
687 687
688 if (int_status & AT91_MCI_UNRE) 688 if (int_status & AT91_MCI_UNRE)
689 pr_debug("MMC: Underrun error\n"); 689 pr_debug("MMC: Underrun error\n");
690 if (int_status & AT91_MCI_OVRE) 690 if (int_status & AT91_MCI_OVRE)
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index e0c9808fd424..9bf2a877113b 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1266,7 +1266,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev)
1266 return 0; 1266 return 0;
1267} 1267}
1268 1268
1269static void __devexit wbsd_free_mmc(struct device *dev) 1269static void wbsd_free_mmc(struct device *dev)
1270{ 1270{
1271 struct mmc_host *mmc; 1271 struct mmc_host *mmc;
1272 struct wbsd_host *host; 1272 struct wbsd_host *host;
@@ -1358,7 +1358,7 @@ static int __devinit wbsd_request_region(struct wbsd_host *host, int base)
1358 return 0; 1358 return 0;
1359} 1359}
1360 1360
1361static void __devexit wbsd_release_regions(struct wbsd_host *host) 1361static void wbsd_release_regions(struct wbsd_host *host)
1362{ 1362{
1363 if (host->base) 1363 if (host->base)
1364 release_region(host->base, 8); 1364 release_region(host->base, 8);
@@ -1434,7 +1434,7 @@ err:
1434 "Falling back on FIFO.\n", dma); 1434 "Falling back on FIFO.\n", dma);
1435} 1435}
1436 1436
1437static void __devexit wbsd_release_dma(struct wbsd_host *host) 1437static void wbsd_release_dma(struct wbsd_host *host)
1438{ 1438{
1439 if (host->dma_addr) { 1439 if (host->dma_addr) {
1440 dma_unmap_single(mmc_dev(host->mmc), host->dma_addr, 1440 dma_unmap_single(mmc_dev(host->mmc), host->dma_addr,
@@ -1484,7 +1484,7 @@ static int __devinit wbsd_request_irq(struct wbsd_host *host, int irq)
1484 return 0; 1484 return 0;
1485} 1485}
1486 1486
1487static void __devexit wbsd_release_irq(struct wbsd_host *host) 1487static void wbsd_release_irq(struct wbsd_host *host)
1488{ 1488{
1489 if (!host->irq) 1489 if (!host->irq)
1490 return; 1490 return;
@@ -1535,7 +1535,7 @@ static int __devinit wbsd_request_resources(struct wbsd_host *host,
1535 * Release all resources for the host. 1535 * Release all resources for the host.
1536 */ 1536 */
1537 1537
1538static void __devexit wbsd_release_resources(struct wbsd_host *host) 1538static void wbsd_release_resources(struct wbsd_host *host)
1539{ 1539{
1540 wbsd_release_dma(host); 1540 wbsd_release_dma(host);
1541 wbsd_release_irq(host); 1541 wbsd_release_irq(host);
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index 8c86b802f212..d091b2430b48 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/device.h> 8#include <linux/device.h>
9#include <linux/fs.h> 9#include <linux/fs.h>
10#include <linux/mm.h>
10#include <linux/err.h> 11#include <linux/err.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/kernel.h> 13#include <linux/kernel.h>
diff --git a/drivers/net/atl1/atl1_main.c b/drivers/net/atl1/atl1_main.c
index 56f6389a300e..3c1984ecf36c 100644
--- a/drivers/net/atl1/atl1_main.c
+++ b/drivers/net/atl1/atl1_main.c
@@ -1704,10 +1704,8 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
1704 } 1704 }
1705 } 1705 }
1706 1706
1707 local_irq_save(flags); 1707 if (!spin_trylock_irqsave(&adapter->lock, flags)) {
1708 if (!spin_trylock(&adapter->lock)) {
1709 /* Can't get lock - tell upper layer to requeue */ 1708 /* Can't get lock - tell upper layer to requeue */
1710 local_irq_restore(flags);
1711 dev_printk(KERN_DEBUG, &adapter->pdev->dev, "tx locked\n"); 1709 dev_printk(KERN_DEBUG, &adapter->pdev->dev, "tx locked\n");
1712 return NETDEV_TX_LOCKED; 1710 return NETDEV_TX_LOCKED;
1713 } 1711 }
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index 8ee2c2c86b42..d67f97bfa3a4 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -39,7 +39,7 @@
39#include <asm/io.h> 39#include <asm/io.h>
40 40
41#define DRV_NAME "ehea" 41#define DRV_NAME "ehea"
42#define DRV_VERSION "EHEA_0072" 42#define DRV_VERSION "EHEA_0073"
43 43
44/* eHEA capability flags */ 44/* eHEA capability flags */
45#define DLPAR_PORT_ADD_REM 1 45#define DLPAR_PORT_ADD_REM 1
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 58702f54c3fb..9756211e83ce 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1326,7 +1326,6 @@ static void write_swqe2_TSO(struct sk_buff *skb,
1326 u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; 1326 u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0];
1327 int skb_data_size = skb->len - skb->data_len; 1327 int skb_data_size = skb->len - skb->data_len;
1328 int headersize; 1328 int headersize;
1329 u64 tmp_addr;
1330 1329
1331 /* Packet is TCP with TSO enabled */ 1330 /* Packet is TCP with TSO enabled */
1332 swqe->tx_control |= EHEA_SWQE_TSO; 1331 swqe->tx_control |= EHEA_SWQE_TSO;
@@ -1347,9 +1346,8 @@ static void write_swqe2_TSO(struct sk_buff *skb,
1347 /* set sg1entry data */ 1346 /* set sg1entry data */
1348 sg1entry->l_key = lkey; 1347 sg1entry->l_key = lkey;
1349 sg1entry->len = skb_data_size - headersize; 1348 sg1entry->len = skb_data_size - headersize;
1350 1349 sg1entry->vaddr =
1351 tmp_addr = (u64)(skb->data + headersize); 1350 ehea_map_vaddr(skb->data + headersize);
1352 sg1entry->vaddr = ehea_map_vaddr(tmp_addr);
1353 swqe->descriptors++; 1351 swqe->descriptors++;
1354 } 1352 }
1355 } else 1353 } else
@@ -1362,7 +1360,6 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
1362 int skb_data_size = skb->len - skb->data_len; 1360 int skb_data_size = skb->len - skb->data_len;
1363 u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; 1361 u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0];
1364 struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; 1362 struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry;
1365 u64 tmp_addr;
1366 1363
1367 /* Packet is any nonTSO type 1364 /* Packet is any nonTSO type
1368 * 1365 *
@@ -1379,8 +1376,8 @@ static void write_swqe2_nonTSO(struct sk_buff *skb,
1379 /* copy sg1entry data */ 1376 /* copy sg1entry data */
1380 sg1entry->l_key = lkey; 1377 sg1entry->l_key = lkey;
1381 sg1entry->len = skb_data_size - SWQE2_MAX_IMM; 1378 sg1entry->len = skb_data_size - SWQE2_MAX_IMM;
1382 tmp_addr = (u64)(skb->data + SWQE2_MAX_IMM); 1379 sg1entry->vaddr =
1383 sg1entry->vaddr = ehea_map_vaddr(tmp_addr); 1380 ehea_map_vaddr(skb->data + SWQE2_MAX_IMM);
1384 swqe->descriptors++; 1381 swqe->descriptors++;
1385 } 1382 }
1386 } else { 1383 } else {
@@ -1395,7 +1392,6 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
1395 struct ehea_vsgentry *sg_list, *sg1entry, *sgentry; 1392 struct ehea_vsgentry *sg_list, *sg1entry, *sgentry;
1396 skb_frag_t *frag; 1393 skb_frag_t *frag;
1397 int nfrags, sg1entry_contains_frag_data, i; 1394 int nfrags, sg1entry_contains_frag_data, i;
1398 u64 tmp_addr;
1399 1395
1400 nfrags = skb_shinfo(skb)->nr_frags; 1396 nfrags = skb_shinfo(skb)->nr_frags;
1401 sg1entry = &swqe->u.immdata_desc.sg_entry; 1397 sg1entry = &swqe->u.immdata_desc.sg_entry;
@@ -1417,9 +1413,9 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
1417 /* copy sg1entry data */ 1413 /* copy sg1entry data */
1418 sg1entry->l_key = lkey; 1414 sg1entry->l_key = lkey;
1419 sg1entry->len = frag->size; 1415 sg1entry->len = frag->size;
1420 tmp_addr = (u64)(page_address(frag->page) 1416 sg1entry->vaddr =
1421 + frag->page_offset); 1417 ehea_map_vaddr(page_address(frag->page)
1422 sg1entry->vaddr = ehea_map_vaddr(tmp_addr); 1418 + frag->page_offset);
1423 swqe->descriptors++; 1419 swqe->descriptors++;
1424 sg1entry_contains_frag_data = 1; 1420 sg1entry_contains_frag_data = 1;
1425 } 1421 }
@@ -1431,10 +1427,9 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev,
1431 1427
1432 sgentry->l_key = lkey; 1428 sgentry->l_key = lkey;
1433 sgentry->len = frag->size; 1429 sgentry->len = frag->size;
1434 1430 sgentry->vaddr =
1435 tmp_addr = (u64)(page_address(frag->page) 1431 ehea_map_vaddr(page_address(frag->page)
1436 + frag->page_offset); 1432 + frag->page_offset);
1437 sgentry->vaddr = ehea_map_vaddr(tmp_addr);
1438 swqe->descriptors++; 1433 swqe->descriptors++;
1439 } 1434 }
1440 } 1435 }
@@ -2165,24 +2160,18 @@ static int ehea_clean_all_portres(struct ehea_port *port)
2165 return ret; 2160 return ret;
2166} 2161}
2167 2162
2168static void ehea_remove_adapter_mr (struct ehea_adapter *adapter) 2163static void ehea_remove_adapter_mr(struct ehea_adapter *adapter)
2169{ 2164{
2170 int i; 2165 if (adapter->active_ports)
2171 2166 return;
2172 for (i=0; i < EHEA_MAX_PORTS; i++)
2173 if (adapter->port[i])
2174 return;
2175 2167
2176 ehea_rem_mr(&adapter->mr); 2168 ehea_rem_mr(&adapter->mr);
2177} 2169}
2178 2170
2179static int ehea_add_adapter_mr (struct ehea_adapter *adapter) 2171static int ehea_add_adapter_mr(struct ehea_adapter *adapter)
2180{ 2172{
2181 int i; 2173 if (adapter->active_ports)
2182 2174 return 0;
2183 for (i=0; i < EHEA_MAX_PORTS; i++)
2184 if (adapter->port[i])
2185 return 0;
2186 2175
2187 return ehea_reg_kernel_mr(adapter, &adapter->mr); 2176 return ehea_reg_kernel_mr(adapter, &adapter->mr);
2188} 2177}
@@ -3099,6 +3088,7 @@ out:
3099 3088
3100static void __exit ehea_module_exit(void) 3089static void __exit ehea_module_exit(void)
3101{ 3090{
3091 destroy_workqueue(ehea_driver_wq);
3102 driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); 3092 driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities);
3103 ibmebus_unregister_driver(&ehea_driver); 3093 ibmebus_unregister_driver(&ehea_driver);
3104 ehea_destroy_busmap(); 3094 ehea_destroy_busmap();
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index d96eb7229548..acba90f1638e 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -963,7 +963,7 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
963{ 963{
964 int rc, i; 964 int rc, i;
965 struct net_device *netdev; 965 struct net_device *netdev;
966 struct ibmveth_adapter *adapter = NULL; 966 struct ibmveth_adapter *adapter;
967 967
968 unsigned char *mac_addr_p; 968 unsigned char *mac_addr_p;
969 unsigned int *mcastFilterSize_p; 969 unsigned int *mcastFilterSize_p;
@@ -997,7 +997,6 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
997 SET_MODULE_OWNER(netdev); 997 SET_MODULE_OWNER(netdev);
998 998
999 adapter = netdev->priv; 999 adapter = netdev->priv;
1000 memset(adapter, 0, sizeof(adapter));
1001 dev->dev.driver_data = netdev; 1000 dev->dev.driver_data = netdev;
1002 1001
1003 adapter->vdev = dev; 1002 adapter->vdev = dev;
@@ -1280,24 +1279,28 @@ const char * buf, size_t count)
1280 int i; 1279 int i;
1281 /* Make sure there is a buffer pool with buffers that 1280 /* Make sure there is a buffer pool with buffers that
1282 can hold a packet of the size of the MTU */ 1281 can hold a packet of the size of the MTU */
1283 for(i = 0; i<IbmVethNumBufferPools; i++) { 1282 for (i = 0; i < IbmVethNumBufferPools; i++) {
1284 if (pool == &adapter->rx_buff_pool[i]) 1283 if (pool == &adapter->rx_buff_pool[i])
1285 continue; 1284 continue;
1286 if (!adapter->rx_buff_pool[i].active) 1285 if (!adapter->rx_buff_pool[i].active)
1287 continue; 1286 continue;
1288 if (mtu < adapter->rx_buff_pool[i].buff_size) { 1287 if (mtu <= adapter->rx_buff_pool[i].buff_size)
1289 pool->active = 0; 1288 break;
1290 h_free_logical_lan_buffer(adapter->
1291 vdev->
1292 unit_address,
1293 pool->
1294 buff_size);
1295 }
1296 } 1289 }
1297 if (pool->active) { 1290
1291 if (i == IbmVethNumBufferPools) {
1298 ibmveth_error_printk("no active pool >= MTU\n"); 1292 ibmveth_error_printk("no active pool >= MTU\n");
1299 return -EPERM; 1293 return -EPERM;
1300 } 1294 }
1295
1296 pool->active = 0;
1297 if (netif_running(netdev)) {
1298 adapter->pool_config = 1;
1299 ibmveth_close(netdev);
1300 adapter->pool_config = 0;
1301 if ((rc = ibmveth_open(netdev)))
1302 return rc;
1303 }
1301 } 1304 }
1302 } else if (attr == &veth_num_attr) { 1305 } else if (attr == &veth_num_attr) {
1303 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) 1306 if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT)
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index bb69ccae8ace..72cc15a6cab7 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -73,9 +73,6 @@ static inline long h_send_logical_lan(unsigned long unit_address,
73#define h_change_logical_lan_mac(ua, mac) \ 73#define h_change_logical_lan_mac(ua, mac) \
74 plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) 74 plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac)
75 75
76#define h_free_logical_lan_buffer(ua, bufsize) \
77 plpar_hcall_norets(H_FREE_LOGICAL_LAN_BUFFER, ua, bufsize)
78
79#define IbmVethNumBufferPools 5 76#define IbmVethNumBufferPools 5
80#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ 77#define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */
81#define IBMVETH_MAX_MTU 68 78#define IBMVETH_MAX_MTU 68
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index f71dab347667..e323efd4ed18 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -261,7 +261,7 @@ void phy_sanitize_settings(struct phy_device *phydev)
261 261
262 /* Sanitize settings based on PHY capabilities */ 262 /* Sanitize settings based on PHY capabilities */
263 if ((features & SUPPORTED_Autoneg) == 0) 263 if ((features & SUPPORTED_Autoneg) == 0)
264 phydev->autoneg = 0; 264 phydev->autoneg = AUTONEG_DISABLE;
265 265
266 idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), 266 idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex),
267 features); 267 features);
@@ -374,7 +374,7 @@ int phy_mii_ioctl(struct phy_device *phydev,
374 if (mii_data->phy_id == phydev->addr) { 374 if (mii_data->phy_id == phydev->addr) {
375 switch(mii_data->reg_num) { 375 switch(mii_data->reg_num) {
376 case MII_BMCR: 376 case MII_BMCR:
377 if (val & (BMCR_RESET|BMCR_ANENABLE)) 377 if ((val & (BMCR_RESET|BMCR_ANENABLE)) == 0)
378 phydev->autoneg = AUTONEG_DISABLE; 378 phydev->autoneg = AUTONEG_DISABLE;
379 else 379 else
380 phydev->autoneg = AUTONEG_ENABLE; 380 phydev->autoneg = AUTONEG_ENABLE;
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index c9333b9dd51a..b85ab4a8f2a3 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -725,6 +725,12 @@ static int rtl8169_set_speed_xmii(struct net_device *dev,
725 725
726 auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; 726 auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
727 727
728 if (tp->mac_version == RTL_GIGA_MAC_VER_12) {
729 /* Vendor specific (0x1f) and reserved (0x0e) MII registers. */
730 mdio_write(ioaddr, 0x1f, 0x0000);
731 mdio_write(ioaddr, 0x0e, 0x0000);
732 }
733
728 tp->phy_auto_nego_reg = auto_nego; 734 tp->phy_auto_nego_reg = auto_nego;
729 tp->phy_1000_ctrl_reg = giga_ctrl; 735 tp->phy_1000_ctrl_reg = giga_ctrl;
730 736
@@ -2760,14 +2766,16 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
2760 rtl8169_check_link_status(dev, tp, ioaddr); 2766 rtl8169_check_link_status(dev, tp, ioaddr);
2761 2767
2762#ifdef CONFIG_R8169_NAPI 2768#ifdef CONFIG_R8169_NAPI
2763 RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); 2769 if (status & tp->napi_event) {
2764 tp->intr_mask = ~tp->napi_event; 2770 RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
2765 2771 tp->intr_mask = ~tp->napi_event;
2766 if (likely(netif_rx_schedule_prep(dev))) 2772
2767 __netif_rx_schedule(dev); 2773 if (likely(netif_rx_schedule_prep(dev)))
2768 else if (netif_msg_intr(tp)) { 2774 __netif_rx_schedule(dev);
2769 printk(KERN_INFO "%s: interrupt %04x taken in poll\n", 2775 else if (netif_msg_intr(tp)) {
2770 dev->name, status); 2776 printk(KERN_INFO "%s: interrupt %04x in poll\n",
2777 dev->name, status);
2778 }
2771 } 2779 }
2772 break; 2780 break;
2773#else 2781#else
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index ec2ad9f0efa2..d470b19c0810 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -1593,6 +1593,9 @@ static int __devinit sis190_get_mac_addr_from_apc(struct pci_dev *pdev,
1593 pci_name(pdev)); 1593 pci_name(pdev));
1594 1594
1595 isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0965, NULL); 1595 isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0965, NULL);
1596 if (!isa_bridge)
1597 isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0966, NULL);
1598
1596 if (!isa_bridge) { 1599 if (!isa_bridge) {
1597 net_probe(tp, KERN_INFO "%s: Can not find ISA bridge.\n", 1600 net_probe(tp, KERN_INFO "%s: Can not find ISA bridge.\n",
1598 pci_name(pdev)); 1601 pci_name(pdev));
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index f8429449dc1e..6ff3a1627af8 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -299,7 +299,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
299 299
300#define SMC_CAN_USE_8BIT 1 300#define SMC_CAN_USE_8BIT 1
301#define SMC_CAN_USE_16BIT 1 301#define SMC_CAN_USE_16BIT 1
302#define SMC_CAN_USE_32BIT 1 302#define SMC_CAN_USE_32BIT 0
303 303
304#define SMC_inb(a, r) inb((a) + (r)) 304#define SMC_inb(a, r) inb((a) + (r))
305#define SMC_inw(a, r) inw((a) + (r)) 305#define SMC_inw(a, r) inw((a) + (r))
@@ -310,8 +310,6 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
310 310
311#endif /* BOARDS */ 311#endif /* BOARDS */
312 312
313#define set_irq_type(irq, type) do {} while (0)
314
315#elif defined(CONFIG_M32R) 313#elif defined(CONFIG_M32R)
316 314
317#define SMC_CAN_USE_8BIT 0 315#define SMC_CAN_USE_8BIT 0
diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c
index a8994c7b8583..64bef7c12365 100644
--- a/drivers/net/ucc_geth_ethtool.c
+++ b/drivers/net/ucc_geth_ethtool.c
@@ -379,7 +379,6 @@ static const struct ethtool_ops uec_ethtool_ops = {
379 .get_stats_count = uec_get_stats_count, 379 .get_stats_count = uec_get_stats_count,
380 .get_strings = uec_get_strings, 380 .get_strings = uec_get_strings,
381 .get_ethtool_stats = uec_get_ethtool_stats, 381 .get_ethtool_stats = uec_get_ethtool_stats,
382 .get_perm_addr = ethtool_op_get_perm_addr,
383}; 382};
384 383
385void uec_set_ethtool_ops(struct net_device *netdev) 384void uec_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c
index 5f8c2d30a328..6c257b88ce51 100644
--- a/drivers/net/ucc_geth_mii.c
+++ b/drivers/net/ucc_geth_mii.c
@@ -272,7 +272,8 @@ int __init uec_mdio_init(void)
272 return of_register_platform_driver(&uec_mdio_driver); 272 return of_register_platform_driver(&uec_mdio_driver);
273} 273}
274 274
275void __exit uec_mdio_exit(void) 275/* called from __init ucc_geth_init, therefore can not be __exit */
276void uec_mdio_exit(void)
276{ 277{
277 of_unregister_platform_driver(&uec_mdio_driver); 278 of_unregister_platform_driver(&uec_mdio_driver);
278} 279}
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_phy.c b/drivers/net/wireless/bcm43xx/bcm43xx_phy.c
index d779199c30d0..b37f1e348700 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_phy.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_phy.c
@@ -1638,7 +1638,7 @@ void bcm43xx_phy_set_baseband_attenuation(struct bcm43xx_private *bcm,
1638 return; 1638 return;
1639 } 1639 }
1640 1640
1641 if (phy->analog == 1) { 1641 if (phy->analog > 1) {
1642 value = bcm43xx_phy_read(bcm, 0x0060) & ~0x003C; 1642 value = bcm43xx_phy_read(bcm, 0x0060) & ~0x003C;
1643 value |= (baseband_attenuation << 2) & 0x003C; 1643 value |= (baseband_attenuation << 2) & 0x003C;
1644 } else { 1644 } else {
diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c
index cea85894b7f2..e61c6d5ba1a9 100644
--- a/drivers/net/wireless/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl8187_dev.c
@@ -466,7 +466,7 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev,
466 return -EOPNOTSUPP; 466 return -EOPNOTSUPP;
467 } 467 }
468 468
469 priv->hwaddr = conf->mac_addr; 469 priv->hwaddr = conf->mac_addr ? conf->mac_addr : dev->wiphy->perm_addr;
470 470
471 return 0; 471 return 0;
472} 472}
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index f6c487aa8246..26869d107e52 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -822,7 +822,7 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs,
822 cs->control |= ZD_CS_MULTICAST; 822 cs->control |= ZD_CS_MULTICAST;
823 823
824 /* PS-POLL */ 824 /* PS-POLL */
825 if (stype == IEEE80211_STYPE_PSPOLL) 825 if (ftype == IEEE80211_FTYPE_CTL && stype == IEEE80211_STYPE_PSPOLL)
826 cs->control |= ZD_CS_PS_POLL_FRAME; 826 cs->control |= ZD_CS_PS_POLL_FRAME;
827 827
828 /* Unicast data frames over the threshold should have RTS */ 828 /* Unicast data frames over the threshold should have RTS */
diff --git a/drivers/spi/spi_mpc83xx.c b/drivers/spi/spi_mpc83xx.c
index 0c16a2b39b41..2adf856e44c2 100644
--- a/drivers/spi/spi_mpc83xx.c
+++ b/drivers/spi/spi_mpc83xx.c
@@ -86,7 +86,7 @@ struct mpc83xx_spi {
86 86
87 unsigned nsecs; /* (clock cycle time)/2 */ 87 unsigned nsecs; /* (clock cycle time)/2 */
88 88
89 u32 sysclk; 89 u32 spibrg; /* SPIBRG input clock */
90 u32 rx_shift; /* RX data reg shift when in qe mode */ 90 u32 rx_shift; /* RX data reg shift when in qe mode */
91 u32 tx_shift; /* TX data reg shift when in qe mode */ 91 u32 tx_shift; /* TX data reg shift when in qe mode */
92 92
@@ -148,6 +148,8 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value)
148 if (value == BITBANG_CS_ACTIVE) { 148 if (value == BITBANG_CS_ACTIVE) {
149 u32 regval = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode); 149 u32 regval = mpc83xx_spi_read_reg(&mpc83xx_spi->base->mode);
150 u32 len = spi->bits_per_word; 150 u32 len = spi->bits_per_word;
151 u8 pm;
152
151 if (len == 32) 153 if (len == 32)
152 len = 0; 154 len = 0;
153 else 155 else
@@ -169,17 +171,20 @@ static void mpc83xx_spi_chipselect(struct spi_device *spi, int value)
169 171
170 regval |= SPMODE_LEN(len); 172 regval |= SPMODE_LEN(len);
171 173
172 if ((mpc83xx_spi->sysclk / spi->max_speed_hz) >= 64) { 174 if ((mpc83xx_spi->spibrg / spi->max_speed_hz) >= 64) {
173 u8 pm = mpc83xx_spi->sysclk / (spi->max_speed_hz * 64); 175 pm = mpc83xx_spi->spibrg / (spi->max_speed_hz * 64) - 1;
174 if (pm > 0x0f) { 176 if (pm > 0x0f) {
175 printk(KERN_WARNING "MPC83xx SPI: SPICLK can't be less then a SYSCLK/1024!\n" 177 dev_err(&spi->dev, "Requested speed is too "
176 "Requested SPICLK is %d Hz. Will use %d Hz instead.\n", 178 "low: %d Hz. Will use %d Hz instead.\n",
177 spi->max_speed_hz, mpc83xx_spi->sysclk / 1024); 179 spi->max_speed_hz,
180 mpc83xx_spi->spibrg / 1024);
178 pm = 0x0f; 181 pm = 0x0f;
179 } 182 }
180 regval |= SPMODE_PM(pm) | SPMODE_DIV16; 183 regval |= SPMODE_PM(pm) | SPMODE_DIV16;
181 } else { 184 } else {
182 u8 pm = mpc83xx_spi->sysclk / (spi->max_speed_hz * 4); 185 pm = mpc83xx_spi->spibrg / (spi->max_speed_hz * 4);
186 if (pm)
187 pm--;
183 regval |= SPMODE_PM(pm); 188 regval |= SPMODE_PM(pm);
184 } 189 }
185 190
@@ -429,13 +434,17 @@ static int __init mpc83xx_spi_probe(struct platform_device *dev)
429 mpc83xx_spi->bitbang.chipselect = mpc83xx_spi_chipselect; 434 mpc83xx_spi->bitbang.chipselect = mpc83xx_spi_chipselect;
430 mpc83xx_spi->bitbang.setup_transfer = mpc83xx_spi_setup_transfer; 435 mpc83xx_spi->bitbang.setup_transfer = mpc83xx_spi_setup_transfer;
431 mpc83xx_spi->bitbang.txrx_bufs = mpc83xx_spi_bufs; 436 mpc83xx_spi->bitbang.txrx_bufs = mpc83xx_spi_bufs;
432 mpc83xx_spi->sysclk = pdata->sysclk;
433 mpc83xx_spi->activate_cs = pdata->activate_cs; 437 mpc83xx_spi->activate_cs = pdata->activate_cs;
434 mpc83xx_spi->deactivate_cs = pdata->deactivate_cs; 438 mpc83xx_spi->deactivate_cs = pdata->deactivate_cs;
435 mpc83xx_spi->qe_mode = pdata->qe_mode; 439 mpc83xx_spi->qe_mode = pdata->qe_mode;
436 mpc83xx_spi->get_rx = mpc83xx_spi_rx_buf_u8; 440 mpc83xx_spi->get_rx = mpc83xx_spi_rx_buf_u8;
437 mpc83xx_spi->get_tx = mpc83xx_spi_tx_buf_u8; 441 mpc83xx_spi->get_tx = mpc83xx_spi_tx_buf_u8;
438 442
443 if (mpc83xx_spi->qe_mode)
444 mpc83xx_spi->spibrg = pdata->sysclk / 2;
445 else
446 mpc83xx_spi->spibrg = pdata->sysclk;
447
439 mpc83xx_spi->rx_shift = 0; 448 mpc83xx_spi->rx_shift = 0;
440 mpc83xx_spi->tx_shift = 0; 449 mpc83xx_spi->tx_shift = 0;
441 if (mpc83xx_spi->qe_mode) { 450 if (mpc83xx_spi->qe_mode) {
diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c
index 630f781aeb19..c55459c592b8 100644
--- a/drivers/spi/spidev.c
+++ b/drivers/spi/spidev.c
@@ -183,7 +183,9 @@ static int spidev_message(struct spidev_data *spidev,
183 183
184 if (u_tmp->rx_buf) { 184 if (u_tmp->rx_buf) {
185 k_tmp->rx_buf = buf; 185 k_tmp->rx_buf = buf;
186 if (!access_ok(VERIFY_WRITE, u_tmp->rx_buf, u_tmp->len)) 186 if (!access_ok(VERIFY_WRITE, (u8 __user *)
187 (ptrdiff_t) u_tmp->rx_buf,
188 u_tmp->len))
187 goto done; 189 goto done;
188 } 190 }
189 if (u_tmp->tx_buf) { 191 if (u_tmp->tx_buf) {
diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index decfdc8eb9cc..e58c87b3e3a0 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -127,8 +127,20 @@ static int last_fb_vc = MAX_NR_CONSOLES - 1;
127static int fbcon_is_default = 1; 127static int fbcon_is_default = 1;
128static int fbcon_has_exited; 128static int fbcon_has_exited;
129static int primary_device = -1; 129static int primary_device = -1;
130
131#ifdef CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY
130static int map_override; 132static int map_override;
131 133
134static inline void fbcon_map_override(void)
135{
136 map_override = 1;
137}
138#else
139static inline void fbcon_map_override(void)
140{
141}
142#endif /* CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY */
143
132/* font data */ 144/* font data */
133static char fontname[40]; 145static char fontname[40];
134 146
@@ -506,7 +518,7 @@ static int __init fb_console_setup(char *this_opt)
506 (options[j++]-'0') % FB_MAX; 518 (options[j++]-'0') % FB_MAX;
507 } 519 }
508 520
509 map_override = 1; 521 fbcon_map_override();
510 } 522 }
511 523
512 return 1; 524 return 1;
diff --git a/drivers/video/matrox/g450_pll.c b/drivers/video/matrox/g450_pll.c
index 7c76e079ca7d..d42346e7fdda 100644
--- a/drivers/video/matrox/g450_pll.c
+++ b/drivers/video/matrox/g450_pll.c
@@ -331,16 +331,19 @@ static int __g450_setclk(WPMINFO unsigned int fout, unsigned int pll,
331 tmp |= M1064_XPIXCLKCTRL_PLL_UP; 331 tmp |= M1064_XPIXCLKCTRL_PLL_UP;
332 } 332 }
333 matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp); 333 matroxfb_DAC_out(PMINFO M1064_XPIXCLKCTRL, tmp);
334#ifdef __powerpc__ 334 /* DVI PLL preferred for frequencies up to
335 /* This is necessary to avoid jitter on PowerPC 335 panel link max, standard PLL otherwise */
336 * (OpenFirmware) systems, but apparently 336 if (fout >= MINFO->max_pixel_clock_panellink)
337 * introduces jitter, at least on a x86-64 337 tmp = 0;
338 * using DVI. 338 else tmp =
339 * A simple workaround is disable for non-PPC. 339 M1064_XDVICLKCTRL_DVIDATAPATHSEL |
340 */ 340 M1064_XDVICLKCTRL_C1DVICLKSEL |
341 matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL, 0); 341 M1064_XDVICLKCTRL_C1DVICLKEN |
342#endif /* __powerpc__ */ 342 M1064_XDVICLKCTRL_DVILOOPCTL |
343 matroxfb_DAC_out(PMINFO M1064_XPWRCTRL, xpwrctrl); 343 M1064_XDVICLKCTRL_P1LOOPBWDTCTL;
344 matroxfb_DAC_out(PMINFO M1064_XDVICLKCTRL,tmp);
345 matroxfb_DAC_out(PMINFO M1064_XPWRCTRL,
346 xpwrctrl);
344 347
345 matroxfb_DAC_unlock_irqrestore(flags); 348 matroxfb_DAC_unlock_irqrestore(flags);
346 } 349 }
diff --git a/drivers/video/matrox/matroxfb_DAC1064.h b/drivers/video/matrox/matroxfb_DAC1064.h
index df39c3193735..7a98ce8043d7 100644
--- a/drivers/video/matrox/matroxfb_DAC1064.h
+++ b/drivers/video/matrox/matroxfb_DAC1064.h
@@ -33,6 +33,21 @@ void DAC1064_global_restore(WPMINFO2);
33#define M1064_XCURCTRL_3COLOR 0x01 /* transparent, 0, 1, 2 */ 33#define M1064_XCURCTRL_3COLOR 0x01 /* transparent, 0, 1, 2 */
34#define M1064_XCURCTRL_XGA 0x02 /* 0, 1, transparent, complement */ 34#define M1064_XCURCTRL_XGA 0x02 /* 0, 1, transparent, complement */
35#define M1064_XCURCTRL_XWIN 0x03 /* transparent, transparent, 0, 1 */ 35#define M1064_XCURCTRL_XWIN 0x03 /* transparent, transparent, 0, 1 */
36 /* drive DVI by standard(0)/DVI(1) PLL */
37 /* if set(1), C?DVICLKEN and C?DVICLKSEL must be set(1) */
38#define M1064_XDVICLKCTRL_DVIDATAPATHSEL 0x01
39 /* drive CRTC1 by standard(0)/DVI(1) PLL */
40#define M1064_XDVICLKCTRL_C1DVICLKSEL 0x02
41 /* drive CRTC2 by standard(0)/DVI(1) PLL */
42#define M1064_XDVICLKCTRL_C2DVICLKSEL 0x04
43 /* pixel clock allowed to(0)/blocked from(1) driving CRTC1 */
44#define M1064_XDVICLKCTRL_C1DVICLKEN 0x08
45 /* DVI PLL loop filter bandwidth selection bits */
46#define M1064_XDVICLKCTRL_DVILOOPCTL 0x30
47 /* CRTC2 pixel clock allowed to(0)/blocked from(1) driving CRTC2 */
48#define M1064_XDVICLKCTRL_C2DVICLKEN 0x40
49 /* P1PLL loop filter bandwith selection */
50#define M1064_XDVICLKCTRL_P1LOOPBWDTCTL 0x80
36#define M1064_XCURCOL0RED 0x08 51#define M1064_XCURCOL0RED 0x08
37#define M1064_XCURCOL0GREEN 0x09 52#define M1064_XCURCOL0GREEN 0x09
38#define M1064_XCURCOL0BLUE 0x0A 53#define M1064_XCURCOL0BLUE 0x0A
diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h
index d59577c8de86..f3107ad7e545 100644
--- a/drivers/video/matrox/matroxfb_base.h
+++ b/drivers/video/matrox/matroxfb_base.h
@@ -424,6 +424,7 @@ struct matrox_fb_info {
424 } mmio; 424 } mmio;
425 425
426 unsigned int max_pixel_clock; 426 unsigned int max_pixel_clock;
427 unsigned int max_pixel_clock_panellink;
427 428
428 struct matrox_switch* hw_switch; 429 struct matrox_switch* hw_switch;
429 430
diff --git a/drivers/video/matrox/matroxfb_misc.c b/drivers/video/matrox/matroxfb_misc.c
index 5948e54b9ef9..ab7fb50bc1de 100644
--- a/drivers/video/matrox/matroxfb_misc.c
+++ b/drivers/video/matrox/matroxfb_misc.c
@@ -658,6 +658,7 @@ static int parse_pins5(WPMINFO const struct matrox_bios* bd) {
658 MINFO->values.reg.mctlwtst_core = (MINFO->values.reg.mctlwtst & ~7) | 658 MINFO->values.reg.mctlwtst_core = (MINFO->values.reg.mctlwtst & ~7) |
659 wtst_xlat[MINFO->values.reg.mctlwtst & 7]; 659 wtst_xlat[MINFO->values.reg.mctlwtst & 7];
660 } 660 }
661 MINFO->max_pixel_clock_panellink = bd->pins[47] * 4000;
661 return 0; 662 return 0;
662} 663}
663 664
diff --git a/drivers/video/pvr2fb.c b/drivers/video/pvr2fb.c
index f9300266044d..7d6c29800d14 100644
--- a/drivers/video/pvr2fb.c
+++ b/drivers/video/pvr2fb.c
@@ -94,6 +94,7 @@
94#define DISP_DIWCONF (DISP_BASE + 0xe8) 94#define DISP_DIWCONF (DISP_BASE + 0xe8)
95#define DISP_DIWHSTRT (DISP_BASE + 0xec) 95#define DISP_DIWHSTRT (DISP_BASE + 0xec)
96#define DISP_DIWVSTRT (DISP_BASE + 0xf0) 96#define DISP_DIWVSTRT (DISP_BASE + 0xf0)
97#define DISP_PIXDEPTH (DISP_BASE + 0x108)
97 98
98/* Pixel clocks, one for TV output, doubled for VGA output */ 99/* Pixel clocks, one for TV output, doubled for VGA output */
99#define TV_CLK 74239 100#define TV_CLK 74239
@@ -143,6 +144,7 @@ static struct pvr2fb_par {
143 unsigned char is_lowres; /* Is horizontal pixel-doubling enabled? */ 144 unsigned char is_lowres; /* Is horizontal pixel-doubling enabled? */
144 145
145 unsigned long mmio_base; /* MMIO base */ 146 unsigned long mmio_base; /* MMIO base */
147 u32 palette[16];
146} *currentpar; 148} *currentpar;
147 149
148static struct fb_info *fb_info; 150static struct fb_info *fb_info;
@@ -599,6 +601,7 @@ static void pvr2_init_display(struct fb_info *info)
599 601
600 /* bits per pixel */ 602 /* bits per pixel */
601 fb_writel(fb_readl(DISP_DIWMODE) | (--bytesperpixel << 2), DISP_DIWMODE); 603 fb_writel(fb_readl(DISP_DIWMODE) | (--bytesperpixel << 2), DISP_DIWMODE);
604 fb_writel(bytesperpixel << 2, DISP_PIXDEPTH);
602 605
603 /* video enable, color sync, interlace, 606 /* video enable, color sync, interlace,
604 * hsync and vsync polarity (currently unused) */ 607 * hsync and vsync polarity (currently unused) */
@@ -790,7 +793,7 @@ static int __devinit pvr2fb_common_init(void)
790 fb_info->fbops = &pvr2fb_ops; 793 fb_info->fbops = &pvr2fb_ops;
791 fb_info->fix = pvr2_fix; 794 fb_info->fix = pvr2_fix;
792 fb_info->par = currentpar; 795 fb_info->par = currentpar;
793 fb_info->pseudo_palette = (void *)(fb_info->par + 1); 796 fb_info->pseudo_palette = currentpar->palette;
794 fb_info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN; 797 fb_info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_YPAN;
795 798
796 if (video_output == VO_VGA) 799 if (video_output == VO_VGA)
@@ -807,6 +810,8 @@ static int __devinit pvr2fb_common_init(void)
807 810
808 if (register_framebuffer(fb_info) < 0) 811 if (register_framebuffer(fb_info) < 0)
809 goto out_err; 812 goto out_err;
813 /*Must write PIXDEPTH to register before anything is displayed - so force init */
814 pvr2_init_display(fb_info);
810 815
811 modememused = get_line_length(fb_info->var.xres_virtual, 816 modememused = get_line_length(fb_info->var.xres_virtual,
812 fb_info->var.bits_per_pixel); 817 fb_info->var.bits_per_pixel);
@@ -1082,14 +1087,15 @@ static int __init pvr2fb_init(void)
1082#endif 1087#endif
1083 size = sizeof(struct fb_info) + sizeof(struct pvr2fb_par) + 16 * sizeof(u32); 1088 size = sizeof(struct fb_info) + sizeof(struct pvr2fb_par) + 16 * sizeof(u32);
1084 1089
1085 fb_info = kzalloc(size, GFP_KERNEL); 1090 fb_info = framebuffer_alloc(sizeof(struct pvr2fb_par), NULL);
1091
1086 if (!fb_info) { 1092 if (!fb_info) {
1087 printk(KERN_ERR "Failed to allocate memory for fb_info\n"); 1093 printk(KERN_ERR "Failed to allocate memory for fb_info\n");
1088 return -ENOMEM; 1094 return -ENOMEM;
1089 } 1095 }
1090 1096
1091 1097
1092 currentpar = (struct pvr2fb_par *)(fb_info + 1); 1098 currentpar = fb_info->par;
1093 1099
1094 for (i = 0; i < ARRAY_SIZE(board_driver); i++) { 1100 for (i = 0; i < ARRAY_SIZE(board_driver); i++) {
1095 struct pvr2_board *pvr_board = board_driver + i; 1101 struct pvr2_board *pvr_board = board_driver + i;
@@ -1102,7 +1108,7 @@ static int __init pvr2fb_init(void)
1102 if (ret != 0) { 1108 if (ret != 0) {
1103 printk(KERN_ERR "pvr2fb: Failed init of %s device\n", 1109 printk(KERN_ERR "pvr2fb: Failed init of %s device\n",
1104 pvr_board->name); 1110 pvr_board->name);
1105 kfree(fb_info); 1111 framebuffer_release(fb_info);
1106 break; 1112 break;
1107 } 1113 }
1108 } 1114 }
@@ -1126,7 +1132,7 @@ static void __exit pvr2fb_exit(void)
1126#endif 1132#endif
1127 1133
1128 unregister_framebuffer(fb_info); 1134 unregister_framebuffer(fb_info);
1129 kfree(fb_info); 1135 framebuffer_release(fb_info);
1130} 1136}
1131 1137
1132module_init(pvr2fb_init); 1138module_init(pvr2fb_init);
diff --git a/drivers/video/stifb.c b/drivers/video/stifb.c
index c97709ecbad0..e7c8db2eb49b 100644
--- a/drivers/video/stifb.c
+++ b/drivers/video/stifb.c
@@ -1100,13 +1100,18 @@ stifb_init_fb(struct sti_struct *sti, int bpp_pref)
1100 /* only supported cards are allowed */ 1100 /* only supported cards are allowed */
1101 switch (fb->id) { 1101 switch (fb->id) {
1102 case CRT_ID_VISUALIZE_EG: 1102 case CRT_ID_VISUALIZE_EG:
1103 /* look for a double buffering device like e.g. the 1103 /* Visualize cards can run either in "double buffer" or
1104 "INTERNAL_EG_DX1024" in the RDI precisionbook laptop 1104 "standard" mode. Depending on the mode, the card reports
1105 which won't work. The same device in non-double 1105 a different device name, e.g. "INTERNAL_EG_DX1024" in double
1106 buffering mode returns "INTERNAL_EG_X1024". */ 1106 buffer mode and "INTERNAL_EG_X1024" in standard mode.
1107 if (strstr(sti->outptr.dev_name, "EG_DX")) { 1107 Since this driver only supports standard mode, we check
1108 printk(KERN_WARNING 1108 if the device name contains the string "DX" and tell the
1109 "stifb: ignoring '%s'. Disable double buffering in IPL menu.\n", 1109 user how to reconfigure the card. */
1110 if (strstr(sti->outptr.dev_name, "DX")) {
1111 printk(KERN_WARNING "WARNING: stifb framebuffer driver does not "
1112 "support '%s' in double-buffer mode.\n"
1113 KERN_WARNING "WARNING: Please disable the double-buffer mode "
1114 "in IPL menu (the PARISC-BIOS).\n",
1110 sti->outptr.dev_name); 1115 sti->outptr.dev_name);
1111 goto out_err0; 1116 goto out_err0;
1112 } 1117 }
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 2bc1428d621c..a6c9078af124 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -3161,12 +3161,9 @@ COMPATIBLE_IOCTL(SIOCSIWSENS)
3161COMPATIBLE_IOCTL(SIOCGIWSENS) 3161COMPATIBLE_IOCTL(SIOCGIWSENS)
3162COMPATIBLE_IOCTL(SIOCSIWRANGE) 3162COMPATIBLE_IOCTL(SIOCSIWRANGE)
3163COMPATIBLE_IOCTL(SIOCSIWPRIV) 3163COMPATIBLE_IOCTL(SIOCSIWPRIV)
3164COMPATIBLE_IOCTL(SIOCGIWPRIV)
3165COMPATIBLE_IOCTL(SIOCSIWSTATS) 3164COMPATIBLE_IOCTL(SIOCSIWSTATS)
3166COMPATIBLE_IOCTL(SIOCGIWSTATS)
3167COMPATIBLE_IOCTL(SIOCSIWAP) 3165COMPATIBLE_IOCTL(SIOCSIWAP)
3168COMPATIBLE_IOCTL(SIOCGIWAP) 3166COMPATIBLE_IOCTL(SIOCGIWAP)
3169COMPATIBLE_IOCTL(SIOCSIWSCAN)
3170COMPATIBLE_IOCTL(SIOCSIWRATE) 3167COMPATIBLE_IOCTL(SIOCSIWRATE)
3171COMPATIBLE_IOCTL(SIOCGIWRATE) 3168COMPATIBLE_IOCTL(SIOCGIWRATE)
3172COMPATIBLE_IOCTL(SIOCSIWRTS) 3169COMPATIBLE_IOCTL(SIOCSIWRTS)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 52bb2638f7ab..6874785bb65a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -974,6 +974,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
974 dio->get_block = get_block; 974 dio->get_block = get_block;
975 dio->end_io = end_io; 975 dio->end_io = end_io;
976 dio->map_bh.b_private = NULL; 976 dio->map_bh.b_private = NULL;
977 dio->map_bh.b_state = 0;
977 dio->final_block_in_bio = -1; 978 dio->final_block_in_bio = -1;
978 dio->next_block_for_io = -1; 979 dio->next_block_for_io = -1;
979 980
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 0a50942b4378..131954b3fb98 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -353,6 +353,10 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry,
353 ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n"); 353 ecryptfs_printk(KERN_DEBUG, "Is a symlink; returning\n");
354 goto out; 354 goto out;
355 } 355 }
356 if (special_file(lower_inode->i_mode)) {
357 ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n");
358 goto out;
359 }
356 if (!nd) { 360 if (!nd) {
357 ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" 361 ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave"
358 "as we *think* we are about to unlink\n"); 362 "as we *think* we are about to unlink\n");
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e557a6766927..a98497264fe8 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -813,6 +813,15 @@ out:
813 return rc; 813 return rc;
814} 814}
815 815
816static void do_sysfs_unregistration(void)
817{
818 sysfs_remove_file(&ecryptfs_subsys.kobj,
819 &sysfs_attr_version.attr);
820 sysfs_remove_file(&ecryptfs_subsys.kobj,
821 &sysfs_attr_version_str.attr);
822 subsystem_unregister(&ecryptfs_subsys);
823}
824
816static int __init ecryptfs_init(void) 825static int __init ecryptfs_init(void)
817{ 826{
818 int rc; 827 int rc;
@@ -851,6 +860,9 @@ static int __init ecryptfs_init(void)
851 if (rc) { 860 if (rc) {
852 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to " 861 ecryptfs_printk(KERN_ERR, "Failure occured while attempting to "
853 "initialize the eCryptfs netlink socket\n"); 862 "initialize the eCryptfs netlink socket\n");
863 do_sysfs_unregistration();
864 unregister_filesystem(&ecryptfs_fs_type);
865 ecryptfs_free_kmem_caches();
854 } 866 }
855out: 867out:
856 return rc; 868 return rc;
@@ -858,11 +870,7 @@ out:
858 870
859static void __exit ecryptfs_exit(void) 871static void __exit ecryptfs_exit(void)
860{ 872{
861 sysfs_remove_file(&ecryptfs_subsys.kobj, 873 do_sysfs_unregistration();
862 &sysfs_attr_version.attr);
863 sysfs_remove_file(&ecryptfs_subsys.kobj,
864 &sysfs_attr_version_str.attr);
865 subsystem_unregister(&ecryptfs_subsys);
866 ecryptfs_release_messaging(ecryptfs_transport); 874 ecryptfs_release_messaging(ecryptfs_transport);
867 unregister_filesystem(&ecryptfs_fs_type); 875 unregister_filesystem(&ecryptfs_fs_type);
868 ecryptfs_free_kmem_caches(); 876 ecryptfs_free_kmem_caches();
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 20ac403469a0..c55a761c22bb 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -20,10 +20,8 @@
20#include "delegation.h" 20#include "delegation.h"
21#include "internal.h" 21#include "internal.h"
22 22
23static void nfs_free_delegation(struct nfs_delegation *delegation) 23static void nfs_do_free_delegation(struct nfs_delegation *delegation)
24{ 24{
25 if (delegation->cred)
26 put_rpccred(delegation->cred);
27 kfree(delegation); 25 kfree(delegation);
28} 26}
29 27
@@ -31,7 +29,18 @@ static void nfs_free_delegation_callback(struct rcu_head *head)
31{ 29{
32 struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu); 30 struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
33 31
34 nfs_free_delegation(delegation); 32 nfs_do_free_delegation(delegation);
33}
34
35static void nfs_free_delegation(struct nfs_delegation *delegation)
36{
37 struct rpc_cred *cred;
38
39 cred = rcu_dereference(delegation->cred);
40 rcu_assign_pointer(delegation->cred, NULL);
41 call_rcu(&delegation->rcu, nfs_free_delegation_callback);
42 if (cred)
43 put_rpccred(cred);
35} 44}
36 45
37static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state) 46static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
@@ -166,7 +175,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
166 int res = 0; 175 int res = 0;
167 176
168 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); 177 res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
169 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 178 nfs_free_delegation(delegation);
170 return res; 179 return res;
171} 180}
172 181
@@ -448,7 +457,7 @@ restart:
448 spin_unlock(&clp->cl_lock); 457 spin_unlock(&clp->cl_lock);
449 rcu_read_unlock(); 458 rcu_read_unlock();
450 if (delegation != NULL) 459 if (delegation != NULL)
451 call_rcu(&delegation->rcu, nfs_free_delegation_callback); 460 nfs_free_delegation(delegation);
452 goto restart; 461 goto restart;
453 } 462 }
454 rcu_read_unlock(); 463 rcu_read_unlock();
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bca6cdcb9f0d..71a49c3acabd 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -468,7 +468,7 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
468 ctx->lockowner = current->files; 468 ctx->lockowner = current->files;
469 ctx->error = 0; 469 ctx->error = 0;
470 ctx->dir_cookie = 0; 470 ctx->dir_cookie = 0;
471 kref_init(&ctx->kref); 471 atomic_set(&ctx->count, 1);
472 } 472 }
473 return ctx; 473 return ctx;
474} 474}
@@ -476,21 +476,18 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) 476struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
477{ 477{
478 if (ctx != NULL) 478 if (ctx != NULL)
479 kref_get(&ctx->kref); 479 atomic_inc(&ctx->count);
480 return ctx; 480 return ctx;
481} 481}
482 482
483static void nfs_free_open_context(struct kref *kref) 483void put_nfs_open_context(struct nfs_open_context *ctx)
484{ 484{
485 struct nfs_open_context *ctx = container_of(kref, 485 struct inode *inode = ctx->path.dentry->d_inode;
486 struct nfs_open_context, kref);
487 486
488 if (!list_empty(&ctx->list)) { 487 if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
489 struct inode *inode = ctx->path.dentry->d_inode; 488 return;
490 spin_lock(&inode->i_lock); 489 list_del(&ctx->list);
491 list_del(&ctx->list); 490 spin_unlock(&inode->i_lock);
492 spin_unlock(&inode->i_lock);
493 }
494 if (ctx->state != NULL) 491 if (ctx->state != NULL)
495 nfs4_close_state(&ctx->path, ctx->state, ctx->mode); 492 nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
496 if (ctx->cred != NULL) 493 if (ctx->cred != NULL)
@@ -500,11 +497,6 @@ static void nfs_free_open_context(struct kref *kref)
500 kfree(ctx); 497 kfree(ctx);
501} 498}
502 499
503void put_nfs_open_context(struct nfs_open_context *ctx)
504{
505 kref_put(&ctx->kref, nfs_free_open_context);
506}
507
508/* 500/*
509 * Ensure that mmap has a recent RPC credential for use when writing out 501 * Ensure that mmap has a recent RPC credential for use when writing out
510 * shared pages 502 * shared pages
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 7f86e65182e4..aea76d0e5fbd 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -175,10 +175,8 @@ static void nfs_expire_automounts(struct work_struct *work)
175 175
176void nfs_release_automount_timer(void) 176void nfs_release_automount_timer(void)
177{ 177{
178 if (list_empty(&nfs_automount_list)) { 178 if (list_empty(&nfs_automount_list))
179 cancel_delayed_work(&nfs_automount_task); 179 cancel_delayed_work_sync(&nfs_automount_task);
180 flush_scheduled_work();
181 }
182} 180}
183 181
184/* 182/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6ca2795ccd9c..62b3ae280310 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -332,11 +332,9 @@ static int can_open_cached(struct nfs4_state *state, int mode)
332 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) { 332 switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
333 case FMODE_READ: 333 case FMODE_READ:
334 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0; 334 ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
335 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
336 break; 335 break;
337 case FMODE_WRITE: 336 case FMODE_WRITE:
338 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0; 337 ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
339 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
340 break; 338 break;
341 case FMODE_READ|FMODE_WRITE: 339 case FMODE_READ|FMODE_WRITE:
342 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0; 340 ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
@@ -1260,7 +1258,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
1260 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid); 1258 nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
1261 switch (task->tk_status) { 1259 switch (task->tk_status) {
1262 case 0: 1260 case 0:
1263 nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags); 1261 nfs_set_open_stateid(state, &calldata->res.stateid, 0);
1264 renew_lease(server, calldata->timestamp); 1262 renew_lease(server, calldata->timestamp);
1265 break; 1263 break;
1266 case -NFS4ERR_STALE_STATEID: 1264 case -NFS4ERR_STALE_STATEID:
@@ -1286,23 +1284,19 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1286 .rpc_cred = state->owner->so_cred, 1284 .rpc_cred = state->owner->so_cred,
1287 }; 1285 };
1288 int clear_rd, clear_wr, clear_rdwr; 1286 int clear_rd, clear_wr, clear_rdwr;
1289 int mode;
1290 1287
1291 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 1288 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
1292 return; 1289 return;
1293 1290
1294 mode = FMODE_READ|FMODE_WRITE;
1295 clear_rd = clear_wr = clear_rdwr = 0; 1291 clear_rd = clear_wr = clear_rdwr = 0;
1296 spin_lock(&state->owner->so_lock); 1292 spin_lock(&state->owner->so_lock);
1297 /* Calculate the change in open mode */ 1293 /* Calculate the change in open mode */
1298 if (state->n_rdwr == 0) { 1294 if (state->n_rdwr == 0) {
1299 if (state->n_rdonly == 0) { 1295 if (state->n_rdonly == 0) {
1300 mode &= ~FMODE_READ;
1301 clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags); 1296 clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1302 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); 1297 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1303 } 1298 }
1304 if (state->n_wronly == 0) { 1299 if (state->n_wronly == 0) {
1305 mode &= ~FMODE_WRITE;
1306 clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags); 1300 clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1307 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags); 1301 clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
1308 } 1302 }
@@ -1314,9 +1308,13 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
1314 return; 1308 return;
1315 } 1309 }
1316 nfs_fattr_init(calldata->res.fattr); 1310 nfs_fattr_init(calldata->res.fattr);
1317 if (mode != 0) 1311 if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
1318 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE]; 1312 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1319 calldata->arg.open_flags = mode; 1313 calldata->arg.open_flags = FMODE_READ;
1314 } else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
1315 msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
1316 calldata->arg.open_flags = FMODE_WRITE;
1317 }
1320 calldata->timestamp = jiffies; 1318 calldata->timestamp = jiffies;
1321 rpc_call_setup(task, &msg, 0); 1319 rpc_call_setup(task, &msg, 0);
1322} 1320}
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 0505ca124034..3ea352d82eba 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -127,16 +127,15 @@ nfs4_schedule_state_renewal(struct nfs_client *clp)
127void 127void
128nfs4_renewd_prepare_shutdown(struct nfs_server *server) 128nfs4_renewd_prepare_shutdown(struct nfs_server *server)
129{ 129{
130 flush_scheduled_work(); 130 cancel_delayed_work(&server->nfs_client->cl_renewd);
131} 131}
132 132
133void 133void
134nfs4_kill_renewd(struct nfs_client *clp) 134nfs4_kill_renewd(struct nfs_client *clp)
135{ 135{
136 down_read(&clp->cl_sem); 136 down_read(&clp->cl_sem);
137 cancel_delayed_work(&clp->cl_renewd); 137 cancel_delayed_work_sync(&clp->cl_renewd);
138 up_read(&clp->cl_sem); 138 up_read(&clp->cl_sem);
139 flush_scheduled_work();
140} 139}
141 140
142/* 141/*
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e9662ba81d86..3e4adf8c8312 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -341,8 +341,6 @@ nfs4_state_set_mode_locked(struct nfs4_state *state, mode_t mode)
341 else 341 else
342 list_move_tail(&state->open_states, &state->owner->so_states); 342 list_move_tail(&state->open_states, &state->owner->so_states);
343 } 343 }
344 if (mode == 0)
345 list_del_init(&state->inode_states);
346 state->state = mode; 344 state->state = mode;
347} 345}
348 346
@@ -415,8 +413,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
415 if (!atomic_dec_and_lock(&state->count, &owner->so_lock)) 413 if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
416 return; 414 return;
417 spin_lock(&inode->i_lock); 415 spin_lock(&inode->i_lock);
418 if (!list_empty(&state->inode_states)) 416 list_del(&state->inode_states);
419 list_del(&state->inode_states);
420 list_del(&state->open_states); 417 list_del(&state->open_states);
421 spin_unlock(&inode->i_lock); 418 spin_unlock(&inode->i_lock);
422 spin_unlock(&owner->so_lock); 419 spin_unlock(&owner->so_lock);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index f5e11f4fa952..4f517665c9a0 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3731,7 +3731,6 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3731{ 3731{
3732 int status; 3732 int status;
3733 struct buffer_head *last_eb_bh = NULL; 3733 struct buffer_head *last_eb_bh = NULL;
3734 struct buffer_head *bh = NULL;
3735 struct ocfs2_insert_type insert = {0, }; 3734 struct ocfs2_insert_type insert = {0, };
3736 struct ocfs2_extent_rec rec; 3735 struct ocfs2_extent_rec rec;
3737 3736
@@ -3783,9 +3782,6 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3783 ocfs2_extent_map_insert_rec(inode, &rec); 3782 ocfs2_extent_map_insert_rec(inode, &rec);
3784 3783
3785bail: 3784bail:
3786 if (bh)
3787 brelse(bh);
3788
3789 if (last_eb_bh) 3785 if (last_eb_bh)
3790 brelse(last_eb_bh); 3786 brelse(last_eb_bh);
3791 3787
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index f0bdfd944c44..685c18065c82 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -854,17 +854,25 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
854 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); 854 struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num);
855 ssize_t ret; 855 ssize_t ret;
856 856
857 857 while (1) {
858 mutex_lock(&sc->sc_send_lock); 858 mutex_lock(&sc->sc_send_lock);
859 ret = sc->sc_sock->ops->sendpage(sc->sc_sock, 859 ret = sc->sc_sock->ops->sendpage(sc->sc_sock,
860 virt_to_page(kmalloced_virt), 860 virt_to_page(kmalloced_virt),
861 (long)kmalloced_virt & ~PAGE_MASK, 861 (long)kmalloced_virt & ~PAGE_MASK,
862 size, MSG_DONTWAIT); 862 size, MSG_DONTWAIT);
863 mutex_unlock(&sc->sc_send_lock); 863 mutex_unlock(&sc->sc_send_lock);
864 if (ret != size) { 864 if (ret == size)
865 break;
866 if (ret == (ssize_t)-EAGAIN) {
867 mlog(0, "sendpage of size %zu to " SC_NODEF_FMT
868 " returned EAGAIN\n", size, SC_NODEF_ARGS(sc));
869 cond_resched();
870 continue;
871 }
865 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT 872 mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
866 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret); 873 " failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
867 o2net_ensure_shutdown(nn, sc, 0); 874 o2net_ensure_shutdown(nn, sc, 0);
875 break;
868 } 876 }
869} 877}
870 878
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c4034f693e7b..4ffa715be09c 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -187,6 +187,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
187 int ret; 187 int ret;
188 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 188 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
189 handle_t *handle; 189 handle_t *handle;
190 struct ocfs2_dinode *di = (struct ocfs2_dinode *) bh->b_data;
190 191
191 mlog_entry_void(); 192 mlog_entry_void();
192 193
@@ -197,11 +198,27 @@ int ocfs2_update_inode_atime(struct inode *inode,
197 goto out; 198 goto out;
198 } 199 }
199 200
201 ret = ocfs2_journal_access(handle, inode, bh,
202 OCFS2_JOURNAL_ACCESS_WRITE);
203 if (ret) {
204 mlog_errno(ret);
205 goto out_commit;
206 }
207
208 /*
209 * Don't use ocfs2_mark_inode_dirty() here as we don't always
210 * have i_mutex to guard against concurrent changes to other
211 * inode fields.
212 */
200 inode->i_atime = CURRENT_TIME; 213 inode->i_atime = CURRENT_TIME;
201 ret = ocfs2_mark_inode_dirty(handle, inode, bh); 214 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
215 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
216
217 ret = ocfs2_journal_dirty(handle, bh);
202 if (ret < 0) 218 if (ret < 0)
203 mlog_errno(ret); 219 mlog_errno(ret);
204 220
221out_commit:
205 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 222 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
206out: 223out:
207 mlog_exit(ret); 224 mlog_exit(ret);
@@ -1011,6 +1028,11 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1011 } 1028 }
1012 1029
1013 if (size_change && attr->ia_size != i_size_read(inode)) { 1030 if (size_change && attr->ia_size != i_size_read(inode)) {
1031 if (attr->ia_size > sb->s_maxbytes) {
1032 status = -EFBIG;
1033 goto bail_unlock;
1034 }
1035
1014 if (i_size_read(inode) > attr->ia_size) 1036 if (i_size_read(inode) > attr->ia_size)
1015 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 1037 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
1016 else 1038 else
@@ -1516,7 +1538,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
1516 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1538 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1517 struct buffer_head *di_bh = NULL; 1539 struct buffer_head *di_bh = NULL;
1518 handle_t *handle; 1540 handle_t *handle;
1519 unsigned long long max_off = ocfs2_max_file_offset(inode->i_sb->s_blocksize_bits); 1541 unsigned long long max_off = inode->i_sb->s_maxbytes;
1520 1542
1521 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) 1543 if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
1522 return -EROFS; 1544 return -EROFS;
@@ -1942,7 +1964,7 @@ static ssize_t ocfs2_file_buffered_write(struct file *file, loff_t *ppos,
1942 } 1964 }
1943 1965
1944 dst = kmap_atomic(page, KM_USER0); 1966 dst = kmap_atomic(page, KM_USER0);
1945 memcpy(dst + (pos & (PAGE_CACHE_SIZE - 1)), buf, bytes); 1967 memcpy(dst + (pos & (loff_t)(PAGE_CACHE_SIZE - 1)), buf, bytes);
1946 kunmap_atomic(dst, KM_USER0); 1968 kunmap_atomic(dst, KM_USER0);
1947 flush_dcache_page(page); 1969 flush_dcache_page(page);
1948 ocfs2_put_write_source(user_page); 1970 ocfs2_put_write_source(user_page);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d430fdab16e9..701e6d04ed5d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -1080,6 +1080,7 @@ static int ocfs2_rename(struct inode *old_dir,
1080 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, 1080 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1081 // this is the 1st dirent bh 1081 // this is the 1st dirent bh
1082 nlink_t old_dir_nlink = old_dir->i_nlink; 1082 nlink_t old_dir_nlink = old_dir->i_nlink;
1083 struct ocfs2_dinode *old_di;
1083 1084
1084 /* At some point it might be nice to break this function up a 1085 /* At some point it might be nice to break this function up a
1085 * bit. */ 1086 * bit. */
@@ -1354,7 +1355,20 @@ static int ocfs2_rename(struct inode *old_dir,
1354 1355
1355 old_inode->i_ctime = CURRENT_TIME; 1356 old_inode->i_ctime = CURRENT_TIME;
1356 mark_inode_dirty(old_inode); 1357 mark_inode_dirty(old_inode);
1357 ocfs2_mark_inode_dirty(handle, old_inode, old_inode_bh); 1358
1359 status = ocfs2_journal_access(handle, old_inode, old_inode_bh,
1360 OCFS2_JOURNAL_ACCESS_WRITE);
1361 if (status >= 0) {
1362 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
1363
1364 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1365 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1366
1367 status = ocfs2_journal_dirty(handle, old_inode_bh);
1368 if (status < 0)
1369 mlog_errno(status);
1370 } else
1371 mlog_errno(status);
1358 1372
1359 /* now that the name has been added to new_dir, remove the old name */ 1373 /* now that the name has been added to new_dir, remove the old name */
1360 status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); 1374 status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 5cc90a40b3c5..58307853fb4a 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -494,16 +494,16 @@ static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
494/* 494/*
495 * Find the 1st page index which covers the given clusters. 495 * Find the 1st page index which covers the given clusters.
496 */ 496 */
497static inline unsigned long ocfs2_align_clusters_to_page_index(struct super_block *sb, 497static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
498 u32 clusters) 498 u32 clusters)
499{ 499{
500 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits; 500 unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
501 unsigned long index = clusters; 501 pgoff_t index = clusters;
502 502
503 if (PAGE_CACHE_SHIFT > cbits) { 503 if (PAGE_CACHE_SHIFT > cbits) {
504 index = clusters >> (PAGE_CACHE_SHIFT - cbits); 504 index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
505 } else if (PAGE_CACHE_SHIFT < cbits) { 505 } else if (PAGE_CACHE_SHIFT < cbits) {
506 index = clusters << (cbits - PAGE_CACHE_SHIFT); 506 index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
507 } 507 }
508 508
509 return index; 509 return index;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 200c7d4790dc..f2fc9a795deb 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -316,39 +316,51 @@ static void ocfs2_destroy_inode(struct inode *inode)
316 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode)); 316 kmem_cache_free(ocfs2_inode_cachep, OCFS2_I(inode));
317} 317}
318 318
319/* From xfs_super.c:xfs_max_file_offset 319static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
320 * Copyright (c) 2000-2004 Silicon Graphics, Inc. 320 unsigned int cbits)
321 */
322unsigned long long ocfs2_max_file_offset(unsigned int blockshift)
323{ 321{
324 unsigned int pagefactor = 1; 322 unsigned int bytes = 1 << cbits;
325 unsigned int bitshift = BITS_PER_LONG - 1; 323 unsigned int trim = bytes;
326 324 unsigned int bitshift = 32;
327 /* Figure out maximum filesize, on Linux this can depend on 325
328 * the filesystem blocksize (on 32 bit platforms). 326 /*
329 * __block_prepare_write does this in an [unsigned] long... 327 * i_size and all block offsets in ocfs2 are always 64 bits
330 * page->index << (PAGE_CACHE_SHIFT - bbits) 328 * wide. i_clusters is 32 bits, in cluster-sized units. So on
331 * So, for page sized blocks (4K on 32 bit platforms), 329 * 64 bit platforms, cluster size will be the limiting factor.
332 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
333 * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
334 * but for smaller blocksizes it is less (bbits = log2 bsize).
335 * Note1: get_block_t takes a long (implicit cast from above)
336 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
337 * can optionally convert the [unsigned] long from above into
338 * an [unsigned] long long.
339 */ 330 */
340 331
341#if BITS_PER_LONG == 32 332#if BITS_PER_LONG == 32
342# if defined(CONFIG_LBD) 333# if defined(CONFIG_LBD)
343 BUILD_BUG_ON(sizeof(sector_t) != 8); 334 BUILD_BUG_ON(sizeof(sector_t) != 8);
344 pagefactor = PAGE_CACHE_SIZE; 335 /*
345 bitshift = BITS_PER_LONG; 336 * We might be limited by page cache size.
337 */
338 if (bytes > PAGE_CACHE_SIZE) {
339 bytes = PAGE_CACHE_SIZE;
340 trim = 1;
341 /*
342 * Shift by 31 here so that we don't get larger than
343 * MAX_LFS_FILESIZE
344 */
345 bitshift = 31;
346 }
346# else 347# else
347 pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift); 348 /*
349 * We are limited by the size of sector_t. Use block size, as
350 * that's what we expose to the VFS.
351 */
352 bytes = 1 << bbits;
353 trim = 1;
354 bitshift = 31;
348# endif 355# endif
349#endif 356#endif
350 357
351 return (((unsigned long long)pagefactor) << bitshift) - 1; 358 /*
359 * Trim by a whole cluster when we can actually approach the
360 * on-disk limits. Otherwise we can overflow i_clusters when
361 * an extent start is at the max offset.
362 */
363 return (((unsigned long long)bytes) << bitshift) - trim;
352} 364}
353 365
354static int ocfs2_remount(struct super_block *sb, int *flags, char *data) 366static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
@@ -1259,8 +1271,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1259 int sector_size) 1271 int sector_size)
1260{ 1272{
1261 int status = 0; 1273 int status = 0;
1262 int i; 1274 int i, cbits, bbits;
1263 struct ocfs2_dinode *di = NULL; 1275 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1264 struct inode *inode = NULL; 1276 struct inode *inode = NULL;
1265 struct buffer_head *bitmap_bh = NULL; 1277 struct buffer_head *bitmap_bh = NULL;
1266 struct ocfs2_journal *journal; 1278 struct ocfs2_journal *journal;
@@ -1279,9 +1291,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
1279 sb->s_fs_info = osb; 1291 sb->s_fs_info = osb;
1280 sb->s_op = &ocfs2_sops; 1292 sb->s_op = &ocfs2_sops;
1281 sb->s_export_op = &ocfs2_export_ops; 1293 sb->s_export_op = &ocfs2_export_ops;
1294 sb->s_time_gran = 1;
1282 sb->s_flags |= MS_NOATIME; 1295 sb->s_flags |= MS_NOATIME;
1283 /* this is needed to support O_LARGEFILE */ 1296 /* this is needed to support O_LARGEFILE */
1284 sb->s_maxbytes = ocfs2_max_file_offset(sb->s_blocksize_bits); 1297 cbits = le32_to_cpu(di->id2.i_super.s_clustersize_bits);
1298 bbits = le32_to_cpu(di->id2.i_super.s_blocksize_bits);
1299 sb->s_maxbytes = ocfs2_max_file_offset(bbits, cbits);
1285 1300
1286 osb->sb = sb; 1301 osb->sb = sb;
1287 /* Save off for ocfs2_rw_direct */ 1302 /* Save off for ocfs2_rw_direct */
@@ -1341,8 +1356,6 @@ static int ocfs2_initialize_super(struct super_block *sb,
1341 goto bail; 1356 goto bail;
1342 } 1357 }
1343 1358
1344 di = (struct ocfs2_dinode *)bh->b_data;
1345
1346 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); 1359 osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots);
1347 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { 1360 if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) {
1348 mlog(ML_ERROR, "Invalid number of node slots (%u)\n", 1361 mlog(ML_ERROR, "Invalid number of node slots (%u)\n",
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 3b9cb3d0b008..783f5270f2a1 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,6 +45,4 @@ void __ocfs2_abort(struct super_block *sb,
45 45
46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) 46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
47 47
48unsigned long long ocfs2_max_file_offset(unsigned int blockshift);
49
50#endif /* OCFS2_SUPER_H */ 48#endif /* OCFS2_SUPER_H */
diff --git a/include/asm-alpha/fcntl.h b/include/asm-alpha/fcntl.h
index 87f2cf459e26..25da0017ec87 100644
--- a/include/asm-alpha/fcntl.h
+++ b/include/asm-alpha/fcntl.h
@@ -16,6 +16,7 @@
16#define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ 16#define O_LARGEFILE 0400000 /* will be set by the kernel on every open */
17#define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ 17#define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */
18#define O_NOATIME 04000000 18#define O_NOATIME 04000000
19#define O_CLOEXEC 010000000 /* set close_on_exec */
19 20
20#define F_GETLK 7 21#define F_GETLK 7
21#define F_SETLK 8 22#define F_SETLK 8
diff --git a/include/asm-frv/unistd.h b/include/asm-frv/unistd.h
index 7306c71a8926..cd84f1771e34 100644
--- a/include/asm-frv/unistd.h
+++ b/include/asm-frv/unistd.h
@@ -330,10 +330,11 @@
330#define __NR_signalfd 321 330#define __NR_signalfd 321
331#define __NR_timerfd 322 331#define __NR_timerfd 322
332#define __NR_eventfd 323 332#define __NR_eventfd 323
333#define __NR_fallocate 324
333 334
334#ifdef __KERNEL__ 335#ifdef __KERNEL__
335 336
336#define NR_syscalls 324 337#define NR_syscalls 325
337 338
338#define __ARCH_WANT_IPC_PARSE_VERSION 339#define __ARCH_WANT_IPC_PARSE_VERSION
339/* #define __ARCH_WANT_OLD_READDIR */ 340/* #define __ARCH_WANT_OLD_READDIR */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index f605e8d0eed3..5f0d797d33fd 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -2,6 +2,7 @@
2#define _ASM_GENERIC_PGTABLE_H 2#define _ASM_GENERIC_PGTABLE_H
3 3
4#ifndef __ASSEMBLY__ 4#ifndef __ASSEMBLY__
5#ifdef CONFIG_MMU
5 6
6#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS 7#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
7/* 8/*
@@ -133,41 +134,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
133#endif 134#endif
134 135
135/* 136/*
136 * A facility to provide lazy MMU batching. This allows PTE updates and
137 * page invalidations to be delayed until a call to leave lazy MMU mode
138 * is issued. Some architectures may benefit from doing this, and it is
139 * beneficial for both shadow and direct mode hypervisors, which may batch
140 * the PTE updates which happen during this window. Note that using this
141 * interface requires that read hazards be removed from the code. A read
142 * hazard could result in the direct mode hypervisor case, since the actual
143 * write to the page tables may not yet have taken place, so reads though
144 * a raw PTE pointer after it has been modified are not guaranteed to be
145 * up to date. This mode can only be entered and left under the protection of
146 * the page table locks for all page tables which may be modified. In the UP
147 * case, this is required so that preemption is disabled, and in the SMP case,
148 * it must synchronize the delayed page table writes properly on other CPUs.
149 */
150#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
151#define arch_enter_lazy_mmu_mode() do {} while (0)
152#define arch_leave_lazy_mmu_mode() do {} while (0)
153#define arch_flush_lazy_mmu_mode() do {} while (0)
154#endif
155
156/*
157 * A facility to provide batching of the reload of page tables with the
158 * actual context switch code for paravirtualized guests. By convention,
159 * only one of the lazy modes (CPU, MMU) should be active at any given
160 * time, entry should never be nested, and entry and exits should always
161 * be paired. This is for sanity of maintaining and reasoning about the
162 * kernel code.
163 */
164#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
165#define arch_enter_lazy_cpu_mode() do {} while (0)
166#define arch_leave_lazy_cpu_mode() do {} while (0)
167#define arch_flush_lazy_cpu_mode() do {} while (0)
168#endif
169
170/*
171 * When walking page tables, get the address of the next boundary, 137 * When walking page tables, get the address of the next boundary,
172 * or the end address of the range if that comes earlier. Although no 138 * or the end address of the range if that comes earlier. Although no
173 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. 139 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
@@ -233,6 +199,43 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
233 } 199 }
234 return 0; 200 return 0;
235} 201}
202#endif /* CONFIG_MMU */
203
204/*
205 * A facility to provide lazy MMU batching. This allows PTE updates and
206 * page invalidations to be delayed until a call to leave lazy MMU mode
207 * is issued. Some architectures may benefit from doing this, and it is
208 * beneficial for both shadow and direct mode hypervisors, which may batch
209 * the PTE updates which happen during this window. Note that using this
210 * interface requires that read hazards be removed from the code. A read
211 * hazard could result in the direct mode hypervisor case, since the actual
212 * write to the page tables may not yet have taken place, so reads though
213 * a raw PTE pointer after it has been modified are not guaranteed to be
214 * up to date. This mode can only be entered and left under the protection of
215 * the page table locks for all page tables which may be modified. In the UP
216 * case, this is required so that preemption is disabled, and in the SMP case,
217 * it must synchronize the delayed page table writes properly on other CPUs.
218 */
219#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
220#define arch_enter_lazy_mmu_mode() do {} while (0)
221#define arch_leave_lazy_mmu_mode() do {} while (0)
222#define arch_flush_lazy_mmu_mode() do {} while (0)
223#endif
224
225/*
226 * A facility to provide batching of the reload of page tables with the
227 * actual context switch code for paravirtualized guests. By convention,
228 * only one of the lazy modes (CPU, MMU) should be active at any given
229 * time, entry should never be nested, and entry and exits should always
230 * be paired. This is for sanity of maintaining and reasoning about the
231 * kernel code.
232 */
233#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
234#define arch_enter_lazy_cpu_mode() do {} while (0)
235#define arch_leave_lazy_cpu_mode() do {} while (0)
236#define arch_flush_lazy_cpu_mode() do {} while (0)
237#endif
238
236#endif /* !__ASSEMBLY__ */ 239#endif /* !__ASSEMBLY__ */
237 240
238#endif /* _ASM_GENERIC_PGTABLE_H */ 241#endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h
index 1e8f6f252dd3..4091b33dcb10 100644
--- a/include/asm-i386/apic.h
+++ b/include/asm-i386/apic.h
@@ -116,6 +116,8 @@ extern void enable_NMI_through_LVT0 (void * dummy);
116extern int timer_over_8254; 116extern int timer_over_8254;
117extern int local_apic_timer_c2_ok; 117extern int local_apic_timer_c2_ok;
118 118
119extern int local_apic_timer_disabled;
120
119#else /* !CONFIG_X86_LOCAL_APIC */ 121#else /* !CONFIG_X86_LOCAL_APIC */
120static inline void lapic_shutdown(void) { } 122static inline void lapic_shutdown(void) { }
121 123
diff --git a/include/asm-i386/cpufeature.h b/include/asm-i386/cpufeature.h
index c961c03cf1e2..7b3aa28ebc6e 100644
--- a/include/asm-i386/cpufeature.h
+++ b/include/asm-i386/cpufeature.h
@@ -79,7 +79,7 @@
79#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 79#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
80#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 80#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
81#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 81#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
82#define X86_FEATURE_LAPIC_TIMER_BROKEN (3*32+ 14) /* lapic timer broken in C1 */ 82/* 14 free */
83#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */ 83#define X86_FEATURE_SYNC_RDTSC (3*32+15) /* RDTSC synchronizes the CPU */
84#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ 84#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */
85 85
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h
index 7df88be2dd9e..9fa3fa9e62d1 100644
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -47,7 +47,8 @@ struct paravirt_ops
47 * The patch function should return the number of bytes of code 47 * The patch function should return the number of bytes of code
48 * generated, as we nop pad the rest in generic code. 48 * generated, as we nop pad the rest in generic code.
49 */ 49 */
50 unsigned (*patch)(u8 type, u16 clobber, void *firstinsn, unsigned len); 50 unsigned (*patch)(u8 type, u16 clobber, void *insnbuf,
51 unsigned long addr, unsigned len);
51 52
52 /* Basic arch-specific setup */ 53 /* Basic arch-specific setup */
53 void (*arch_setup)(void); 54 void (*arch_setup)(void);
@@ -253,13 +254,16 @@ extern struct paravirt_ops paravirt_ops;
253 254
254unsigned paravirt_patch_nop(void); 255unsigned paravirt_patch_nop(void);
255unsigned paravirt_patch_ignore(unsigned len); 256unsigned paravirt_patch_ignore(unsigned len);
256unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, 257unsigned paravirt_patch_call(void *insnbuf,
257 void *site, u16 site_clobbers, 258 const void *target, u16 tgt_clobbers,
259 unsigned long addr, u16 site_clobbers,
258 unsigned len); 260 unsigned len);
259unsigned paravirt_patch_jmp(void *target, void *site, unsigned len); 261unsigned paravirt_patch_jmp(const void *target, void *insnbuf,
260unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len); 262 unsigned long addr, unsigned len);
263unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
264 unsigned long addr, unsigned len);
261 265
262unsigned paravirt_patch_insns(void *site, unsigned len, 266unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
263 const char *start, const char *end); 267 const char *start, const char *end);
264 268
265int paravirt_disable_iospace(void); 269int paravirt_disable_iospace(void);
diff --git a/include/asm-i386/pci.h b/include/asm-i386/pci.h
index d790343e9982..4fcacc711385 100644
--- a/include/asm-i386/pci.h
+++ b/include/asm-i386/pci.h
@@ -8,6 +8,9 @@ struct pci_sysdata {
8 int node; /* NUMA node */ 8 int node; /* NUMA node */
9}; 9};
10 10
11/* scan a bus after allocating a pci_sysdata for it */
12extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
13
11#include <linux/mm.h> /* for struct page */ 14#include <linux/mm.h> /* for struct page */
12 15
13/* Can be used to override the logic in pci_scan_bus for skipping 16/* Can be used to override the logic in pci_scan_bus for skipping
diff --git a/include/asm-sparc/prom.h b/include/asm-sparc/prom.h
index 350676c589f9..71f2a1998324 100644
--- a/include/asm-sparc/prom.h
+++ b/include/asm-sparc/prom.h
@@ -67,6 +67,7 @@ extern int of_set_property(struct device_node *node, const char *name, void *val
67extern int of_getintprop_default(struct device_node *np, 67extern int of_getintprop_default(struct device_node *np,
68 const char *name, 68 const char *name,
69 int def); 69 int def);
70extern int of_find_in_proplist(const char *list, const char *match, int len);
70 71
71extern void prom_build_devicetree(void); 72extern void prom_build_devicetree(void);
72 73
diff --git a/include/asm-sparc64/oplib.h b/include/asm-sparc64/oplib.h
index 3f23c5dc5f21..86dc5c018a19 100644
--- a/include/asm-sparc64/oplib.h
+++ b/include/asm-sparc64/oplib.h
@@ -1,8 +1,7 @@
1/* $Id: oplib.h,v 1.14 2001/12/19 00:29:51 davem Exp $ 1/* oplib.h: Describes the interface and available routines in the
2 * oplib.h: Describes the interface and available routines in the
3 * Linux Prom library. 2 * Linux Prom library.
4 * 3 *
5 * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) 4 * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net)
6 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) 5 * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
7 */ 6 */
8 7
@@ -31,8 +30,10 @@ extern int prom_chosen_node;
31extern const char prom_peer_name[]; 30extern const char prom_peer_name[];
32extern const char prom_compatible_name[]; 31extern const char prom_compatible_name[];
33extern const char prom_root_compatible[]; 32extern const char prom_root_compatible[];
33extern const char prom_cpu_compatible[];
34extern const char prom_finddev_name[]; 34extern const char prom_finddev_name[];
35extern const char prom_chosen_path[]; 35extern const char prom_chosen_path[];
36extern const char prom_cpu_path[];
36extern const char prom_getprop_name[]; 37extern const char prom_getprop_name[];
37extern const char prom_mmu_name[]; 38extern const char prom_mmu_name[];
38extern const char prom_callmethod_name[]; 39extern const char prom_callmethod_name[];
diff --git a/include/asm-sparc64/prom.h b/include/asm-sparc64/prom.h
index 31dcb92fbae0..07843f9f05df 100644
--- a/include/asm-sparc64/prom.h
+++ b/include/asm-sparc64/prom.h
@@ -76,6 +76,7 @@ extern int of_set_property(struct device_node *node, const char *name, void *val
76extern int of_getintprop_default(struct device_node *np, 76extern int of_getintprop_default(struct device_node *np,
77 const char *name, 77 const char *name,
78 int def); 78 int def);
79extern int of_find_in_proplist(const char *list, const char *match, int len);
79 80
80extern void prom_build_devicetree(void); 81extern void prom_build_devicetree(void);
81 82
diff --git a/include/asm-sparc64/spitfire.h b/include/asm-sparc64/spitfire.h
index 23ad8a7987ad..cf7807813e85 100644
--- a/include/asm-sparc64/spitfire.h
+++ b/include/asm-sparc64/spitfire.h
@@ -38,6 +38,11 @@
38 38
39#define L1DCACHE_SIZE 0x4000 39#define L1DCACHE_SIZE 0x4000
40 40
41#define SUN4V_CHIP_INVALID 0x00
42#define SUN4V_CHIP_NIAGARA1 0x01
43#define SUN4V_CHIP_NIAGARA2 0x02
44#define SUN4V_CHIP_UNKNOWN 0xff
45
41#ifndef __ASSEMBLY__ 46#ifndef __ASSEMBLY__
42 47
43enum ultra_tlb_layout { 48enum ultra_tlb_layout {
@@ -49,6 +54,8 @@ enum ultra_tlb_layout {
49 54
50extern enum ultra_tlb_layout tlb_type; 55extern enum ultra_tlb_layout tlb_type;
51 56
57extern int sun4v_chip_type;
58
52extern int cheetah_pcache_forced_on; 59extern int cheetah_pcache_forced_on;
53extern void cheetah_enable_pcache(void); 60extern void cheetah_enable_pcache(void);
54 61
diff --git a/include/asm-sparc64/xor.h b/include/asm-sparc64/xor.h
index 8ce3f1813e28..a0233884fc94 100644
--- a/include/asm-sparc64/xor.h
+++ b/include/asm-sparc64/xor.h
@@ -63,4 +63,8 @@ static struct xor_block_template xor_block_niagara = {
63 63
64/* For VIS for everything except Niagara. */ 64/* For VIS for everything except Niagara. */
65#define XOR_SELECT_TEMPLATE(FASTEST) \ 65#define XOR_SELECT_TEMPLATE(FASTEST) \
66 (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS) 66 ((tlb_type == hypervisor && \
67 (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \
68 sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \
69 &xor_block_niagara : \
70 &xor_block_VIS)
diff --git a/include/asm-x86_64/pci.h b/include/asm-x86_64/pci.h
index 88926eb44f5c..5da8cb0c0599 100644
--- a/include/asm-x86_64/pci.h
+++ b/include/asm-x86_64/pci.h
@@ -10,6 +10,8 @@ struct pci_sysdata {
10 void* iommu; /* IOMMU private data */ 10 void* iommu; /* IOMMU private data */
11}; 11};
12 12
13extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
14
13#ifdef CONFIG_CALGARY_IOMMU 15#ifdef CONFIG_CALGARY_IOMMU
14static inline void* pci_iommu(struct pci_bus *bus) 16static inline void* pci_iommu(struct pci_bus *bus)
15{ 17{
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 4d85262b4fa4..1ddef34f43c3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -24,6 +24,8 @@
24#include <linux/mempool.h> 24#include <linux/mempool.h>
25#include <linux/ioprio.h> 25#include <linux/ioprio.h>
26 26
27#ifdef CONFIG_BLOCK
28
27/* Platforms may set this to teach the BIO layer about IOMMU hardware. */ 29/* Platforms may set this to teach the BIO layer about IOMMU hardware. */
28#include <asm/io.h> 30#include <asm/io.h>
29 31
@@ -361,4 +363,5 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
361 __bio_kmap_irq((bio), (bio)->bi_idx, (flags)) 363 __bio_kmap_irq((bio), (bio)->bi_idx, (flags))
362#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) 364#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags)
363 365
366#endif /* CONFIG_BLOCK */
364#endif /* __LINUX_BIO_H */ 367#endif /* __LINUX_BIO_H */
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 90874a5d7d78..7b5d56b82b59 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -105,7 +105,7 @@ struct blk_io_trace {
105 */ 105 */
106struct blk_io_trace_remap { 106struct blk_io_trace_remap {
107 __be32 device; 107 __be32 device;
108 u32 __pad; 108 __be32 device_from;
109 __be64 sector; 109 __be64 sector;
110}; 110};
111 111
@@ -272,6 +272,7 @@ static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
272 return; 272 return;
273 273
274 r.device = cpu_to_be32(dev); 274 r.device = cpu_to_be32(dev);
275 r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
275 r.sector = cpu_to_be64(to); 276 r.sector = cpu_to_be64(to);
276 277
277 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); 278 __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index e0bd46eb2414..def5a659b8a5 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -123,7 +123,6 @@ extern void clockevents_exchange_device(struct clock_event_device *old,
123extern void clockevents_set_mode(struct clock_event_device *dev, 123extern void clockevents_set_mode(struct clock_event_device *dev,
124 enum clock_event_mode mode); 124 enum clock_event_mode mode);
125extern int clockevents_register_notifier(struct notifier_block *nb); 125extern int clockevents_register_notifier(struct notifier_block *nb);
126extern void clockevents_unregister_notifier(struct notifier_block *nb);
127extern int clockevents_program_event(struct clock_event_device *dev, 126extern int clockevents_program_event(struct clock_event_device *dev,
128 ktime_t expires, ktime_t now); 127 ktime_t expires, ktime_t now);
129 128
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6bf139562947..16421f662a7a 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1659,7 +1659,6 @@ extern int sb_min_blocksize(struct super_block *, int);
1659extern int generic_file_mmap(struct file *, struct vm_area_struct *); 1659extern int generic_file_mmap(struct file *, struct vm_area_struct *);
1660extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); 1660extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
1661extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); 1661extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
1662extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
1663int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); 1662int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
1664extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); 1663extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
1665extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); 1664extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
diff --git a/include/linux/init.h b/include/linux/init.h
index 1a4a283d19a9..74b1f43bf982 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -43,7 +43,7 @@
43#define __init __attribute__ ((__section__ (".init.text"))) __cold 43#define __init __attribute__ ((__section__ (".init.text"))) __cold
44#define __initdata __attribute__ ((__section__ (".init.data"))) 44#define __initdata __attribute__ ((__section__ (".init.data")))
45#define __exitdata __attribute__ ((__section__(".exit.data"))) 45#define __exitdata __attribute__ ((__section__(".exit.data")))
46#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit"))) __cold 46#define __exit_call __attribute_used__ __attribute__ ((__section__ (".exitcall.exit")))
47 47
48/* modpost check for section mismatches during the kernel build. 48/* modpost check for section mismatches during the kernel build.
49 * A section mismatch happens when there are references from a 49 * A section mismatch happens when there are references from a
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4300bb462d29..f592df74b3cf 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -224,9 +224,9 @@ extern void hex_dump_to_buffer(const void *buf, size_t len,
224 char *linebuf, size_t linebuflen, bool ascii); 224 char *linebuf, size_t linebuflen, bool ascii);
225extern void print_hex_dump(const char *level, const char *prefix_str, 225extern void print_hex_dump(const char *level, const char *prefix_str,
226 int prefix_type, int rowsize, int groupsize, 226 int prefix_type, int rowsize, int groupsize,
227 void *buf, size_t len, bool ascii); 227 const void *buf, size_t len, bool ascii);
228extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, 228extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
229 void *buf, size_t len); 229 const void *buf, size_t len);
230#define hex_asc(x) "0123456789abcdef"[x] 230#define hex_asc(x) "0123456789abcdef"[x]
231 231
232#ifdef DEBUG 232#ifdef DEBUG
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9ba4aec37c50..157dcb055b5c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -71,7 +71,7 @@ struct nfs_access_entry {
71 71
72struct nfs4_state; 72struct nfs4_state;
73struct nfs_open_context { 73struct nfs_open_context {
74 struct kref kref; 74 atomic_t count;
75 struct path path; 75 struct path path;
76 struct rpc_cred *cred; 76 struct rpc_cred *cred;
77 struct nfs4_state *state; 77 struct nfs4_state *state;
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 28e3664fdf1b..cd13a78c5db8 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -75,7 +75,6 @@ struct proc_dir_entry {
75 write_proc_t *write_proc; 75 write_proc_t *write_proc;
76 atomic_t count; /* use count */ 76 atomic_t count; /* use count */
77 int deleted; /* delete flag */ 77 int deleted; /* delete flag */
78 void *set;
79 int pde_users; /* number of callers into module in progress */ 78 int pde_users; /* number of callers into module in progress */
80 spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ 79 spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
81 struct completion *pde_unload_completion; 80 struct completion *pde_unload_completion;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index c6b7485eac7c..fe17d7d750c2 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -281,7 +281,6 @@ extern void FASTCALL(call_rcu(struct rcu_head *head,
281extern void FASTCALL(call_rcu_bh(struct rcu_head *head, 281extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
282 void (*func)(struct rcu_head *head))); 282 void (*func)(struct rcu_head *head)));
283extern void synchronize_rcu(void); 283extern void synchronize_rcu(void);
284void synchronize_idle(void);
285extern void rcu_barrier(void); 284extern void rcu_barrier(void);
286 285
287#endif /* __KERNEL__ */ 286#endif /* __KERNEL__ */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17249fae5014..682ef87da6eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -139,7 +139,7 @@ struct cfs_rq;
139extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); 139extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m);
140extern void proc_sched_set_task(struct task_struct *p); 140extern void proc_sched_set_task(struct task_struct *p);
141extern void 141extern void
142print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now); 142print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
143#else 143#else
144static inline void 144static inline void
145proc_sched_show_task(struct task_struct *p, struct seq_file *m) 145proc_sched_show_task(struct task_struct *p, struct seq_file *m)
@@ -149,7 +149,7 @@ static inline void proc_sched_set_task(struct task_struct *p)
149{ 149{
150} 150}
151static inline void 151static inline void
152print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) 152print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
153{ 153{
154} 154}
155#endif 155#endif
@@ -855,26 +855,24 @@ struct sched_domain;
855struct sched_class { 855struct sched_class {
856 struct sched_class *next; 856 struct sched_class *next;
857 857
858 void (*enqueue_task) (struct rq *rq, struct task_struct *p, 858 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
859 int wakeup, u64 now); 859 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
860 void (*dequeue_task) (struct rq *rq, struct task_struct *p,
861 int sleep, u64 now);
862 void (*yield_task) (struct rq *rq, struct task_struct *p); 860 void (*yield_task) (struct rq *rq, struct task_struct *p);
863 861
864 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); 862 void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
865 863
866 struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); 864 struct task_struct * (*pick_next_task) (struct rq *rq);
867 void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); 865 void (*put_prev_task) (struct rq *rq, struct task_struct *p);
868 866
869 int (*load_balance) (struct rq *this_rq, int this_cpu, 867 unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
870 struct rq *busiest, 868 struct rq *busiest,
871 unsigned long max_nr_move, unsigned long max_load_move, 869 unsigned long max_nr_move, unsigned long max_load_move,
872 struct sched_domain *sd, enum cpu_idle_type idle, 870 struct sched_domain *sd, enum cpu_idle_type idle,
873 int *all_pinned, unsigned long *total_load_moved); 871 int *all_pinned, int *this_best_prio);
874 872
875 void (*set_curr_task) (struct rq *rq); 873 void (*set_curr_task) (struct rq *rq);
876 void (*task_tick) (struct rq *rq, struct task_struct *p); 874 void (*task_tick) (struct rq *rq, struct task_struct *p);
877 void (*task_new) (struct rq *rq, struct task_struct *p, u64 now); 875 void (*task_new) (struct rq *rq, struct task_struct *p);
878}; 876};
879 877
880struct load_weight { 878struct load_weight {
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 691a1748d9d2..6570719eafdf 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -274,6 +274,8 @@ struct tty_struct {
274#define TTY_PTY_LOCK 16 /* pty private */ 274#define TTY_PTY_LOCK 16 /* pty private */
275#define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */ 275#define TTY_NO_WRITE_SPLIT 17 /* Preserve write boundaries to driver */
276#define TTY_HUPPED 18 /* Post driver->hangup() */ 276#define TTY_HUPPED 18 /* Post driver->hangup() */
277#define TTY_FLUSHING 19 /* Flushing to ldisc in progress */
278#define TTY_FLUSHPENDING 20 /* Queued buffer flush pending */
277 279
278#define TTY_WRITE_FLUSH(tty) tty_write_flush((tty)) 280#define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
279 281
diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
index 7a671603fca6..9bf059817aec 100644
--- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
+++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h
@@ -21,4 +21,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp;
21extern int nf_conntrack_ipv4_compat_init(void); 21extern int nf_conntrack_ipv4_compat_init(void);
22extern void nf_conntrack_ipv4_compat_fini(void); 22extern void nf_conntrack_ipv4_compat_fini(void);
23 23
24extern void need_ipv4_conntrack(void);
25
24#endif /*_NF_CONNTRACK_IPV4_H*/ 26#endif /*_NF_CONNTRACK_IPV4_H*/
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a777d3761416..3401293359e8 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1992,19 +1992,19 @@ int __audit_signal_info(int sig, struct task_struct *t)
1992 extern uid_t audit_sig_uid; 1992 extern uid_t audit_sig_uid;
1993 extern u32 audit_sig_sid; 1993 extern u32 audit_sig_sid;
1994 1994
1995 if (audit_pid && t->tgid == audit_pid && 1995 if (audit_pid && t->tgid == audit_pid) {
1996 (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) { 1996 if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) {
1997 audit_sig_pid = tsk->pid; 1997 audit_sig_pid = tsk->pid;
1998 if (ctx) 1998 if (ctx)
1999 audit_sig_uid = ctx->loginuid; 1999 audit_sig_uid = ctx->loginuid;
2000 else 2000 else
2001 audit_sig_uid = tsk->uid; 2001 audit_sig_uid = tsk->uid;
2002 selinux_get_task_sid(tsk, &audit_sig_sid); 2002 selinux_get_task_sid(tsk, &audit_sig_sid);
2003 }
2004 if (!audit_signals || audit_dummy_context())
2005 return 0;
2003 } 2006 }
2004 2007
2005 if (!audit_signals) /* audit_context checked in wrapper */
2006 return 0;
2007
2008 /* optimize the common case by putting first signal recipient directly 2008 /* optimize the common case by putting first signal recipient directly
2009 * in audit_context */ 2009 * in audit_context */
2010 if (!ctx->target_pid) { 2010 if (!ctx->target_pid) {
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index c38272746887..5bfeaed7e487 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -62,15 +62,6 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
62 */ 62 */
63 desc->chip->enable(irq); 63 desc->chip->enable(irq);
64 64
65 /*
66 * Temporary hack to figure out more about the problem, which
67 * is causing the ancient network cards to die.
68 */
69 if (desc->handle_irq != handle_edge_irq) {
70 WARN_ON_ONCE(1);
71 return;
72 }
73
74 if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { 65 if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
75 desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; 66 desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY;
76 67
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3e9f513a728d..4b8a4493c541 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1063,6 +1063,11 @@ EXPORT_SYMBOL_GPL(register_kprobe);
1063EXPORT_SYMBOL_GPL(unregister_kprobe); 1063EXPORT_SYMBOL_GPL(unregister_kprobe);
1064EXPORT_SYMBOL_GPL(register_jprobe); 1064EXPORT_SYMBOL_GPL(register_jprobe);
1065EXPORT_SYMBOL_GPL(unregister_jprobe); 1065EXPORT_SYMBOL_GPL(unregister_jprobe);
1066#ifdef CONFIG_KPROBES
1066EXPORT_SYMBOL_GPL(jprobe_return); 1067EXPORT_SYMBOL_GPL(jprobe_return);
1068#endif
1069
1070#ifdef CONFIG_KPROBES
1067EXPORT_SYMBOL_GPL(register_kretprobe); 1071EXPORT_SYMBOL_GPL(register_kretprobe);
1068EXPORT_SYMBOL_GPL(unregister_kretprobe); 1072EXPORT_SYMBOL_GPL(unregister_kretprobe);
1073#endif
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index a3b7854b8f7c..a686590d88c1 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -709,7 +709,8 @@ static void mark_nosave_pages(struct memory_bitmap *bm)
709 region->end_pfn << PAGE_SHIFT); 709 region->end_pfn << PAGE_SHIFT);
710 710
711 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++) 711 for (pfn = region->start_pfn; pfn < region->end_pfn; pfn++)
712 memory_bm_set_bit(bm, pfn); 712 if (pfn_valid(pfn))
713 memory_bm_set_bit(bm, pfn);
713 } 714 }
714} 715}
715 716
diff --git a/kernel/profile.c b/kernel/profile.c
index 5b20fe977bed..cb1e37d2dac3 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -199,11 +199,11 @@ EXPORT_SYMBOL_GPL(register_timer_hook);
199EXPORT_SYMBOL_GPL(unregister_timer_hook); 199EXPORT_SYMBOL_GPL(unregister_timer_hook);
200EXPORT_SYMBOL_GPL(task_handoff_register); 200EXPORT_SYMBOL_GPL(task_handoff_register);
201EXPORT_SYMBOL_GPL(task_handoff_unregister); 201EXPORT_SYMBOL_GPL(task_handoff_unregister);
202EXPORT_SYMBOL_GPL(profile_event_register);
203EXPORT_SYMBOL_GPL(profile_event_unregister);
202 204
203#endif /* CONFIG_PROFILING */ 205#endif /* CONFIG_PROFILING */
204 206
205EXPORT_SYMBOL_GPL(profile_event_register);
206EXPORT_SYMBOL_GPL(profile_event_unregister);
207 207
208#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
209/* 209/*
diff --git a/kernel/sched.c b/kernel/sched.c
index 72bb9483d949..6247e4a8350f 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -263,6 +263,7 @@ struct rq {
263 263
264 unsigned int clock_warps, clock_overflows; 264 unsigned int clock_warps, clock_overflows;
265 unsigned int clock_unstable_events; 265 unsigned int clock_unstable_events;
266 u64 tick_timestamp;
266 267
267 atomic_t nr_iowait; 268 atomic_t nr_iowait;
268 269
@@ -318,15 +319,19 @@ static inline int cpu_of(struct rq *rq)
318} 319}
319 320
320/* 321/*
321 * Per-runqueue clock, as finegrained as the platform can give us: 322 * Update the per-runqueue clock, as finegrained as the platform can give
323 * us, but without assuming monotonicity, etc.:
322 */ 324 */
323static unsigned long long __rq_clock(struct rq *rq) 325static void __update_rq_clock(struct rq *rq)
324{ 326{
325 u64 prev_raw = rq->prev_clock_raw; 327 u64 prev_raw = rq->prev_clock_raw;
326 u64 now = sched_clock(); 328 u64 now = sched_clock();
327 s64 delta = now - prev_raw; 329 s64 delta = now - prev_raw;
328 u64 clock = rq->clock; 330 u64 clock = rq->clock;
329 331
332#ifdef CONFIG_SCHED_DEBUG
333 WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
334#endif
330 /* 335 /*
331 * Protect against sched_clock() occasionally going backwards: 336 * Protect against sched_clock() occasionally going backwards:
332 */ 337 */
@@ -337,8 +342,11 @@ static unsigned long long __rq_clock(struct rq *rq)
337 /* 342 /*
338 * Catch too large forward jumps too: 343 * Catch too large forward jumps too:
339 */ 344 */
340 if (unlikely(delta > 2*TICK_NSEC)) { 345 if (unlikely(clock + delta > rq->tick_timestamp + TICK_NSEC)) {
341 clock++; 346 if (clock < rq->tick_timestamp + TICK_NSEC)
347 clock = rq->tick_timestamp + TICK_NSEC;
348 else
349 clock++;
342 rq->clock_overflows++; 350 rq->clock_overflows++;
343 } else { 351 } else {
344 if (unlikely(delta > rq->clock_max_delta)) 352 if (unlikely(delta > rq->clock_max_delta))
@@ -349,18 +357,12 @@ static unsigned long long __rq_clock(struct rq *rq)
349 357
350 rq->prev_clock_raw = now; 358 rq->prev_clock_raw = now;
351 rq->clock = clock; 359 rq->clock = clock;
352
353 return clock;
354} 360}
355 361
356static inline unsigned long long rq_clock(struct rq *rq) 362static void update_rq_clock(struct rq *rq)
357{ 363{
358 int this_cpu = smp_processor_id(); 364 if (likely(smp_processor_id() == cpu_of(rq)))
359 365 __update_rq_clock(rq);
360 if (this_cpu == cpu_of(rq))
361 return __rq_clock(rq);
362
363 return rq->clock;
364} 366}
365 367
366/* 368/*
@@ -386,9 +388,12 @@ unsigned long long cpu_clock(int cpu)
386{ 388{
387 unsigned long long now; 389 unsigned long long now;
388 unsigned long flags; 390 unsigned long flags;
391 struct rq *rq;
389 392
390 local_irq_save(flags); 393 local_irq_save(flags);
391 now = rq_clock(cpu_rq(cpu)); 394 rq = cpu_rq(cpu);
395 update_rq_clock(rq);
396 now = rq->clock;
392 local_irq_restore(flags); 397 local_irq_restore(flags);
393 398
394 return now; 399 return now;
@@ -637,6 +642,11 @@ static u64 div64_likely32(u64 divident, unsigned long divisor)
637 642
638#define WMULT_SHIFT 32 643#define WMULT_SHIFT 32
639 644
645/*
646 * Shift right and round:
647 */
648#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y))
649
640static unsigned long 650static unsigned long
641calc_delta_mine(unsigned long delta_exec, unsigned long weight, 651calc_delta_mine(unsigned long delta_exec, unsigned long weight,
642 struct load_weight *lw) 652 struct load_weight *lw)
@@ -644,18 +654,17 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
644 u64 tmp; 654 u64 tmp;
645 655
646 if (unlikely(!lw->inv_weight)) 656 if (unlikely(!lw->inv_weight))
647 lw->inv_weight = WMULT_CONST / lw->weight; 657 lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1;
648 658
649 tmp = (u64)delta_exec * weight; 659 tmp = (u64)delta_exec * weight;
650 /* 660 /*
651 * Check whether we'd overflow the 64-bit multiplication: 661 * Check whether we'd overflow the 64-bit multiplication:
652 */ 662 */
653 if (unlikely(tmp > WMULT_CONST)) { 663 if (unlikely(tmp > WMULT_CONST))
654 tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) 664 tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight,
655 >> (WMULT_SHIFT/2); 665 WMULT_SHIFT/2);
656 } else { 666 else
657 tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; 667 tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT);
658 }
659 668
660 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); 669 return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX);
661} 670}
@@ -703,11 +712,14 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec)
703 * the relative distance between them is ~25%.) 712 * the relative distance between them is ~25%.)
704 */ 713 */
705static const int prio_to_weight[40] = { 714static const int prio_to_weight[40] = {
706/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, 715 /* -20 */ 88761, 71755, 56483, 46273, 36291,
707/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, 716 /* -15 */ 29154, 23254, 18705, 14949, 11916,
708/* 0 */ NICE_0_LOAD /* 1024 */, 717 /* -10 */ 9548, 7620, 6100, 4904, 3906,
709/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, 718 /* -5 */ 3121, 2501, 1991, 1586, 1277,
710/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, 719 /* 0 */ 1024, 820, 655, 526, 423,
720 /* 5 */ 335, 272, 215, 172, 137,
721 /* 10 */ 110, 87, 70, 56, 45,
722 /* 15 */ 36, 29, 23, 18, 15,
711}; 723};
712 724
713/* 725/*
@@ -718,14 +730,14 @@ static const int prio_to_weight[40] = {
718 * into multiplications: 730 * into multiplications:
719 */ 731 */
720static const u32 prio_to_wmult[40] = { 732static const u32 prio_to_wmult[40] = {
721/* -20 */ 48356, 60446, 75558, 94446, 118058, 733 /* -20 */ 48388, 59856, 76040, 92818, 118348,
722/* -15 */ 147573, 184467, 230589, 288233, 360285, 734 /* -15 */ 147320, 184698, 229616, 287308, 360437,
723/* -10 */ 450347, 562979, 703746, 879575, 1099582, 735 /* -10 */ 449829, 563644, 704093, 875809, 1099582,
724/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, 736 /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326,
725/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, 737 /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587,
726/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, 738 /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126,
727/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, 739 /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717,
728/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, 740 /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
729}; 741};
730 742
731static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); 743static void activate_task(struct rq *rq, struct task_struct *p, int wakeup);
@@ -745,8 +757,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
745 unsigned long max_nr_move, unsigned long max_load_move, 757 unsigned long max_nr_move, unsigned long max_load_move,
746 struct sched_domain *sd, enum cpu_idle_type idle, 758 struct sched_domain *sd, enum cpu_idle_type idle,
747 int *all_pinned, unsigned long *load_moved, 759 int *all_pinned, unsigned long *load_moved,
748 int this_best_prio, int best_prio, int best_prio_seen, 760 int *this_best_prio, struct rq_iterator *iterator);
749 struct rq_iterator *iterator);
750 761
751#include "sched_stats.h" 762#include "sched_stats.h"
752#include "sched_rt.c" 763#include "sched_rt.c"
@@ -782,14 +793,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls)
782 * This function is called /before/ updating rq->ls.load 793 * This function is called /before/ updating rq->ls.load
783 * and when switching tasks. 794 * and when switching tasks.
784 */ 795 */
785static void update_curr_load(struct rq *rq, u64 now) 796static void update_curr_load(struct rq *rq)
786{ 797{
787 struct load_stat *ls = &rq->ls; 798 struct load_stat *ls = &rq->ls;
788 u64 start; 799 u64 start;
789 800
790 start = ls->load_update_start; 801 start = ls->load_update_start;
791 ls->load_update_start = now; 802 ls->load_update_start = rq->clock;
792 ls->delta_stat += now - start; 803 ls->delta_stat += rq->clock - start;
793 /* 804 /*
794 * Stagger updates to ls->delta_fair. Very frequent updates 805 * Stagger updates to ls->delta_fair. Very frequent updates
795 * can be expensive. 806 * can be expensive.
@@ -798,30 +809,28 @@ static void update_curr_load(struct rq *rq, u64 now)
798 __update_curr_load(rq, ls); 809 __update_curr_load(rq, ls);
799} 810}
800 811
801static inline void 812static inline void inc_load(struct rq *rq, const struct task_struct *p)
802inc_load(struct rq *rq, const struct task_struct *p, u64 now)
803{ 813{
804 update_curr_load(rq, now); 814 update_curr_load(rq);
805 update_load_add(&rq->ls.load, p->se.load.weight); 815 update_load_add(&rq->ls.load, p->se.load.weight);
806} 816}
807 817
808static inline void 818static inline void dec_load(struct rq *rq, const struct task_struct *p)
809dec_load(struct rq *rq, const struct task_struct *p, u64 now)
810{ 819{
811 update_curr_load(rq, now); 820 update_curr_load(rq);
812 update_load_sub(&rq->ls.load, p->se.load.weight); 821 update_load_sub(&rq->ls.load, p->se.load.weight);
813} 822}
814 823
815static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) 824static void inc_nr_running(struct task_struct *p, struct rq *rq)
816{ 825{
817 rq->nr_running++; 826 rq->nr_running++;
818 inc_load(rq, p, now); 827 inc_load(rq, p);
819} 828}
820 829
821static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) 830static void dec_nr_running(struct task_struct *p, struct rq *rq)
822{ 831{
823 rq->nr_running--; 832 rq->nr_running--;
824 dec_load(rq, p, now); 833 dec_load(rq, p);
825} 834}
826 835
827static void set_load_weight(struct task_struct *p) 836static void set_load_weight(struct task_struct *p)
@@ -848,18 +857,16 @@ static void set_load_weight(struct task_struct *p)
848 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; 857 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
849} 858}
850 859
851static void 860static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
852enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
853{ 861{
854 sched_info_queued(p); 862 sched_info_queued(p);
855 p->sched_class->enqueue_task(rq, p, wakeup, now); 863 p->sched_class->enqueue_task(rq, p, wakeup);
856 p->se.on_rq = 1; 864 p->se.on_rq = 1;
857} 865}
858 866
859static void 867static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
860dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now)
861{ 868{
862 p->sched_class->dequeue_task(rq, p, sleep, now); 869 p->sched_class->dequeue_task(rq, p, sleep);
863 p->se.on_rq = 0; 870 p->se.on_rq = 0;
864} 871}
865 872
@@ -914,13 +921,11 @@ static int effective_prio(struct task_struct *p)
914 */ 921 */
915static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) 922static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
916{ 923{
917 u64 now = rq_clock(rq);
918
919 if (p->state == TASK_UNINTERRUPTIBLE) 924 if (p->state == TASK_UNINTERRUPTIBLE)
920 rq->nr_uninterruptible--; 925 rq->nr_uninterruptible--;
921 926
922 enqueue_task(rq, p, wakeup, now); 927 enqueue_task(rq, p, wakeup);
923 inc_nr_running(p, rq, now); 928 inc_nr_running(p, rq);
924} 929}
925 930
926/* 931/*
@@ -928,13 +933,13 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
928 */ 933 */
929static inline void activate_idle_task(struct task_struct *p, struct rq *rq) 934static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
930{ 935{
931 u64 now = rq_clock(rq); 936 update_rq_clock(rq);
932 937
933 if (p->state == TASK_UNINTERRUPTIBLE) 938 if (p->state == TASK_UNINTERRUPTIBLE)
934 rq->nr_uninterruptible--; 939 rq->nr_uninterruptible--;
935 940
936 enqueue_task(rq, p, 0, now); 941 enqueue_task(rq, p, 0);
937 inc_nr_running(p, rq, now); 942 inc_nr_running(p, rq);
938} 943}
939 944
940/* 945/*
@@ -942,13 +947,11 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq)
942 */ 947 */
943static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) 948static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
944{ 949{
945 u64 now = rq_clock(rq);
946
947 if (p->state == TASK_UNINTERRUPTIBLE) 950 if (p->state == TASK_UNINTERRUPTIBLE)
948 rq->nr_uninterruptible++; 951 rq->nr_uninterruptible++;
949 952
950 dequeue_task(rq, p, sleep, now); 953 dequeue_task(rq, p, sleep);
951 dec_nr_running(p, rq, now); 954 dec_nr_running(p, rq);
952} 955}
953 956
954/** 957/**
@@ -1516,6 +1519,7 @@ out_set_cpu:
1516 1519
1517out_activate: 1520out_activate:
1518#endif /* CONFIG_SMP */ 1521#endif /* CONFIG_SMP */
1522 update_rq_clock(rq);
1519 activate_task(rq, p, 1); 1523 activate_task(rq, p, 1);
1520 /* 1524 /*
1521 * Sync wakeups (i.e. those types of wakeups where the waker 1525 * Sync wakeups (i.e. those types of wakeups where the waker
@@ -1647,12 +1651,11 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1647 unsigned long flags; 1651 unsigned long flags;
1648 struct rq *rq; 1652 struct rq *rq;
1649 int this_cpu; 1653 int this_cpu;
1650 u64 now;
1651 1654
1652 rq = task_rq_lock(p, &flags); 1655 rq = task_rq_lock(p, &flags);
1653 BUG_ON(p->state != TASK_RUNNING); 1656 BUG_ON(p->state != TASK_RUNNING);
1654 this_cpu = smp_processor_id(); /* parent's CPU */ 1657 this_cpu = smp_processor_id(); /* parent's CPU */
1655 now = rq_clock(rq); 1658 update_rq_clock(rq);
1656 1659
1657 p->prio = effective_prio(p); 1660 p->prio = effective_prio(p);
1658 1661
@@ -1666,8 +1669,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
1666 * Let the scheduling class do new task startup 1669 * Let the scheduling class do new task startup
1667 * management (if any): 1670 * management (if any):
1668 */ 1671 */
1669 p->sched_class->task_new(rq, p, now); 1672 p->sched_class->task_new(rq, p);
1670 inc_nr_running(p, rq, now); 1673 inc_nr_running(p, rq);
1671 } 1674 }
1672 check_preempt_curr(rq, p); 1675 check_preempt_curr(rq, p);
1673 task_rq_unlock(rq, &flags); 1676 task_rq_unlock(rq, &flags);
@@ -1954,7 +1957,6 @@ static void update_cpu_load(struct rq *this_rq)
1954 unsigned long total_load = this_rq->ls.load.weight; 1957 unsigned long total_load = this_rq->ls.load.weight;
1955 unsigned long this_load = total_load; 1958 unsigned long this_load = total_load;
1956 struct load_stat *ls = &this_rq->ls; 1959 struct load_stat *ls = &this_rq->ls;
1957 u64 now = __rq_clock(this_rq);
1958 int i, scale; 1960 int i, scale;
1959 1961
1960 this_rq->nr_load_updates++; 1962 this_rq->nr_load_updates++;
@@ -1962,7 +1964,7 @@ static void update_cpu_load(struct rq *this_rq)
1962 goto do_avg; 1964 goto do_avg;
1963 1965
1964 /* Update delta_fair/delta_exec fields first */ 1966 /* Update delta_fair/delta_exec fields first */
1965 update_curr_load(this_rq, now); 1967 update_curr_load(this_rq);
1966 1968
1967 fair_delta64 = ls->delta_fair + 1; 1969 fair_delta64 = ls->delta_fair + 1;
1968 ls->delta_fair = 0; 1970 ls->delta_fair = 0;
@@ -1970,8 +1972,8 @@ static void update_cpu_load(struct rq *this_rq)
1970 exec_delta64 = ls->delta_exec + 1; 1972 exec_delta64 = ls->delta_exec + 1;
1971 ls->delta_exec = 0; 1973 ls->delta_exec = 0;
1972 1974
1973 sample_interval64 = now - ls->load_update_last; 1975 sample_interval64 = this_rq->clock - ls->load_update_last;
1974 ls->load_update_last = now; 1976 ls->load_update_last = this_rq->clock;
1975 1977
1976 if ((s64)sample_interval64 < (s64)TICK_NSEC) 1978 if ((s64)sample_interval64 < (s64)TICK_NSEC)
1977 sample_interval64 = TICK_NSEC; 1979 sample_interval64 = TICK_NSEC;
@@ -2026,6 +2028,8 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2)
2026 spin_lock(&rq1->lock); 2028 spin_lock(&rq1->lock);
2027 } 2029 }
2028 } 2030 }
2031 update_rq_clock(rq1);
2032 update_rq_clock(rq2);
2029} 2033}
2030 2034
2031/* 2035/*
@@ -2166,8 +2170,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2166 unsigned long max_nr_move, unsigned long max_load_move, 2170 unsigned long max_nr_move, unsigned long max_load_move,
2167 struct sched_domain *sd, enum cpu_idle_type idle, 2171 struct sched_domain *sd, enum cpu_idle_type idle,
2168 int *all_pinned, unsigned long *load_moved, 2172 int *all_pinned, unsigned long *load_moved,
2169 int this_best_prio, int best_prio, int best_prio_seen, 2173 int *this_best_prio, struct rq_iterator *iterator)
2170 struct rq_iterator *iterator)
2171{ 2174{
2172 int pulled = 0, pinned = 0, skip_for_load; 2175 int pulled = 0, pinned = 0, skip_for_load;
2173 struct task_struct *p; 2176 struct task_struct *p;
@@ -2192,12 +2195,8 @@ next:
2192 */ 2195 */
2193 skip_for_load = (p->se.load.weight >> 1) > rem_load_move + 2196 skip_for_load = (p->se.load.weight >> 1) > rem_load_move +
2194 SCHED_LOAD_SCALE_FUZZ; 2197 SCHED_LOAD_SCALE_FUZZ;
2195 if (skip_for_load && p->prio < this_best_prio) 2198 if ((skip_for_load && p->prio >= *this_best_prio) ||
2196 skip_for_load = !best_prio_seen && p->prio == best_prio;
2197 if (skip_for_load ||
2198 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { 2199 !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
2199
2200 best_prio_seen |= p->prio == best_prio;
2201 p = iterator->next(iterator->arg); 2200 p = iterator->next(iterator->arg);
2202 goto next; 2201 goto next;
2203 } 2202 }
@@ -2211,8 +2210,8 @@ next:
2211 * and the prescribed amount of weighted load. 2210 * and the prescribed amount of weighted load.
2212 */ 2211 */
2213 if (pulled < max_nr_move && rem_load_move > 0) { 2212 if (pulled < max_nr_move && rem_load_move > 0) {
2214 if (p->prio < this_best_prio) 2213 if (p->prio < *this_best_prio)
2215 this_best_prio = p->prio; 2214 *this_best_prio = p->prio;
2216 p = iterator->next(iterator->arg); 2215 p = iterator->next(iterator->arg);
2217 goto next; 2216 goto next;
2218 } 2217 }
@@ -2231,32 +2230,52 @@ out:
2231} 2230}
2232 2231
2233/* 2232/*
2234 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2233 * move_tasks tries to move up to max_load_move weighted load from busiest to
2235 * load from busiest to this_rq, as part of a balancing operation within 2234 * this_rq, as part of a balancing operation within domain "sd".
2236 * "domain". Returns the number of tasks moved. 2235 * Returns 1 if successful and 0 otherwise.
2237 * 2236 *
2238 * Called with both runqueues locked. 2237 * Called with both runqueues locked.
2239 */ 2238 */
2240static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, 2239static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2241 unsigned long max_nr_move, unsigned long max_load_move, 2240 unsigned long max_load_move,
2242 struct sched_domain *sd, enum cpu_idle_type idle, 2241 struct sched_domain *sd, enum cpu_idle_type idle,
2243 int *all_pinned) 2242 int *all_pinned)
2244{ 2243{
2245 struct sched_class *class = sched_class_highest; 2244 struct sched_class *class = sched_class_highest;
2246 unsigned long load_moved, total_nr_moved = 0, nr_moved; 2245 unsigned long total_load_moved = 0;
2247 long rem_load_move = max_load_move; 2246 int this_best_prio = this_rq->curr->prio;
2248 2247
2249 do { 2248 do {
2250 nr_moved = class->load_balance(this_rq, this_cpu, busiest, 2249 total_load_moved +=
2251 max_nr_move, (unsigned long)rem_load_move, 2250 class->load_balance(this_rq, this_cpu, busiest,
2252 sd, idle, all_pinned, &load_moved); 2251 ULONG_MAX, max_load_move - total_load_moved,
2253 total_nr_moved += nr_moved; 2252 sd, idle, all_pinned, &this_best_prio);
2254 max_nr_move -= nr_moved;
2255 rem_load_move -= load_moved;
2256 class = class->next; 2253 class = class->next;
2257 } while (class && max_nr_move && rem_load_move > 0); 2254 } while (class && max_load_move > total_load_moved);
2255
2256 return total_load_moved > 0;
2257}
2258 2258
2259 return total_nr_moved; 2259/*
2260 * move_one_task tries to move exactly one task from busiest to this_rq, as
2261 * part of active balancing operations within "domain".
2262 * Returns 1 if successful and 0 otherwise.
2263 *
2264 * Called with both runqueues locked.
2265 */
2266static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2267 struct sched_domain *sd, enum cpu_idle_type idle)
2268{
2269 struct sched_class *class;
2270 int this_best_prio = MAX_PRIO;
2271
2272 for (class = sched_class_highest; class; class = class->next)
2273 if (class->load_balance(this_rq, this_cpu, busiest,
2274 1, ULONG_MAX, sd, idle, NULL,
2275 &this_best_prio))
2276 return 1;
2277
2278 return 0;
2260} 2279}
2261 2280
2262/* 2281/*
@@ -2588,11 +2607,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
2588 */ 2607 */
2589#define MAX_PINNED_INTERVAL 512 2608#define MAX_PINNED_INTERVAL 512
2590 2609
2591static inline unsigned long minus_1_or_zero(unsigned long n)
2592{
2593 return n > 0 ? n - 1 : 0;
2594}
2595
2596/* 2610/*
2597 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2611 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2598 * tasks if there is an imbalance. 2612 * tasks if there is an imbalance.
@@ -2601,7 +2615,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2601 struct sched_domain *sd, enum cpu_idle_type idle, 2615 struct sched_domain *sd, enum cpu_idle_type idle,
2602 int *balance) 2616 int *balance)
2603{ 2617{
2604 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 2618 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2605 struct sched_group *group; 2619 struct sched_group *group;
2606 unsigned long imbalance; 2620 unsigned long imbalance;
2607 struct rq *busiest; 2621 struct rq *busiest;
@@ -2642,18 +2656,17 @@ redo:
2642 2656
2643 schedstat_add(sd, lb_imbalance[idle], imbalance); 2657 schedstat_add(sd, lb_imbalance[idle], imbalance);
2644 2658
2645 nr_moved = 0; 2659 ld_moved = 0;
2646 if (busiest->nr_running > 1) { 2660 if (busiest->nr_running > 1) {
2647 /* 2661 /*
2648 * Attempt to move tasks. If find_busiest_group has found 2662 * Attempt to move tasks. If find_busiest_group has found
2649 * an imbalance but busiest->nr_running <= 1, the group is 2663 * an imbalance but busiest->nr_running <= 1, the group is
2650 * still unbalanced. nr_moved simply stays zero, so it is 2664 * still unbalanced. ld_moved simply stays zero, so it is
2651 * correctly treated as an imbalance. 2665 * correctly treated as an imbalance.
2652 */ 2666 */
2653 local_irq_save(flags); 2667 local_irq_save(flags);
2654 double_rq_lock(this_rq, busiest); 2668 double_rq_lock(this_rq, busiest);
2655 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2669 ld_moved = move_tasks(this_rq, this_cpu, busiest,
2656 minus_1_or_zero(busiest->nr_running),
2657 imbalance, sd, idle, &all_pinned); 2670 imbalance, sd, idle, &all_pinned);
2658 double_rq_unlock(this_rq, busiest); 2671 double_rq_unlock(this_rq, busiest);
2659 local_irq_restore(flags); 2672 local_irq_restore(flags);
@@ -2661,7 +2674,7 @@ redo:
2661 /* 2674 /*
2662 * some other cpu did the load balance for us. 2675 * some other cpu did the load balance for us.
2663 */ 2676 */
2664 if (nr_moved && this_cpu != smp_processor_id()) 2677 if (ld_moved && this_cpu != smp_processor_id())
2665 resched_cpu(this_cpu); 2678 resched_cpu(this_cpu);
2666 2679
2667 /* All tasks on this runqueue were pinned by CPU affinity */ 2680 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2673,7 +2686,7 @@ redo:
2673 } 2686 }
2674 } 2687 }
2675 2688
2676 if (!nr_moved) { 2689 if (!ld_moved) {
2677 schedstat_inc(sd, lb_failed[idle]); 2690 schedstat_inc(sd, lb_failed[idle]);
2678 sd->nr_balance_failed++; 2691 sd->nr_balance_failed++;
2679 2692
@@ -2722,10 +2735,10 @@ redo:
2722 sd->balance_interval *= 2; 2735 sd->balance_interval *= 2;
2723 } 2736 }
2724 2737
2725 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2738 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2726 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 2739 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2727 return -1; 2740 return -1;
2728 return nr_moved; 2741 return ld_moved;
2729 2742
2730out_balanced: 2743out_balanced:
2731 schedstat_inc(sd, lb_balanced[idle]); 2744 schedstat_inc(sd, lb_balanced[idle]);
@@ -2757,7 +2770,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2757 struct sched_group *group; 2770 struct sched_group *group;
2758 struct rq *busiest = NULL; 2771 struct rq *busiest = NULL;
2759 unsigned long imbalance; 2772 unsigned long imbalance;
2760 int nr_moved = 0; 2773 int ld_moved = 0;
2761 int sd_idle = 0; 2774 int sd_idle = 0;
2762 int all_pinned = 0; 2775 int all_pinned = 0;
2763 cpumask_t cpus = CPU_MASK_ALL; 2776 cpumask_t cpus = CPU_MASK_ALL;
@@ -2792,12 +2805,13 @@ redo:
2792 2805
2793 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); 2806 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
2794 2807
2795 nr_moved = 0; 2808 ld_moved = 0;
2796 if (busiest->nr_running > 1) { 2809 if (busiest->nr_running > 1) {
2797 /* Attempt to move tasks */ 2810 /* Attempt to move tasks */
2798 double_lock_balance(this_rq, busiest); 2811 double_lock_balance(this_rq, busiest);
2799 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2812 /* this_rq->clock is already updated */
2800 minus_1_or_zero(busiest->nr_running), 2813 update_rq_clock(busiest);
2814 ld_moved = move_tasks(this_rq, this_cpu, busiest,
2801 imbalance, sd, CPU_NEWLY_IDLE, 2815 imbalance, sd, CPU_NEWLY_IDLE,
2802 &all_pinned); 2816 &all_pinned);
2803 spin_unlock(&busiest->lock); 2817 spin_unlock(&busiest->lock);
@@ -2809,7 +2823,7 @@ redo:
2809 } 2823 }
2810 } 2824 }
2811 2825
2812 if (!nr_moved) { 2826 if (!ld_moved) {
2813 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); 2827 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
2814 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2828 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2815 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 2829 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
@@ -2817,7 +2831,7 @@ redo:
2817 } else 2831 } else
2818 sd->nr_balance_failed = 0; 2832 sd->nr_balance_failed = 0;
2819 2833
2820 return nr_moved; 2834 return ld_moved;
2821 2835
2822out_balanced: 2836out_balanced:
2823 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); 2837 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
@@ -2894,6 +2908,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2894 2908
2895 /* move a task from busiest_rq to target_rq */ 2909 /* move a task from busiest_rq to target_rq */
2896 double_lock_balance(busiest_rq, target_rq); 2910 double_lock_balance(busiest_rq, target_rq);
2911 update_rq_clock(busiest_rq);
2912 update_rq_clock(target_rq);
2897 2913
2898 /* Search for an sd spanning us and the target CPU. */ 2914 /* Search for an sd spanning us and the target CPU. */
2899 for_each_domain(target_cpu, sd) { 2915 for_each_domain(target_cpu, sd) {
@@ -2905,8 +2921,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2905 if (likely(sd)) { 2921 if (likely(sd)) {
2906 schedstat_inc(sd, alb_cnt); 2922 schedstat_inc(sd, alb_cnt);
2907 2923
2908 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2924 if (move_one_task(target_rq, target_cpu, busiest_rq,
2909 ULONG_MAX, sd, CPU_IDLE, NULL)) 2925 sd, CPU_IDLE))
2910 schedstat_inc(sd, alb_pushed); 2926 schedstat_inc(sd, alb_pushed);
2911 else 2927 else
2912 schedstat_inc(sd, alb_failed); 2928 schedstat_inc(sd, alb_failed);
@@ -3175,8 +3191,7 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3175 unsigned long max_nr_move, unsigned long max_load_move, 3191 unsigned long max_nr_move, unsigned long max_load_move,
3176 struct sched_domain *sd, enum cpu_idle_type idle, 3192 struct sched_domain *sd, enum cpu_idle_type idle,
3177 int *all_pinned, unsigned long *load_moved, 3193 int *all_pinned, unsigned long *load_moved,
3178 int this_best_prio, int best_prio, int best_prio_seen, 3194 int *this_best_prio, struct rq_iterator *iterator)
3179 struct rq_iterator *iterator)
3180{ 3195{
3181 *load_moved = 0; 3196 *load_moved = 0;
3182 3197
@@ -3202,7 +3217,8 @@ unsigned long long task_sched_runtime(struct task_struct *p)
3202 rq = task_rq_lock(p, &flags); 3217 rq = task_rq_lock(p, &flags);
3203 ns = p->se.sum_exec_runtime; 3218 ns = p->se.sum_exec_runtime;
3204 if (rq->curr == p) { 3219 if (rq->curr == p) {
3205 delta_exec = rq_clock(rq) - p->se.exec_start; 3220 update_rq_clock(rq);
3221 delta_exec = rq->clock - p->se.exec_start;
3206 if ((s64)delta_exec > 0) 3222 if ((s64)delta_exec > 0)
3207 ns += delta_exec; 3223 ns += delta_exec;
3208 } 3224 }
@@ -3296,11 +3312,19 @@ void scheduler_tick(void)
3296 int cpu = smp_processor_id(); 3312 int cpu = smp_processor_id();
3297 struct rq *rq = cpu_rq(cpu); 3313 struct rq *rq = cpu_rq(cpu);
3298 struct task_struct *curr = rq->curr; 3314 struct task_struct *curr = rq->curr;
3315 u64 next_tick = rq->tick_timestamp + TICK_NSEC;
3299 3316
3300 spin_lock(&rq->lock); 3317 spin_lock(&rq->lock);
3318 __update_rq_clock(rq);
3319 /*
3320 * Let rq->clock advance by at least TICK_NSEC:
3321 */
3322 if (unlikely(rq->clock < next_tick))
3323 rq->clock = next_tick;
3324 rq->tick_timestamp = rq->clock;
3325 update_cpu_load(rq);
3301 if (curr != rq->idle) /* FIXME: needed? */ 3326 if (curr != rq->idle) /* FIXME: needed? */
3302 curr->sched_class->task_tick(rq, curr); 3327 curr->sched_class->task_tick(rq, curr);
3303 update_cpu_load(rq);
3304 spin_unlock(&rq->lock); 3328 spin_unlock(&rq->lock);
3305 3329
3306#ifdef CONFIG_SMP 3330#ifdef CONFIG_SMP
@@ -3382,7 +3406,7 @@ static inline void schedule_debug(struct task_struct *prev)
3382 * Pick up the highest-prio task: 3406 * Pick up the highest-prio task:
3383 */ 3407 */
3384static inline struct task_struct * 3408static inline struct task_struct *
3385pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) 3409pick_next_task(struct rq *rq, struct task_struct *prev)
3386{ 3410{
3387 struct sched_class *class; 3411 struct sched_class *class;
3388 struct task_struct *p; 3412 struct task_struct *p;
@@ -3392,14 +3416,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev, u64 now)
3392 * the fair class we can call that function directly: 3416 * the fair class we can call that function directly:
3393 */ 3417 */
3394 if (likely(rq->nr_running == rq->cfs.nr_running)) { 3418 if (likely(rq->nr_running == rq->cfs.nr_running)) {
3395 p = fair_sched_class.pick_next_task(rq, now); 3419 p = fair_sched_class.pick_next_task(rq);
3396 if (likely(p)) 3420 if (likely(p))
3397 return p; 3421 return p;
3398 } 3422 }
3399 3423
3400 class = sched_class_highest; 3424 class = sched_class_highest;
3401 for ( ; ; ) { 3425 for ( ; ; ) {
3402 p = class->pick_next_task(rq, now); 3426 p = class->pick_next_task(rq);
3403 if (p) 3427 if (p)
3404 return p; 3428 return p;
3405 /* 3429 /*
@@ -3418,7 +3442,6 @@ asmlinkage void __sched schedule(void)
3418 struct task_struct *prev, *next; 3442 struct task_struct *prev, *next;
3419 long *switch_count; 3443 long *switch_count;
3420 struct rq *rq; 3444 struct rq *rq;
3421 u64 now;
3422 int cpu; 3445 int cpu;
3423 3446
3424need_resched: 3447need_resched:
@@ -3436,6 +3459,7 @@ need_resched_nonpreemptible:
3436 3459
3437 spin_lock_irq(&rq->lock); 3460 spin_lock_irq(&rq->lock);
3438 clear_tsk_need_resched(prev); 3461 clear_tsk_need_resched(prev);
3462 __update_rq_clock(rq);
3439 3463
3440 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 3464 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
3441 if (unlikely((prev->state & TASK_INTERRUPTIBLE) && 3465 if (unlikely((prev->state & TASK_INTERRUPTIBLE) &&
@@ -3450,9 +3474,8 @@ need_resched_nonpreemptible:
3450 if (unlikely(!rq->nr_running)) 3474 if (unlikely(!rq->nr_running))
3451 idle_balance(cpu, rq); 3475 idle_balance(cpu, rq);
3452 3476
3453 now = __rq_clock(rq); 3477 prev->sched_class->put_prev_task(rq, prev);
3454 prev->sched_class->put_prev_task(rq, prev, now); 3478 next = pick_next_task(rq, prev);
3455 next = pick_next_task(rq, prev, now);
3456 3479
3457 sched_info_switch(prev, next); 3480 sched_info_switch(prev, next);
3458 3481
@@ -3895,17 +3918,16 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
3895 unsigned long flags; 3918 unsigned long flags;
3896 int oldprio, on_rq; 3919 int oldprio, on_rq;
3897 struct rq *rq; 3920 struct rq *rq;
3898 u64 now;
3899 3921
3900 BUG_ON(prio < 0 || prio > MAX_PRIO); 3922 BUG_ON(prio < 0 || prio > MAX_PRIO);
3901 3923
3902 rq = task_rq_lock(p, &flags); 3924 rq = task_rq_lock(p, &flags);
3903 now = rq_clock(rq); 3925 update_rq_clock(rq);
3904 3926
3905 oldprio = p->prio; 3927 oldprio = p->prio;
3906 on_rq = p->se.on_rq; 3928 on_rq = p->se.on_rq;
3907 if (on_rq) 3929 if (on_rq)
3908 dequeue_task(rq, p, 0, now); 3930 dequeue_task(rq, p, 0);
3909 3931
3910 if (rt_prio(prio)) 3932 if (rt_prio(prio))
3911 p->sched_class = &rt_sched_class; 3933 p->sched_class = &rt_sched_class;
@@ -3915,7 +3937,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
3915 p->prio = prio; 3937 p->prio = prio;
3916 3938
3917 if (on_rq) { 3939 if (on_rq) {
3918 enqueue_task(rq, p, 0, now); 3940 enqueue_task(rq, p, 0);
3919 /* 3941 /*
3920 * Reschedule if we are currently running on this runqueue and 3942 * Reschedule if we are currently running on this runqueue and
3921 * our priority decreased, or if we are not currently running on 3943 * our priority decreased, or if we are not currently running on
@@ -3938,7 +3960,6 @@ void set_user_nice(struct task_struct *p, long nice)
3938 int old_prio, delta, on_rq; 3960 int old_prio, delta, on_rq;
3939 unsigned long flags; 3961 unsigned long flags;
3940 struct rq *rq; 3962 struct rq *rq;
3941 u64 now;
3942 3963
3943 if (TASK_NICE(p) == nice || nice < -20 || nice > 19) 3964 if (TASK_NICE(p) == nice || nice < -20 || nice > 19)
3944 return; 3965 return;
@@ -3947,7 +3968,7 @@ void set_user_nice(struct task_struct *p, long nice)
3947 * the task might be in the middle of scheduling on another CPU. 3968 * the task might be in the middle of scheduling on another CPU.
3948 */ 3969 */
3949 rq = task_rq_lock(p, &flags); 3970 rq = task_rq_lock(p, &flags);
3950 now = rq_clock(rq); 3971 update_rq_clock(rq);
3951 /* 3972 /*
3952 * The RT priorities are set via sched_setscheduler(), but we still 3973 * The RT priorities are set via sched_setscheduler(), but we still
3953 * allow the 'normal' nice value to be set - but as expected 3974 * allow the 'normal' nice value to be set - but as expected
@@ -3960,8 +3981,8 @@ void set_user_nice(struct task_struct *p, long nice)
3960 } 3981 }
3961 on_rq = p->se.on_rq; 3982 on_rq = p->se.on_rq;
3962 if (on_rq) { 3983 if (on_rq) {
3963 dequeue_task(rq, p, 0, now); 3984 dequeue_task(rq, p, 0);
3964 dec_load(rq, p, now); 3985 dec_load(rq, p);
3965 } 3986 }
3966 3987
3967 p->static_prio = NICE_TO_PRIO(nice); 3988 p->static_prio = NICE_TO_PRIO(nice);
@@ -3971,8 +3992,8 @@ void set_user_nice(struct task_struct *p, long nice)
3971 delta = p->prio - old_prio; 3992 delta = p->prio - old_prio;
3972 3993
3973 if (on_rq) { 3994 if (on_rq) {
3974 enqueue_task(rq, p, 0, now); 3995 enqueue_task(rq, p, 0);
3975 inc_load(rq, p, now); 3996 inc_load(rq, p);
3976 /* 3997 /*
3977 * If the task increased its priority or is running and 3998 * If the task increased its priority or is running and
3978 * lowered its priority, then reschedule its CPU: 3999 * lowered its priority, then reschedule its CPU:
@@ -4208,6 +4229,7 @@ recheck:
4208 spin_unlock_irqrestore(&p->pi_lock, flags); 4229 spin_unlock_irqrestore(&p->pi_lock, flags);
4209 goto recheck; 4230 goto recheck;
4210 } 4231 }
4232 update_rq_clock(rq);
4211 on_rq = p->se.on_rq; 4233 on_rq = p->se.on_rq;
4212 if (on_rq) 4234 if (on_rq)
4213 deactivate_task(rq, p, 0); 4235 deactivate_task(rq, p, 0);
@@ -4463,10 +4485,8 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask)
4463out_unlock: 4485out_unlock:
4464 read_unlock(&tasklist_lock); 4486 read_unlock(&tasklist_lock);
4465 mutex_unlock(&sched_hotcpu_mutex); 4487 mutex_unlock(&sched_hotcpu_mutex);
4466 if (retval)
4467 return retval;
4468 4488
4469 return 0; 4489 return retval;
4470} 4490}
4471 4491
4472/** 4492/**
@@ -4966,6 +4986,7 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
4966 on_rq = p->se.on_rq; 4986 on_rq = p->se.on_rq;
4967 if (on_rq) 4987 if (on_rq)
4968 deactivate_task(rq_src, p, 0); 4988 deactivate_task(rq_src, p, 0);
4989
4969 set_task_cpu(p, dest_cpu); 4990 set_task_cpu(p, dest_cpu);
4970 if (on_rq) { 4991 if (on_rq) {
4971 activate_task(rq_dest, p, 0); 4992 activate_task(rq_dest, p, 0);
@@ -5198,7 +5219,8 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5198 for ( ; ; ) { 5219 for ( ; ; ) {
5199 if (!rq->nr_running) 5220 if (!rq->nr_running)
5200 break; 5221 break;
5201 next = pick_next_task(rq, rq->curr, rq_clock(rq)); 5222 update_rq_clock(rq);
5223 next = pick_next_task(rq, rq->curr);
5202 if (!next) 5224 if (!next)
5203 break; 5225 break;
5204 migrate_dead(dead_cpu, next); 5226 migrate_dead(dead_cpu, next);
@@ -5210,12 +5232,19 @@ static void migrate_dead_tasks(unsigned int dead_cpu)
5210#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) 5232#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
5211 5233
5212static struct ctl_table sd_ctl_dir[] = { 5234static struct ctl_table sd_ctl_dir[] = {
5213 {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, 5235 {
5236 .procname = "sched_domain",
5237 .mode = 0755,
5238 },
5214 {0,}, 5239 {0,},
5215}; 5240};
5216 5241
5217static struct ctl_table sd_ctl_root[] = { 5242static struct ctl_table sd_ctl_root[] = {
5218 {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, 5243 {
5244 .procname = "kernel",
5245 .mode = 0755,
5246 .child = sd_ctl_dir,
5247 },
5219 {0,}, 5248 {0,},
5220}; 5249};
5221 5250
@@ -5231,11 +5260,10 @@ static struct ctl_table *sd_alloc_ctl_entry(int n)
5231} 5260}
5232 5261
5233static void 5262static void
5234set_table_entry(struct ctl_table *entry, int ctl_name, 5263set_table_entry(struct ctl_table *entry,
5235 const char *procname, void *data, int maxlen, 5264 const char *procname, void *data, int maxlen,
5236 mode_t mode, proc_handler *proc_handler) 5265 mode_t mode, proc_handler *proc_handler)
5237{ 5266{
5238 entry->ctl_name = ctl_name;
5239 entry->procname = procname; 5267 entry->procname = procname;
5240 entry->data = data; 5268 entry->data = data;
5241 entry->maxlen = maxlen; 5269 entry->maxlen = maxlen;
@@ -5248,28 +5276,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd)
5248{ 5276{
5249 struct ctl_table *table = sd_alloc_ctl_entry(14); 5277 struct ctl_table *table = sd_alloc_ctl_entry(14);
5250 5278
5251 set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, 5279 set_table_entry(&table[0], "min_interval", &sd->min_interval,
5252 sizeof(long), 0644, proc_doulongvec_minmax); 5280 sizeof(long), 0644, proc_doulongvec_minmax);
5253 set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, 5281 set_table_entry(&table[1], "max_interval", &sd->max_interval,
5254 sizeof(long), 0644, proc_doulongvec_minmax); 5282 sizeof(long), 0644, proc_doulongvec_minmax);
5255 set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, 5283 set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
5256 sizeof(int), 0644, proc_dointvec_minmax); 5284 sizeof(int), 0644, proc_dointvec_minmax);
5257 set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, 5285 set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
5258 sizeof(int), 0644, proc_dointvec_minmax); 5286 sizeof(int), 0644, proc_dointvec_minmax);
5259 set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, 5287 set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
5260 sizeof(int), 0644, proc_dointvec_minmax); 5288 sizeof(int), 0644, proc_dointvec_minmax);
5261 set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, 5289 set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
5262 sizeof(int), 0644, proc_dointvec_minmax); 5290 sizeof(int), 0644, proc_dointvec_minmax);
5263 set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, 5291 set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
5264 sizeof(int), 0644, proc_dointvec_minmax); 5292 sizeof(int), 0644, proc_dointvec_minmax);
5265 set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, 5293 set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
5266 sizeof(int), 0644, proc_dointvec_minmax); 5294 sizeof(int), 0644, proc_dointvec_minmax);
5267 set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, 5295 set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
5268 sizeof(int), 0644, proc_dointvec_minmax); 5296 sizeof(int), 0644, proc_dointvec_minmax);
5269 set_table_entry(&table[10], 11, "cache_nice_tries", 5297 set_table_entry(&table[10], "cache_nice_tries",
5270 &sd->cache_nice_tries, 5298 &sd->cache_nice_tries,
5271 sizeof(int), 0644, proc_dointvec_minmax); 5299 sizeof(int), 0644, proc_dointvec_minmax);
5272 set_table_entry(&table[12], 13, "flags", &sd->flags, 5300 set_table_entry(&table[12], "flags", &sd->flags,
5273 sizeof(int), 0644, proc_dointvec_minmax); 5301 sizeof(int), 0644, proc_dointvec_minmax);
5274 5302
5275 return table; 5303 return table;
@@ -5289,7 +5317,6 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu)
5289 i = 0; 5317 i = 0;
5290 for_each_domain(cpu, sd) { 5318 for_each_domain(cpu, sd) {
5291 snprintf(buf, 32, "domain%d", i); 5319 snprintf(buf, 32, "domain%d", i);
5292 entry->ctl_name = i + 1;
5293 entry->procname = kstrdup(buf, GFP_KERNEL); 5320 entry->procname = kstrdup(buf, GFP_KERNEL);
5294 entry->mode = 0755; 5321 entry->mode = 0755;
5295 entry->child = sd_alloc_ctl_domain_table(sd); 5322 entry->child = sd_alloc_ctl_domain_table(sd);
@@ -5310,7 +5337,6 @@ static void init_sched_domain_sysctl(void)
5310 5337
5311 for (i = 0; i < cpu_num; i++, entry++) { 5338 for (i = 0; i < cpu_num; i++, entry++) {
5312 snprintf(buf, 32, "cpu%d", i); 5339 snprintf(buf, 32, "cpu%d", i);
5313 entry->ctl_name = i + 1;
5314 entry->procname = kstrdup(buf, GFP_KERNEL); 5340 entry->procname = kstrdup(buf, GFP_KERNEL);
5315 entry->mode = 0755; 5341 entry->mode = 0755;
5316 entry->child = sd_alloc_ctl_cpu_table(i); 5342 entry->child = sd_alloc_ctl_cpu_table(i);
@@ -5379,6 +5405,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
5379 rq->migration_thread = NULL; 5405 rq->migration_thread = NULL;
5380 /* Idle task back to normal (off runqueue, low prio) */ 5406 /* Idle task back to normal (off runqueue, low prio) */
5381 rq = task_rq_lock(rq->idle, &flags); 5407 rq = task_rq_lock(rq->idle, &flags);
5408 update_rq_clock(rq);
5382 deactivate_task(rq, rq->idle, 0); 5409 deactivate_task(rq, rq->idle, 0);
5383 rq->idle->static_prio = MAX_PRIO; 5410 rq->idle->static_prio = MAX_PRIO;
5384 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); 5411 __setscheduler(rq, rq->idle, SCHED_NORMAL, 0);
@@ -6616,12 +6643,13 @@ void normalize_rt_tasks(void)
6616 goto out_unlock; 6643 goto out_unlock;
6617#endif 6644#endif
6618 6645
6646 update_rq_clock(rq);
6619 on_rq = p->se.on_rq; 6647 on_rq = p->se.on_rq;
6620 if (on_rq) 6648 if (on_rq)
6621 deactivate_task(task_rq(p), p, 0); 6649 deactivate_task(rq, p, 0);
6622 __setscheduler(rq, p, SCHED_NORMAL, 0); 6650 __setscheduler(rq, p, SCHED_NORMAL, 0);
6623 if (on_rq) { 6651 if (on_rq) {
6624 activate_task(task_rq(p), p, 0); 6652 activate_task(rq, p, 0);
6625 resched_task(rq->curr); 6653 resched_task(rq->curr);
6626 } 6654 }
6627#ifdef CONFIG_SMP 6655#ifdef CONFIG_SMP
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 8421b9399e10..87e524762b85 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -29,7 +29,7 @@
29 } while (0) 29 } while (0)
30 30
31static void 31static void
32print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now) 32print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
33{ 33{
34 if (rq->curr == p) 34 if (rq->curr == p)
35 SEQ_printf(m, "R"); 35 SEQ_printf(m, "R");
@@ -56,7 +56,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now)
56#endif 56#endif
57} 57}
58 58
59static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) 59static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
60{ 60{
61 struct task_struct *g, *p; 61 struct task_struct *g, *p;
62 62
@@ -77,7 +77,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now)
77 if (!p->se.on_rq || task_cpu(p) != rq_cpu) 77 if (!p->se.on_rq || task_cpu(p) != rq_cpu)
78 continue; 78 continue;
79 79
80 print_task(m, rq, p, now); 80 print_task(m, rq, p);
81 } while_each_thread(g, p); 81 } while_each_thread(g, p);
82 82
83 read_unlock_irq(&tasklist_lock); 83 read_unlock_irq(&tasklist_lock);
@@ -106,9 +106,9 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
106 (long long)wait_runtime_rq_sum); 106 (long long)wait_runtime_rq_sum);
107} 107}
108 108
109void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) 109void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
110{ 110{
111 SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); 111 SEQ_printf(m, "\ncfs_rq\n");
112 112
113#define P(x) \ 113#define P(x) \
114 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x)) 114 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(cfs_rq->x))
@@ -124,7 +124,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now)
124 print_cfs_rq_runtime_sum(m, cpu, cfs_rq); 124 print_cfs_rq_runtime_sum(m, cpu, cfs_rq);
125} 125}
126 126
127static void print_cpu(struct seq_file *m, int cpu, u64 now) 127static void print_cpu(struct seq_file *m, int cpu)
128{ 128{
129 struct rq *rq = &per_cpu(runqueues, cpu); 129 struct rq *rq = &per_cpu(runqueues, cpu);
130 130
@@ -166,9 +166,9 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
166 P(cpu_load[4]); 166 P(cpu_load[4]);
167#undef P 167#undef P
168 168
169 print_cfs_stats(m, cpu, now); 169 print_cfs_stats(m, cpu);
170 170
171 print_rq(m, rq, cpu, now); 171 print_rq(m, rq, cpu);
172} 172}
173 173
174static int sched_debug_show(struct seq_file *m, void *v) 174static int sched_debug_show(struct seq_file *m, void *v)
@@ -184,7 +184,7 @@ static int sched_debug_show(struct seq_file *m, void *v)
184 SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now); 184 SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now);
185 185
186 for_each_online_cpu(cpu) 186 for_each_online_cpu(cpu)
187 print_cpu(m, cpu, now); 187 print_cpu(m, cpu);
188 188
189 SEQ_printf(m, "\n"); 189 SEQ_printf(m, "\n");
190 190
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6f579ff5a9bc..c5af38948a1e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -222,21 +222,25 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity)
222{ 222{
223 u64 tmp; 223 u64 tmp;
224 224
225 if (likely(curr->load.weight == NICE_0_LOAD))
226 return granularity;
225 /* 227 /*
226 * Negative nice levels get the same granularity as nice-0: 228 * Positive nice levels get the same granularity as nice-0:
227 */ 229 */
228 if (likely(curr->load.weight >= NICE_0_LOAD)) 230 if (likely(curr->load.weight < NICE_0_LOAD)) {
229 return granularity; 231 tmp = curr->load.weight * (u64)granularity;
232 return (long) (tmp >> NICE_0_SHIFT);
233 }
230 /* 234 /*
231 * Positive nice level tasks get linearly finer 235 * Negative nice level tasks get linearly finer
232 * granularity: 236 * granularity:
233 */ 237 */
234 tmp = curr->load.weight * (u64)granularity; 238 tmp = curr->load.inv_weight * (u64)granularity;
235 239
236 /* 240 /*
237 * It will always fit into 'long': 241 * It will always fit into 'long':
238 */ 242 */
239 return (long) (tmp >> NICE_0_SHIFT); 243 return (long) (tmp >> WMULT_SHIFT);
240} 244}
241 245
242static inline void 246static inline void
@@ -281,26 +285,25 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta)
281 * are not in our scheduling class. 285 * are not in our scheduling class.
282 */ 286 */
283static inline void 287static inline void
284__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now) 288__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr)
285{ 289{
286 unsigned long delta, delta_exec, delta_fair; 290 unsigned long delta, delta_exec, delta_fair, delta_mine;
287 long delta_mine;
288 struct load_weight *lw = &cfs_rq->load; 291 struct load_weight *lw = &cfs_rq->load;
289 unsigned long load = lw->weight; 292 unsigned long load = lw->weight;
290 293
291 if (unlikely(!load))
292 return;
293
294 delta_exec = curr->delta_exec; 294 delta_exec = curr->delta_exec;
295 schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); 295 schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
296 296
297 curr->sum_exec_runtime += delta_exec; 297 curr->sum_exec_runtime += delta_exec;
298 cfs_rq->exec_clock += delta_exec; 298 cfs_rq->exec_clock += delta_exec;
299 299
300 if (unlikely(!load))
301 return;
302
300 delta_fair = calc_delta_fair(delta_exec, lw); 303 delta_fair = calc_delta_fair(delta_exec, lw);
301 delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); 304 delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw);
302 305
303 if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) { 306 if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) {
304 delta = calc_delta_mine(cfs_rq->sleeper_bonus, 307 delta = calc_delta_mine(cfs_rq->sleeper_bonus,
305 curr->load.weight, lw); 308 curr->load.weight, lw);
306 if (unlikely(delta > cfs_rq->sleeper_bonus)) 309 if (unlikely(delta > cfs_rq->sleeper_bonus))
@@ -321,7 +324,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now)
321 add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec); 324 add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec);
322} 325}
323 326
324static void update_curr(struct cfs_rq *cfs_rq, u64 now) 327static void update_curr(struct cfs_rq *cfs_rq)
325{ 328{
326 struct sched_entity *curr = cfs_rq_curr(cfs_rq); 329 struct sched_entity *curr = cfs_rq_curr(cfs_rq);
327 unsigned long delta_exec; 330 unsigned long delta_exec;
@@ -334,22 +337,22 @@ static void update_curr(struct cfs_rq *cfs_rq, u64 now)
334 * since the last time we changed load (this cannot 337 * since the last time we changed load (this cannot
335 * overflow on 32 bits): 338 * overflow on 32 bits):
336 */ 339 */
337 delta_exec = (unsigned long)(now - curr->exec_start); 340 delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start);
338 341
339 curr->delta_exec += delta_exec; 342 curr->delta_exec += delta_exec;
340 343
341 if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) { 344 if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) {
342 __update_curr(cfs_rq, curr, now); 345 __update_curr(cfs_rq, curr);
343 curr->delta_exec = 0; 346 curr->delta_exec = 0;
344 } 347 }
345 curr->exec_start = now; 348 curr->exec_start = rq_of(cfs_rq)->clock;
346} 349}
347 350
348static inline void 351static inline void
349update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 352update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
350{ 353{
351 se->wait_start_fair = cfs_rq->fair_clock; 354 se->wait_start_fair = cfs_rq->fair_clock;
352 schedstat_set(se->wait_start, now); 355 schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
353} 356}
354 357
355/* 358/*
@@ -377,8 +380,7 @@ calc_weighted(unsigned long delta, unsigned long weight, int shift)
377/* 380/*
378 * Task is being enqueued - update stats: 381 * Task is being enqueued - update stats:
379 */ 382 */
380static void 383static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
381update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
382{ 384{
383 s64 key; 385 s64 key;
384 386
@@ -387,7 +389,7 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
387 * a dequeue/enqueue event is a NOP) 389 * a dequeue/enqueue event is a NOP)
388 */ 390 */
389 if (se != cfs_rq_curr(cfs_rq)) 391 if (se != cfs_rq_curr(cfs_rq))
390 update_stats_wait_start(cfs_rq, se, now); 392 update_stats_wait_start(cfs_rq, se);
391 /* 393 /*
392 * Update the key: 394 * Update the key:
393 */ 395 */
@@ -407,7 +409,8 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
407 (WMULT_SHIFT - NICE_0_SHIFT); 409 (WMULT_SHIFT - NICE_0_SHIFT);
408 } else { 410 } else {
409 tmp = se->wait_runtime; 411 tmp = se->wait_runtime;
410 key -= (tmp * se->load.weight) >> NICE_0_SHIFT; 412 key -= (tmp * se->load.inv_weight) >>
413 (WMULT_SHIFT - NICE_0_SHIFT);
411 } 414 }
412 } 415 }
413 416
@@ -418,11 +421,12 @@ update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
418 * Note: must be called with a freshly updated rq->fair_clock. 421 * Note: must be called with a freshly updated rq->fair_clock.
419 */ 422 */
420static inline void 423static inline void
421__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 424__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
422{ 425{
423 unsigned long delta_fair = se->delta_fair_run; 426 unsigned long delta_fair = se->delta_fair_run;
424 427
425 schedstat_set(se->wait_max, max(se->wait_max, now - se->wait_start)); 428 schedstat_set(se->wait_max, max(se->wait_max,
429 rq_of(cfs_rq)->clock - se->wait_start));
426 430
427 if (unlikely(se->load.weight != NICE_0_LOAD)) 431 if (unlikely(se->load.weight != NICE_0_LOAD))
428 delta_fair = calc_weighted(delta_fair, se->load.weight, 432 delta_fair = calc_weighted(delta_fair, se->load.weight,
@@ -432,7 +436,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
432} 436}
433 437
434static void 438static void
435update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 439update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
436{ 440{
437 unsigned long delta_fair; 441 unsigned long delta_fair;
438 442
@@ -442,7 +446,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
442 se->delta_fair_run += delta_fair; 446 se->delta_fair_run += delta_fair;
443 if (unlikely(abs(se->delta_fair_run) >= 447 if (unlikely(abs(se->delta_fair_run) >=
444 sysctl_sched_stat_granularity)) { 448 sysctl_sched_stat_granularity)) {
445 __update_stats_wait_end(cfs_rq, se, now); 449 __update_stats_wait_end(cfs_rq, se);
446 se->delta_fair_run = 0; 450 se->delta_fair_run = 0;
447 } 451 }
448 452
@@ -451,34 +455,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
451} 455}
452 456
453static inline void 457static inline void
454update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 458update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
455{ 459{
456 update_curr(cfs_rq, now); 460 update_curr(cfs_rq);
457 /* 461 /*
458 * Mark the end of the wait period if dequeueing a 462 * Mark the end of the wait period if dequeueing a
459 * waiting task: 463 * waiting task:
460 */ 464 */
461 if (se != cfs_rq_curr(cfs_rq)) 465 if (se != cfs_rq_curr(cfs_rq))
462 update_stats_wait_end(cfs_rq, se, now); 466 update_stats_wait_end(cfs_rq, se);
463} 467}
464 468
465/* 469/*
466 * We are picking a new current task - update its stats: 470 * We are picking a new current task - update its stats:
467 */ 471 */
468static inline void 472static inline void
469update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 473update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
470{ 474{
471 /* 475 /*
472 * We are starting a new run period: 476 * We are starting a new run period:
473 */ 477 */
474 se->exec_start = now; 478 se->exec_start = rq_of(cfs_rq)->clock;
475} 479}
476 480
477/* 481/*
478 * We are descheduling a task - update its stats: 482 * We are descheduling a task - update its stats:
479 */ 483 */
480static inline void 484static inline void
481update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 485update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
482{ 486{
483 se->exec_start = 0; 487 se->exec_start = 0;
484} 488}
@@ -487,8 +491,7 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
487 * Scheduling class queueing methods: 491 * Scheduling class queueing methods:
488 */ 492 */
489 493
490static void 494static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
491__enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
492{ 495{
493 unsigned long load = cfs_rq->load.weight, delta_fair; 496 unsigned long load = cfs_rq->load.weight, delta_fair;
494 long prev_runtime; 497 long prev_runtime;
@@ -522,8 +525,7 @@ __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
522 schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); 525 schedstat_add(cfs_rq, wait_runtime, se->wait_runtime);
523} 526}
524 527
525static void 528static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
526enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
527{ 529{
528 struct task_struct *tsk = task_of(se); 530 struct task_struct *tsk = task_of(se);
529 unsigned long delta_fair; 531 unsigned long delta_fair;
@@ -538,7 +540,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
538 se->delta_fair_sleep += delta_fair; 540 se->delta_fair_sleep += delta_fair;
539 if (unlikely(abs(se->delta_fair_sleep) >= 541 if (unlikely(abs(se->delta_fair_sleep) >=
540 sysctl_sched_stat_granularity)) { 542 sysctl_sched_stat_granularity)) {
541 __enqueue_sleeper(cfs_rq, se, now); 543 __enqueue_sleeper(cfs_rq, se);
542 se->delta_fair_sleep = 0; 544 se->delta_fair_sleep = 0;
543 } 545 }
544 546
@@ -546,7 +548,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
546 548
547#ifdef CONFIG_SCHEDSTATS 549#ifdef CONFIG_SCHEDSTATS
548 if (se->sleep_start) { 550 if (se->sleep_start) {
549 u64 delta = now - se->sleep_start; 551 u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
550 552
551 if ((s64)delta < 0) 553 if ((s64)delta < 0)
552 delta = 0; 554 delta = 0;
@@ -558,7 +560,7 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
558 se->sum_sleep_runtime += delta; 560 se->sum_sleep_runtime += delta;
559 } 561 }
560 if (se->block_start) { 562 if (se->block_start) {
561 u64 delta = now - se->block_start; 563 u64 delta = rq_of(cfs_rq)->clock - se->block_start;
562 564
563 if ((s64)delta < 0) 565 if ((s64)delta < 0)
564 delta = 0; 566 delta = 0;
@@ -573,26 +575,24 @@ enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
573} 575}
574 576
575static void 577static void
576enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 578enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
577 int wakeup, u64 now)
578{ 579{
579 /* 580 /*
580 * Update the fair clock. 581 * Update the fair clock.
581 */ 582 */
582 update_curr(cfs_rq, now); 583 update_curr(cfs_rq);
583 584
584 if (wakeup) 585 if (wakeup)
585 enqueue_sleeper(cfs_rq, se, now); 586 enqueue_sleeper(cfs_rq, se);
586 587
587 update_stats_enqueue(cfs_rq, se, now); 588 update_stats_enqueue(cfs_rq, se);
588 __enqueue_entity(cfs_rq, se); 589 __enqueue_entity(cfs_rq, se);
589} 590}
590 591
591static void 592static void
592dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, 593dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
593 int sleep, u64 now)
594{ 594{
595 update_stats_dequeue(cfs_rq, se, now); 595 update_stats_dequeue(cfs_rq, se);
596 if (sleep) { 596 if (sleep) {
597 se->sleep_start_fair = cfs_rq->fair_clock; 597 se->sleep_start_fair = cfs_rq->fair_clock;
598#ifdef CONFIG_SCHEDSTATS 598#ifdef CONFIG_SCHEDSTATS
@@ -600,9 +600,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
600 struct task_struct *tsk = task_of(se); 600 struct task_struct *tsk = task_of(se);
601 601
602 if (tsk->state & TASK_INTERRUPTIBLE) 602 if (tsk->state & TASK_INTERRUPTIBLE)
603 se->sleep_start = now; 603 se->sleep_start = rq_of(cfs_rq)->clock;
604 if (tsk->state & TASK_UNINTERRUPTIBLE) 604 if (tsk->state & TASK_UNINTERRUPTIBLE)
605 se->block_start = now; 605 se->block_start = rq_of(cfs_rq)->clock;
606 } 606 }
607 cfs_rq->wait_runtime -= se->wait_runtime; 607 cfs_rq->wait_runtime -= se->wait_runtime;
608#endif 608#endif
@@ -629,7 +629,7 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se,
629} 629}
630 630
631static inline void 631static inline void
632set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) 632set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
633{ 633{
634 /* 634 /*
635 * Any task has to be enqueued before it get to execute on 635 * Any task has to be enqueued before it get to execute on
@@ -638,49 +638,46 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now)
638 * done a put_prev_task_fair() shortly before this, which 638 * done a put_prev_task_fair() shortly before this, which
639 * updated rq->fair_clock - used by update_stats_wait_end()) 639 * updated rq->fair_clock - used by update_stats_wait_end())
640 */ 640 */
641 update_stats_wait_end(cfs_rq, se, now); 641 update_stats_wait_end(cfs_rq, se);
642 update_stats_curr_start(cfs_rq, se, now); 642 update_stats_curr_start(cfs_rq, se);
643 set_cfs_rq_curr(cfs_rq, se); 643 set_cfs_rq_curr(cfs_rq, se);
644} 644}
645 645
646static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now) 646static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
647{ 647{
648 struct sched_entity *se = __pick_next_entity(cfs_rq); 648 struct sched_entity *se = __pick_next_entity(cfs_rq);
649 649
650 set_next_entity(cfs_rq, se, now); 650 set_next_entity(cfs_rq, se);
651 651
652 return se; 652 return se;
653} 653}
654 654
655static void 655static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
656put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now)
657{ 656{
658 /* 657 /*
659 * If still on the runqueue then deactivate_task() 658 * If still on the runqueue then deactivate_task()
660 * was not called and update_curr() has to be done: 659 * was not called and update_curr() has to be done:
661 */ 660 */
662 if (prev->on_rq) 661 if (prev->on_rq)
663 update_curr(cfs_rq, now); 662 update_curr(cfs_rq);
664 663
665 update_stats_curr_end(cfs_rq, prev, now); 664 update_stats_curr_end(cfs_rq, prev);
666 665
667 if (prev->on_rq) 666 if (prev->on_rq)
668 update_stats_wait_start(cfs_rq, prev, now); 667 update_stats_wait_start(cfs_rq, prev);
669 set_cfs_rq_curr(cfs_rq, NULL); 668 set_cfs_rq_curr(cfs_rq, NULL);
670} 669}
671 670
672static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) 671static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
673{ 672{
674 struct rq *rq = rq_of(cfs_rq);
675 struct sched_entity *next; 673 struct sched_entity *next;
676 u64 now = __rq_clock(rq);
677 674
678 /* 675 /*
679 * Dequeue and enqueue the task to update its 676 * Dequeue and enqueue the task to update its
680 * position within the tree: 677 * position within the tree:
681 */ 678 */
682 dequeue_entity(cfs_rq, curr, 0, now); 679 dequeue_entity(cfs_rq, curr, 0);
683 enqueue_entity(cfs_rq, curr, 0, now); 680 enqueue_entity(cfs_rq, curr, 0);
684 681
685 /* 682 /*
686 * Reschedule if another task tops the current one. 683 * Reschedule if another task tops the current one.
@@ -785,8 +782,7 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p)
785 * increased. Here we update the fair scheduling stats and 782 * increased. Here we update the fair scheduling stats and
786 * then put the task into the rbtree: 783 * then put the task into the rbtree:
787 */ 784 */
788static void 785static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
789enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
790{ 786{
791 struct cfs_rq *cfs_rq; 787 struct cfs_rq *cfs_rq;
792 struct sched_entity *se = &p->se; 788 struct sched_entity *se = &p->se;
@@ -795,7 +791,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
795 if (se->on_rq) 791 if (se->on_rq)
796 break; 792 break;
797 cfs_rq = cfs_rq_of(se); 793 cfs_rq = cfs_rq_of(se);
798 enqueue_entity(cfs_rq, se, wakeup, now); 794 enqueue_entity(cfs_rq, se, wakeup);
799 } 795 }
800} 796}
801 797
@@ -804,15 +800,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
804 * decreased. We remove the task from the rbtree and 800 * decreased. We remove the task from the rbtree and
805 * update the fair scheduling stats: 801 * update the fair scheduling stats:
806 */ 802 */
807static void 803static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
808dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
809{ 804{
810 struct cfs_rq *cfs_rq; 805 struct cfs_rq *cfs_rq;
811 struct sched_entity *se = &p->se; 806 struct sched_entity *se = &p->se;
812 807
813 for_each_sched_entity(se) { 808 for_each_sched_entity(se) {
814 cfs_rq = cfs_rq_of(se); 809 cfs_rq = cfs_rq_of(se);
815 dequeue_entity(cfs_rq, se, sleep, now); 810 dequeue_entity(cfs_rq, se, sleep);
816 /* Don't dequeue parent if it has other entities besides us */ 811 /* Don't dequeue parent if it has other entities besides us */
817 if (cfs_rq->load.weight) 812 if (cfs_rq->load.weight)
818 break; 813 break;
@@ -825,14 +820,14 @@ dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now)
825static void yield_task_fair(struct rq *rq, struct task_struct *p) 820static void yield_task_fair(struct rq *rq, struct task_struct *p)
826{ 821{
827 struct cfs_rq *cfs_rq = task_cfs_rq(p); 822 struct cfs_rq *cfs_rq = task_cfs_rq(p);
828 u64 now = __rq_clock(rq);
829 823
824 __update_rq_clock(rq);
830 /* 825 /*
831 * Dequeue and enqueue the task to update its 826 * Dequeue and enqueue the task to update its
832 * position within the tree: 827 * position within the tree:
833 */ 828 */
834 dequeue_entity(cfs_rq, &p->se, 0, now); 829 dequeue_entity(cfs_rq, &p->se, 0);
835 enqueue_entity(cfs_rq, &p->se, 0, now); 830 enqueue_entity(cfs_rq, &p->se, 0);
836} 831}
837 832
838/* 833/*
@@ -845,7 +840,8 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
845 unsigned long gran; 840 unsigned long gran;
846 841
847 if (unlikely(rt_prio(p->prio))) { 842 if (unlikely(rt_prio(p->prio))) {
848 update_curr(cfs_rq, rq_clock(rq)); 843 update_rq_clock(rq);
844 update_curr(cfs_rq);
849 resched_task(curr); 845 resched_task(curr);
850 return; 846 return;
851 } 847 }
@@ -861,7 +857,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p)
861 __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); 857 __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran);
862} 858}
863 859
864static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) 860static struct task_struct *pick_next_task_fair(struct rq *rq)
865{ 861{
866 struct cfs_rq *cfs_rq = &rq->cfs; 862 struct cfs_rq *cfs_rq = &rq->cfs;
867 struct sched_entity *se; 863 struct sched_entity *se;
@@ -870,7 +866,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
870 return NULL; 866 return NULL;
871 867
872 do { 868 do {
873 se = pick_next_entity(cfs_rq, now); 869 se = pick_next_entity(cfs_rq);
874 cfs_rq = group_cfs_rq(se); 870 cfs_rq = group_cfs_rq(se);
875 } while (cfs_rq); 871 } while (cfs_rq);
876 872
@@ -880,14 +876,14 @@ static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now)
880/* 876/*
881 * Account for a descheduled task: 877 * Account for a descheduled task:
882 */ 878 */
883static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now) 879static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
884{ 880{
885 struct sched_entity *se = &prev->se; 881 struct sched_entity *se = &prev->se;
886 struct cfs_rq *cfs_rq; 882 struct cfs_rq *cfs_rq;
887 883
888 for_each_sched_entity(se) { 884 for_each_sched_entity(se) {
889 cfs_rq = cfs_rq_of(se); 885 cfs_rq = cfs_rq_of(se);
890 put_prev_entity(cfs_rq, se, now); 886 put_prev_entity(cfs_rq, se);
891 } 887 }
892} 888}
893 889
@@ -930,6 +926,7 @@ static struct task_struct *load_balance_next_fair(void *arg)
930 return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); 926 return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
931} 927}
932 928
929#ifdef CONFIG_FAIR_GROUP_SCHED
933static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) 930static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
934{ 931{
935 struct sched_entity *curr; 932 struct sched_entity *curr;
@@ -943,12 +940,13 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
943 940
944 return p->prio; 941 return p->prio;
945} 942}
943#endif
946 944
947static int 945static unsigned long
948load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 946load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
949 unsigned long max_nr_move, unsigned long max_load_move, 947 unsigned long max_nr_move, unsigned long max_load_move,
950 struct sched_domain *sd, enum cpu_idle_type idle, 948 struct sched_domain *sd, enum cpu_idle_type idle,
951 int *all_pinned, unsigned long *total_load_moved) 949 int *all_pinned, int *this_best_prio)
952{ 950{
953 struct cfs_rq *busy_cfs_rq; 951 struct cfs_rq *busy_cfs_rq;
954 unsigned long load_moved, total_nr_moved = 0, nr_moved; 952 unsigned long load_moved, total_nr_moved = 0, nr_moved;
@@ -959,15 +957,14 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
959 cfs_rq_iterator.next = load_balance_next_fair; 957 cfs_rq_iterator.next = load_balance_next_fair;
960 958
961 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { 959 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
960#ifdef CONFIG_FAIR_GROUP_SCHED
962 struct cfs_rq *this_cfs_rq; 961 struct cfs_rq *this_cfs_rq;
963 long imbalance; 962 long imbalance;
964 unsigned long maxload; 963 unsigned long maxload;
965 int this_best_prio, best_prio, best_prio_seen = 0;
966 964
967 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); 965 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
968 966
969 imbalance = busy_cfs_rq->load.weight - 967 imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
970 this_cfs_rq->load.weight;
971 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ 968 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
972 if (imbalance <= 0) 969 if (imbalance <= 0)
973 continue; 970 continue;
@@ -976,27 +973,17 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
976 imbalance /= 2; 973 imbalance /= 2;
977 maxload = min(rem_load_move, imbalance); 974 maxload = min(rem_load_move, imbalance);
978 975
979 this_best_prio = cfs_rq_best_prio(this_cfs_rq); 976 *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
980 best_prio = cfs_rq_best_prio(busy_cfs_rq); 977#else
981 978# define maxload rem_load_move
982 /* 979#endif
983 * Enable handling of the case where there is more than one task
984 * with the best priority. If the current running task is one
985 * of those with prio==best_prio we know it won't be moved
986 * and therefore it's safe to override the skip (based on load)
987 * of any task we find with that prio.
988 */
989 if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se)
990 best_prio_seen = 1;
991
992 /* pass busy_cfs_rq argument into 980 /* pass busy_cfs_rq argument into
993 * load_balance_[start|next]_fair iterators 981 * load_balance_[start|next]_fair iterators
994 */ 982 */
995 cfs_rq_iterator.arg = busy_cfs_rq; 983 cfs_rq_iterator.arg = busy_cfs_rq;
996 nr_moved = balance_tasks(this_rq, this_cpu, busiest, 984 nr_moved = balance_tasks(this_rq, this_cpu, busiest,
997 max_nr_move, maxload, sd, idle, all_pinned, 985 max_nr_move, maxload, sd, idle, all_pinned,
998 &load_moved, this_best_prio, best_prio, 986 &load_moved, this_best_prio, &cfs_rq_iterator);
999 best_prio_seen, &cfs_rq_iterator);
1000 987
1001 total_nr_moved += nr_moved; 988 total_nr_moved += nr_moved;
1002 max_nr_move -= nr_moved; 989 max_nr_move -= nr_moved;
@@ -1006,9 +993,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1006 break; 993 break;
1007 } 994 }
1008 995
1009 *total_load_moved = max_load_move - rem_load_move; 996 return max_load_move - rem_load_move;
1010
1011 return total_nr_moved;
1012} 997}
1013 998
1014/* 999/*
@@ -1032,14 +1017,14 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr)
1032 * monopolize the CPU. Note: the parent runqueue is locked, 1017 * monopolize the CPU. Note: the parent runqueue is locked,
1033 * the child is not running yet. 1018 * the child is not running yet.
1034 */ 1019 */
1035static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) 1020static void task_new_fair(struct rq *rq, struct task_struct *p)
1036{ 1021{
1037 struct cfs_rq *cfs_rq = task_cfs_rq(p); 1022 struct cfs_rq *cfs_rq = task_cfs_rq(p);
1038 struct sched_entity *se = &p->se; 1023 struct sched_entity *se = &p->se;
1039 1024
1040 sched_info_queued(p); 1025 sched_info_queued(p);
1041 1026
1042 update_stats_enqueue(cfs_rq, se, now); 1027 update_stats_enqueue(cfs_rq, se);
1043 /* 1028 /*
1044 * Child runs first: we let it run before the parent 1029 * Child runs first: we let it run before the parent
1045 * until it reschedules once. We set up the key so that 1030 * until it reschedules once. We set up the key so that
@@ -1072,15 +1057,10 @@ static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now)
1072 */ 1057 */
1073static void set_curr_task_fair(struct rq *rq) 1058static void set_curr_task_fair(struct rq *rq)
1074{ 1059{
1075 struct task_struct *curr = rq->curr; 1060 struct sched_entity *se = &rq->curr.se;
1076 struct sched_entity *se = &curr->se;
1077 u64 now = rq_clock(rq);
1078 struct cfs_rq *cfs_rq;
1079 1061
1080 for_each_sched_entity(se) { 1062 for_each_sched_entity(se)
1081 cfs_rq = cfs_rq_of(se); 1063 set_next_entity(cfs_rq_of(se), se);
1082 set_next_entity(cfs_rq, se, now);
1083 }
1084} 1064}
1085#else 1065#else
1086static void set_curr_task_fair(struct rq *rq) 1066static void set_curr_task_fair(struct rq *rq)
@@ -1109,12 +1089,11 @@ struct sched_class fair_sched_class __read_mostly = {
1109}; 1089};
1110 1090
1111#ifdef CONFIG_SCHED_DEBUG 1091#ifdef CONFIG_SCHED_DEBUG
1112void print_cfs_stats(struct seq_file *m, int cpu, u64 now) 1092static void print_cfs_stats(struct seq_file *m, int cpu)
1113{ 1093{
1114 struct rq *rq = cpu_rq(cpu);
1115 struct cfs_rq *cfs_rq; 1094 struct cfs_rq *cfs_rq;
1116 1095
1117 for_each_leaf_cfs_rq(rq, cfs_rq) 1096 for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
1118 print_cfs_rq(m, cpu, cfs_rq, now); 1097 print_cfs_rq(m, cpu, cfs_rq);
1119} 1098}
1120#endif 1099#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 41841e741c4a..3503fb2d9f96 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p)
13 resched_task(rq->idle); 13 resched_task(rq->idle);
14} 14}
15 15
16static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) 16static struct task_struct *pick_next_task_idle(struct rq *rq)
17{ 17{
18 schedstat_inc(rq, sched_goidle); 18 schedstat_inc(rq, sched_goidle);
19 19
@@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now)
25 * message if some code attempts to do it: 25 * message if some code attempts to do it:
26 */ 26 */
27static void 27static void
28dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) 28dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
29{ 29{
30 spin_unlock_irq(&rq->lock); 30 spin_unlock_irq(&rq->lock);
31 printk(KERN_ERR "bad: scheduling from the idle thread!\n"); 31 printk(KERN_ERR "bad: scheduling from the idle thread!\n");
@@ -33,15 +33,15 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now)
33 spin_lock_irq(&rq->lock); 33 spin_lock_irq(&rq->lock);
34} 34}
35 35
36static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) 36static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
37{ 37{
38} 38}
39 39
40static int 40static unsigned long
41load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, 41load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
42 unsigned long max_nr_move, unsigned long max_load_move, 42 unsigned long max_nr_move, unsigned long max_load_move,
43 struct sched_domain *sd, enum cpu_idle_type idle, 43 struct sched_domain *sd, enum cpu_idle_type idle,
44 int *all_pinned, unsigned long *total_load_moved) 44 int *all_pinned, int *this_best_prio)
45{ 45{
46 return 0; 46 return 0;
47} 47}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 002fcf8d3f64..dcdcad632fd9 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -7,7 +7,7 @@
7 * Update the current task's runtime statistics. Skip current tasks that 7 * Update the current task's runtime statistics. Skip current tasks that
8 * are not in our scheduling class. 8 * are not in our scheduling class.
9 */ 9 */
10static inline void update_curr_rt(struct rq *rq, u64 now) 10static inline void update_curr_rt(struct rq *rq)
11{ 11{
12 struct task_struct *curr = rq->curr; 12 struct task_struct *curr = rq->curr;
13 u64 delta_exec; 13 u64 delta_exec;
@@ -15,18 +15,17 @@ static inline void update_curr_rt(struct rq *rq, u64 now)
15 if (!task_has_rt_policy(curr)) 15 if (!task_has_rt_policy(curr))
16 return; 16 return;
17 17
18 delta_exec = now - curr->se.exec_start; 18 delta_exec = rq->clock - curr->se.exec_start;
19 if (unlikely((s64)delta_exec < 0)) 19 if (unlikely((s64)delta_exec < 0))
20 delta_exec = 0; 20 delta_exec = 0;
21 21
22 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); 22 schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
23 23
24 curr->se.sum_exec_runtime += delta_exec; 24 curr->se.sum_exec_runtime += delta_exec;
25 curr->se.exec_start = now; 25 curr->se.exec_start = rq->clock;
26} 26}
27 27
28static void 28static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
29enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
30{ 29{
31 struct rt_prio_array *array = &rq->rt.active; 30 struct rt_prio_array *array = &rq->rt.active;
32 31
@@ -37,12 +36,11 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now)
37/* 36/*
38 * Adding/removing a task to/from a priority array: 37 * Adding/removing a task to/from a priority array:
39 */ 38 */
40static void 39static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
41dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now)
42{ 40{
43 struct rt_prio_array *array = &rq->rt.active; 41 struct rt_prio_array *array = &rq->rt.active;
44 42
45 update_curr_rt(rq, now); 43 update_curr_rt(rq);
46 44
47 list_del(&p->run_list); 45 list_del(&p->run_list);
48 if (list_empty(array->queue + p->prio)) 46 if (list_empty(array->queue + p->prio))
@@ -75,7 +73,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
75 resched_task(rq->curr); 73 resched_task(rq->curr);
76} 74}
77 75
78static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) 76static struct task_struct *pick_next_task_rt(struct rq *rq)
79{ 77{
80 struct rt_prio_array *array = &rq->rt.active; 78 struct rt_prio_array *array = &rq->rt.active;
81 struct task_struct *next; 79 struct task_struct *next;
@@ -89,14 +87,14 @@ static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now)
89 queue = array->queue + idx; 87 queue = array->queue + idx;
90 next = list_entry(queue->next, struct task_struct, run_list); 88 next = list_entry(queue->next, struct task_struct, run_list);
91 89
92 next->se.exec_start = now; 90 next->se.exec_start = rq->clock;
93 91
94 return next; 92 return next;
95} 93}
96 94
97static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now) 95static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
98{ 96{
99 update_curr_rt(rq, now); 97 update_curr_rt(rq);
100 p->se.exec_start = 0; 98 p->se.exec_start = 0;
101} 99}
102 100
@@ -172,28 +170,15 @@ static struct task_struct *load_balance_next_rt(void *arg)
172 return p; 170 return p;
173} 171}
174 172
175static int 173static unsigned long
176load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, 174load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
177 unsigned long max_nr_move, unsigned long max_load_move, 175 unsigned long max_nr_move, unsigned long max_load_move,
178 struct sched_domain *sd, enum cpu_idle_type idle, 176 struct sched_domain *sd, enum cpu_idle_type idle,
179 int *all_pinned, unsigned long *load_moved) 177 int *all_pinned, int *this_best_prio)
180{ 178{
181 int this_best_prio, best_prio, best_prio_seen = 0;
182 int nr_moved; 179 int nr_moved;
183 struct rq_iterator rt_rq_iterator; 180 struct rq_iterator rt_rq_iterator;
184 181 unsigned long load_moved;
185 best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
186 this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
187
188 /*
189 * Enable handling of the case where there is more than one task
190 * with the best priority. If the current running task is one
191 * of those with prio==best_prio we know it won't be moved
192 * and therefore it's safe to override the skip (based on load)
193 * of any task we find with that prio.
194 */
195 if (busiest->curr->prio == best_prio)
196 best_prio_seen = 1;
197 182
198 rt_rq_iterator.start = load_balance_start_rt; 183 rt_rq_iterator.start = load_balance_start_rt;
199 rt_rq_iterator.next = load_balance_next_rt; 184 rt_rq_iterator.next = load_balance_next_rt;
@@ -203,11 +188,10 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
203 rt_rq_iterator.arg = busiest; 188 rt_rq_iterator.arg = busiest;
204 189
205 nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, 190 nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
206 max_load_move, sd, idle, all_pinned, load_moved, 191 max_load_move, sd, idle, all_pinned, &load_moved,
207 this_best_prio, best_prio, best_prio_seen, 192 this_best_prio, &rt_rq_iterator);
208 &rt_rq_iterator);
209 193
210 return nr_moved; 194 return load_moved;
211} 195}
212 196
213static void task_tick_rt(struct rq *rq, struct task_struct *p) 197static void task_tick_rt(struct rq *rq, struct task_struct *p)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79c891e6266c..8bdb8c07e04f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1023,6 +1023,7 @@ static ctl_table vm_table[] = {
1023 .mode = 0644, 1023 .mode = 0644,
1024 .proc_handler = &proc_doulongvec_minmax, 1024 .proc_handler = &proc_doulongvec_minmax,
1025 }, 1025 },
1026#endif
1026#ifdef CONFIG_NUMA 1027#ifdef CONFIG_NUMA
1027 { 1028 {
1028 .ctl_name = CTL_UNNUMBERED, 1029 .ctl_name = CTL_UNNUMBERED,
@@ -1034,7 +1035,6 @@ static ctl_table vm_table[] = {
1034 .strategy = &sysctl_string, 1035 .strategy = &sysctl_string,
1035 }, 1036 },
1036#endif 1037#endif
1037#endif
1038#if defined(CONFIG_X86_32) || \ 1038#if defined(CONFIG_X86_32) || \
1039 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL)) 1039 (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
1040 { 1040 {
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 2ad1c37b8dfe..41dd3105ce7f 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -113,16 +113,6 @@ int clockevents_register_notifier(struct notifier_block *nb)
113 return ret; 113 return ret;
114} 114}
115 115
116/**
117 * clockevents_unregister_notifier - unregister a clock events change listener
118 */
119void clockevents_unregister_notifier(struct notifier_block *nb)
120{
121 spin_lock(&clockevents_lock);
122 raw_notifier_chain_unregister(&clockevents_chain, nb);
123 spin_unlock(&clockevents_lock);
124}
125
126/* 116/*
127 * Notify about a clock event change. Called with clockevents_lock 117 * Notify about a clock event change. Called with clockevents_lock
128 * held. 118 * held.
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 473f5aed6cae..bd5edaeaa80b 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -145,9 +145,9 @@ EXPORT_SYMBOL(hex_dump_to_buffer);
145 */ 145 */
146void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, 146void print_hex_dump(const char *level, const char *prefix_str, int prefix_type,
147 int rowsize, int groupsize, 147 int rowsize, int groupsize,
148 void *buf, size_t len, bool ascii) 148 const void *buf, size_t len, bool ascii)
149{ 149{
150 u8 *ptr = buf; 150 const u8 *ptr = buf;
151 int i, linelen, remaining = len; 151 int i, linelen, remaining = len;
152 unsigned char linebuf[200]; 152 unsigned char linebuf[200];
153 153
@@ -189,7 +189,7 @@ EXPORT_SYMBOL(print_hex_dump);
189 * rowsize of 16, groupsize of 1, and ASCII output included. 189 * rowsize of 16, groupsize of 1, and ASCII output included.
190 */ 190 */
191void print_hex_dump_bytes(const char *prefix_str, int prefix_type, 191void print_hex_dump_bytes(const char *prefix_str, int prefix_type,
192 void *buf, size_t len) 192 const void *buf, size_t len)
193{ 193{
194 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1, 194 print_hex_dump(KERN_DEBUG, prefix_str, prefix_type, 16, 1,
195 buf, len, 1); 195 buf, len, 1);
diff --git a/mm/filemap.c b/mm/filemap.c
index 6cf700d41844..90b657b50f81 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -843,7 +843,7 @@ static void shrink_readahead_size_eio(struct file *filp,
843/** 843/**
844 * do_generic_mapping_read - generic file read routine 844 * do_generic_mapping_read - generic file read routine
845 * @mapping: address_space to be read 845 * @mapping: address_space to be read
846 * @ra: file's readahead state 846 * @_ra: file's readahead state
847 * @filp: the file to read 847 * @filp: the file to read
848 * @ppos: current file position 848 * @ppos: current file position
849 * @desc: read_descriptor 849 * @desc: read_descriptor
@@ -1218,26 +1218,6 @@ out:
1218} 1218}
1219EXPORT_SYMBOL(generic_file_aio_read); 1219EXPORT_SYMBOL(generic_file_aio_read);
1220 1220
1221int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
1222{
1223 ssize_t written;
1224 unsigned long count = desc->count;
1225 struct file *file = desc->arg.data;
1226
1227 if (size > count)
1228 size = count;
1229
1230 written = file->f_op->sendpage(file, page, offset,
1231 size, &file->f_pos, size<count);
1232 if (written < 0) {
1233 desc->error = written;
1234 written = 0;
1235 }
1236 desc->count = count - written;
1237 desc->written += written;
1238 return written;
1239}
1240
1241static ssize_t 1221static ssize_t
1242do_readahead(struct address_space *mapping, struct file *filp, 1222do_readahead(struct address_space *mapping, struct file *filp,
1243 unsigned long index, unsigned long nr) 1223 unsigned long index, unsigned long nr)
diff --git a/mm/slub.c b/mm/slub.c
index 6c6d74ff0694..69d02e3e439e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -211,7 +211,8 @@ static inline void ClearSlabDebug(struct page *page)
211#define MAX_OBJECTS_PER_SLAB 65535 211#define MAX_OBJECTS_PER_SLAB 65535
212 212
213/* Internal SLUB flags */ 213/* Internal SLUB flags */
214#define __OBJECT_POISON 0x80000000 /* Poison object */ 214#define __OBJECT_POISON 0x80000000 /* Poison object */
215#define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */
215 216
216/* Not all arches define cache_line_size */ 217/* Not all arches define cache_line_size */
217#ifndef cache_line_size 218#ifndef cache_line_size
@@ -2277,10 +2278,26 @@ panic:
2277} 2278}
2278 2279
2279#ifdef CONFIG_ZONE_DMA 2280#ifdef CONFIG_ZONE_DMA
2281
2282static void sysfs_add_func(struct work_struct *w)
2283{
2284 struct kmem_cache *s;
2285
2286 down_write(&slub_lock);
2287 list_for_each_entry(s, &slab_caches, list) {
2288 if (s->flags & __SYSFS_ADD_DEFERRED) {
2289 s->flags &= ~__SYSFS_ADD_DEFERRED;
2290 sysfs_slab_add(s);
2291 }
2292 }
2293 up_write(&slub_lock);
2294}
2295
2296static DECLARE_WORK(sysfs_add_work, sysfs_add_func);
2297
2280static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags) 2298static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2281{ 2299{
2282 struct kmem_cache *s; 2300 struct kmem_cache *s;
2283 struct kmem_cache *x;
2284 char *text; 2301 char *text;
2285 size_t realsize; 2302 size_t realsize;
2286 2303
@@ -2289,22 +2306,36 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2289 return s; 2306 return s;
2290 2307
2291 /* Dynamically create dma cache */ 2308 /* Dynamically create dma cache */
2292 x = kmalloc(kmem_size, flags & ~SLUB_DMA); 2309 if (flags & __GFP_WAIT)
2293 if (!x) 2310 down_write(&slub_lock);
2294 panic("Unable to allocate memory for dma cache\n"); 2311 else {
2312 if (!down_write_trylock(&slub_lock))
2313 goto out;
2314 }
2315
2316 if (kmalloc_caches_dma[index])
2317 goto unlock_out;
2295 2318
2296 realsize = kmalloc_caches[index].objsize; 2319 realsize = kmalloc_caches[index].objsize;
2297 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", 2320 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize),
2298 (unsigned int)realsize); 2321 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2299 s = create_kmalloc_cache(x, text, realsize, flags); 2322
2300 down_write(&slub_lock); 2323 if (!s || !text || !kmem_cache_open(s, flags, text,
2301 if (!kmalloc_caches_dma[index]) { 2324 realsize, ARCH_KMALLOC_MINALIGN,
2302 kmalloc_caches_dma[index] = s; 2325 SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) {
2303 up_write(&slub_lock); 2326 kfree(s);
2304 return s; 2327 kfree(text);
2328 goto unlock_out;
2305 } 2329 }
2330
2331 list_add(&s->list, &slab_caches);
2332 kmalloc_caches_dma[index] = s;
2333
2334 schedule_work(&sysfs_add_work);
2335
2336unlock_out:
2306 up_write(&slub_lock); 2337 up_write(&slub_lock);
2307 kmem_cache_destroy(s); 2338out:
2308 return kmalloc_caches_dma[index]; 2339 return kmalloc_caches_dma[index];
2309} 2340}
2310#endif 2341#endif
@@ -2500,15 +2531,11 @@ int kmem_cache_shrink(struct kmem_cache *s)
2500 slab_unlock(page); 2531 slab_unlock(page);
2501 discard_slab(s, page); 2532 discard_slab(s, page);
2502 } else { 2533 } else {
2503 if (n->nr_partial > MAX_PARTIAL) 2534 list_move(&page->lru,
2504 list_move(&page->lru, 2535 slabs_by_inuse + page->inuse);
2505 slabs_by_inuse + page->inuse);
2506 } 2536 }
2507 } 2537 }
2508 2538
2509 if (n->nr_partial <= MAX_PARTIAL)
2510 goto out;
2511
2512 /* 2539 /*
2513 * Rebuild the partial list with the slabs filled up most 2540 * Rebuild the partial list with the slabs filled up most
2514 * first and the least used slabs at the end. 2541 * first and the least used slabs at the end.
@@ -2516,7 +2543,6 @@ int kmem_cache_shrink(struct kmem_cache *s)
2516 for (i = s->objects - 1; i >= 0; i--) 2543 for (i = s->objects - 1; i >= 0; i--)
2517 list_splice(slabs_by_inuse + i, n->partial.prev); 2544 list_splice(slabs_by_inuse + i, n->partial.prev);
2518 2545
2519 out:
2520 spin_unlock_irqrestore(&n->list_lock, flags); 2546 spin_unlock_irqrestore(&n->list_lock, flags);
2521 } 2547 }
2522 2548
diff --git a/net/core/utils.c b/net/core/utils.c
index 2030bb8c2d30..0bf17da40d52 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -25,6 +25,7 @@
25#include <linux/random.h> 25#include <linux/random.h>
26#include <linux/percpu.h> 26#include <linux/percpu.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <net/sock.h>
28 29
29#include <asm/byteorder.h> 30#include <asm/byteorder.h>
30#include <asm/system.h> 31#include <asm/system.h>
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index f13937bf9e8c..d054e9224b3e 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -74,8 +74,8 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
74 struct ieee80211softmac_auth_queue_item *authptr; 74 struct ieee80211softmac_auth_queue_item *authptr;
75 int length = 0; 75 int length = 0;
76 76
77check_assoc_again:
77 mutex_lock(&sm->associnfo.mutex); 78 mutex_lock(&sm->associnfo.mutex);
78
79 /* Check if we're already associating to this or another network 79 /* Check if we're already associating to this or another network
80 * If it's another network, cancel and start over with our new network 80 * If it's another network, cancel and start over with our new network
81 * If it's our network, ignore the change, we're already doing it! 81 * If it's our network, ignore the change, we're already doing it!
@@ -98,13 +98,18 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
98 cancel_delayed_work(&authptr->work); 98 cancel_delayed_work(&authptr->work);
99 sm->associnfo.bssvalid = 0; 99 sm->associnfo.bssvalid = 0;
100 sm->associnfo.bssfixed = 0; 100 sm->associnfo.bssfixed = 0;
101 flush_scheduled_work();
102 sm->associnfo.associating = 0; 101 sm->associnfo.associating = 0;
103 sm->associnfo.associated = 0; 102 sm->associnfo.associated = 0;
103 /* We must unlock to avoid deadlocks with the assoc workqueue
104 * on the associnfo.mutex */
105 mutex_unlock(&sm->associnfo.mutex);
106 flush_scheduled_work();
107 /* Avoid race! Check assoc status again. Maybe someone started an
108 * association while we flushed. */
109 goto check_assoc_again;
104 } 110 }
105 } 111 }
106 112
107
108 sm->associnfo.static_essid = 0; 113 sm->associnfo.static_essid = 0;
109 sm->associnfo.assoc_wait = 0; 114 sm->associnfo.assoc_wait = 0;
110 115
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
index 321804315659..6d0c0f7364ad 100644
--- a/net/ipv4/netfilter/ipt_recent.c
+++ b/net/ipv4/netfilter/ipt_recent.c
@@ -387,12 +387,17 @@ static int recent_seq_open(struct inode *inode, struct file *file)
387 st = kzalloc(sizeof(*st), GFP_KERNEL); 387 st = kzalloc(sizeof(*st), GFP_KERNEL);
388 if (st == NULL) 388 if (st == NULL)
389 return -ENOMEM; 389 return -ENOMEM;
390
390 ret = seq_open(file, &recent_seq_ops); 391 ret = seq_open(file, &recent_seq_ops);
391 if (ret) 392 if (ret) {
392 kfree(st); 393 kfree(st);
394 goto out;
395 }
396
393 st->table = pde->data; 397 st->table = pde->data;
394 seq = file->private_data; 398 seq = file->private_data;
395 seq->private = st; 399 seq->private = st;
400out:
396 return ret; 401 return ret;
397} 402}
398 403
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 64552afd01cb..d9b5177989c6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -509,3 +509,9 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
509 509
510module_init(nf_conntrack_l3proto_ipv4_init); 510module_init(nf_conntrack_l3proto_ipv4_init);
511module_exit(nf_conntrack_l3proto_ipv4_fini); 511module_exit(nf_conntrack_l3proto_ipv4_fini);
512
513void need_ipv4_conntrack(void)
514{
515 return;
516}
517EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 332814dac503..46cc99def165 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -328,7 +328,7 @@ static int __init nf_nat_standalone_init(void)
328{ 328{
329 int ret = 0; 329 int ret = 0;
330 330
331 need_conntrack(); 331 need_ipv4_conntrack();
332 332
333#ifdef CONFIG_XFRM 333#ifdef CONFIG_XFRM
334 BUG_ON(ip_nat_decode_session != NULL); 334 BUG_ON(ip_nat_decode_session != NULL);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index b66556c0a5bd..5215691f2760 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -79,7 +79,6 @@ static u32 htcp_cwnd_undo(struct sock *sk)
79static inline void measure_rtt(struct sock *sk, u32 srtt) 79static inline void measure_rtt(struct sock *sk, u32 srtt)
80{ 80{
81 const struct inet_connection_sock *icsk = inet_csk(sk); 81 const struct inet_connection_sock *icsk = inet_csk(sk);
82 const struct tcp_sock *tp = tcp_sk(sk);
83 struct htcp *ca = inet_csk_ca(sk); 82 struct htcp *ca = inet_csk_ca(sk);
84 83
85 /* keep track of minimum RTT seen so far, minRTT is zero at first */ 84 /* keep track of minimum RTT seen so far, minRTT is zero at first */
@@ -87,8 +86,7 @@ static inline void measure_rtt(struct sock *sk, u32 srtt)
87 ca->minRTT = srtt; 86 ca->minRTT = srtt;
88 87
89 /* max RTT */ 88 /* max RTT */
90 if (icsk->icsk_ca_state == TCP_CA_Open 89 if (icsk->icsk_ca_state == TCP_CA_Open) {
91 && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) {
92 if (ca->maxRTT < ca->minRTT) 90 if (ca->maxRTT < ca->minRTT)
93 ca->maxRTT = ca->minRTT; 91 ca->maxRTT = ca->minRTT;
94 if (ca->maxRTT < srtt 92 if (ca->maxRTT < srtt
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 799a9208c4b4..095be91829ca 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -271,9 +271,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
271 } 271 }
272} 272}
273 273
274#define DEBUGFS_DEL(name, type)\ 274#define DEBUGFS_DEL(name, type) \
275 debugfs_remove(sdata->debugfs.type.name);\ 275 do { \
276 sdata->debugfs.type.name = NULL; 276 debugfs_remove(sdata->debugfs.type.name); \
277 sdata->debugfs.type.name = NULL; \
278 } while (0)
277 279
278static void del_sta_files(struct ieee80211_sub_if_data *sdata) 280static void del_sta_files(struct ieee80211_sub_if_data *sdata)
279{ 281{
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d41e696f3980..da34ea70276f 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -157,7 +157,7 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf,
157 struct sta_info *sta = file->private_data; 157 struct sta_info *sta = file->private_data;
158 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 158 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
159 p += scnprintf(p, sizeof(buf)+buf-p, "%x ", 159 p += scnprintf(p, sizeof(buf)+buf-p, "%x ",
160 sta->last_seq_ctrl[i]); 160 le16_to_cpu(sta->last_seq_ctrl[i]));
161 p += scnprintf(p, sizeof(buf)+buf-p, "\n"); 161 p += scnprintf(p, sizeof(buf)+buf-p, "\n");
162 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); 162 return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
163} 163}
diff --git a/net/mac80211/ieee80211.c b/net/mac80211/ieee80211.c
index c944b17d0fc0..8ec5ed192b5d 100644
--- a/net/mac80211/ieee80211.c
+++ b/net/mac80211/ieee80211.c
@@ -1650,6 +1650,7 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb,
1650 if (skb_headroom(skb) < headroom) { 1650 if (skb_headroom(skb) < headroom) {
1651 if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { 1651 if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) {
1652 dev_kfree_skb(skb); 1652 dev_kfree_skb(skb);
1653 dev_put(odev);
1653 return 0; 1654 return 0;
1654 } 1655 }
1655 } 1656 }
diff --git a/net/mac80211/ieee80211_ioctl.c b/net/mac80211/ieee80211_ioctl.c
index d0e1ab5589db..e7904db55325 100644
--- a/net/mac80211/ieee80211_ioctl.c
+++ b/net/mac80211/ieee80211_ioctl.c
@@ -697,17 +697,24 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev,
697 if (!netif_running(dev)) 697 if (!netif_running(dev))
698 return -ENETDOWN; 698 return -ENETDOWN;
699 699
700 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { 700 switch (sdata->type) {
701 if (sdata->type == IEEE80211_IF_TYPE_STA || 701 case IEEE80211_IF_TYPE_STA:
702 sdata->type == IEEE80211_IF_TYPE_IBSS) { 702 case IEEE80211_IF_TYPE_IBSS:
703 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
703 ssid = sdata->u.sta.ssid; 704 ssid = sdata->u.sta.ssid;
704 ssid_len = sdata->u.sta.ssid_len; 705 ssid_len = sdata->u.sta.ssid_len;
705 } else if (sdata->type == IEEE80211_IF_TYPE_AP) { 706 }
707 break;
708 case IEEE80211_IF_TYPE_AP:
709 if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) {
706 ssid = sdata->u.ap.ssid; 710 ssid = sdata->u.ap.ssid;
707 ssid_len = sdata->u.ap.ssid_len; 711 ssid_len = sdata->u.ap.ssid_len;
708 } else 712 }
709 return -EINVAL; 713 break;
714 default:
715 return -EOPNOTSUPP;
710 } 716 }
717
711 return ieee80211_sta_req_scan(dev, ssid, ssid_len); 718 return ieee80211_sta_req_scan(dev, ssid, ssid_len);
712} 719}
713 720
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6f89b105a205..2863e72b4091 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1052,17 +1052,18 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1052 } 1052 }
1053 /* implicit 'else' */ 1053 /* implicit 'else' */
1054 1054
1055 /* we only allow nat config for new conntracks */
1056 if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
1057 err = -EINVAL;
1058 goto out_unlock;
1059 }
1060
1061 /* We manipulate the conntrack inside the global conntrack table lock, 1055 /* We manipulate the conntrack inside the global conntrack table lock,
1062 * so there's no need to increase the refcount */ 1056 * so there's no need to increase the refcount */
1063 err = -EEXIST; 1057 err = -EEXIST;
1064 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) 1058 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
1065 err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda); 1059 /* we only allow nat config for new conntracks */
1060 if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) {
1061 err = -EINVAL;
1062 goto out_unlock;
1063 }
1064 err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h),
1065 cda);
1066 }
1066 1067
1067out_unlock: 1068out_unlock:
1068 write_unlock_bh(&nf_conntrack_lock); 1069 write_unlock_bh(&nf_conntrack_lock);
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index f46a0aeec44f..b6c844b7e1c1 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -126,7 +126,9 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def)
126 126
127 if (domain != NULL) { 127 if (domain != NULL) {
128 bkt = netlbl_domhsh_hash(domain); 128 bkt = netlbl_domhsh_hash(domain);
129 list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) 129 list_for_each_entry_rcu(iter,
130 &rcu_dereference(netlbl_domhsh)->tbl[bkt],
131 list)
130 if (iter->valid && strcmp(iter->domain, domain) == 0) 132 if (iter->valid && strcmp(iter->domain, domain) == 0)
131 return iter; 133 return iter;
132 } 134 }
@@ -227,7 +229,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
227 spin_lock(&netlbl_domhsh_lock); 229 spin_lock(&netlbl_domhsh_lock);
228 if (netlbl_domhsh_search(entry->domain, 0) == NULL) 230 if (netlbl_domhsh_search(entry->domain, 0) == NULL)
229 list_add_tail_rcu(&entry->list, 231 list_add_tail_rcu(&entry->list,
230 &netlbl_domhsh->tbl[bkt]); 232 &rcu_dereference(netlbl_domhsh)->tbl[bkt]);
231 else 233 else
232 ret_val = -EEXIST; 234 ret_val = -EEXIST;
233 spin_unlock(&netlbl_domhsh_lock); 235 spin_unlock(&netlbl_domhsh_lock);
@@ -423,8 +425,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
423 iter_bkt < rcu_dereference(netlbl_domhsh)->size; 425 iter_bkt < rcu_dereference(netlbl_domhsh)->size;
424 iter_bkt++, chain_cnt = 0) { 426 iter_bkt++, chain_cnt = 0) {
425 list_for_each_entry_rcu(iter_entry, 427 list_for_each_entry_rcu(iter_entry,
426 &netlbl_domhsh->tbl[iter_bkt], 428 &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt],
427 list) 429 list)
428 if (iter_entry->valid) { 430 if (iter_entry->valid) {
429 if (chain_cnt++ < *skip_chain) 431 if (chain_cnt++ < *skip_chain)
430 continue; 432 continue;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4bbc59cc237c..53995af9ca4b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -736,9 +736,6 @@ gss_do_free_ctx(struct gss_cl_ctx *ctx)
736{ 736{
737 dprintk("RPC: gss_free_ctx\n"); 737 dprintk("RPC: gss_free_ctx\n");
738 738
739 if (ctx->gc_gss_ctx)
740 gss_delete_sec_context(&ctx->gc_gss_ctx);
741
742 kfree(ctx->gc_wire_ctx.data); 739 kfree(ctx->gc_wire_ctx.data);
743 kfree(ctx); 740 kfree(ctx);
744} 741}
@@ -753,7 +750,13 @@ gss_free_ctx_callback(struct rcu_head *head)
753static void 750static void
754gss_free_ctx(struct gss_cl_ctx *ctx) 751gss_free_ctx(struct gss_cl_ctx *ctx)
755{ 752{
753 struct gss_ctx *gc_gss_ctx;
754
755 gc_gss_ctx = rcu_dereference(ctx->gc_gss_ctx);
756 rcu_assign_pointer(ctx->gc_gss_ctx, NULL);
756 call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); 757 call_rcu(&ctx->gc_rcu, gss_free_ctx_callback);
758 if (gc_gss_ctx)
759 gss_delete_sec_context(&gc_gss_ctx);
757} 760}
758 761
759static void 762static void
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 01c3c4105204..ebe344f34d1a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -371,8 +371,7 @@ int cache_unregister(struct cache_detail *cd)
371 } 371 }
372 if (list_empty(&cache_list)) { 372 if (list_empty(&cache_list)) {
373 /* module must be being unloaded so its safe to kill the worker */ 373 /* module must be being unloaded so its safe to kill the worker */
374 cancel_delayed_work(&cache_cleaner); 374 cancel_delayed_work_sync(&cache_cleaner);
375 flush_scheduled_work();
376 } 375 }
377 return 0; 376 return 0;
378} 377}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 650af064ff8d..669e12a4ed18 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -132,8 +132,7 @@ rpc_close_pipes(struct inode *inode)
132 rpci->nwriters = 0; 132 rpci->nwriters = 0;
133 if (ops->release_pipe) 133 if (ops->release_pipe)
134 ops->release_pipe(inode); 134 ops->release_pipe(inode);
135 cancel_delayed_work(&rpci->queue_timeout); 135 cancel_delayed_work_sync(&rpci->queue_timeout);
136 flush_workqueue(rpciod_workqueue);
137 } 136 }
138 rpc_inode_setowner(inode, NULL); 137 rpc_inode_setowner(inode, NULL);
139 mutex_unlock(&inode->i_mutex); 138 mutex_unlock(&inode->i_mutex);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b5723c262a3e..954d7ec86c7e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -50,8 +50,6 @@ static RPC_WAITQ(delay_queue, "delayq");
50/* 50/*
51 * rpciod-related stuff 51 * rpciod-related stuff
52 */ 52 */
53static DEFINE_MUTEX(rpciod_mutex);
54static atomic_t rpciod_users = ATOMIC_INIT(0);
55struct workqueue_struct *rpciod_workqueue; 53struct workqueue_struct *rpciod_workqueue;
56 54
57/* 55/*
@@ -961,60 +959,49 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
961 spin_unlock(&clnt->cl_lock); 959 spin_unlock(&clnt->cl_lock);
962} 960}
963 961
962int rpciod_up(void)
963{
964 return try_module_get(THIS_MODULE) ? 0 : -EINVAL;
965}
966
967void rpciod_down(void)
968{
969 module_put(THIS_MODULE);
970}
971
964/* 972/*
965 * Start up the rpciod process if it's not already running. 973 * Start up the rpciod workqueue.
966 */ 974 */
967int 975static int rpciod_start(void)
968rpciod_up(void)
969{ 976{
970 struct workqueue_struct *wq; 977 struct workqueue_struct *wq;
971 int error = 0;
972
973 if (atomic_inc_not_zero(&rpciod_users))
974 return 0;
975
976 mutex_lock(&rpciod_mutex);
977 978
978 /* Guard against races with rpciod_down() */
979 if (rpciod_workqueue != NULL)
980 goto out_ok;
981 /* 979 /*
982 * Create the rpciod thread and wait for it to start. 980 * Create the rpciod thread and wait for it to start.
983 */ 981 */
984 dprintk("RPC: creating workqueue rpciod\n"); 982 dprintk("RPC: creating workqueue rpciod\n");
985 error = -ENOMEM;
986 wq = create_workqueue("rpciod"); 983 wq = create_workqueue("rpciod");
987 if (wq == NULL)
988 goto out;
989
990 rpciod_workqueue = wq; 984 rpciod_workqueue = wq;
991 error = 0; 985 return rpciod_workqueue != NULL;
992out_ok:
993 atomic_inc(&rpciod_users);
994out:
995 mutex_unlock(&rpciod_mutex);
996 return error;
997} 986}
998 987
999void 988static void rpciod_stop(void)
1000rpciod_down(void)
1001{ 989{
1002 if (!atomic_dec_and_test(&rpciod_users)) 990 struct workqueue_struct *wq = NULL;
1003 return;
1004 991
1005 mutex_lock(&rpciod_mutex); 992 if (rpciod_workqueue == NULL)
993 return;
1006 dprintk("RPC: destroying workqueue rpciod\n"); 994 dprintk("RPC: destroying workqueue rpciod\n");
1007 995
1008 if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { 996 wq = rpciod_workqueue;
1009 destroy_workqueue(rpciod_workqueue); 997 rpciod_workqueue = NULL;
1010 rpciod_workqueue = NULL; 998 destroy_workqueue(wq);
1011 }
1012 mutex_unlock(&rpciod_mutex);
1013} 999}
1014 1000
1015void 1001void
1016rpc_destroy_mempool(void) 1002rpc_destroy_mempool(void)
1017{ 1003{
1004 rpciod_stop();
1018 if (rpc_buffer_mempool) 1005 if (rpc_buffer_mempool)
1019 mempool_destroy(rpc_buffer_mempool); 1006 mempool_destroy(rpc_buffer_mempool);
1020 if (rpc_task_mempool) 1007 if (rpc_task_mempool)
@@ -1048,6 +1035,8 @@ rpc_init_mempool(void)
1048 rpc_buffer_slabp); 1035 rpc_buffer_slabp);
1049 if (!rpc_buffer_mempool) 1036 if (!rpc_buffer_mempool)
1050 goto err_nomem; 1037 goto err_nomem;
1038 if (!rpciod_start())
1039 goto err_nomem;
1051 return 0; 1040 return 0;
1052err_nomem: 1041err_nomem:
1053 rpc_destroy_mempool(); 1042 rpc_destroy_mempool();
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 73751ab6ec0c..dae7d30dca0f 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -9,7 +9,7 @@ use strict;
9my $P = $0; 9my $P = $0;
10$P =~ s@.*/@@g; 10$P =~ s@.*/@@g;
11 11
12my $V = '0.08'; 12my $V = '0.09';
13 13
14use Getopt::Long qw(:config no_auto_abbrev); 14use Getopt::Long qw(:config no_auto_abbrev);
15 15
@@ -311,7 +311,7 @@ sub process {
311 311
312 my $Ident = qr{[A-Za-z\d_]+}; 312 my $Ident = qr{[A-Za-z\d_]+};
313 my $Storage = qr{extern|static}; 313 my $Storage = qr{extern|static};
314 my $Sparse = qr{__user|__kernel|__force|__iomem}; 314 my $Sparse = qr{__user|__kernel|__force|__iomem|__must_check|__init_refok};
315 my $NonptrType = qr{ 315 my $NonptrType = qr{
316 \b 316 \b
317 (?:const\s+)? 317 (?:const\s+)?
@@ -325,6 +325,7 @@ sub process {
325 unsigned| 325 unsigned|
326 float| 326 float|
327 double| 327 double|
328 bool|
328 long\s+int| 329 long\s+int|
329 long\s+long| 330 long\s+long|
330 long\s+long\s+int| 331 long\s+long\s+int|
@@ -340,7 +341,8 @@ sub process {
340 }x; 341 }x;
341 my $Type = qr{ 342 my $Type = qr{
342 \b$NonptrType\b 343 \b$NonptrType\b
343 (?:\s*\*+\s*const|\s*\*+)? 344 (?:\s*\*+\s*const|\s*\*+|(?:\s*\[\s*\])+)?
345 (?:\s+$Sparse)*
344 }x; 346 }x;
345 my $Declare = qr{(?:$Storage\s+)?$Type}; 347 my $Declare = qr{(?:$Storage\s+)?$Type};
346 my $Attribute = qr{const|__read_mostly|__init|__initdata|__meminit}; 348 my $Attribute = qr{const|__read_mostly|__init|__initdata|__meminit};
@@ -494,16 +496,15 @@ sub process {
494 ERROR("use tabs not spaces\n" . $herevet); 496 ERROR("use tabs not spaces\n" . $herevet);
495 } 497 }
496 498
497 #
498 # The rest of our checks refer specifically to C style
499 # only apply those _outside_ comments.
500 #
501 next if ($in_comment);
502
503# Remove comments from the line before processing. 499# Remove comments from the line before processing.
504 $line =~ s@/\*.*\*/@@g; 500 my $comment_edge = ($line =~ s@/\*.*\*/@@g) +
505 $line =~ s@/\*.*@@; 501 ($line =~ s@/\*.*@@) +
506 $line =~ s@.*\*/@@; 502 ($line =~ s@^(.).*\*/@$1@);
503
504# The rest of our checks refer specifically to C style
505# only apply those _outside_ comments. Only skip
506# lines in the middle of comments.
507 next if (!$comment_edge && $in_comment);
507 508
508# Standardise the strings and chars within the input to simplify matching. 509# Standardise the strings and chars within the input to simplify matching.
509 $line = sanitise_line($line); 510 $line = sanitise_line($line);
@@ -599,7 +600,7 @@ sub process {
599 if (($prevline !~ /^}/) && 600 if (($prevline !~ /^}/) &&
600 ($prevline !~ /^\+}/) && 601 ($prevline !~ /^\+}/) &&
601 ($prevline !~ /^ }/) && 602 ($prevline !~ /^ }/) &&
602 ($prevline !~ /\s$name(?:\s+$Attribute)?\s*(?:;|=)/)) { 603 ($prevline !~ /\b\Q$name\E(?:\s+$Attribute)?\s*(?:;|=)/)) {
603 WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr); 604 WARN("EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr);
604 } 605 }
605 } 606 }
@@ -680,9 +681,9 @@ sub process {
680 681
681# check for spaces between functions and their parentheses. 682# check for spaces between functions and their parentheses.
682 if ($line =~ /($Ident)\s+\(/ && 683 if ($line =~ /($Ident)\s+\(/ &&
683 $1 !~ /^(?:if|for|while|switch|return|volatile)$/ && 684 $1 !~ /^(?:if|for|while|switch|return|volatile|__volatile__|__attribute__|format|__extension__|Copyright)$/ &&
684 $line !~ /$Type\s+\(/ && $line !~ /^.\#\s*define\b/) { 685 $line !~ /$Type\s+\(/ && $line !~ /^.\#\s*define\b/) {
685 ERROR("no space between function name and open parenthesis '('\n" . $herecurr); 686 WARN("no space between function name and open parenthesis '('\n" . $herecurr);
686 } 687 }
687# Check operator spacing. 688# Check operator spacing.
688 # Note we expand the line with the leading + as the real 689 # Note we expand the line with the leading + as the real
@@ -712,6 +713,7 @@ sub process {
712 $c = 'W' if ($elements[$n + 2] =~ /^\s/); 713 $c = 'W' if ($elements[$n + 2] =~ /^\s/);
713 $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/); 714 $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
714 $c = 'O' if ($elements[$n + 2] eq ''); 715 $c = 'O' if ($elements[$n + 2] eq '');
716 $c = 'E' if ($elements[$n + 2] =~ /\s*\\$/);
715 } else { 717 } else {
716 $c = 'E'; 718 $c = 'E';
717 } 719 }
@@ -812,7 +814,11 @@ sub process {
812 814
813 # All the others need spaces both sides. 815 # All the others need spaces both sides.
814 } elsif ($ctx !~ /[EW]x[WE]/) { 816 } elsif ($ctx !~ /[EW]x[WE]/) {
815 ERROR("need spaces around that '$op' $at\n" . $hereptr); 817 # Ignore email addresses <foo@bar>
818 if (!($op eq '<' && $cb =~ /$;\S+\@\S+>/) &&
819 !($op eq '>' && $cb =~ /<\S+\@\S+$;/)) {
820 ERROR("need spaces around that '$op' $at\n" . $hereptr);
821 }
816 } 822 }
817 $off += length($elements[$n + 1]); 823 $off += length($elements[$n + 1]);
818 } 824 }
@@ -823,15 +829,24 @@ sub process {
823 WARN("multiple assignments should be avoided\n" . $herecurr); 829 WARN("multiple assignments should be avoided\n" . $herecurr);
824 } 830 }
825 831
826# check for multiple declarations, allowing for a function declaration 832## # check for multiple declarations, allowing for a function declaration
827# continuation. 833## # continuation.
828 if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ && 834## if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ &&
829 $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) { 835## $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) {
830 WARN("declaring multiple variables together should be avoided\n" . $herecurr); 836##
831 } 837## # Remove any bracketed sections to ensure we do not
838## # falsly report the parameters of functions.
839## my $ln = $line;
840## while ($ln =~ s/\([^\(\)]*\)//g) {
841## }
842## if ($ln =~ /,/) {
843## WARN("declaring multiple variables together should be avoided\n" . $herecurr);
844## }
845## }
832 846
833#need space before brace following if, while, etc 847#need space before brace following if, while, etc
834 if ($line =~ /\(.*\){/ || $line =~ /do{/) { 848 if (($line =~ /\(.*\){/ && $line !~ /\($Type\){/) ||
849 $line =~ /do{/) {
835 ERROR("need a space before the open brace '{'\n" . $herecurr); 850 ERROR("need a space before the open brace '{'\n" . $herecurr);
836 } 851 }
837 852
@@ -841,6 +856,22 @@ sub process {
841 ERROR("need a space after that close brace '}'\n" . $herecurr); 856 ERROR("need a space after that close brace '}'\n" . $herecurr);
842 } 857 }
843 858
859# check spacing on square brackets
860 if ($line =~ /\[\s/ && $line !~ /\[\s*$/) {
861 ERROR("no space after that open square bracket '['\n" . $herecurr);
862 }
863 if ($line =~ /\s\]/) {
864 ERROR("no space before that close square bracket ']'\n" . $herecurr);
865 }
866
867# check spacing on paretheses
868 if ($line =~ /\(\s/ && $line !~ /\(\s*$/) {
869 ERROR("no space after that open parenthesis '('\n" . $herecurr);
870 }
871 if ($line =~ /\s\)/) {
872 ERROR("no space before that close parenthesis ')'\n" . $herecurr);
873 }
874
844#goto labels aren't indented, allow a single space however 875#goto labels aren't indented, allow a single space however
845 if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and 876 if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
846 !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) { 877 !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
@@ -910,7 +941,7 @@ sub process {
910 # grabbing the statement after the identifier 941 # grabbing the statement after the identifier
911 $prevline =~ m{^(.#\s*define\s*$Ident(?:\([^\)]*\))?\s*)(.*)\\\s*$}; 942 $prevline =~ m{^(.#\s*define\s*$Ident(?:\([^\)]*\))?\s*)(.*)\\\s*$};
912 ##print "1<$1> 2<$2>\n"; 943 ##print "1<$1> 2<$2>\n";
913 if ($2 ne '') { 944 if (defined $2 && $2 ne '') {
914 $off = length($1); 945 $off = length($1);
915 $ln--; 946 $ln--;
916 $cnt++; 947 $cnt++;
@@ -950,8 +981,10 @@ sub process {
950 my ($lvl, @block) = ctx_block_level($nr, $cnt); 981 my ($lvl, @block) = ctx_block_level($nr, $cnt);
951 982
952 my $stmt = join(' ', @block); 983 my $stmt = join(' ', @block);
953 $stmt =~ s/^[^{]*{//; 984 $stmt =~ s/(^[^{]*){//;
954 $stmt =~ s/}[^}]*$//; 985 my $before = $1;
986 $stmt =~ s/}([^}]*$)//;
987 my $after = $1;
955 988
956 #print "block<" . join(' ', @block) . "><" . scalar(@block) . ">\n"; 989 #print "block<" . join(' ', @block) . "><" . scalar(@block) . ">\n";
957 #print "stmt<$stmt>\n\n"; 990 #print "stmt<$stmt>\n\n";
@@ -963,12 +996,14 @@ sub process {
963 # Also nested if's often require braces to 996 # Also nested if's often require braces to
964 # disambiguate the else binding so shhh there. 997 # disambiguate the else binding so shhh there.
965 my @semi = ($stmt =~ /;/g); 998 my @semi = ($stmt =~ /;/g);
999 push(@semi, "/**/") if ($stmt =~ m@/\*@);
966 ##print "semi<" . scalar(@semi) . ">\n"; 1000 ##print "semi<" . scalar(@semi) . ">\n";
967 if ($lvl == 0 && scalar(@semi) < 2 && 1001 if ($lvl == 0 && scalar(@semi) < 2 &&
968 $stmt !~ /{/ && $stmt !~ /\bif\b/) { 1002 $stmt !~ /{/ && $stmt !~ /\bif\b/ &&
1003 $before !~ /}/ && $after !~ /{/) {
969 my $herectx = "$here\n" . join("\n", @control, @block[1 .. $#block]) . "\n"; 1004 my $herectx = "$here\n" . join("\n", @control, @block[1 .. $#block]) . "\n";
970 shift(@block); 1005 shift(@block);
971 ERROR("braces {} are not necessary for single statement blocks\n" . $herectx); 1006 WARN("braces {} are not necessary for single statement blocks\n" . $herectx);
972 } 1007 }
973 } 1008 }
974 } 1009 }
@@ -1013,6 +1048,11 @@ sub process {
1013# $clean = 0; 1048# $clean = 0;
1014# } 1049# }
1015 1050
1051# warn about spacing in #ifdefs
1052 if ($line =~ /^.#\s*(ifdef|ifndef|elif)\s\s+/) {
1053 ERROR("exactly one space required after that #$1\n" . $herecurr);
1054 }
1055
1016# check for spinlock_t definitions without a comment. 1056# check for spinlock_t definitions without a comment.
1017 if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/) { 1057 if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/) {
1018 my $which = $1; 1058 my $which = $1;
@@ -1027,14 +1067,14 @@ sub process {
1027 } 1067 }
1028 } 1068 }
1029# check of hardware specific defines 1069# check of hardware specific defines
1030 if ($line =~ m@^.#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@) { 1070 if ($line =~ m@^.#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
1031 CHK("architecture specific defines should be avoided\n" . $herecurr); 1071 CHK("architecture specific defines should be avoided\n" . $herecurr);
1032 } 1072 }
1033 1073
1034# check the location of the inline attribute, that it is between 1074# check the location of the inline attribute, that it is between
1035# storage class and type. 1075# storage class and type.
1036 if ($line =~ /$Type\s+(?:inline|__always_inline)\b/ || 1076 if ($line =~ /$Type\s+(?:inline|__always_inline|noinline)\b/ ||
1037 $line =~ /\b(?:inline|always_inline)\s+$Storage/) { 1077 $line =~ /\b(?:inline|__always_inline|noinline)\s+$Storage/) {
1038 ERROR("inline keyword should sit between storage class and type\n" . $herecurr); 1078 ERROR("inline keyword should sit between storage class and type\n" . $herecurr);
1039 } 1079 }
1040 1080