aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRussell King <rmk@dyn-67.arm.linux.org.uk>2008-07-07 17:21:34 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2008-07-07 17:21:34 -0400
commitfa6868508a0102b150ca03d976e3a1c234e4e387 (patch)
tree2b252f262695544e5caeb2bd983f8cac5158a8f6
parent5b2353859f87748f0707cec57c3953022a3321f3 (diff)
parent4ed47896935573c8423d05bddda3f269d6e6c613 (diff)
Merge branch 'machtypes' into orion
-rw-r--r--Documentation/DocBook/kgdb.tmpl20
-rw-r--r--Documentation/hwmon/sysfs-interface33
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile2
-rw-r--r--arch/alpha/Makefile1
-rw-r--r--arch/alpha/kernel/core_t2.c2
-rw-r--r--arch/alpha/kernel/pci.c17
-rw-r--r--arch/alpha/kernel/traps.c3
-rw-r--r--arch/arm/tools/mach-types126
-rw-r--r--arch/ia64/kernel/iosapic.c2
-rw-r--r--arch/ia64/kernel/setup.c3
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c2
-rw-r--r--arch/x86/Kconfig5
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/kvmclock.c89
-rw-r--r--arch/x86/kernel/pvclock.c141
-rw-r--r--arch/x86/kvm/i8254.c9
-rw-r--r--arch/x86/kvm/lapic.c1
-rw-r--r--arch/x86/kvm/mmu.c19
-rw-r--r--arch/x86/kvm/vmx.c19
-rw-r--r--arch/x86/kvm/x86.c91
-rw-r--r--arch/x86/xen/Kconfig3
-rw-r--r--arch/x86/xen/enlighten.c56
-rw-r--r--arch/x86/xen/mmu.c75
-rw-r--r--arch/x86/xen/mmu.h24
-rw-r--r--arch/x86/xen/time.c132
-rw-r--r--arch/x86/xen/xen-head.S6
-rw-r--r--drivers/char/drm/i915_drv.c1
-rw-r--r--drivers/char/tty_ioctl.c7
-rw-r--r--drivers/hwmon/abituguru3.c18
-rw-r--r--drivers/hwmon/adt7473.c3
-rw-r--r--drivers/hwmon/lm75.c20
-rw-r--r--drivers/hwmon/lm85.c25
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c6
-rw-r--r--drivers/lguest/x86/core.c15
-rw-r--r--drivers/net/pppoe.c2
-rw-r--r--drivers/watchdog/Makefile1
-rw-r--r--drivers/xen/events.c2
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/gfs2/bmap.c23
-rw-r--r--fs/gfs2/rgrp.c2
-rw-r--r--fs/nfs/mount_clnt.c5
-rw-r--r--fs/nfs/super.c76
-rw-r--r--fs/nfs/write.c7
-rw-r--r--fs/select.c2
-rw-r--r--include/asm-alpha/core_mcpcia.h2
-rw-r--r--include/asm-alpha/core_t2.h14
-rw-r--r--include/asm-alpha/io.h6
-rw-r--r--include/asm-alpha/mmu_context.h6
-rw-r--r--include/asm-alpha/percpu.h74
-rw-r--r--include/asm-alpha/system.h10
-rw-r--r--include/asm-alpha/vga.h6
-rw-r--r--include/asm-x86/kvm_host.h4
-rw-r--r--include/asm-x86/kvm_para.h18
-rw-r--r--include/asm-x86/pvclock-abi.h42
-rw-r--r--include/asm-x86/pvclock.h13
-rw-r--r--include/asm-x86/xen/page.h4
-rw-r--r--include/linux/bootmem.h2
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/tty_driver.h5
-rw-r--r--include/net/ipv6.h6
-rw-r--r--include/net/net_namespace.h11
-rw-r--r--include/xen/interface/xen.h7
-rw-r--r--kernel/futex.c93
-rw-r--r--kernel/kgdb.c3
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sched_rt.c3
-rw-r--r--mm/bootmem.c6
-rw-r--r--mm/memory.c50
-rw-r--r--mm/slab.c5
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/net_namespace.c3
-rw-r--r--net/ipv6/ip6_input.c9
-rw-r--r--net/ipv6/ipv6_sockglue.c11
-rw-r--r--net/mac80211/tx.c9
-rw-r--r--net/sctp/socket.c4
-rw-r--r--sound/isa/sb/sb_mixer.c4
-rw-r--r--sound/pci/aw2/aw2-alsa.c4
-rw-r--r--virt/kvm/ioapic.c31
79 files changed, 994 insertions, 596 deletions
diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 028a8444d95e..e8acd1f03456 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -84,10 +84,9 @@
84 runs an instance of gdb against the vmlinux file which contains 84 runs an instance of gdb against the vmlinux file which contains
85 the symbols (not boot image such as bzImage, zImage, uImage...). 85 the symbols (not boot image such as bzImage, zImage, uImage...).
86 In gdb the developer specifies the connection parameters and 86 In gdb the developer specifies the connection parameters and
87 connects to kgdb. Depending on which kgdb I/O modules exist in 87 connects to kgdb. The type of connection a developer makes with
88 the kernel for a given architecture, it may be possible to debug 88 gdb depends on the availability of kgdb I/O modules compiled as
89 the test machine's kernel with the development machine using a 89 builtin's or kernel modules in the test machine's kernel.
90 rs232 or ethernet connection.
91 </para> 90 </para>
92 </chapter> 91 </chapter>
93 <chapter id="CompilingAKernel"> 92 <chapter id="CompilingAKernel">
@@ -223,7 +222,7 @@
223 </para> 222 </para>
224 <para> 223 <para>
225 IMPORTANT NOTE: Using this option with kgdb over the console 224 IMPORTANT NOTE: Using this option with kgdb over the console
226 (kgdboc) or kgdb over ethernet (kgdboe) is not supported. 225 (kgdboc) is not supported.
227 </para> 226 </para>
228 </sect1> 227 </sect1>
229 </chapter> 228 </chapter>
@@ -249,18 +248,11 @@
249 (gdb) target remote /dev/ttyS0 248 (gdb) target remote /dev/ttyS0
250 </programlisting> 249 </programlisting>
251 <para> 250 <para>
252 Example (kgdb to a terminal server): 251 Example (kgdb to a terminal server on tcp port 2012):
253 </para> 252 </para>
254 <programlisting> 253 <programlisting>
255 % gdb ./vmlinux 254 % gdb ./vmlinux
256 (gdb) target remote udp:192.168.2.2:6443 255 (gdb) target remote 192.168.2.2:2012
257 </programlisting>
258 <para>
259 Example (kgdb over ethernet):
260 </para>
261 <programlisting>
262 % gdb ./vmlinux
263 (gdb) target remote udp:192.168.2.2:6443
264 </programlisting> 256 </programlisting>
265 <para> 257 <para>
266 Once connected, you can debug a kernel the way you would debug an 258 Once connected, you can debug a kernel the way you would debug an
diff --git a/Documentation/hwmon/sysfs-interface b/Documentation/hwmon/sysfs-interface
index f4a8ebc1ef1a..2d845730d4e0 100644
--- a/Documentation/hwmon/sysfs-interface
+++ b/Documentation/hwmon/sysfs-interface
@@ -2,17 +2,12 @@ Naming and data format standards for sysfs files
2------------------------------------------------ 2------------------------------------------------
3 3
4The libsensors library offers an interface to the raw sensors data 4The libsensors library offers an interface to the raw sensors data
5through the sysfs interface. See libsensors documentation and source for 5through the sysfs interface. Since lm-sensors 3.0.0, libsensors is
6further information. As of writing this document, libsensors 6completely chip-independent. It assumes that all the kernel drivers
7(from lm_sensors 2.8.3) is heavily chip-dependent. Adding or updating 7implement the standard sysfs interface described in this document.
8support for any given chip requires modifying the library's code. 8This makes adding or updating support for any given chip very easy, as
9This is because libsensors was written for the procfs interface 9libsensors, and applications using it, do not need to be modified.
10older kernel modules were using, which wasn't standardized enough. 10This is a major improvement compared to lm-sensors 2.
11Recent versions of libsensors (from lm_sensors 2.8.2 and later) have
12support for the sysfs interface, though.
13
14The new sysfs interface was designed to be as chip-independent as
15possible.
16 11
17Note that motherboards vary widely in the connections to sensor chips. 12Note that motherboards vary widely in the connections to sensor chips.
18There is no standard that ensures, for example, that the second 13There is no standard that ensures, for example, that the second
@@ -35,19 +30,17 @@ access this data in a simple and consistent way. That said, such programs
35will have to implement conversion, labeling and hiding of inputs. For 30will have to implement conversion, labeling and hiding of inputs. For
36this reason, it is still not recommended to bypass the library. 31this reason, it is still not recommended to bypass the library.
37 32
38If you are developing a userspace application please send us feedback on
39this standard.
40
41Note that this standard isn't completely established yet, so it is subject
42to changes. If you are writing a new hardware monitoring driver those
43features can't seem to fit in this interface, please contact us with your
44extension proposal. Keep in mind that backward compatibility must be
45preserved.
46
47Each chip gets its own directory in the sysfs /sys/devices tree. To 33Each chip gets its own directory in the sysfs /sys/devices tree. To
48find all sensor chips, it is easier to follow the device symlinks from 34find all sensor chips, it is easier to follow the device symlinks from
49/sys/class/hwmon/hwmon*. 35/sys/class/hwmon/hwmon*.
50 36
37Up to lm-sensors 3.0.0, libsensors looks for hardware monitoring attributes
38in the "physical" device directory. Since lm-sensors 3.0.1, attributes found
39in the hwmon "class" device directory are also supported. Complex drivers
40(e.g. drivers for multifunction chips) may want to use this possibility to
41avoid namespace pollution. The only drawback will be that older versions of
42libsensors won't support the driver in question.
43
51All sysfs values are fixed point numbers. 44All sysfs values are fixed point numbers.
52 45
53There is only one value per file, unlike the older /proc specification. 46There is only one value per file, unlike the older /proc specification.
diff --git a/MAINTAINERS b/MAINTAINERS
index cd587eec9fa7..8f0ec46a7096 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4431,10 +4431,10 @@ M: johnpol@2ka.mipt.ru
4431S: Maintained 4431S: Maintained
4432 4432
4433W83791D HARDWARE MONITORING DRIVER 4433W83791D HARDWARE MONITORING DRIVER
4434P: Charles Spirakis 4434P: Marc Hulsman
4435M: bezaur@gmail.com 4435M: m.hulsman@tudelft.nl
4436L: lm-sensors@lm-sensors.org 4436L: lm-sensors@lm-sensors.org
4437S: Odd Fixes 4437S: Maintained
4438 4438
4439W83793 HARDWARE MONITORING DRIVER 4439W83793 HARDWARE MONITORING DRIVER
4440P: Rudolf Marek 4440P: Rudolf Marek
diff --git a/Makefile b/Makefile
index 2b4977c9844e..6aff5f47c21d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 26 3SUBLEVEL = 26
4EXTRAVERSION = -rc7 4EXTRAVERSION = -rc8
5NAME = Rotary Wombat 5NAME = Rotary Wombat
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/alpha/Makefile b/arch/alpha/Makefile
index 4e1a8e2c4541..4759fe751aa1 100644
--- a/arch/alpha/Makefile
+++ b/arch/alpha/Makefile
@@ -13,6 +13,7 @@ NM := $(NM) -B
13LDFLAGS_vmlinux := -static -N #-relax 13LDFLAGS_vmlinux := -static -N #-relax
14CHECKFLAGS += -D__alpha__ -m64 14CHECKFLAGS += -D__alpha__ -m64
15cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data 15cflags-y := -pipe -mno-fp-regs -ffixed-8 -msmall-data
16cflags-y += $(call cc-option, -fno-jump-tables)
16 17
17cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4 18cpuflags-$(CONFIG_ALPHA_EV4) := -mcpu=ev4
18cpuflags-$(CONFIG_ALPHA_EV5) := -mcpu=ev5 19cpuflags-$(CONFIG_ALPHA_EV5) := -mcpu=ev5
diff --git a/arch/alpha/kernel/core_t2.c b/arch/alpha/kernel/core_t2.c
index c0750291b44a..d9980d47ab81 100644
--- a/arch/alpha/kernel/core_t2.c
+++ b/arch/alpha/kernel/core_t2.c
@@ -74,6 +74,8 @@
74# define DBG(args) 74# define DBG(args)
75#endif 75#endif
76 76
77DEFINE_SPINLOCK(t2_hae_lock);
78
77static volatile unsigned int t2_mcheck_any_expected; 79static volatile unsigned int t2_mcheck_any_expected;
78static volatile unsigned int t2_mcheck_last_taken; 80static volatile unsigned int t2_mcheck_last_taken;
79 81
diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 36ab22a7ea12..5cf45fc51343 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -71,6 +71,23 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82378, quirk_i
71static void __init 71static void __init
72quirk_cypress(struct pci_dev *dev) 72quirk_cypress(struct pci_dev *dev)
73{ 73{
74 /* The Notorious Cy82C693 chip. */
75
76 /* The generic legacy mode IDE fixup in drivers/pci/probe.c
77 doesn't work correctly with the Cypress IDE controller as
78 it has non-standard register layout. Fix that. */
79 if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE) {
80 dev->resource[2].start = dev->resource[3].start = 0;
81 dev->resource[2].end = dev->resource[3].end = 0;
82 dev->resource[2].flags = dev->resource[3].flags = 0;
83 if (PCI_FUNC(dev->devfn) == 2) {
84 dev->resource[0].start = 0x170;
85 dev->resource[0].end = 0x177;
86 dev->resource[1].start = 0x376;
87 dev->resource[1].end = 0x376;
88 }
89 }
90
74 /* The Cypress bridge responds on the PCI bus in the address range 91 /* The Cypress bridge responds on the PCI bus in the address range
75 0xffff0000-0xffffffff (conventional x86 BIOS ROM). There is no 92 0xffff0000-0xffffffff (conventional x86 BIOS ROM). There is no
76 way to turn this off. The bridge also supports several extended 93 way to turn this off. The bridge also supports several extended
diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c
index dc57790250d2..c778779007fc 100644
--- a/arch/alpha/kernel/traps.c
+++ b/arch/alpha/kernel/traps.c
@@ -447,7 +447,7 @@ struct unaligned_stat {
447 447
448 448
449/* Macro for exception fixup code to access integer registers. */ 449/* Macro for exception fixup code to access integer registers. */
450#define una_reg(r) (regs->regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)]) 450#define una_reg(r) (_regs[(r) >= 16 && (r) <= 18 ? (r)+19 : (r)])
451 451
452 452
453asmlinkage void 453asmlinkage void
@@ -456,6 +456,7 @@ do_entUna(void * va, unsigned long opcode, unsigned long reg,
456{ 456{
457 long error, tmp1, tmp2, tmp3, tmp4; 457 long error, tmp1, tmp2, tmp3, tmp4;
458 unsigned long pc = regs->pc - 4; 458 unsigned long pc = regs->pc - 4;
459 unsigned long *_regs = regs->regs;
459 const struct exception_table_entry *fixup; 460 const struct exception_table_entry *fixup;
460 461
461 unaligned[0].count++; 462 unaligned[0].count++;
diff --git a/arch/arm/tools/mach-types b/arch/arm/tools/mach-types
index 207a8b5a0c4a..0be5630ff568 100644
--- a/arch/arm/tools/mach-types
+++ b/arch/arm/tools/mach-types
@@ -12,7 +12,7 @@
12# 12#
13# http://www.arm.linux.org.uk/developer/machines/?action=new 13# http://www.arm.linux.org.uk/developer/machines/?action=new
14# 14#
15# Last update: Sat Apr 19 11:23:38 2008 15# Last update: Mon Jul 7 16:25:39 2008
16# 16#
17# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number 17# machine_is_xxx CONFIG_xxxx MACH_TYPE_xxx number
18# 18#
@@ -560,7 +560,6 @@ husky MACH_HUSKY HUSKY 543
560boxer MACH_BOXER BOXER 544 560boxer MACH_BOXER BOXER 544
561shepherd MACH_SHEPHERD SHEPHERD 545 561shepherd MACH_SHEPHERD SHEPHERD 545
562aml42800aa MACH_AML42800AA AML42800AA 546 562aml42800aa MACH_AML42800AA AML42800AA 546
563ml674001 MACH_MACH_TYPE_ML674001 MACH_TYPE_ML674001 547
564lpc2294 MACH_LPC2294 LPC2294 548 563lpc2294 MACH_LPC2294 LPC2294 548
565switchgrass MACH_SWITCHGRASS SWITCHGRASS 549 564switchgrass MACH_SWITCHGRASS SWITCHGRASS 549
566ens_cmu MACH_ENS_CMU ENS_CMU 550 565ens_cmu MACH_ENS_CMU ENS_CMU 550
@@ -748,7 +747,6 @@ anubis MACH_ANUBIS ANUBIS 734
748ite8152 MACH_ITE8152 ITE8152 735 747ite8152 MACH_ITE8152 ITE8152 735
749lpc3xxx MACH_LPC3XXX LPC3XXX 736 748lpc3xxx MACH_LPC3XXX LPC3XXX 736
750puppeteer MACH_PUPPETEER PUPPETEER 737 749puppeteer MACH_PUPPETEER PUPPETEER 737
751vt001 MACH_MACH_VADATECH MACH_VADATECH 738
752e570 MACH_E570 E570 739 750e570 MACH_E570 E570 739
753x50 MACH_X50 X50 740 751x50 MACH_X50 X50 740
754recon MACH_RECON RECON 741 752recon MACH_RECON RECON 741
@@ -839,7 +837,7 @@ ccxp270 MACH_CCXP CCXP 825
839omap_gsample MACH_OMAP_GSAMPLE OMAP_GSAMPLE 826 837omap_gsample MACH_OMAP_GSAMPLE OMAP_GSAMPLE 826
840realview_eb MACH_REALVIEW_EB REALVIEW_EB 827 838realview_eb MACH_REALVIEW_EB REALVIEW_EB 827
841samoa MACH_SAMOA SAMOA 828 839samoa MACH_SAMOA SAMOA 828
842t3xscale MACH_T3XSCALE T3XSCALE 829 840palmt3 MACH_PALMT3 PALMT3 829
843i878 MACH_I878 I878 830 841i878 MACH_I878 I878 830
844borzoi MACH_BORZOI BORZOI 831 842borzoi MACH_BORZOI BORZOI 831
845gecko MACH_GECKO GECKO 832 843gecko MACH_GECKO GECKO 832
@@ -895,7 +893,7 @@ mio8390 MACH_MIO8390 MIO8390 881
895omi_board MACH_OMI_BOARD OMI_BOARD 882 893omi_board MACH_OMI_BOARD OMI_BOARD 882
896mx21civ MACH_MX21CIV MX21CIV 883 894mx21civ MACH_MX21CIV MX21CIV 883
897mahi_cdac MACH_MAHI_CDAC MAHI_CDAC 884 895mahi_cdac MACH_MAHI_CDAC MAHI_CDAC 884
898xscale_palmtx MACH_XSCALE_PALMTX XSCALE_PALMTX 885 896palmtx MACH_PALMTX PALMTX 885
899s3c2413 MACH_S3C2413 S3C2413 887 897s3c2413 MACH_S3C2413 S3C2413 887
900samsys_ep0 MACH_SAMSYS_EP0 SAMSYS_EP0 888 898samsys_ep0 MACH_SAMSYS_EP0 SAMSYS_EP0 888
901wg302v1 MACH_WG302V1 WG302V1 889 899wg302v1 MACH_WG302V1 WG302V1 889
@@ -918,7 +916,7 @@ nxdb500 MACH_NXDB500 NXDB500 905
918apf9328 MACH_APF9328 APF9328 906 916apf9328 MACH_APF9328 APF9328 906
919omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907 917omap_wipoq MACH_OMAP_WIPOQ OMAP_WIPOQ 907
920omap_twip MACH_OMAP_TWIP OMAP_TWIP 908 918omap_twip MACH_OMAP_TWIP OMAP_TWIP 908
921xscale_treo650 MACH_XSCALE_PALMTREO650 XSCALE_PALMTREO650 909 919palmtreo650 MACH_PALMTREO650 PALMTREO650 909
922acumen MACH_ACUMEN ACUMEN 910 920acumen MACH_ACUMEN ACUMEN 910
923xp100 MACH_XP100 XP100 911 921xp100 MACH_XP100 XP100 911
924fs2410 MACH_FS2410 FS2410 912 922fs2410 MACH_FS2410 FS2410 912
@@ -926,8 +924,8 @@ pxa270_cerf MACH_PXA270_CERF PXA270_CERF 913
926sq2ftlpalm MACH_SQ2FTLPALM SQ2FTLPALM 914 924sq2ftlpalm MACH_SQ2FTLPALM SQ2FTLPALM 914
927bsemserver MACH_BSEMSERVER BSEMSERVER 915 925bsemserver MACH_BSEMSERVER BSEMSERVER 915
928netclient MACH_NETCLIENT NETCLIENT 916 926netclient MACH_NETCLIENT NETCLIENT 916
929xscale_palmtt5 MACH_XSCALE_PALMTT5 XSCALE_PALMTT5 917 927palmt5 MACH_PALMT5 PALMT5 917
930xscale_palmtc MACH_OMAP_PALMTC OMAP_PALMTC 918 928palmtc MACH_PALMTC PALMTC 918
931omap_apollon MACH_OMAP_APOLLON OMAP_APOLLON 919 929omap_apollon MACH_OMAP_APOLLON OMAP_APOLLON 919
932mxc30030evb MACH_MXC30030EVB MXC30030EVB 920 930mxc30030evb MACH_MXC30030EVB MXC30030EVB 920
933rea_2d MACH_REA_2D REA_2D 921 931rea_2d MACH_REA_2D REA_2D 921
@@ -1220,7 +1218,6 @@ empca400 MACH_EMPCA400 EMPCA400 1211
1220em7210 MACH_EM7210 EM7210 1212 1218em7210 MACH_EM7210 EM7210 1212
1221htchermes MACH_HTCHERMES HTCHERMES 1213 1219htchermes MACH_HTCHERMES HTCHERMES 1213
1222eti_c1 MACH_ETI_C1 ETI_C1 1214 1220eti_c1 MACH_ETI_C1 ETI_C1 1214
1223mach_dep2410 MACH_MACH_DEP2410 MACH_DEP2410 1215
1224ac100 MACH_AC100 AC100 1216 1221ac100 MACH_AC100 AC100 1216
1225sneetch MACH_SNEETCH SNEETCH 1217 1222sneetch MACH_SNEETCH SNEETCH 1217
1226studentmate MACH_STUDENTMATE STUDENTMATE 1218 1223studentmate MACH_STUDENTMATE STUDENTMATE 1218
@@ -1421,10 +1418,10 @@ looxc550 MACH_LOOXC550 LOOXC550 1417
1421cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418 1418cnty_titan MACH_CNTY_TITAN CNTY_TITAN 1418
1422app3xx MACH_APP3XX APP3XX 1419 1419app3xx MACH_APP3XX APP3XX 1419
1423sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420 1420sideoatsgrama MACH_SIDEOATSGRAMA SIDEOATSGRAMA 1420
1424xscale_palmt700p MACH_XSCALE_PALMT700P XSCALE_PALMT700P 1421 1421palmtreo700p MACH_PALMTREO700P PALMTREO700P 1421
1425xscale_palmt700w MACH_XSCALE_PALMT700W XSCALE_PALMT700W 1422 1422palmtreo700w MACH_PALMTREO700W PALMTREO700W 1422
1426xscale_palmt750 MACH_XSCALE_PALMT750 XSCALE_PALMT750 1423 1423palmtreo750 MACH_PALMTREO750 PALMTREO750 1423
1427xscale_palmt755p MACH_XSCALE_PALMT755P XSCALE_PALMT755P 1424 1424palmtreo755p MACH_PALMTREO755P PALMTREO755P 1424
1428ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425 1425ezreganut9200 MACH_EZREGANUT9200 EZREGANUT9200 1425
1429sarge MACH_SARGE SARGE 1426 1426sarge MACH_SARGE SARGE 1426
1430a696 MACH_A696 A696 1427 1427a696 MACH_A696 A696 1427
@@ -1463,7 +1460,7 @@ artemis MACH_ARTEMIS ARTEMIS 1462
1463htctitan MACH_HTCTITAN HTCTITAN 1463 1460htctitan MACH_HTCTITAN HTCTITAN 1463
1464qranium MACH_QRANIUM QRANIUM 1464 1461qranium MACH_QRANIUM QRANIUM 1464
1465adx_wsc2 MACH_ADX_WSC2 ADX_WSC2 1465 1462adx_wsc2 MACH_ADX_WSC2 ADX_WSC2 1465
1466adx_medcom MACH_ADX_MEDINET ADX_MEDINET 1466 1463adx_medcom MACH_ADX_MEDCOM ADX_MEDCOM 1466
1467bboard MACH_BBOARD BBOARD 1467 1464bboard MACH_BBOARD BBOARD 1467
1468cambria MACH_CAMBRIA CAMBRIA 1468 1465cambria MACH_CAMBRIA CAMBRIA 1468
1469mt7xxx MACH_MT7XXX MT7XXX 1469 1466mt7xxx MACH_MT7XXX MT7XXX 1469
@@ -1519,7 +1516,7 @@ wp188 MACH_WP188 WP188 1518
1519corsica MACH_CORSICA CORSICA 1519 1516corsica MACH_CORSICA CORSICA 1519
1520bigeye MACH_BIGEYE BIGEYE 1520 1517bigeye MACH_BIGEYE BIGEYE 1520
1521tll5000 MACH_TLL5000 TLL5000 1522 1518tll5000 MACH_TLL5000 TLL5000 1522
1522hni270 MACH_HNI_X270 HNI_X270 1523 1519bebot MACH_BEBOT BEBOT 1523
1523qong MACH_QONG QONG 1524 1520qong MACH_QONG QONG 1524
1524tcompact MACH_TCOMPACT TCOMPACT 1525 1521tcompact MACH_TCOMPACT TCOMPACT 1525
1525puma5 MACH_PUMA5 PUMA5 1526 1522puma5 MACH_PUMA5 PUMA5 1526
@@ -1636,7 +1633,6 @@ awlug4lcu MACH_AWLUG4LCU AWLUG4LCU 1637
1636palermoc MACH_PALERMOC PALERMOC 1638 1633palermoc MACH_PALERMOC PALERMOC 1638
1637omap_ldp MACH_OMAP_LDP OMAP_LDP 1639 1634omap_ldp MACH_OMAP_LDP OMAP_LDP 1639
1638ip500 MACH_IP500 IP500 1640 1635ip500 MACH_IP500 IP500 1640
1639mx35ads MACH_MACH_MX35ADS MACH_MX35ADS 1641
1640ase2 MACH_ASE2 ASE2 1642 1636ase2 MACH_ASE2 ASE2 1642
1641mx35evb MACH_MX35EVB MX35EVB 1643 1637mx35evb MACH_MX35EVB MX35EVB 1643
1642aml_m8050 MACH_AML_M8050 AML_M8050 1644 1638aml_m8050 MACH_AML_M8050 AML_M8050 1644
@@ -1647,7 +1643,7 @@ badger MACH_BADGER BADGER 1648
1647trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649 1643trizeps4wl MACH_TRIZEPS4WL TRIZEPS4WL 1649
1648trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650 1644trizeps5 MACH_TRIZEPS5 TRIZEPS5 1650
1649marlin MACH_MARLIN MARLIN 1651 1645marlin MACH_MARLIN MARLIN 1651
1650ts7800 MACH_TS7800 TS7800 1652 1646ts78xx MACH_TS78XX TS78XX 1652
1651hpipaq214 MACH_HPIPAQ214 HPIPAQ214 1653 1647hpipaq214 MACH_HPIPAQ214 HPIPAQ214 1653
1652at572d940dcm MACH_AT572D940DCM AT572D940DCM 1654 1648at572d940dcm MACH_AT572D940DCM AT572D940DCM 1654
1653ne1board MACH_NE1BOARD NE1BOARD 1655 1649ne1board MACH_NE1BOARD NE1BOARD 1655
@@ -1720,3 +1716,99 @@ htc_kaiser MACH_HTC_KAISER HTC_KAISER 1724
1720lg_ks20 MACH_LG_KS20 LG_KS20 1725 1716lg_ks20 MACH_LG_KS20 LG_KS20 1725
1721hhgps MACH_HHGPS HHGPS 1726 1717hhgps MACH_HHGPS HHGPS 1726
1722nokia_n810_wimax MACH_NOKIA_N810_WIMAX NOKIA_N810_WIMAX 1727 1718nokia_n810_wimax MACH_NOKIA_N810_WIMAX NOKIA_N810_WIMAX 1727
1719insight MACH_INSIGHT INSIGHT 1728
1720sapphire MACH_SAPPHIRE SAPPHIRE 1729
1721csb637xo MACH_CSB637XO CSB637XO 1730
1722evisiong MACH_EVISIONG EVISIONG 1731
1723stmp37xx MACH_STMP37XX STMP37XX 1732
1724stmp378x MACH_STMP38XX STMP38XX 1733
1725tnt MACH_TNT TNT 1734
1726tbxt MACH_TBXT TBXT 1735
1727playmate MACH_PLAYMATE PLAYMATE 1736
1728pns10 MACH_PNS10 PNS10 1737
1729eznavi MACH_EZNAVI EZNAVI 1738
1730ps4000 MACH_PS4000 PS4000 1739
1731ezx_a780 MACH_EZX_A780 EZX_A780 1740
1732ezx_e680 MACH_EZX_E680 EZX_E680 1741
1733ezx_a1200 MACH_EZX_A1200 EZX_A1200 1742
1734ezx_e6 MACH_EZX_E6 EZX_E6 1743
1735ezx_e2 MACH_EZX_E2 EZX_E2 1744
1736ezx_a910 MACH_EZX_A910 EZX_A910 1745
1737cwmx31 MACH_CWMX31 CWMX31 1746
1738sl2312 MACH_SL2312 SL2312 1747
1739blenny MACH_BLENNY BLENNY 1748
1740ds107 MACH_DS107 DS107 1749
1741dsx07 MACH_DSX07 DSX07 1750
1742picocom1 MACH_PICOCOM1 PICOCOM1 1751
1743lynx_wolverine MACH_LYNX_WOLVERINE LYNX_WOLVERINE 1752
1744ubisys_p9_sc19 MACH_UBISYS_P9_SC19 UBISYS_P9_SC19 1753
1745kratos_low MACH_KRATOS_LOW KRATOS_LOW 1754
1746m700 MACH_M700 M700 1755
1747edmini_v2 MACH_EDMINI_V2 EDMINI_V2 1756
1748zipit2 MACH_ZIPIT2 ZIPIT2 1757
1749hslfemtocell MACH_HSLFEMTOCELL HSLFEMTOCELL 1758
1750daintree_at91 MACH_DAINTREE_AT91 DAINTREE_AT91 1759
1751sg560usb MACH_SG560USB SG560USB 1760
1752omap3_pandora MACH_OMAP3_PANDORA OMAP3_PANDORA 1761
1753usr8200 MACH_USR8200 USR8200 1762
1754s1s65k MACH_S1S65K S1S65K 1763
1755s2s65a MACH_S2S65A S2S65A 1764
1756icore MACH_ICORE ICORE 1765
1757mss2 MACH_MSS2 MSS2 1766
1758belmont MACH_BELMONT BELMONT 1767
1759asusp525 MACH_ASUSP525 ASUSP525 1768
1760lb88rc8480 MACH_LB88RC8480 LB88RC8480 1769
1761hipxa MACH_HIPXA HIPXA 1770
1762mx25_3ds MACH_MX25_3DS MX25_3DS 1771
1763m800 MACH_M800 M800 1772
1764omap3530_lv_som MACH_OMAP3530_LV_SOM OMAP3530_LV_SOM 1773
1765prima_evb MACH_PRIMA_EVB PRIMA_EVB 1774
1766mx31bt1 MACH_MX31BT1 MX31BT1 1775
1767atlas4_evb MACH_ATLAS4_EVB ATLAS4_EVB 1776
1768mx31cicada MACH_MX31CICADA MX31CICADA 1777
1769mi424wr MACH_MI424WR MI424WR 1778
1770axs_ultrax MACH_AXS_ULTRAX AXS_ULTRAX 1779
1771at572d940deb MACH_AT572D940DEB AT572D940DEB 1780
1772davinci_da8xx_evm MACH_DAVINCI_DA8XX_EVM DAVINCI_DA8XX_EVM 1781
1773ep9302 MACH_EP9302 EP9302 1782
1774at572d940hfeb MACH_AT572D940HFEB AT572D940HFEB 1783
1775cybook3 MACH_CYBOOK3 CYBOOK3 1784
1776wdg002 MACH_WDG002 WDG002 1785
1777sg560adsl MACH_SG560ADSL SG560ADSL 1786
1778nextio_n2800_ica MACH_NEXTIO_N2800_ICA NEXTIO_N2800_ICA 1787
1779marvell_newdb MACH_MARVELL_NEWDB MARVELL_NEWDB 1789
1780vandihud MACH_VANDIHUD VANDIHUD 1790
1781magx_e8 MACH_MAGX_E8 MAGX_E8 1791
1782magx_z6 MACH_MAGX_Z6 MAGX_Z6 1792
1783magx_v8 MACH_MAGX_V8 MAGX_V8 1793
1784magx_u9 MACH_MAGX_U9 MAGX_U9 1794
1785toughcf08 MACH_TOUGHCF08 TOUGHCF08 1795
1786zw4400 MACH_ZW4400 ZW4400 1796
1787marat91 MACH_MARAT91 MARAT91 1797
1788overo MACH_OVERO OVERO 1798
1789at2440evb MACH_AT2440EVB AT2440EVB 1799
1790neocore926 MACH_NEOCORE926 NEOCORE926 1800
1791wnr854t MACH_WNR854T WNR854T 1801
1792imx27 MACH_IMX27 IMX27 1802
1793moose_db MACH_MOOSE_DB MOOSE_DB 1803
1794fab4 MACH_FAB4 FAB4 1804
1795htcdiamond MACH_HTCDIAMOND HTCDIAMOND 1805
1796fiona MACH_FIONA FIONA 1806
1797mxc30030_x MACH_MXC30030_X MXC30030_X 1807
1798bmp1000 MACH_BMP1000 BMP1000 1808
1799logi9200 MACH_LOGI9200 LOGI9200 1809
1800tqma31 MACH_TQMA31 TQMA31 1810
1801ccw9p9215js MACH_CCW9P9215JS CCW9P9215JS 1811
1802rd88f5181l_ge MACH_RD88F5181L_GE RD88F5181L_GE 1812
1803sifmain MACH_SIFMAIN SIFMAIN 1813
1804sam9_l9261 MACH_SAM9_L9261 SAM9_L9261 1814
1805cc9m2443js MACH_CC9M2443JS CC9M2443JS 1815
1806xaria300 MACH_XARIA300 XARIA300 1816
1807it9200 MACH_IT9200 IT9200 1817
1808rd88f5181l_fxo MACH_RD88F5181L_FXO RD88F5181L_FXO 1818
1809kriss_sensor MACH_KRISS_SENSOR KRISS_SENSOR 1819
1810pilz_pmi5 MACH_PILZ_PMI5 PILZ_PMI5 1820
1811jade MACH_JADE JADE 1821
1812ks8695_softplc MACH_KS8695_SOFTPLC KS8695_SOFTPLC 1822
1813gprisc4 MACH_GPRISC4 GPRISC4 1823
1814stamp9260 MACH_STAMP9260 STAMP9260 1824
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 082c31dcfd99..39752cdef6ff 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -558,8 +558,6 @@ static struct iosapic_rte_info * __init_refok iosapic_alloc_rte (void)
558 if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) { 558 if (!iosapic_kmalloc_ok && list_empty(&free_rte_list)) {
559 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) * 559 rte = alloc_bootmem(sizeof(struct iosapic_rte_info) *
560 NR_PREALLOCATE_RTE_ENTRIES); 560 NR_PREALLOCATE_RTE_ENTRIES);
561 if (!rte)
562 return NULL;
563 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++) 561 for (i = 0; i < NR_PREALLOCATE_RTE_ENTRIES; i++, rte++)
564 list_add(&rte->rte_list, &free_rte_list); 562 list_add(&rte->rte_list, &free_rte_list);
565 } 563 }
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index f48a809c686d..4ae15c8c2488 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -578,8 +578,6 @@ setup_arch (char **cmdline_p)
578 cpu_init(); /* initialize the bootstrap CPU */ 578 cpu_init(); /* initialize the bootstrap CPU */
579 mmu_context_init(); /* initialize context_id bitmap */ 579 mmu_context_init(); /* initialize context_id bitmap */
580 580
581 check_sal_cache_flush();
582
583#ifdef CONFIG_ACPI 581#ifdef CONFIG_ACPI
584 acpi_boot_init(); 582 acpi_boot_init();
585#endif 583#endif
@@ -607,6 +605,7 @@ setup_arch (char **cmdline_p)
607 ia64_mca_init(); 605 ia64_mca_init();
608 606
609 platform_setup(cmdline_p); 607 platform_setup(cmdline_p);
608 check_sal_cache_flush();
610 paging_init(); 609 paging_init();
611} 610}
612 611
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index 49d3120415eb..e585f9a2afb9 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -512,6 +512,8 @@ static ssize_t sn2_ptc_proc_write(struct file *file, const char __user *user, si
512 int cpu; 512 int cpu;
513 char optstr[64]; 513 char optstr[64];
514 514
515 if (count == 0 || count > sizeof(optstr))
516 return -EINVAL;
515 if (copy_from_user(optstr, user, count)) 517 if (copy_from_user(optstr, user, count))
516 return -EFAULT; 518 return -EFAULT;
517 optstr[count - 1] = '\0'; 519 optstr[count - 1] = '\0';
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 52e18e6d2ba0..e0edaaa6920a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -383,6 +383,7 @@ config VMI
383config KVM_CLOCK 383config KVM_CLOCK
384 bool "KVM paravirtualized clock" 384 bool "KVM paravirtualized clock"
385 select PARAVIRT 385 select PARAVIRT
386 select PARAVIRT_CLOCK
386 depends on !(X86_VISWS || X86_VOYAGER) 387 depends on !(X86_VISWS || X86_VOYAGER)
387 help 388 help
388 Turning on this option will allow you to run a paravirtualized clock 389 Turning on this option will allow you to run a paravirtualized clock
@@ -410,6 +411,10 @@ config PARAVIRT
410 over full virtualization. However, when run without a hypervisor 411 over full virtualization. However, when run without a hypervisor
411 the kernel is theoretically slower and slightly larger. 412 the kernel is theoretically slower and slightly larger.
412 413
414config PARAVIRT_CLOCK
415 bool
416 default n
417
413endif 418endif
414 419
415config MEMTEST_BOOTPARAM 420config MEMTEST_BOOTPARAM
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5e618c3b4720..77807d4769c9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -82,6 +82,7 @@ obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o
82obj-$(CONFIG_KVM_GUEST) += kvm.o 82obj-$(CONFIG_KVM_GUEST) += kvm.o
83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o 83obj-$(CONFIG_KVM_CLOCK) += kvmclock.o
84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o 84obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o
85obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o
85 86
86obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o 87obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o
87 88
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 08a30986d472..87edf1ceb1df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h>
21#include <asm/arch_hooks.h> 22#include <asm/arch_hooks.h>
22#include <asm/msr.h> 23#include <asm/msr.h>
23#include <asm/apic.h> 24#include <asm/apic.h>
@@ -36,18 +37,9 @@ static int parse_no_kvmclock(char *arg)
36early_param("no-kvmclock", parse_no_kvmclock); 37early_param("no-kvmclock", parse_no_kvmclock);
37 38
38/* The hypervisor will put information about time periodically here */ 39/* The hypervisor will put information about time periodically here */
39static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); 40static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock);
40#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field 41static struct pvclock_wall_clock wall_clock;
41 42
42static inline u64 kvm_get_delta(u64 last_tsc)
43{
44 int cpu = smp_processor_id();
45 u64 delta = native_read_tsc() - last_tsc;
46 return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE;
47}
48
49static struct kvm_wall_clock wall_clock;
50static cycle_t kvm_clock_read(void);
51/* 43/*
52 * The wallclock is the time of day when we booted. Since then, some time may 44 * The wallclock is the time of day when we booted. Since then, some time may
53 * have elapsed since the hypervisor wrote the data. So we try to account for 45 * have elapsed since the hypervisor wrote the data. So we try to account for
@@ -55,64 +47,37 @@ static cycle_t kvm_clock_read(void);
55 */ 47 */
56static unsigned long kvm_get_wallclock(void) 48static unsigned long kvm_get_wallclock(void)
57{ 49{
58 u32 wc_sec, wc_nsec; 50 struct pvclock_vcpu_time_info *vcpu_time;
59 u64 delta;
60 struct timespec ts; 51 struct timespec ts;
61 int version, nsec;
62 int low, high; 52 int low, high;
63 53
64 low = (int)__pa(&wall_clock); 54 low = (int)__pa(&wall_clock);
65 high = ((u64)__pa(&wall_clock) >> 32); 55 high = ((u64)__pa(&wall_clock) >> 32);
56 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
66 57
67 delta = kvm_clock_read(); 58 vcpu_time = &get_cpu_var(hv_clock);
59 pvclock_read_wallclock(&wall_clock, vcpu_time, &ts);
60 put_cpu_var(hv_clock);
68 61
69 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 62 return ts.tv_sec;
70 do {
71 version = wall_clock.wc_version;
72 rmb();
73 wc_sec = wall_clock.wc_sec;
74 wc_nsec = wall_clock.wc_nsec;
75 rmb();
76 } while ((wall_clock.wc_version != version) || (version & 1));
77
78 delta = kvm_clock_read() - delta;
79 delta += wc_nsec;
80 nsec = do_div(delta, NSEC_PER_SEC);
81 set_normalized_timespec(&ts, wc_sec + delta, nsec);
82 /*
83 * Of all mechanisms of time adjustment I've tested, this one
84 * was the champion!
85 */
86 return ts.tv_sec + 1;
87} 63}
88 64
89static int kvm_set_wallclock(unsigned long now) 65static int kvm_set_wallclock(unsigned long now)
90{ 66{
91 return 0; 67 return -1;
92} 68}
93 69
94/*
95 * This is our read_clock function. The host puts an tsc timestamp each time
96 * it updates a new time. Without the tsc adjustment, we can have a situation
97 * in which a vcpu starts to run earlier (smaller system_time), but probes
98 * time later (compared to another vcpu), leading to backwards time
99 */
100static cycle_t kvm_clock_read(void) 70static cycle_t kvm_clock_read(void)
101{ 71{
102 u64 last_tsc, now; 72 struct pvclock_vcpu_time_info *src;
103 int cpu; 73 cycle_t ret;
104 74
105 preempt_disable(); 75 src = &get_cpu_var(hv_clock);
106 cpu = smp_processor_id(); 76 ret = pvclock_clocksource_read(src);
107 77 put_cpu_var(hv_clock);
108 last_tsc = get_clock(cpu, tsc_timestamp); 78 return ret;
109 now = get_clock(cpu, system_time);
110
111 now += kvm_get_delta(last_tsc);
112 preempt_enable();
113
114 return now;
115} 79}
80
116static struct clocksource kvm_clock = { 81static struct clocksource kvm_clock = {
117 .name = "kvm-clock", 82 .name = "kvm-clock",
118 .read = kvm_clock_read, 83 .read = kvm_clock_read,
@@ -123,13 +88,14 @@ static struct clocksource kvm_clock = {
123 .flags = CLOCK_SOURCE_IS_CONTINUOUS, 88 .flags = CLOCK_SOURCE_IS_CONTINUOUS,
124}; 89};
125 90
126static int kvm_register_clock(void) 91static int kvm_register_clock(char *txt)
127{ 92{
128 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
129 int low, high; 94 int low, high;
130 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; 95 low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1;
131 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); 96 high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32);
132 97 printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n",
98 cpu, high, low, txt);
133 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); 99 return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
134} 100}
135 101
@@ -140,12 +106,20 @@ static void kvm_setup_secondary_clock(void)
140 * Now that the first cpu already had this clocksource initialized, 106 * Now that the first cpu already had this clocksource initialized,
141 * we shouldn't fail. 107 * we shouldn't fail.
142 */ 108 */
143 WARN_ON(kvm_register_clock()); 109 WARN_ON(kvm_register_clock("secondary cpu clock"));
144 /* ok, done with our trickery, call native */ 110 /* ok, done with our trickery, call native */
145 setup_secondary_APIC_clock(); 111 setup_secondary_APIC_clock();
146} 112}
147#endif 113#endif
148 114
115#ifdef CONFIG_SMP
116void __init kvm_smp_prepare_boot_cpu(void)
117{
118 WARN_ON(kvm_register_clock("primary cpu clock"));
119 native_smp_prepare_boot_cpu();
120}
121#endif
122
149/* 123/*
150 * After the clock is registered, the host will keep writing to the 124 * After the clock is registered, the host will keep writing to the
151 * registered memory location. If the guest happens to shutdown, this memory 125 * registered memory location. If the guest happens to shutdown, this memory
@@ -174,7 +148,7 @@ void __init kvmclock_init(void)
174 return; 148 return;
175 149
176 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { 150 if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
177 if (kvm_register_clock()) 151 if (kvm_register_clock("boot clock"))
178 return; 152 return;
179 pv_time_ops.get_wallclock = kvm_get_wallclock; 153 pv_time_ops.get_wallclock = kvm_get_wallclock;
180 pv_time_ops.set_wallclock = kvm_set_wallclock; 154 pv_time_ops.set_wallclock = kvm_set_wallclock;
@@ -182,6 +156,9 @@ void __init kvmclock_init(void)
182#ifdef CONFIG_X86_LOCAL_APIC 156#ifdef CONFIG_X86_LOCAL_APIC
183 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; 157 pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
184#endif 158#endif
159#ifdef CONFIG_SMP
160 smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
161#endif
185 machine_ops.shutdown = kvm_shutdown; 162 machine_ops.shutdown = kvm_shutdown;
186#ifdef CONFIG_KEXEC 163#ifdef CONFIG_KEXEC
187 machine_ops.crash_shutdown = kvm_crash_shutdown; 164 machine_ops.crash_shutdown = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
new file mode 100644
index 000000000000..05fbe9a0325a
--- /dev/null
+++ b/arch/x86/kernel/pvclock.c
@@ -0,0 +1,141 @@
1/* paravirtual clock -- common code used by kvm/xen
2
3 This program is free software; you can redistribute it and/or modify
4 it under the terms of the GNU General Public License as published by
5 the Free Software Foundation; either version 2 of the License, or
6 (at your option) any later version.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16*/
17
18#include <linux/kernel.h>
19#include <linux/percpu.h>
20#include <asm/pvclock.h>
21
22/*
23 * These are perodically updated
24 * xen: magic shared_info page
25 * kvm: gpa registered via msr
26 * and then copied here.
27 */
28struct pvclock_shadow_time {
29 u64 tsc_timestamp; /* TSC at last update of time vals. */
30 u64 system_timestamp; /* Time, in nanosecs, since boot. */
31 u32 tsc_to_nsec_mul;
32 int tsc_shift;
33 u32 version;
34};
35
36/*
37 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
38 * yielding a 64-bit result.
39 */
40static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
41{
42 u64 product;
43#ifdef __i386__
44 u32 tmp1, tmp2;
45#endif
46
47 if (shift < 0)
48 delta >>= -shift;
49 else
50 delta <<= shift;
51
52#ifdef __i386__
53 __asm__ (
54 "mul %5 ; "
55 "mov %4,%%eax ; "
56 "mov %%edx,%4 ; "
57 "mul %5 ; "
58 "xor %5,%5 ; "
59 "add %4,%%eax ; "
60 "adc %5,%%edx ; "
61 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
62 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
63#elif __x86_64__
64 __asm__ (
65 "mul %%rdx ; shrd $32,%%rdx,%%rax"
66 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
67#else
68#error implement me!
69#endif
70
71 return product;
72}
73
74static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
75{
76 u64 delta = native_read_tsc() - shadow->tsc_timestamp;
77 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
78}
79
80/*
81 * Reads a consistent set of time-base values from hypervisor,
82 * into a shadow data area.
83 */
84static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
85 struct pvclock_vcpu_time_info *src)
86{
87 do {
88 dst->version = src->version;
89 rmb(); /* fetch version before data */
90 dst->tsc_timestamp = src->tsc_timestamp;
91 dst->system_timestamp = src->system_time;
92 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
93 dst->tsc_shift = src->tsc_shift;
94 rmb(); /* test version after fetching data */
95 } while ((src->version & 1) || (dst->version != src->version));
96
97 return dst->version;
98}
99
100cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
101{
102 struct pvclock_shadow_time shadow;
103 unsigned version;
104 cycle_t ret, offset;
105
106 do {
107 version = pvclock_get_time_values(&shadow, src);
108 barrier();
109 offset = pvclock_get_nsec_offset(&shadow);
110 ret = shadow.system_timestamp + offset;
111 barrier();
112 } while (version != src->version);
113
114 return ret;
115}
116
117void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
118 struct pvclock_vcpu_time_info *vcpu_time,
119 struct timespec *ts)
120{
121 u32 version;
122 u64 delta;
123 struct timespec now;
124
125 /* get wallclock at system boot */
126 do {
127 version = wall_clock->version;
128 rmb(); /* fetch version before time */
129 now.tv_sec = wall_clock->sec;
130 now.tv_nsec = wall_clock->nsec;
131 rmb(); /* fetch time before checking version */
132 } while ((wall_clock->version & 1) || (version != wall_clock->version));
133
134 delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
135 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
136
137 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
138 now.tv_sec = delta;
139
140 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
141}
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index f2f5d260874e..3829aa7b663f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,9 +200,12 @@ int __pit_timer_fn(struct kvm_kpit_state *ps)
200 200
201 atomic_inc(&pt->pending); 201 atomic_inc(&pt->pending);
202 smp_mb__after_atomic_inc(); 202 smp_mb__after_atomic_inc();
203 if (vcpu0 && waitqueue_active(&vcpu0->wq)) { 203 if (vcpu0) {
204 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; 204 set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
205 wake_up_interruptible(&vcpu0->wq); 205 if (waitqueue_active(&vcpu0->wq)) {
206 vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
207 wake_up_interruptible(&vcpu0->wq);
208 }
206 } 209 }
207 210
208 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); 211 pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c297c50eba63..ebc03f5ae162 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -940,6 +940,7 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
940 wait_queue_head_t *q = &apic->vcpu->wq; 940 wait_queue_head_t *q = &apic->vcpu->wq;
941 941
942 atomic_inc(&apic->timer.pending); 942 atomic_inc(&apic->timer.pending);
943 set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
943 if (waitqueue_active(q)) { 944 if (waitqueue_active(q)) {
944 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; 945 apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
945 wake_up_interruptible(q); 946 wake_up_interruptible(q);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee3f53098f0c..7e7c3969f7a2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -640,6 +640,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
640 rmap_remove(kvm, spte); 640 rmap_remove(kvm, spte);
641 --kvm->stat.lpages; 641 --kvm->stat.lpages;
642 set_shadow_pte(spte, shadow_trap_nonpresent_pte); 642 set_shadow_pte(spte, shadow_trap_nonpresent_pte);
643 spte = NULL;
643 write_protected = 1; 644 write_protected = 1;
644 } 645 }
645 spte = rmap_next(kvm, rmapp, spte); 646 spte = rmap_next(kvm, rmapp, spte);
@@ -1082,10 +1083,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1082 struct kvm_mmu_page *shadow; 1083 struct kvm_mmu_page *shadow;
1083 1084
1084 spte |= PT_WRITABLE_MASK; 1085 spte |= PT_WRITABLE_MASK;
1085 if (user_fault) {
1086 mmu_unshadow(vcpu->kvm, gfn);
1087 goto unshadowed;
1088 }
1089 1086
1090 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn); 1087 shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
1091 if (shadow || 1088 if (shadow ||
@@ -1102,8 +1099,6 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1102 } 1099 }
1103 } 1100 }
1104 1101
1105unshadowed:
1106
1107 if (pte_access & ACC_WRITE_MASK) 1102 if (pte_access & ACC_WRITE_MASK)
1108 mark_page_dirty(vcpu->kvm, gfn); 1103 mark_page_dirty(vcpu->kvm, gfn);
1109 1104
@@ -1580,11 +1575,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
1580 u64 *spte, 1575 u64 *spte,
1581 const void *new) 1576 const void *new)
1582{ 1577{
1583 if ((sp->role.level != PT_PAGE_TABLE_LEVEL) 1578 if (sp->role.level != PT_PAGE_TABLE_LEVEL) {
1584 && !vcpu->arch.update_pte.largepage) { 1579 if (!vcpu->arch.update_pte.largepage ||
1585 ++vcpu->kvm->stat.mmu_pde_zapped; 1580 sp->role.glevels == PT32_ROOT_LEVEL) {
1586 return; 1581 ++vcpu->kvm->stat.mmu_pde_zapped;
1587 } 1582 return;
1583 }
1584 }
1588 1585
1589 ++vcpu->kvm->stat.mmu_pte_updated; 1586 ++vcpu->kvm->stat.mmu_pte_updated;
1590 if (sp->role.glevels == PT32_ROOT_LEVEL) 1587 if (sp->role.glevels == PT32_ROOT_LEVEL)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 02efbe75f317..540e95179074 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -566,7 +566,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
566 load_transition_efer(vmx); 566 load_transition_efer(vmx);
567} 567}
568 568
569static void vmx_load_host_state(struct vcpu_vmx *vmx) 569static void __vmx_load_host_state(struct vcpu_vmx *vmx)
570{ 570{
571 unsigned long flags; 571 unsigned long flags;
572 572
@@ -596,6 +596,13 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
596 reload_host_efer(vmx); 596 reload_host_efer(vmx);
597} 597}
598 598
599static void vmx_load_host_state(struct vcpu_vmx *vmx)
600{
601 preempt_disable();
602 __vmx_load_host_state(vmx);
603 preempt_enable();
604}
605
599/* 606/*
600 * Switches to specified vcpu, until a matching vcpu_put(), but assumes 607 * Switches to specified vcpu, until a matching vcpu_put(), but assumes
601 * vcpu mutex is already taken. 608 * vcpu mutex is already taken.
@@ -654,7 +661,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
654 661
655static void vmx_vcpu_put(struct kvm_vcpu *vcpu) 662static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
656{ 663{
657 vmx_load_host_state(to_vmx(vcpu)); 664 __vmx_load_host_state(to_vmx(vcpu));
658} 665}
659 666
660static void vmx_fpu_activate(struct kvm_vcpu *vcpu) 667static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -884,11 +891,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
884 switch (msr_index) { 891 switch (msr_index) {
885#ifdef CONFIG_X86_64 892#ifdef CONFIG_X86_64
886 case MSR_EFER: 893 case MSR_EFER:
894 vmx_load_host_state(vmx);
887 ret = kvm_set_msr_common(vcpu, msr_index, data); 895 ret = kvm_set_msr_common(vcpu, msr_index, data);
888 if (vmx->host_state.loaded) {
889 reload_host_efer(vmx);
890 load_transition_efer(vmx);
891 }
892 break; 896 break;
893 case MSR_FS_BASE: 897 case MSR_FS_BASE:
894 vmcs_writel(GUEST_FS_BASE, data); 898 vmcs_writel(GUEST_FS_BASE, data);
@@ -910,11 +914,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
910 guest_write_tsc(data); 914 guest_write_tsc(data);
911 break; 915 break;
912 default: 916 default:
917 vmx_load_host_state(vmx);
913 msr = find_msr_entry(vmx, msr_index); 918 msr = find_msr_entry(vmx, msr_index);
914 if (msr) { 919 if (msr) {
915 msr->data = data; 920 msr->data = data;
916 if (vmx->host_state.loaded)
917 load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
918 break; 921 break;
919 } 922 }
920 ret = kvm_set_msr_common(vcpu, msr_index, data); 923 ret = kvm_set_msr_common(vcpu, msr_index, data);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 00acf1301a15..63a77caa59f1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -492,8 +492,8 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) 492static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
493{ 493{
494 static int version; 494 static int version;
495 struct kvm_wall_clock wc; 495 struct pvclock_wall_clock wc;
496 struct timespec wc_ts; 496 struct timespec now, sys, boot;
497 497
498 if (!wall_clock) 498 if (!wall_clock)
499 return; 499 return;
@@ -502,10 +502,19 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
502 502
503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 503 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
504 504
505 wc_ts = current_kernel_time(); 505 /*
506 wc.wc_sec = wc_ts.tv_sec; 506 * The guest calculates current wall clock time by adding
507 wc.wc_nsec = wc_ts.tv_nsec; 507 * system time (updated by kvm_write_guest_time below) to the
508 wc.wc_version = version; 508 * wall clock specified here. guest system time equals host
509 * system time for us, thus we must fill in host boot time here.
510 */
511 now = current_kernel_time();
512 ktime_get_ts(&sys);
513 boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys));
514
515 wc.sec = boot.tv_sec;
516 wc.nsec = boot.tv_nsec;
517 wc.version = version;
509 518
510 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc)); 519 kvm_write_guest(kvm, wall_clock, &wc, sizeof(wc));
511 520
@@ -513,6 +522,45 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
513 kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); 522 kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
514} 523}
515 524
525static uint32_t div_frac(uint32_t dividend, uint32_t divisor)
526{
527 uint32_t quotient, remainder;
528
529 /* Don't try to replace with do_div(), this one calculates
530 * "(dividend << 32) / divisor" */
531 __asm__ ( "divl %4"
532 : "=a" (quotient), "=d" (remainder)
533 : "0" (0), "1" (dividend), "r" (divisor) );
534 return quotient;
535}
536
537static void kvm_set_time_scale(uint32_t tsc_khz, struct pvclock_vcpu_time_info *hv_clock)
538{
539 uint64_t nsecs = 1000000000LL;
540 int32_t shift = 0;
541 uint64_t tps64;
542 uint32_t tps32;
543
544 tps64 = tsc_khz * 1000LL;
545 while (tps64 > nsecs*2) {
546 tps64 >>= 1;
547 shift--;
548 }
549
550 tps32 = (uint32_t)tps64;
551 while (tps32 <= (uint32_t)nsecs) {
552 tps32 <<= 1;
553 shift++;
554 }
555
556 hv_clock->tsc_shift = shift;
557 hv_clock->tsc_to_system_mul = div_frac(nsecs, tps32);
558
559 pr_debug("%s: tsc_khz %u, tsc_shift %d, tsc_mul %u\n",
560 __FUNCTION__, tsc_khz, hv_clock->tsc_shift,
561 hv_clock->tsc_to_system_mul);
562}
563
516static void kvm_write_guest_time(struct kvm_vcpu *v) 564static void kvm_write_guest_time(struct kvm_vcpu *v)
517{ 565{
518 struct timespec ts; 566 struct timespec ts;
@@ -523,6 +571,11 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
523 if ((!vcpu->time_page)) 571 if ((!vcpu->time_page))
524 return; 572 return;
525 573
574 if (unlikely(vcpu->hv_clock_tsc_khz != tsc_khz)) {
575 kvm_set_time_scale(tsc_khz, &vcpu->hv_clock);
576 vcpu->hv_clock_tsc_khz = tsc_khz;
577 }
578
526 /* Keep irq disabled to prevent changes to the clock */ 579 /* Keep irq disabled to prevent changes to the clock */
527 local_irq_save(flags); 580 local_irq_save(flags);
528 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER, 581 kvm_get_msr(v, MSR_IA32_TIME_STAMP_COUNTER,
@@ -537,14 +590,14 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
537 /* 590 /*
538 * The interface expects us to write an even number signaling that the 591 * The interface expects us to write an even number signaling that the
539 * update is finished. Since the guest won't see the intermediate 592 * update is finished. Since the guest won't see the intermediate
540 * state, we just write "2" at the end 593 * state, we just increase by 2 at the end.
541 */ 594 */
542 vcpu->hv_clock.version = 2; 595 vcpu->hv_clock.version += 2;
543 596
544 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0); 597 shared_kaddr = kmap_atomic(vcpu->time_page, KM_USER0);
545 598
546 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock, 599 memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
547 sizeof(vcpu->hv_clock)); 600 sizeof(vcpu->hv_clock));
548 601
549 kunmap_atomic(shared_kaddr, KM_USER0); 602 kunmap_atomic(shared_kaddr, KM_USER0);
550 603
@@ -599,10 +652,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
599 /* ...but clean it before doing the actual write */ 652 /* ...but clean it before doing the actual write */
600 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1); 653 vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
601 654
602 vcpu->arch.hv_clock.tsc_to_system_mul =
603 clocksource_khz2mult(tsc_khz, 22);
604 vcpu->arch.hv_clock.tsc_shift = 22;
605
606 down_read(&current->mm->mmap_sem); 655 down_read(&current->mm->mmap_sem);
607 vcpu->arch.time_page = 656 vcpu->arch.time_page =
608 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT); 657 gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
@@ -2759,6 +2808,8 @@ again:
2759 if (vcpu->requests) { 2808 if (vcpu->requests) {
2760 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests)) 2809 if (test_and_clear_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests))
2761 __kvm_migrate_timers(vcpu); 2810 __kvm_migrate_timers(vcpu);
2811 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2812 kvm_x86_ops->tlb_flush(vcpu);
2762 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS, 2813 if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
2763 &vcpu->requests)) { 2814 &vcpu->requests)) {
2764 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS; 2815 kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
@@ -2772,6 +2823,7 @@ again:
2772 } 2823 }
2773 } 2824 }
2774 2825
2826 clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
2775 kvm_inject_pending_timer_irqs(vcpu); 2827 kvm_inject_pending_timer_irqs(vcpu);
2776 2828
2777 preempt_disable(); 2829 preempt_disable();
@@ -2781,21 +2833,13 @@ again:
2781 2833
2782 local_irq_disable(); 2834 local_irq_disable();
2783 2835
2784 if (need_resched()) { 2836 if (vcpu->requests || need_resched()) {
2785 local_irq_enable(); 2837 local_irq_enable();
2786 preempt_enable(); 2838 preempt_enable();
2787 r = 1; 2839 r = 1;
2788 goto out; 2840 goto out;
2789 } 2841 }
2790 2842
2791 if (vcpu->requests)
2792 if (test_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) {
2793 local_irq_enable();
2794 preempt_enable();
2795 r = 1;
2796 goto out;
2797 }
2798
2799 if (signal_pending(current)) { 2843 if (signal_pending(current)) {
2800 local_irq_enable(); 2844 local_irq_enable();
2801 preempt_enable(); 2845 preempt_enable();
@@ -2825,9 +2869,6 @@ again:
2825 2869
2826 kvm_guest_enter(); 2870 kvm_guest_enter();
2827 2871
2828 if (vcpu->requests)
2829 if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
2830 kvm_x86_ops->tlb_flush(vcpu);
2831 2872
2832 KVMTRACE_0D(VMENTRY, vcpu, entryexit); 2873 KVMTRACE_0D(VMENTRY, vcpu, entryexit);
2833 kvm_x86_ops->run(vcpu, kvm_run); 2874 kvm_x86_ops->run(vcpu, kvm_run);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 2e641be2737e..6c388e593bc8 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -5,8 +5,9 @@
5config XEN 5config XEN
6 bool "Xen guest support" 6 bool "Xen guest support"
7 select PARAVIRT 7 select PARAVIRT
8 select PARAVIRT_CLOCK
8 depends on X86_32 9 depends on X86_32
9 depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER) 10 depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER)
10 help 11 help
11 This is the Linux Xen port. Enabling this will allow the 12 This is the Linux Xen port. Enabling this will allow the
12 kernel to boot in a paravirtualized environment under the 13 kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d61..f09c1c69c37a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -785,38 +785,35 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
785static __init void xen_pagetable_setup_start(pgd_t *base) 785static __init void xen_pagetable_setup_start(pgd_t *base)
786{ 786{
787 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; 787 pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base;
788 int i;
788 789
789 /* special set_pte for pagetable initialization */ 790 /* special set_pte for pagetable initialization */
790 pv_mmu_ops.set_pte = xen_set_pte_init; 791 pv_mmu_ops.set_pte = xen_set_pte_init;
791 792
792 init_mm.pgd = base; 793 init_mm.pgd = base;
793 /* 794 /*
794 * copy top-level of Xen-supplied pagetable into place. For 795 * copy top-level of Xen-supplied pagetable into place. This
795 * !PAE we can use this as-is, but for PAE it is a stand-in 796 * is a stand-in while we copy the pmd pages.
796 * while we copy the pmd pages.
797 */ 797 */
798 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); 798 memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t));
799 799
800 if (PTRS_PER_PMD > 1) { 800 /*
801 int i; 801 * For PAE, need to allocate new pmds, rather than
802 /* 802 * share Xen's, since Xen doesn't like pmd's being
803 * For PAE, need to allocate new pmds, rather than 803 * shared between address spaces.
804 * share Xen's, since Xen doesn't like pmd's being 804 */
805 * shared between address spaces. 805 for (i = 0; i < PTRS_PER_PGD; i++) {
806 */ 806 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
807 for (i = 0; i < PTRS_PER_PGD; i++) { 807 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
808 if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) {
809 pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
810 808
811 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), 809 memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]),
812 PAGE_SIZE); 810 PAGE_SIZE);
813 811
814 make_lowmem_page_readonly(pmd); 812 make_lowmem_page_readonly(pmd);
815 813
816 set_pgd(&base[i], __pgd(1 + __pa(pmd))); 814 set_pgd(&base[i], __pgd(1 + __pa(pmd)));
817 } else 815 } else
818 pgd_clear(&base[i]); 816 pgd_clear(&base[i]);
819 }
820 } 817 }
821 818
822 /* make sure zero_page is mapped RO so we can use it in pagetables */ 819 /* make sure zero_page is mapped RO so we can use it in pagetables */
@@ -873,17 +870,7 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
873 870
874 /* Actually pin the pagetable down, but we can't set PG_pinned 871 /* Actually pin the pagetable down, but we can't set PG_pinned
875 yet because the page structures don't exist yet. */ 872 yet because the page structures don't exist yet. */
876 { 873 pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
877 unsigned level;
878
879#ifdef CONFIG_X86_PAE
880 level = MMUEXT_PIN_L3_TABLE;
881#else
882 level = MMUEXT_PIN_L2_TABLE;
883#endif
884
885 pin_pagetable_pfn(level, PFN_DOWN(__pa(base)));
886 }
887} 874}
888 875
889/* This is called once we have the cpu_possible_map */ 876/* This is called once we have the cpu_possible_map */
@@ -1093,7 +1080,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1093 .make_pte = xen_make_pte, 1080 .make_pte = xen_make_pte,
1094 .make_pgd = xen_make_pgd, 1081 .make_pgd = xen_make_pgd,
1095 1082
1096#ifdef CONFIG_X86_PAE
1097 .set_pte_atomic = xen_set_pte_atomic, 1083 .set_pte_atomic = xen_set_pte_atomic,
1098 .set_pte_present = xen_set_pte_at, 1084 .set_pte_present = xen_set_pte_at,
1099 .set_pud = xen_set_pud, 1085 .set_pud = xen_set_pud,
@@ -1102,7 +1088,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1102 1088
1103 .make_pmd = xen_make_pmd, 1089 .make_pmd = xen_make_pmd,
1104 .pmd_val = xen_pmd_val, 1090 .pmd_val = xen_pmd_val,
1105#endif /* PAE */
1106 1091
1107 .activate_mm = xen_activate_mm, 1092 .activate_mm = xen_activate_mm,
1108 .dup_mmap = xen_dup_mmap, 1093 .dup_mmap = xen_dup_mmap,
@@ -1228,6 +1213,11 @@ asmlinkage void __init xen_start_kernel(void)
1228 if (xen_feature(XENFEAT_supervisor_mode_kernel)) 1213 if (xen_feature(XENFEAT_supervisor_mode_kernel))
1229 pv_info.kernel_rpl = 0; 1214 pv_info.kernel_rpl = 0;
1230 1215
1216 /* Prevent unwanted bits from being set in PTEs. */
1217 __supported_pte_mask &= ~_PAGE_GLOBAL;
1218 if (!is_initial_xendomain())
1219 __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
1220
1231 /* set the limit of our address space */ 1221 /* set the limit of our address space */
1232 xen_reserve_top(); 1222 xen_reserve_top();
1233 1223
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3525ef523a74..df40bf74ea75 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -179,50 +179,56 @@ out:
179 preempt_enable(); 179 preempt_enable();
180} 180}
181 181
182pteval_t xen_pte_val(pte_t pte) 182/* Assume pteval_t is equivalent to all the other *val_t types. */
183static pteval_t pte_mfn_to_pfn(pteval_t val)
184{
185 if (val & _PAGE_PRESENT) {
186 unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
187 pteval_t flags = val & ~PTE_MASK;
188 val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
189 }
190
191 return val;
192}
193
194static pteval_t pte_pfn_to_mfn(pteval_t val)
183{ 195{
184 pteval_t ret = pte.pte; 196 if (val & _PAGE_PRESENT) {
197 unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
198 pteval_t flags = val & ~PTE_MASK;
199 val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
200 }
185 201
186 if (ret & _PAGE_PRESENT) 202 return val;
187 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT; 203}
188 204
189 return ret; 205pteval_t xen_pte_val(pte_t pte)
206{
207 return pte_mfn_to_pfn(pte.pte);
190} 208}
191 209
192pgdval_t xen_pgd_val(pgd_t pgd) 210pgdval_t xen_pgd_val(pgd_t pgd)
193{ 211{
194 pgdval_t ret = pgd.pgd; 212 return pte_mfn_to_pfn(pgd.pgd);
195 if (ret & _PAGE_PRESENT)
196 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
197 return ret;
198} 213}
199 214
200pte_t xen_make_pte(pteval_t pte) 215pte_t xen_make_pte(pteval_t pte)
201{ 216{
202 if (pte & _PAGE_PRESENT) { 217 pte = pte_pfn_to_mfn(pte);
203 pte = phys_to_machine(XPADDR(pte)).maddr; 218 return native_make_pte(pte);
204 pte &= ~(_PAGE_PCD | _PAGE_PWT);
205 }
206
207 return (pte_t){ .pte = pte };
208} 219}
209 220
210pgd_t xen_make_pgd(pgdval_t pgd) 221pgd_t xen_make_pgd(pgdval_t pgd)
211{ 222{
212 if (pgd & _PAGE_PRESENT) 223 pgd = pte_pfn_to_mfn(pgd);
213 pgd = phys_to_machine(XPADDR(pgd)).maddr; 224 return native_make_pgd(pgd);
214
215 return (pgd_t){ pgd };
216} 225}
217 226
218pmdval_t xen_pmd_val(pmd_t pmd) 227pmdval_t xen_pmd_val(pmd_t pmd)
219{ 228{
220 pmdval_t ret = native_pmd_val(pmd); 229 return pte_mfn_to_pfn(pmd.pmd);
221 if (ret & _PAGE_PRESENT)
222 ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
223 return ret;
224} 230}
225#ifdef CONFIG_X86_PAE 231
226void xen_set_pud(pud_t *ptr, pud_t val) 232void xen_set_pud(pud_t *ptr, pud_t val)
227{ 233{
228 struct multicall_space mcs; 234 struct multicall_space mcs;
@@ -267,17 +273,9 @@ void xen_pmd_clear(pmd_t *pmdp)
267 273
268pmd_t xen_make_pmd(pmdval_t pmd) 274pmd_t xen_make_pmd(pmdval_t pmd)
269{ 275{
270 if (pmd & _PAGE_PRESENT) 276 pmd = pte_pfn_to_mfn(pmd);
271 pmd = phys_to_machine(XPADDR(pmd)).maddr;
272
273 return native_make_pmd(pmd); 277 return native_make_pmd(pmd);
274} 278}
275#else /* !PAE */
276void xen_set_pte(pte_t *ptep, pte_t pte)
277{
278 *ptep = pte;
279}
280#endif /* CONFIG_X86_PAE */
281 279
282/* 280/*
283 (Yet another) pagetable walker. This one is intended for pinning a 281 (Yet another) pagetable walker. This one is intended for pinning a
@@ -430,8 +428,6 @@ static int pin_page(struct page *page, enum pt_level level)
430 read-only, and can be pinned. */ 428 read-only, and can be pinned. */
431void xen_pgd_pin(pgd_t *pgd) 429void xen_pgd_pin(pgd_t *pgd)
432{ 430{
433 unsigned level;
434
435 xen_mc_batch(); 431 xen_mc_batch();
436 432
437 if (pgd_walk(pgd, pin_page, TASK_SIZE)) { 433 if (pgd_walk(pgd, pin_page, TASK_SIZE)) {
@@ -441,14 +437,7 @@ void xen_pgd_pin(pgd_t *pgd)
441 xen_mc_batch(); 437 xen_mc_batch();
442 } 438 }
443 439
444#ifdef CONFIG_X86_PAE 440 xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd)));
445 level = MMUEXT_PIN_L3_TABLE;
446#else
447 level = MMUEXT_PIN_L2_TABLE;
448#endif
449
450 xen_do_pin(level, PFN_DOWN(__pa(pgd)));
451
452 xen_mc_issue(0); 441 xen_mc_issue(0);
453} 442}
454 443
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h
index b5e189b1519d..5fe961caffd4 100644
--- a/arch/x86/xen/mmu.h
+++ b/arch/x86/xen/mmu.h
@@ -37,14 +37,13 @@ void xen_exit_mmap(struct mm_struct *mm);
37void xen_pgd_pin(pgd_t *pgd); 37void xen_pgd_pin(pgd_t *pgd);
38//void xen_pgd_unpin(pgd_t *pgd); 38//void xen_pgd_unpin(pgd_t *pgd);
39 39
40#ifdef CONFIG_X86_PAE 40pteval_t xen_pte_val(pte_t);
41unsigned long long xen_pte_val(pte_t); 41pmdval_t xen_pmd_val(pmd_t);
42unsigned long long xen_pmd_val(pmd_t); 42pgdval_t xen_pgd_val(pgd_t);
43unsigned long long xen_pgd_val(pgd_t);
44 43
45pte_t xen_make_pte(unsigned long long); 44pte_t xen_make_pte(pteval_t);
46pmd_t xen_make_pmd(unsigned long long); 45pmd_t xen_make_pmd(pmdval_t);
47pgd_t xen_make_pgd(unsigned long long); 46pgd_t xen_make_pgd(pgdval_t);
48 47
49void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, 48void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
50 pte_t *ptep, pte_t pteval); 49 pte_t *ptep, pte_t pteval);
@@ -53,15 +52,4 @@ void xen_set_pud(pud_t *ptr, pud_t val);
53void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); 52void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
54void xen_pmd_clear(pmd_t *pmdp); 53void xen_pmd_clear(pmd_t *pmdp);
55 54
56
57#else
58unsigned long xen_pte_val(pte_t);
59unsigned long xen_pmd_val(pmd_t);
60unsigned long xen_pgd_val(pgd_t);
61
62pte_t xen_make_pte(unsigned long);
63pmd_t xen_make_pmd(unsigned long);
64pgd_t xen_make_pgd(unsigned long);
65#endif
66
67#endif /* _XEN_MMU_H */ 55#endif /* _XEN_MMU_H */
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 52b2e3856980..41e217503c96 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -14,6 +14,7 @@
14#include <linux/kernel_stat.h> 14#include <linux/kernel_stat.h>
15#include <linux/math64.h> 15#include <linux/math64.h>
16 16
17#include <asm/pvclock.h>
17#include <asm/xen/hypervisor.h> 18#include <asm/xen/hypervisor.h>
18#include <asm/xen/hypercall.h> 19#include <asm/xen/hypercall.h>
19 20
@@ -31,17 +32,6 @@
31 32
32static cycle_t xen_clocksource_read(void); 33static cycle_t xen_clocksource_read(void);
33 34
34/* These are perodically updated in shared_info, and then copied here. */
35struct shadow_time_info {
36 u64 tsc_timestamp; /* TSC at last update of time vals. */
37 u64 system_timestamp; /* Time, in nanosecs, since boot. */
38 u32 tsc_to_nsec_mul;
39 int tsc_shift;
40 u32 version;
41};
42
43static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
44
45/* runstate info updated by Xen */ 35/* runstate info updated by Xen */
46static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); 36static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
47 37
@@ -211,7 +201,7 @@ unsigned long long xen_sched_clock(void)
211unsigned long xen_cpu_khz(void) 201unsigned long xen_cpu_khz(void)
212{ 202{
213 u64 xen_khz = 1000000ULL << 32; 203 u64 xen_khz = 1000000ULL << 32;
214 const struct vcpu_time_info *info = 204 const struct pvclock_vcpu_time_info *info =
215 &HYPERVISOR_shared_info->vcpu_info[0].time; 205 &HYPERVISOR_shared_info->vcpu_info[0].time;
216 206
217 do_div(xen_khz, info->tsc_to_system_mul); 207 do_div(xen_khz, info->tsc_to_system_mul);
@@ -223,121 +213,26 @@ unsigned long xen_cpu_khz(void)
223 return xen_khz; 213 return xen_khz;
224} 214}
225 215
226/*
227 * Reads a consistent set of time-base values from Xen, into a shadow data
228 * area.
229 */
230static unsigned get_time_values_from_xen(void)
231{
232 struct vcpu_time_info *src;
233 struct shadow_time_info *dst;
234
235 /* src is shared memory with the hypervisor, so we need to
236 make sure we get a consistent snapshot, even in the face of
237 being preempted. */
238 src = &__get_cpu_var(xen_vcpu)->time;
239 dst = &__get_cpu_var(shadow_time);
240
241 do {
242 dst->version = src->version;
243 rmb(); /* fetch version before data */
244 dst->tsc_timestamp = src->tsc_timestamp;
245 dst->system_timestamp = src->system_time;
246 dst->tsc_to_nsec_mul = src->tsc_to_system_mul;
247 dst->tsc_shift = src->tsc_shift;
248 rmb(); /* test version after fetching data */
249 } while ((src->version & 1) | (dst->version ^ src->version));
250
251 return dst->version;
252}
253
254/*
255 * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
256 * yielding a 64-bit result.
257 */
258static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
259{
260 u64 product;
261#ifdef __i386__
262 u32 tmp1, tmp2;
263#endif
264
265 if (shift < 0)
266 delta >>= -shift;
267 else
268 delta <<= shift;
269
270#ifdef __i386__
271 __asm__ (
272 "mul %5 ; "
273 "mov %4,%%eax ; "
274 "mov %%edx,%4 ; "
275 "mul %5 ; "
276 "xor %5,%5 ; "
277 "add %4,%%eax ; "
278 "adc %5,%%edx ; "
279 : "=A" (product), "=r" (tmp1), "=r" (tmp2)
280 : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
281#elif __x86_64__
282 __asm__ (
283 "mul %%rdx ; shrd $32,%%rdx,%%rax"
284 : "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
285#else
286#error implement me!
287#endif
288
289 return product;
290}
291
292static u64 get_nsec_offset(struct shadow_time_info *shadow)
293{
294 u64 now, delta;
295 now = native_read_tsc();
296 delta = now - shadow->tsc_timestamp;
297 return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
298}
299
300static cycle_t xen_clocksource_read(void) 216static cycle_t xen_clocksource_read(void)
301{ 217{
302 struct shadow_time_info *shadow = &get_cpu_var(shadow_time); 218 struct pvclock_vcpu_time_info *src;
303 cycle_t ret; 219 cycle_t ret;
304 unsigned version;
305
306 do {
307 version = get_time_values_from_xen();
308 barrier();
309 ret = shadow->system_timestamp + get_nsec_offset(shadow);
310 barrier();
311 } while (version != __get_cpu_var(xen_vcpu)->time.version);
312
313 put_cpu_var(shadow_time);
314 220
221 src = &get_cpu_var(xen_vcpu)->time;
222 ret = pvclock_clocksource_read(src);
223 put_cpu_var(xen_vcpu);
315 return ret; 224 return ret;
316} 225}
317 226
318static void xen_read_wallclock(struct timespec *ts) 227static void xen_read_wallclock(struct timespec *ts)
319{ 228{
320 const struct shared_info *s = HYPERVISOR_shared_info; 229 struct shared_info *s = HYPERVISOR_shared_info;
321 u32 version; 230 struct pvclock_wall_clock *wall_clock = &(s->wc);
322 u64 delta; 231 struct pvclock_vcpu_time_info *vcpu_time;
323 struct timespec now;
324
325 /* get wallclock at system boot */
326 do {
327 version = s->wc_version;
328 rmb(); /* fetch version before time */
329 now.tv_sec = s->wc_sec;
330 now.tv_nsec = s->wc_nsec;
331 rmb(); /* fetch time before checking version */
332 } while ((s->wc_version & 1) | (version ^ s->wc_version));
333 232
334 delta = xen_clocksource_read(); /* time since system boot */ 233 vcpu_time = &get_cpu_var(xen_vcpu)->time;
335 delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; 234 pvclock_read_wallclock(wall_clock, vcpu_time, ts);
336 235 put_cpu_var(xen_vcpu);
337 now.tv_nsec = do_div(delta, NSEC_PER_SEC);
338 now.tv_sec = delta;
339
340 set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
341} 236}
342 237
343unsigned long xen_get_wallclock(void) 238unsigned long xen_get_wallclock(void)
@@ -345,7 +240,6 @@ unsigned long xen_get_wallclock(void)
345 struct timespec ts; 240 struct timespec ts;
346 241
347 xen_read_wallclock(&ts); 242 xen_read_wallclock(&ts);
348
349 return ts.tv_sec; 243 return ts.tv_sec;
350} 244}
351 245
@@ -569,8 +463,6 @@ __init void xen_time_init(void)
569{ 463{
570 int cpu = smp_processor_id(); 464 int cpu = smp_processor_id();
571 465
572 get_time_values_from_xen();
573
574 clocksource_register(&xen_clocksource); 466 clocksource_register(&xen_clocksource);
575 467
576 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) { 468 if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587ce73c..6ec3b4f7719b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
17 17
18 __FINIT 18 __FINIT
19 19
20.pushsection .bss.page_aligned 20.pushsection .text
21 .align PAGE_SIZE_asm 21 .align PAGE_SIZE_asm
22ENTRY(hypercall_page) 22ENTRY(hypercall_page)
23 .skip 0x1000 23 .skip 0x1000
@@ -30,11 +30,7 @@ ENTRY(hypercall_page)
30 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) 30 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen)
31 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) 31 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page)
32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") 32 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb")
33#ifdef CONFIG_X86_PAE
34 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") 33 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes")
35#else
36 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "no")
37#endif
38 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") 34 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic")
39 35
40#endif /*CONFIG_XEN */ 36#endif /*CONFIG_XEN */
diff --git a/drivers/char/drm/i915_drv.c b/drivers/char/drm/i915_drv.c
index e8f3d682e3b1..93aed1c38bd2 100644
--- a/drivers/char/drm/i915_drv.c
+++ b/drivers/char/drm/i915_drv.c
@@ -389,6 +389,7 @@ static int i915_resume(struct drm_device *dev)
389 pci_restore_state(dev->pdev); 389 pci_restore_state(dev->pdev);
390 if (pci_enable_device(dev->pdev)) 390 if (pci_enable_device(dev->pdev))
391 return -1; 391 return -1;
392 pci_set_master(dev->pdev);
392 393
393 pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB); 394 pci_write_config_byte(dev->pdev, LBB, dev_priv->saveLBB);
394 395
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index b1a757a5ee27..8f81139d6194 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -981,16 +981,9 @@ EXPORT_SYMBOL_GPL(tty_perform_flush);
981int n_tty_ioctl(struct tty_struct *tty, struct file *file, 981int n_tty_ioctl(struct tty_struct *tty, struct file *file,
982 unsigned int cmd, unsigned long arg) 982 unsigned int cmd, unsigned long arg)
983{ 983{
984 struct tty_struct *real_tty;
985 unsigned long flags; 984 unsigned long flags;
986 int retval; 985 int retval;
987 986
988 if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
989 tty->driver->subtype == PTY_TYPE_MASTER)
990 real_tty = tty->link;
991 else
992 real_tty = tty;
993
994 switch (cmd) { 987 switch (cmd) {
995 case TCXONC: 988 case TCXONC:
996 retval = tty_check_change(tty); 989 retval = tty_check_change(tty);
diff --git a/drivers/hwmon/abituguru3.c b/drivers/hwmon/abituguru3.c
index ed33fddc4dee..f00f497b9ca9 100644
--- a/drivers/hwmon/abituguru3.c
+++ b/drivers/hwmon/abituguru3.c
@@ -30,6 +30,7 @@
30#include <linux/platform_device.h> 30#include <linux/platform_device.h>
31#include <linux/hwmon.h> 31#include <linux/hwmon.h>
32#include <linux/hwmon-sysfs.h> 32#include <linux/hwmon-sysfs.h>
33#include <linux/dmi.h>
33#include <asm/io.h> 34#include <asm/io.h>
34 35
35/* uGuru3 bank addresses */ 36/* uGuru3 bank addresses */
@@ -323,7 +324,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
323 { "AUX1 Fan", 36, 2, 60, 1, 0 }, 324 { "AUX1 Fan", 36, 2, 60, 1, 0 },
324 { NULL, 0, 0, 0, 0, 0 } } 325 { NULL, 0, 0, 0, 0, 0 } }
325 }, 326 },
326 { 0x0013, "unknown", { 327 { 0x0013, "Abit AW8D", {
327 { "CPU Core", 0, 0, 10, 1, 0 }, 328 { "CPU Core", 0, 0, 10, 1, 0 },
328 { "DDR", 1, 0, 10, 1, 0 }, 329 { "DDR", 1, 0, 10, 1, 0 },
329 { "DDR VTT", 2, 0, 10, 1, 0 }, 330 { "DDR VTT", 2, 0, 10, 1, 0 },
@@ -349,6 +350,7 @@ static const struct abituguru3_motherboard_info abituguru3_motherboards[] = {
349 { "AUX2 Fan", 36, 2, 60, 1, 0 }, 350 { "AUX2 Fan", 36, 2, 60, 1, 0 },
350 { "AUX3 Fan", 37, 2, 60, 1, 0 }, 351 { "AUX3 Fan", 37, 2, 60, 1, 0 },
351 { "AUX4 Fan", 38, 2, 60, 1, 0 }, 352 { "AUX4 Fan", 38, 2, 60, 1, 0 },
353 { "AUX5 Fan", 39, 2, 60, 1, 0 },
352 { NULL, 0, 0, 0, 0, 0 } } 354 { NULL, 0, 0, 0, 0, 0 } }
353 }, 355 },
354 { 0x0014, "Abit AB9 Pro", { 356 { 0x0014, "Abit AB9 Pro", {
@@ -1111,11 +1113,12 @@ static int __init abituguru3_detect(void)
1111{ 1113{
1112 /* See if there is an uguru3 there. An idle uGuru3 will hold 0x00 or 1114 /* See if there is an uguru3 there. An idle uGuru3 will hold 0x00 or
1113 0x08 at DATA and 0xAC at CMD. Sometimes the uGuru3 will hold 0x05 1115 0x08 at DATA and 0xAC at CMD. Sometimes the uGuru3 will hold 0x05
1114 at CMD instead, why is unknown. So we test for 0x05 too. */ 1116 or 0x55 at CMD instead, why is unknown. */
1115 u8 data_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_DATA); 1117 u8 data_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_DATA);
1116 u8 cmd_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_CMD); 1118 u8 cmd_val = inb_p(ABIT_UGURU3_BASE + ABIT_UGURU3_CMD);
1117 if (((data_val == 0x00) || (data_val == 0x08)) && 1119 if (((data_val == 0x00) || (data_val == 0x08)) &&
1118 ((cmd_val == 0xAC) || (cmd_val == 0x05))) 1120 ((cmd_val == 0xAC) || (cmd_val == 0x05) ||
1121 (cmd_val == 0x55)))
1119 return ABIT_UGURU3_BASE; 1122 return ABIT_UGURU3_BASE;
1120 1123
1121 ABIT_UGURU3_DEBUG("no Abit uGuru3 found, data = 0x%02X, cmd = " 1124 ABIT_UGURU3_DEBUG("no Abit uGuru3 found, data = 0x%02X, cmd = "
@@ -1138,6 +1141,15 @@ static int __init abituguru3_init(void)
1138 int address, err; 1141 int address, err;
1139 struct resource res = { .flags = IORESOURCE_IO }; 1142 struct resource res = { .flags = IORESOURCE_IO };
1140 1143
1144#ifdef CONFIG_DMI
1145 const char *board_vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
1146
1147 /* safety check, refuse to load on non Abit motherboards */
1148 if (!force && (!board_vendor ||
1149 strcmp(board_vendor, "http://www.abit.com.tw/")))
1150 return -ENODEV;
1151#endif
1152
1141 address = abituguru3_detect(); 1153 address = abituguru3_detect();
1142 if (address < 0) 1154 if (address < 0)
1143 return address; 1155 return address;
diff --git a/drivers/hwmon/adt7473.c b/drivers/hwmon/adt7473.c
index c1009d6f9796..93dbf5e7ff8a 100644
--- a/drivers/hwmon/adt7473.c
+++ b/drivers/hwmon/adt7473.c
@@ -309,6 +309,9 @@ no_sensor_update:
309 ADT7473_REG_PWM_BHVR(i)); 309 ADT7473_REG_PWM_BHVR(i));
310 } 310 }
311 311
312 i = i2c_smbus_read_byte_data(client, ADT7473_REG_CFG4);
313 data->max_duty_at_overheat = !!(i & ADT7473_CFG4_MAX_DUTY_AT_OVT);
314
312 data->limits_last_updated = local_jiffies; 315 data->limits_last_updated = local_jiffies;
313 data->limits_valid = 1; 316 data->limits_valid = 1;
314 317
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index fa7696905154..de698dc73020 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -251,10 +251,13 @@ static int lm75_detach_client(struct i2c_client *client)
251 the SMBus standard. */ 251 the SMBus standard. */
252static int lm75_read_value(struct i2c_client *client, u8 reg) 252static int lm75_read_value(struct i2c_client *client, u8 reg)
253{ 253{
254 int value;
255
254 if (reg == LM75_REG_CONF) 256 if (reg == LM75_REG_CONF)
255 return i2c_smbus_read_byte_data(client, reg); 257 return i2c_smbus_read_byte_data(client, reg);
256 else 258
257 return swab16(i2c_smbus_read_word_data(client, reg)); 259 value = i2c_smbus_read_word_data(client, reg);
260 return (value < 0) ? value : swab16(value);
258} 261}
259 262
260static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value) 263static int lm75_write_value(struct i2c_client *client, u8 reg, u16 value)
@@ -287,9 +290,16 @@ static struct lm75_data *lm75_update_device(struct device *dev)
287 int i; 290 int i;
288 dev_dbg(&client->dev, "Starting lm75 update\n"); 291 dev_dbg(&client->dev, "Starting lm75 update\n");
289 292
290 for (i = 0; i < ARRAY_SIZE(data->temp); i++) 293 for (i = 0; i < ARRAY_SIZE(data->temp); i++) {
291 data->temp[i] = lm75_read_value(client, 294 int status;
292 LM75_REG_TEMP[i]); 295
296 status = lm75_read_value(client, LM75_REG_TEMP[i]);
297 if (status < 0)
298 dev_dbg(&client->dev, "reg %d, err %d\n",
299 LM75_REG_TEMP[i], status);
300 else
301 data->temp[i] = status;
302 }
293 data->last_updated = jiffies; 303 data->last_updated = jiffies;
294 data->valid = 1; 304 data->valid = 1;
295 } 305 }
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 182fe6a5605f..ee5eca1c1921 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -192,23 +192,20 @@ static int RANGE_TO_REG( int range )
192{ 192{
193 int i; 193 int i;
194 194
195 if ( range < lm85_range_map[0] ) { 195 if (range >= lm85_range_map[15])
196 return 0 ;
197 } else if ( range > lm85_range_map[15] ) {
198 return 15 ; 196 return 15 ;
199 } else { /* find closest match */ 197
200 for ( i = 14 ; i >= 0 ; --i ) { 198 /* Find the closest match */
201 if ( range > lm85_range_map[i] ) { /* range bracketed */ 199 for (i = 14; i >= 0; --i) {
202 if ((lm85_range_map[i+1] - range) < 200 if (range >= lm85_range_map[i]) {
203 (range - lm85_range_map[i])) { 201 if ((lm85_range_map[i + 1] - range) <
204 i++; 202 (range - lm85_range_map[i]))
205 break; 203 return i + 1;
206 } 204 return i;
207 break;
208 }
209 } 205 }
210 } 206 }
211 return( i & 0x0f ); 207
208 return 0;
212} 209}
213#define RANGE_FROM_REG(val) (lm85_range_map[(val)&0x0f]) 210#define RANGE_FROM_REG(val) (lm85_range_map[(val)&0x0f])
214 211
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index b224079d4e1f..d5862e5d99a0 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
109{ 109{
110 struct page *page; 110 struct page *page;
111 111
112 page = alloc_pages(gfp_mask, order); 112 /*
113 * Use __GFP_ZERO because buggy firmware assumes ICM pages are
114 * cleared, and subtle failures are seen if they aren't.
115 */
116 page = alloc_pages(gfp_mask | __GFP_ZERO, order);
113 if (!page) 117 if (!page)
114 return -ENOMEM; 118 return -ENOMEM;
115 119
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 5126d5d9ea0e..2e554a4ab337 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
176 * we set it now, so we can trap and pass that trap to the Guest if it 176 * we set it now, so we can trap and pass that trap to the Guest if it
177 * uses the FPU. */ 177 * uses the FPU. */
178 if (cpu->ts) 178 if (cpu->ts)
179 lguest_set_ts(); 179 unlazy_fpu(current);
180 180
181 /* SYSENTER is an optimized way of doing system calls. We can't allow 181 /* SYSENTER is an optimized way of doing system calls. We can't allow
182 * it because it always jumps to privilege level 0. A normal Guest 182 * it because it always jumps to privilege level 0. A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
196 * trap made the switcher code come back, and an error code which some 196 * trap made the switcher code come back, and an error code which some
197 * traps set. */ 197 * traps set. */
198 198
199 /* Restore SYSENTER if it's supposed to be on. */
200 if (boot_cpu_has(X86_FEATURE_SEP))
201 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
202
199 /* If the Guest page faulted, then the cr2 register will tell us the 203 /* If the Guest page faulted, then the cr2 register will tell us the
200 * bad virtual address. We have to grab this now, because once we 204 * bad virtual address. We have to grab this now, because once we
201 * re-enable interrupts an interrupt could fault and thus overwrite 205 * re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
203 if (cpu->regs->trapnum == 14) 207 if (cpu->regs->trapnum == 14)
204 cpu->arch.last_pagefault = read_cr2(); 208 cpu->arch.last_pagefault = read_cr2();
205 /* Similarly, if we took a trap because the Guest used the FPU, 209 /* Similarly, if we took a trap because the Guest used the FPU,
206 * we have to restore the FPU it expects to see. */ 210 * we have to restore the FPU it expects to see.
211 * math_state_restore() may sleep and we may even move off to
212 * a different CPU. So all the critical stuff should be done
213 * before this. */
207 else if (cpu->regs->trapnum == 7) 214 else if (cpu->regs->trapnum == 7)
208 math_state_restore(); 215 math_state_restore();
209
210 /* Restore SYSENTER if it's supposed to be on. */
211 if (boot_cpu_has(X86_FEATURE_SEP))
212 wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
213} 216}
214 217
215/*H:130 Now we've examined the hypercall code; our Guest can make requests. 218/*H:130 Now we've examined the hypercall code; our Guest can make requests.
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index bafb69b6f7cb..fc6f4b8c64b3 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -942,7 +942,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
942 m->msg_namelen = 0; 942 m->msg_namelen = 0;
943 943
944 if (skb) { 944 if (skb) {
945 total_len = min(total_len, skb->len); 945 total_len = min_t(size_t, total_len, skb->len);
946 error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len); 946 error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
947 if (error == 0) 947 if (error == 0)
948 error = total_len; 948 error = total_len;
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index 8662a6b7a30b..25b352b664d9 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -68,7 +68,6 @@ obj-$(CONFIG_WAFER_WDT) += wafer5823wdt.o
68obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o 68obj-$(CONFIG_I6300ESB_WDT) += i6300esb.o
69obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o 69obj-$(CONFIG_ITCO_WDT) += iTCO_wdt.o iTCO_vendor_support.o
70obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o 70obj-$(CONFIG_IT8712F_WDT) += it8712f_wdt.o
71CFLAGS_hpwdt.o += -O
72obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o 71obj-$(CONFIG_HP_WATCHDOG) += hpwdt.o
73obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o 72obj-$(CONFIG_SC1200_WDT) += sc1200wdt.o
74obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o 73obj-$(CONFIG_SCx200_WDT) += scx200_wdt.o
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4f0f22b020ea..76e5b7386af9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
529 529
530#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */ 530#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
531 /* Clear master flag /before/ clearing selector flag. */ 531 /* Clear master flag /before/ clearing selector flag. */
532 rmb(); 532 wmb();
533#endif 533#endif
534 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0); 534 pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
535 while (pending_words != 0) { 535 while (pending_words != 0) {
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9ecb92f68543..9ff7b1c04239 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -855,7 +855,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
855 */ 855 */
856 856
857 /* Update group descriptor block for new group */ 857 /* Update group descriptor block for new group */
858 gdp = (struct ext4_group_desc *)primary->b_data + gdb_off; 858 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
859 gdb_off * EXT4_DESC_SIZE(sb));
859 860
860 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ 861 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
861 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ 862 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index c19184f2e70e..bec76b1c2bb0 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -246,15 +246,11 @@ static void find_metapath(const struct gfs2_sbd *sdp, u64 block,
246 246
247} 247}
248 248
249static inline unsigned int zero_metapath_length(const struct metapath *mp, 249static inline unsigned int metapath_branch_start(const struct metapath *mp)
250 unsigned height)
251{ 250{
252 unsigned int i; 251 if (mp->mp_list[0] == 0)
253 for (i = 0; i < height - 1; i++) { 252 return 2;
254 if (mp->mp_list[i] != 0) 253 return 1;
255 return i;
256 }
257 return height;
258} 254}
259 255
260/** 256/**
@@ -436,7 +432,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
436 struct gfs2_sbd *sdp = GFS2_SB(inode); 432 struct gfs2_sbd *sdp = GFS2_SB(inode);
437 struct buffer_head *dibh = mp->mp_bh[0]; 433 struct buffer_head *dibh = mp->mp_bh[0];
438 u64 bn, dblock = 0; 434 u64 bn, dblock = 0;
439 unsigned n, i, blks, alloced = 0, iblks = 0, zmpl = 0; 435 unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
440 unsigned dblks = 0; 436 unsigned dblks = 0;
441 unsigned ptrs_per_blk; 437 unsigned ptrs_per_blk;
442 const unsigned end_of_metadata = height - 1; 438 const unsigned end_of_metadata = height - 1;
@@ -471,9 +467,8 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
471 /* Building up tree height */ 467 /* Building up tree height */
472 state = ALLOC_GROW_HEIGHT; 468 state = ALLOC_GROW_HEIGHT;
473 iblks = height - ip->i_height; 469 iblks = height - ip->i_height;
474 zmpl = zero_metapath_length(mp, height); 470 branch_start = metapath_branch_start(mp);
475 iblks -= zmpl; 471 iblks += (height - branch_start);
476 iblks += height;
477 } 472 }
478 } 473 }
479 474
@@ -509,13 +504,13 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
509 sizeof(struct gfs2_meta_header)); 504 sizeof(struct gfs2_meta_header));
510 *ptr = zero_bn; 505 *ptr = zero_bn;
511 state = ALLOC_GROW_DEPTH; 506 state = ALLOC_GROW_DEPTH;
512 for(i = zmpl; i < height; i++) { 507 for(i = branch_start; i < height; i++) {
513 if (mp->mp_bh[i] == NULL) 508 if (mp->mp_bh[i] == NULL)
514 break; 509 break;
515 brelse(mp->mp_bh[i]); 510 brelse(mp->mp_bh[i]);
516 mp->mp_bh[i] = NULL; 511 mp->mp_bh[i] = NULL;
517 } 512 }
518 i = zmpl; 513 i = branch_start;
519 } 514 }
520 if (n == 0) 515 if (n == 0)
521 break; 516 break;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 6387523a3153..3401628d742b 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -195,7 +195,7 @@ ulong_aligned:
195 depending on architecture. I've experimented with several ways 195 depending on architecture. I've experimented with several ways
196 of writing this section such as using an else before the goto 196 of writing this section such as using an else before the goto
197 but this one seems to be the fastest. */ 197 but this one seems to be the fastest. */
198 while ((unsigned char *)plong < end - 1) { 198 while ((unsigned char *)plong < end - sizeof(unsigned long)) {
199 prefetch(plong + 1); 199 prefetch(plong + 1);
200 if (((*plong) & LBITMASK) != lskipval) 200 if (((*plong) & LBITMASK) != lskipval)
201 break; 201 break;
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 49c7cd0502cc..779d2eb649c5 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -130,10 +130,11 @@ static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
130 struct mnt_fhstatus *res) 130 struct mnt_fhstatus *res)
131{ 131{
132 struct nfs_fh *fh = res->fh; 132 struct nfs_fh *fh = res->fh;
133 unsigned size;
133 134
134 if ((res->status = ntohl(*p++)) == 0) { 135 if ((res->status = ntohl(*p++)) == 0) {
135 int size = ntohl(*p++); 136 size = ntohl(*p++);
136 if (size <= NFS3_FHSIZE) { 137 if (size <= NFS3_FHSIZE && size != 0) {
137 fh->size = size; 138 fh->size = size;
138 memcpy(fh->data, p, size); 139 memcpy(fh->data, p, size);
139 } else 140 } else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2a4a024a4e7b..614efeed5437 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1216,8 +1216,6 @@ static int nfs_validate_mount_data(void *options,
1216{ 1216{
1217 struct nfs_mount_data *data = (struct nfs_mount_data *)options; 1217 struct nfs_mount_data *data = (struct nfs_mount_data *)options;
1218 1218
1219 memset(args, 0, sizeof(*args));
1220
1221 if (data == NULL) 1219 if (data == NULL)
1222 goto out_no_data; 1220 goto out_no_data;
1223 1221
@@ -1251,13 +1249,13 @@ static int nfs_validate_mount_data(void *options,
1251 case 5: 1249 case 5:
1252 memset(data->context, 0, sizeof(data->context)); 1250 memset(data->context, 0, sizeof(data->context));
1253 case 6: 1251 case 6:
1254 if (data->flags & NFS_MOUNT_VER3) 1252 if (data->flags & NFS_MOUNT_VER3) {
1253 if (data->root.size > NFS3_FHSIZE || data->root.size == 0)
1254 goto out_invalid_fh;
1255 mntfh->size = data->root.size; 1255 mntfh->size = data->root.size;
1256 else 1256 } else
1257 mntfh->size = NFS2_FHSIZE; 1257 mntfh->size = NFS2_FHSIZE;
1258 1258
1259 if (mntfh->size > sizeof(mntfh->data))
1260 goto out_invalid_fh;
1261 1259
1262 memcpy(mntfh->data, data->root.data, mntfh->size); 1260 memcpy(mntfh->data, data->root.data, mntfh->size);
1263 if (mntfh->size < sizeof(mntfh->data)) 1261 if (mntfh->size < sizeof(mntfh->data))
@@ -1585,24 +1583,29 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1585{ 1583{
1586 struct nfs_server *server = NULL; 1584 struct nfs_server *server = NULL;
1587 struct super_block *s; 1585 struct super_block *s;
1588 struct nfs_fh mntfh; 1586 struct nfs_parsed_mount_data *data;
1589 struct nfs_parsed_mount_data data; 1587 struct nfs_fh *mntfh;
1590 struct dentry *mntroot; 1588 struct dentry *mntroot;
1591 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 1589 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1592 struct nfs_sb_mountdata sb_mntdata = { 1590 struct nfs_sb_mountdata sb_mntdata = {
1593 .mntflags = flags, 1591 .mntflags = flags,
1594 }; 1592 };
1595 int error; 1593 int error = -ENOMEM;
1594
1595 data = kzalloc(sizeof(*data), GFP_KERNEL);
1596 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
1597 if (data == NULL || mntfh == NULL)
1598 goto out_free_fh;
1596 1599
1597 security_init_mnt_opts(&data.lsm_opts); 1600 security_init_mnt_opts(&data->lsm_opts);
1598 1601
1599 /* Validate the mount data */ 1602 /* Validate the mount data */
1600 error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); 1603 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
1601 if (error < 0) 1604 if (error < 0)
1602 goto out; 1605 goto out;
1603 1606
1604 /* Get a volume representation */ 1607 /* Get a volume representation */
1605 server = nfs_create_server(&data, &mntfh); 1608 server = nfs_create_server(data, mntfh);
1606 if (IS_ERR(server)) { 1609 if (IS_ERR(server)) {
1607 error = PTR_ERR(server); 1610 error = PTR_ERR(server);
1608 goto out; 1611 goto out;
@@ -1630,16 +1633,16 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1630 1633
1631 if (!s->s_root) { 1634 if (!s->s_root) {
1632 /* initial superblock/root creation */ 1635 /* initial superblock/root creation */
1633 nfs_fill_super(s, &data); 1636 nfs_fill_super(s, data);
1634 } 1637 }
1635 1638
1636 mntroot = nfs_get_root(s, &mntfh); 1639 mntroot = nfs_get_root(s, mntfh);
1637 if (IS_ERR(mntroot)) { 1640 if (IS_ERR(mntroot)) {
1638 error = PTR_ERR(mntroot); 1641 error = PTR_ERR(mntroot);
1639 goto error_splat_super; 1642 goto error_splat_super;
1640 } 1643 }
1641 1644
1642 error = security_sb_set_mnt_opts(s, &data.lsm_opts); 1645 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
1643 if (error) 1646 if (error)
1644 goto error_splat_root; 1647 goto error_splat_root;
1645 1648
@@ -1649,9 +1652,12 @@ static int nfs_get_sb(struct file_system_type *fs_type,
1649 error = 0; 1652 error = 0;
1650 1653
1651out: 1654out:
1652 kfree(data.nfs_server.hostname); 1655 kfree(data->nfs_server.hostname);
1653 kfree(data.mount_server.hostname); 1656 kfree(data->mount_server.hostname);
1654 security_free_mnt_opts(&data.lsm_opts); 1657 security_free_mnt_opts(&data->lsm_opts);
1658out_free_fh:
1659 kfree(mntfh);
1660 kfree(data);
1655 return error; 1661 return error;
1656 1662
1657out_err_nosb: 1663out_err_nosb:
@@ -1800,8 +1806,6 @@ static int nfs4_validate_mount_data(void *options,
1800 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; 1806 struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
1801 char *c; 1807 char *c;
1802 1808
1803 memset(args, 0, sizeof(*args));
1804
1805 if (data == NULL) 1809 if (data == NULL)
1806 goto out_no_data; 1810 goto out_no_data;
1807 1811
@@ -1959,26 +1963,31 @@ out_no_client_address:
1959static int nfs4_get_sb(struct file_system_type *fs_type, 1963static int nfs4_get_sb(struct file_system_type *fs_type,
1960 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 1964 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
1961{ 1965{
1962 struct nfs_parsed_mount_data data; 1966 struct nfs_parsed_mount_data *data;
1963 struct super_block *s; 1967 struct super_block *s;
1964 struct nfs_server *server; 1968 struct nfs_server *server;
1965 struct nfs_fh mntfh; 1969 struct nfs_fh *mntfh;
1966 struct dentry *mntroot; 1970 struct dentry *mntroot;
1967 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 1971 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
1968 struct nfs_sb_mountdata sb_mntdata = { 1972 struct nfs_sb_mountdata sb_mntdata = {
1969 .mntflags = flags, 1973 .mntflags = flags,
1970 }; 1974 };
1971 int error; 1975 int error = -ENOMEM;
1972 1976
1973 security_init_mnt_opts(&data.lsm_opts); 1977 data = kzalloc(sizeof(*data), GFP_KERNEL);
1978 mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
1979 if (data == NULL || mntfh == NULL)
1980 goto out_free_fh;
1981
1982 security_init_mnt_opts(&data->lsm_opts);
1974 1983
1975 /* Validate the mount data */ 1984 /* Validate the mount data */
1976 error = nfs4_validate_mount_data(raw_data, &data, dev_name); 1985 error = nfs4_validate_mount_data(raw_data, data, dev_name);
1977 if (error < 0) 1986 if (error < 0)
1978 goto out; 1987 goto out;
1979 1988
1980 /* Get a volume representation */ 1989 /* Get a volume representation */
1981 server = nfs4_create_server(&data, &mntfh); 1990 server = nfs4_create_server(data, mntfh);
1982 if (IS_ERR(server)) { 1991 if (IS_ERR(server)) {
1983 error = PTR_ERR(server); 1992 error = PTR_ERR(server);
1984 goto out; 1993 goto out;
@@ -2009,13 +2018,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2009 nfs4_fill_super(s); 2018 nfs4_fill_super(s);
2010 } 2019 }
2011 2020
2012 mntroot = nfs4_get_root(s, &mntfh); 2021 mntroot = nfs4_get_root(s, mntfh);
2013 if (IS_ERR(mntroot)) { 2022 if (IS_ERR(mntroot)) {
2014 error = PTR_ERR(mntroot); 2023 error = PTR_ERR(mntroot);
2015 goto error_splat_super; 2024 goto error_splat_super;
2016 } 2025 }
2017 2026
2018 error = security_sb_set_mnt_opts(s, &data.lsm_opts); 2027 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2019 if (error) 2028 if (error)
2020 goto error_splat_root; 2029 goto error_splat_root;
2021 2030
@@ -2025,10 +2034,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2025 error = 0; 2034 error = 0;
2026 2035
2027out: 2036out:
2028 kfree(data.client_address); 2037 kfree(data->client_address);
2029 kfree(data.nfs_server.export_path); 2038 kfree(data->nfs_server.export_path);
2030 kfree(data.nfs_server.hostname); 2039 kfree(data->nfs_server.hostname);
2031 security_free_mnt_opts(&data.lsm_opts); 2040 security_free_mnt_opts(&data->lsm_opts);
2041out_free_fh:
2042 kfree(mntfh);
2043 kfree(data);
2032 return error; 2044 return error;
2033 2045
2034out_free: 2046out_free:
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6d8ace3e3259..f333848fd3be 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -739,12 +739,13 @@ int nfs_updatepage(struct file *file, struct page *page,
739 } 739 }
740 740
741 status = nfs_writepage_setup(ctx, page, offset, count); 741 status = nfs_writepage_setup(ctx, page, offset, count);
742 __set_page_dirty_nobuffers(page); 742 if (status < 0)
743 nfs_set_pageerror(page);
744 else
745 __set_page_dirty_nobuffers(page);
743 746
744 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", 747 dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
745 status, (long long)i_size_read(inode)); 748 status, (long long)i_size_read(inode));
746 if (status < 0)
747 nfs_set_pageerror(page);
748 return status; 749 return status;
749} 750}
750 751
diff --git a/fs/select.c b/fs/select.c
index 8dda969614a9..da0e88201c3a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
249 retval++; 249 retval++;
250 } 250 }
251 } 251 }
252 cond_resched();
253 } 252 }
254 if (res_in) 253 if (res_in)
255 *rinp = res_in; 254 *rinp = res_in;
@@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
257 *routp = res_out; 256 *routp = res_out;
258 if (res_ex) 257 if (res_ex)
259 *rexp = res_ex; 258 *rexp = res_ex;
259 cond_resched();
260 } 260 }
261 wait = NULL; 261 wait = NULL;
262 if (retval || !*timeout || signal_pending(current)) 262 if (retval || !*timeout || signal_pending(current))
diff --git a/include/asm-alpha/core_mcpcia.h b/include/asm-alpha/core_mcpcia.h
index 525b4f6a7ace..acf55b483472 100644
--- a/include/asm-alpha/core_mcpcia.h
+++ b/include/asm-alpha/core_mcpcia.h
@@ -261,7 +261,7 @@ struct el_MCPCIA_uncorrected_frame_mcheck {
261 } 261 }
262#endif 262#endif
263 263
264static inline int __mcpcia_is_mmio(unsigned long addr) 264extern inline int __mcpcia_is_mmio(unsigned long addr)
265{ 265{
266 return (addr & 0x80000000UL) == 0; 266 return (addr & 0x80000000UL) == 0;
267} 267}
diff --git a/include/asm-alpha/core_t2.h b/include/asm-alpha/core_t2.h
index 90e6b5d6c214..46bfff58f670 100644
--- a/include/asm-alpha/core_t2.h
+++ b/include/asm-alpha/core_t2.h
@@ -356,13 +356,13 @@ struct el_t2_frame_corrected {
356#define vip volatile int * 356#define vip volatile int *
357#define vuip volatile unsigned int * 357#define vuip volatile unsigned int *
358 358
359static inline u8 t2_inb(unsigned long addr) 359extern inline u8 t2_inb(unsigned long addr)
360{ 360{
361 long result = *(vip) ((addr << 5) + T2_IO + 0x00); 361 long result = *(vip) ((addr << 5) + T2_IO + 0x00);
362 return __kernel_extbl(result, addr & 3); 362 return __kernel_extbl(result, addr & 3);
363} 363}
364 364
365static inline void t2_outb(u8 b, unsigned long addr) 365extern inline void t2_outb(u8 b, unsigned long addr)
366{ 366{
367 unsigned long w; 367 unsigned long w;
368 368
@@ -371,13 +371,13 @@ static inline void t2_outb(u8 b, unsigned long addr)
371 mb(); 371 mb();
372} 372}
373 373
374static inline u16 t2_inw(unsigned long addr) 374extern inline u16 t2_inw(unsigned long addr)
375{ 375{
376 long result = *(vip) ((addr << 5) + T2_IO + 0x08); 376 long result = *(vip) ((addr << 5) + T2_IO + 0x08);
377 return __kernel_extwl(result, addr & 3); 377 return __kernel_extwl(result, addr & 3);
378} 378}
379 379
380static inline void t2_outw(u16 b, unsigned long addr) 380extern inline void t2_outw(u16 b, unsigned long addr)
381{ 381{
382 unsigned long w; 382 unsigned long w;
383 383
@@ -386,12 +386,12 @@ static inline void t2_outw(u16 b, unsigned long addr)
386 mb(); 386 mb();
387} 387}
388 388
389static inline u32 t2_inl(unsigned long addr) 389extern inline u32 t2_inl(unsigned long addr)
390{ 390{
391 return *(vuip) ((addr << 5) + T2_IO + 0x18); 391 return *(vuip) ((addr << 5) + T2_IO + 0x18);
392} 392}
393 393
394static inline void t2_outl(u32 b, unsigned long addr) 394extern inline void t2_outl(u32 b, unsigned long addr)
395{ 395{
396 *(vuip) ((addr << 5) + T2_IO + 0x18) = b; 396 *(vuip) ((addr << 5) + T2_IO + 0x18) = b;
397 mb(); 397 mb();
@@ -435,7 +435,7 @@ static inline void t2_outl(u32 b, unsigned long addr)
435 set_hae(msb); \ 435 set_hae(msb); \
436} 436}
437 437
438static DEFINE_SPINLOCK(t2_hae_lock); 438extern spinlock_t t2_hae_lock;
439 439
440/* 440/*
441 * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since 441 * NOTE: take T2_DENSE_MEM off in each readX/writeX routine, since
diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h
index 38f18cf18c9d..e971ab000f95 100644
--- a/include/asm-alpha/io.h
+++ b/include/asm-alpha/io.h
@@ -35,7 +35,7 @@
35 * register not being up-to-date with respect to the hardware 35 * register not being up-to-date with respect to the hardware
36 * value. 36 * value.
37 */ 37 */
38static inline void __set_hae(unsigned long new_hae) 38extern inline void __set_hae(unsigned long new_hae)
39{ 39{
40 unsigned long flags; 40 unsigned long flags;
41 local_irq_save(flags); 41 local_irq_save(flags);
@@ -49,7 +49,7 @@ static inline void __set_hae(unsigned long new_hae)
49 local_irq_restore(flags); 49 local_irq_restore(flags);
50} 50}
51 51
52static inline void set_hae(unsigned long new_hae) 52extern inline void set_hae(unsigned long new_hae)
53{ 53{
54 if (new_hae != alpha_mv.hae_cache) 54 if (new_hae != alpha_mv.hae_cache)
55 __set_hae(new_hae); 55 __set_hae(new_hae);
@@ -176,7 +176,7 @@ REMAP2(u64, writeq, volatile)
176#undef REMAP1 176#undef REMAP1
177#undef REMAP2 177#undef REMAP2
178 178
179static inline void __iomem *generic_ioportmap(unsigned long a) 179extern inline void __iomem *generic_ioportmap(unsigned long a)
180{ 180{
181 return alpha_mv.mv_ioportmap(a); 181 return alpha_mv.mv_ioportmap(a);
182} 182}
diff --git a/include/asm-alpha/mmu_context.h b/include/asm-alpha/mmu_context.h
index 6a5be1f7debf..86c08a02d239 100644
--- a/include/asm-alpha/mmu_context.h
+++ b/include/asm-alpha/mmu_context.h
@@ -23,7 +23,7 @@
23#endif 23#endif
24 24
25 25
26extern inline unsigned long 26static inline unsigned long
27__reload_thread(struct pcb_struct *pcb) 27__reload_thread(struct pcb_struct *pcb)
28{ 28{
29 register unsigned long a0 __asm__("$16"); 29 register unsigned long a0 __asm__("$16");
@@ -114,7 +114,7 @@ extern unsigned long last_asn;
114#define __MMU_EXTERN_INLINE 114#define __MMU_EXTERN_INLINE
115#endif 115#endif
116 116
117static inline unsigned long 117extern inline unsigned long
118__get_new_mm_context(struct mm_struct *mm, long cpu) 118__get_new_mm_context(struct mm_struct *mm, long cpu)
119{ 119{
120 unsigned long asn = cpu_last_asn(cpu); 120 unsigned long asn = cpu_last_asn(cpu);
@@ -226,7 +226,7 @@ ev4_activate_mm(struct mm_struct *prev_mm, struct mm_struct *next_mm)
226# endif 226# endif
227#endif 227#endif
228 228
229extern inline int 229static inline int
230init_new_context(struct task_struct *tsk, struct mm_struct *mm) 230init_new_context(struct task_struct *tsk, struct mm_struct *mm)
231{ 231{
232 int i; 232 int i;
diff --git a/include/asm-alpha/percpu.h b/include/asm-alpha/percpu.h
index 48348fe34c19..3495e8e00d70 100644
--- a/include/asm-alpha/percpu.h
+++ b/include/asm-alpha/percpu.h
@@ -1,6 +1,78 @@
1#ifndef __ALPHA_PERCPU_H 1#ifndef __ALPHA_PERCPU_H
2#define __ALPHA_PERCPU_H 2#define __ALPHA_PERCPU_H
3#include <linux/compiler.h>
4#include <linux/threads.h>
3 5
4#include <asm-generic/percpu.h> 6/*
7 * Determine the real variable name from the name visible in the
8 * kernel sources.
9 */
10#define per_cpu_var(var) per_cpu__##var
11
12#ifdef CONFIG_SMP
13
14/*
15 * per_cpu_offset() is the offset that has to be added to a
16 * percpu variable to get to the instance for a certain processor.
17 */
18extern unsigned long __per_cpu_offset[NR_CPUS];
19
20#define per_cpu_offset(x) (__per_cpu_offset[x])
21
22#define __my_cpu_offset per_cpu_offset(raw_smp_processor_id())
23#ifdef CONFIG_DEBUG_PREEMPT
24#define my_cpu_offset per_cpu_offset(smp_processor_id())
25#else
26#define my_cpu_offset __my_cpu_offset
27#endif
28
29#ifndef MODULE
30#define SHIFT_PERCPU_PTR(var, offset) RELOC_HIDE(&per_cpu_var(var), (offset))
31#define PER_CPU_ATTRIBUTES
32#else
33/*
34 * To calculate addresses of locally defined variables, GCC uses 32-bit
35 * displacement from the GP. Which doesn't work for per cpu variables in
36 * modules, as an offset to the kernel per cpu area is way above 4G.
37 *
38 * This forces allocation of a GOT entry for per cpu variable using
39 * ldq instruction with a 'literal' relocation.
40 */
41#define SHIFT_PERCPU_PTR(var, offset) ({ \
42 extern int simple_identifier_##var(void); \
43 unsigned long __ptr, tmp_gp; \
44 asm ( "br %1, 1f \n\
45 1: ldgp %1, 0(%1) \n\
46 ldq %0, per_cpu__" #var"(%1)\t!literal" \
47 : "=&r"(__ptr), "=&r"(tmp_gp)); \
48 (typeof(&per_cpu_var(var)))(__ptr + (offset)); })
49
50#define PER_CPU_ATTRIBUTES __used
51
52#endif /* MODULE */
53
54/*
55 * A percpu variable may point to a discarded regions. The following are
56 * established ways to produce a usable pointer from the percpu variable
57 * offset.
58 */
59#define per_cpu(var, cpu) \
60 (*SHIFT_PERCPU_PTR(var, per_cpu_offset(cpu)))
61#define __get_cpu_var(var) \
62 (*SHIFT_PERCPU_PTR(var, my_cpu_offset))
63#define __raw_get_cpu_var(var) \
64 (*SHIFT_PERCPU_PTR(var, __my_cpu_offset))
65
66#else /* ! SMP */
67
68#define per_cpu(var, cpu) (*((void)(cpu), &per_cpu_var(var)))
69#define __get_cpu_var(var) per_cpu_var(var)
70#define __raw_get_cpu_var(var) per_cpu_var(var)
71
72#define PER_CPU_ATTRIBUTES
73
74#endif /* SMP */
75
76#define DECLARE_PER_CPU(type, name) extern __typeof__(type) per_cpu_var(name)
5 77
6#endif /* __ALPHA_PERCPU_H */ 78#endif /* __ALPHA_PERCPU_H */
diff --git a/include/asm-alpha/system.h b/include/asm-alpha/system.h
index ed221d6408fc..afe20fa58c99 100644
--- a/include/asm-alpha/system.h
+++ b/include/asm-alpha/system.h
@@ -184,7 +184,7 @@ enum amask_enum {
184 __amask; }) 184 __amask; })
185 185
186#define __CALL_PAL_R0(NAME, TYPE) \ 186#define __CALL_PAL_R0(NAME, TYPE) \
187static inline TYPE NAME(void) \ 187extern inline TYPE NAME(void) \
188{ \ 188{ \
189 register TYPE __r0 __asm__("$0"); \ 189 register TYPE __r0 __asm__("$0"); \
190 __asm__ __volatile__( \ 190 __asm__ __volatile__( \
@@ -196,7 +196,7 @@ static inline TYPE NAME(void) \
196} 196}
197 197
198#define __CALL_PAL_W1(NAME, TYPE0) \ 198#define __CALL_PAL_W1(NAME, TYPE0) \
199static inline void NAME(TYPE0 arg0) \ 199extern inline void NAME(TYPE0 arg0) \
200{ \ 200{ \
201 register TYPE0 __r16 __asm__("$16") = arg0; \ 201 register TYPE0 __r16 __asm__("$16") = arg0; \
202 __asm__ __volatile__( \ 202 __asm__ __volatile__( \
@@ -207,7 +207,7 @@ static inline void NAME(TYPE0 arg0) \
207} 207}
208 208
209#define __CALL_PAL_W2(NAME, TYPE0, TYPE1) \ 209#define __CALL_PAL_W2(NAME, TYPE0, TYPE1) \
210static inline void NAME(TYPE0 arg0, TYPE1 arg1) \ 210extern inline void NAME(TYPE0 arg0, TYPE1 arg1) \
211{ \ 211{ \
212 register TYPE0 __r16 __asm__("$16") = arg0; \ 212 register TYPE0 __r16 __asm__("$16") = arg0; \
213 register TYPE1 __r17 __asm__("$17") = arg1; \ 213 register TYPE1 __r17 __asm__("$17") = arg1; \
@@ -219,7 +219,7 @@ static inline void NAME(TYPE0 arg0, TYPE1 arg1) \
219} 219}
220 220
221#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0) \ 221#define __CALL_PAL_RW1(NAME, RTYPE, TYPE0) \
222static inline RTYPE NAME(TYPE0 arg0) \ 222extern inline RTYPE NAME(TYPE0 arg0) \
223{ \ 223{ \
224 register RTYPE __r0 __asm__("$0"); \ 224 register RTYPE __r0 __asm__("$0"); \
225 register TYPE0 __r16 __asm__("$16") = arg0; \ 225 register TYPE0 __r16 __asm__("$16") = arg0; \
@@ -232,7 +232,7 @@ static inline RTYPE NAME(TYPE0 arg0) \
232} 232}
233 233
234#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1) \ 234#define __CALL_PAL_RW2(NAME, RTYPE, TYPE0, TYPE1) \
235static inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \ 235extern inline RTYPE NAME(TYPE0 arg0, TYPE1 arg1) \
236{ \ 236{ \
237 register RTYPE __r0 __asm__("$0"); \ 237 register RTYPE __r0 __asm__("$0"); \
238 register TYPE0 __r16 __asm__("$16") = arg0; \ 238 register TYPE0 __r16 __asm__("$16") = arg0; \
diff --git a/include/asm-alpha/vga.h b/include/asm-alpha/vga.h
index e8df1e7aae6b..c00106bac521 100644
--- a/include/asm-alpha/vga.h
+++ b/include/asm-alpha/vga.h
@@ -13,7 +13,7 @@
13#define VT_BUF_HAVE_MEMSETW 13#define VT_BUF_HAVE_MEMSETW
14#define VT_BUF_HAVE_MEMCPYW 14#define VT_BUF_HAVE_MEMCPYW
15 15
16extern inline void scr_writew(u16 val, volatile u16 *addr) 16static inline void scr_writew(u16 val, volatile u16 *addr)
17{ 17{
18 if (__is_ioaddr(addr)) 18 if (__is_ioaddr(addr))
19 __raw_writew(val, (volatile u16 __iomem *) addr); 19 __raw_writew(val, (volatile u16 __iomem *) addr);
@@ -21,7 +21,7 @@ extern inline void scr_writew(u16 val, volatile u16 *addr)
21 *addr = val; 21 *addr = val;
22} 22}
23 23
24extern inline u16 scr_readw(volatile const u16 *addr) 24static inline u16 scr_readw(volatile const u16 *addr)
25{ 25{
26 if (__is_ioaddr(addr)) 26 if (__is_ioaddr(addr))
27 return __raw_readw((volatile const u16 __iomem *) addr); 27 return __raw_readw((volatile const u16 __iomem *) addr);
@@ -29,7 +29,7 @@ extern inline u16 scr_readw(volatile const u16 *addr)
29 return *addr; 29 return *addr;
30} 30}
31 31
32extern inline void scr_memsetw(u16 *s, u16 c, unsigned int count) 32static inline void scr_memsetw(u16 *s, u16 c, unsigned int count)
33{ 33{
34 if (__is_ioaddr(s)) 34 if (__is_ioaddr(s))
35 memsetw_io((u16 __iomem *) s, c, count); 35 memsetw_io((u16 __iomem *) s, c, count);
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1d8cd01fa514..844f2a89afbc 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -18,6 +18,7 @@
18#include <linux/kvm_para.h> 18#include <linux/kvm_para.h>
19#include <linux/kvm_types.h> 19#include <linux/kvm_types.h>
20 20
21#include <asm/pvclock-abi.h>
21#include <asm/desc.h> 22#include <asm/desc.h>
22 23
23#define KVM_MAX_VCPUS 16 24#define KVM_MAX_VCPUS 16
@@ -282,7 +283,8 @@ struct kvm_vcpu_arch {
282 struct x86_emulate_ctxt emulate_ctxt; 283 struct x86_emulate_ctxt emulate_ctxt;
283 284
284 gpa_t time; 285 gpa_t time;
285 struct kvm_vcpu_time_info hv_clock; 286 struct pvclock_vcpu_time_info hv_clock;
287 unsigned int hv_clock_tsc_khz;
286 unsigned int time_offset; 288 unsigned int time_offset;
287 struct page *time_page; 289 struct page *time_page;
288}; 290};
diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 509845942070..bfd9900742bf 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -48,24 +48,6 @@ struct kvm_mmu_op_release_pt {
48#ifdef __KERNEL__ 48#ifdef __KERNEL__
49#include <asm/processor.h> 49#include <asm/processor.h>
50 50
51/* xen binary-compatible interface. See xen headers for details */
52struct kvm_vcpu_time_info {
53 uint32_t version;
54 uint32_t pad0;
55 uint64_t tsc_timestamp;
56 uint64_t system_time;
57 uint32_t tsc_to_system_mul;
58 int8_t tsc_shift;
59 int8_t pad[3];
60} __attribute__((__packed__)); /* 32 bytes */
61
62struct kvm_wall_clock {
63 uint32_t wc_version;
64 uint32_t wc_sec;
65 uint32_t wc_nsec;
66} __attribute__((__packed__));
67
68
69extern void kvmclock_init(void); 51extern void kvmclock_init(void);
70 52
71 53
diff --git a/include/asm-x86/pvclock-abi.h b/include/asm-x86/pvclock-abi.h
new file mode 100644
index 000000000000..6857f840b243
--- /dev/null
+++ b/include/asm-x86/pvclock-abi.h
@@ -0,0 +1,42 @@
1#ifndef _ASM_X86_PVCLOCK_ABI_H_
2#define _ASM_X86_PVCLOCK_ABI_H_
3#ifndef __ASSEMBLY__
4
5/*
6 * These structs MUST NOT be changed.
7 * They are the ABI between hypervisor and guest OS.
8 * Both Xen and KVM are using this.
9 *
10 * pvclock_vcpu_time_info holds the system time and the tsc timestamp
11 * of the last update. So the guest can use the tsc delta to get a
12 * more precise system time. There is one per virtual cpu.
13 *
14 * pvclock_wall_clock references the point in time when the system
15 * time was zero (usually boot time), thus the guest calculates the
16 * current wall clock by adding the system time.
17 *
18 * Protocol for the "version" fields is: hypervisor raises it (making
19 * it uneven) before it starts updating the fields and raises it again
20 * (making it even) when it is done. Thus the guest can make sure the
21 * time values it got are consistent by checking the version before
22 * and after reading them.
23 */
24
25struct pvclock_vcpu_time_info {
26 u32 version;
27 u32 pad0;
28 u64 tsc_timestamp;
29 u64 system_time;
30 u32 tsc_to_system_mul;
31 s8 tsc_shift;
32 u8 pad[3];
33} __attribute__((__packed__)); /* 32 bytes */
34
35struct pvclock_wall_clock {
36 u32 version;
37 u32 sec;
38 u32 nsec;
39} __attribute__((__packed__));
40
41#endif /* __ASSEMBLY__ */
42#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
diff --git a/include/asm-x86/pvclock.h b/include/asm-x86/pvclock.h
new file mode 100644
index 000000000000..85b1bba8e0a3
--- /dev/null
+++ b/include/asm-x86/pvclock.h
@@ -0,0 +1,13 @@
1#ifndef _ASM_X86_PVCLOCK_H_
2#define _ASM_X86_PVCLOCK_H_
3
4#include <linux/clocksource.h>
5#include <asm/pvclock-abi.h>
6
7/* some helper functions for xen and kvm pv clock sources */
8cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src);
9void pvclock_read_wallclock(struct pvclock_wall_clock *wall,
10 struct pvclock_vcpu_time_info *vcpu,
11 struct timespec *ts);
12
13#endif /* _ASM_X86_PVCLOCK_H_ */
diff --git a/include/asm-x86/xen/page.h b/include/asm-x86/xen/page.h
index baf3a4dce28c..e11f24038b1d 100644
--- a/include/asm-x86/xen/page.h
+++ b/include/asm-x86/xen/page.h
@@ -150,13 +150,9 @@ static inline pte_t __pte_ma(pteval_t x)
150 return (pte_t) { .pte = x }; 150 return (pte_t) { .pte = x };
151} 151}
152 152
153#ifdef CONFIG_X86_PAE
154#define pmd_val_ma(v) ((v).pmd) 153#define pmd_val_ma(v) ((v).pmd)
155#define pud_val_ma(v) ((v).pgd.pgd) 154#define pud_val_ma(v) ((v).pgd.pgd)
156#define __pmd_ma(x) ((pmd_t) { (x) } ) 155#define __pmd_ma(x) ((pmd_t) { (x) } )
157#else /* !X86_PAE */
158#define pmd_val_ma(v) ((v).pud.pgd.pgd)
159#endif /* CONFIG_X86_PAE */
160 156
161#define pgd_val_ma(x) ((x).pgd) 157#define pgd_val_ma(x) ((x).pgd)
162 158
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 6a5dbdc8a7dc..686895bacd9d 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -94,7 +94,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
94 unsigned long freepfn, 94 unsigned long freepfn,
95 unsigned long startpfn, 95 unsigned long startpfn,
96 unsigned long endpfn); 96 unsigned long endpfn);
97extern void reserve_bootmem_node(pg_data_t *pgdat, 97extern int reserve_bootmem_node(pg_data_t *pgdat,
98 unsigned long physaddr, 98 unsigned long physaddr,
99 unsigned long size, 99 unsigned long size,
100 int flags); 100 int flags);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 092b1b25291d..de9d1df4bba2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -33,6 +33,7 @@
33#define KVM_REQ_REPORT_TPR_ACCESS 2 33#define KVM_REQ_REPORT_TPR_ACCESS 2
34#define KVM_REQ_MMU_RELOAD 3 34#define KVM_REQ_MMU_RELOAD 3
35#define KVM_REQ_TRIPLE_FAULT 4 35#define KVM_REQ_TRIPLE_FAULT 4
36#define KVM_REQ_PENDING_TIMER 5
36 37
37struct kvm_vcpu; 38struct kvm_vcpu;
38extern struct kmem_cache *kvm_vcpu_cache; 39extern struct kmem_cache *kvm_vcpu_cache;
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 59f1c0bd8f9c..d2a003586761 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -27,8 +27,7 @@
27 * This routine is called by the kernel to write a series of 27 * This routine is called by the kernel to write a series of
28 * characters to the tty device. The characters may come from 28 * characters to the tty device. The characters may come from
29 * user space or kernel space. This routine will return the 29 * user space or kernel space. This routine will return the
30 * number of characters actually accepted for writing. This 30 * number of characters actually accepted for writing.
31 * routine is mandatory.
32 * 31 *
33 * Optional: Required for writable devices. 32 * Optional: Required for writable devices.
34 * 33 *
@@ -134,7 +133,7 @@
134 * This routine notifies the tty driver that it should hangup the 133 * This routine notifies the tty driver that it should hangup the
135 * tty device. 134 * tty device.
136 * 135 *
137 * Required: 136 * Optional:
138 * 137 *
139 * void (*break_ctl)(struct tty_stuct *tty, int state); 138 * void (*break_ctl)(struct tty_stuct *tty, int state);
140 * 139 *
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e0a612bc9c4e..f422f7218e1c 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -367,6 +367,12 @@ static inline int ipv6_addr_any(const struct in6_addr *a)
367 a->s6_addr32[2] | a->s6_addr32[3] ) == 0); 367 a->s6_addr32[2] | a->s6_addr32[3] ) == 0);
368} 368}
369 369
370static inline int ipv6_addr_loopback(const struct in6_addr *a)
371{
372 return ((a->s6_addr32[0] | a->s6_addr32[1] |
373 a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0);
374}
375
370static inline int ipv6_addr_v4mapped(const struct in6_addr *a) 376static inline int ipv6_addr_v4mapped(const struct in6_addr *a)
371{ 377{
372 return ((a->s6_addr32[0] | a->s6_addr32[1] | 378 return ((a->s6_addr32[0] | a->s6_addr32[1] |
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index aa540e6be502..d9dd0f707296 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -95,6 +95,11 @@ extern struct list_head net_namespace_list;
95#ifdef CONFIG_NET_NS 95#ifdef CONFIG_NET_NS
96extern void __put_net(struct net *net); 96extern void __put_net(struct net *net);
97 97
98static inline int net_alive(struct net *net)
99{
100 return net && atomic_read(&net->count);
101}
102
98static inline struct net *get_net(struct net *net) 103static inline struct net *get_net(struct net *net)
99{ 104{
100 atomic_inc(&net->count); 105 atomic_inc(&net->count);
@@ -125,6 +130,12 @@ int net_eq(const struct net *net1, const struct net *net2)
125 return net1 == net2; 130 return net1 == net2;
126} 131}
127#else 132#else
133
134static inline int net_alive(struct net *net)
135{
136 return 1;
137}
138
128static inline struct net *get_net(struct net *net) 139static inline struct net *get_net(struct net *net)
129{ 140{
130 return net; 141 return net;
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 9b018da48cf3..819a0331cda9 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -10,6 +10,7 @@
10#define __XEN_PUBLIC_XEN_H__ 10#define __XEN_PUBLIC_XEN_H__
11 11
12#include <asm/xen/interface.h> 12#include <asm/xen/interface.h>
13#include <asm/pvclock-abi.h>
13 14
14/* 15/*
15 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). 16 * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
@@ -336,7 +337,7 @@ struct vcpu_info {
336 uint8_t evtchn_upcall_mask; 337 uint8_t evtchn_upcall_mask;
337 unsigned long evtchn_pending_sel; 338 unsigned long evtchn_pending_sel;
338 struct arch_vcpu_info arch; 339 struct arch_vcpu_info arch;
339 struct vcpu_time_info time; 340 struct pvclock_vcpu_time_info time;
340}; /* 64 bytes (x86) */ 341}; /* 64 bytes (x86) */
341 342
342/* 343/*
@@ -384,9 +385,7 @@ struct shared_info {
384 * Wallclock time: updated only by control software. Guests should base 385 * Wallclock time: updated only by control software. Guests should base
385 * their gettimeofday() syscall on this wallclock-base value. 386 * their gettimeofday() syscall on this wallclock-base value.
386 */ 387 */
387 uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ 388 struct pvclock_wall_clock wc;
388 uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */
389 uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */
390 389
391 struct arch_shared_info arch; 390 struct arch_shared_info arch;
392 391
diff --git a/kernel/futex.c b/kernel/futex.c
index 449def8074fe..7d1136e97c14 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1096,21 +1096,64 @@ static void unqueue_me_pi(struct futex_q *q)
1096 * private futexes. 1096 * private futexes.
1097 */ 1097 */
1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q, 1098static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1099 struct task_struct *newowner) 1099 struct task_struct *newowner,
1100 struct rw_semaphore *fshared)
1100{ 1101{
1101 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS; 1102 u32 newtid = task_pid_vnr(newowner) | FUTEX_WAITERS;
1102 struct futex_pi_state *pi_state = q->pi_state; 1103 struct futex_pi_state *pi_state = q->pi_state;
1104 struct task_struct *oldowner = pi_state->owner;
1103 u32 uval, curval, newval; 1105 u32 uval, curval, newval;
1104 int ret; 1106 int ret, attempt = 0;
1105 1107
1106 /* Owner died? */ 1108 /* Owner died? */
1109 if (!pi_state->owner)
1110 newtid |= FUTEX_OWNER_DIED;
1111
1112 /*
1113 * We are here either because we stole the rtmutex from the
1114 * pending owner or we are the pending owner which failed to
1115 * get the rtmutex. We have to replace the pending owner TID
1116 * in the user space variable. This must be atomic as we have
1117 * to preserve the owner died bit here.
1118 *
1119 * Note: We write the user space value _before_ changing the
1120 * pi_state because we can fault here. Imagine swapped out
1121 * pages or a fork, which was running right before we acquired
1122 * mmap_sem, that marked all the anonymous memory readonly for
1123 * cow.
1124 *
1125 * Modifying pi_state _before_ the user space value would
1126 * leave the pi_state in an inconsistent state when we fault
1127 * here, because we need to drop the hash bucket lock to
1128 * handle the fault. This might be observed in the PID check
1129 * in lookup_pi_state.
1130 */
1131retry:
1132 if (get_futex_value_locked(&uval, uaddr))
1133 goto handle_fault;
1134
1135 while (1) {
1136 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1137
1138 curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
1139
1140 if (curval == -EFAULT)
1141 goto handle_fault;
1142 if (curval == uval)
1143 break;
1144 uval = curval;
1145 }
1146
1147 /*
1148 * We fixed up user space. Now we need to fix the pi_state
1149 * itself.
1150 */
1107 if (pi_state->owner != NULL) { 1151 if (pi_state->owner != NULL) {
1108 spin_lock_irq(&pi_state->owner->pi_lock); 1152 spin_lock_irq(&pi_state->owner->pi_lock);
1109 WARN_ON(list_empty(&pi_state->list)); 1153 WARN_ON(list_empty(&pi_state->list));
1110 list_del_init(&pi_state->list); 1154 list_del_init(&pi_state->list);
1111 spin_unlock_irq(&pi_state->owner->pi_lock); 1155 spin_unlock_irq(&pi_state->owner->pi_lock);
1112 } else 1156 }
1113 newtid |= FUTEX_OWNER_DIED;
1114 1157
1115 pi_state->owner = newowner; 1158 pi_state->owner = newowner;
1116 1159
@@ -1118,26 +1161,35 @@ static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
1118 WARN_ON(!list_empty(&pi_state->list)); 1161 WARN_ON(!list_empty(&pi_state->list));
1119 list_add(&pi_state->list, &newowner->pi_state_list); 1162 list_add(&pi_state->list, &newowner->pi_state_list);
1120 spin_unlock_irq(&newowner->pi_lock); 1163 spin_unlock_irq(&newowner->pi_lock);
1164 return 0;
1121 1165
1122 /* 1166 /*
1123 * We own it, so we have to replace the pending owner 1167 * To handle the page fault we need to drop the hash bucket
1124 * TID. This must be atomic as we have preserve the 1168 * lock here. That gives the other task (either the pending
1125 * owner died bit here. 1169 * owner itself or the task which stole the rtmutex) the
1170 * chance to try the fixup of the pi_state. So once we are
1171 * back from handling the fault we need to check the pi_state
1172 * after reacquiring the hash bucket lock and before trying to
1173 * do another fixup. When the fixup has been done already we
1174 * simply return.
1126 */ 1175 */
1127 ret = get_futex_value_locked(&uval, uaddr); 1176handle_fault:
1177 spin_unlock(q->lock_ptr);
1128 1178
1129 while (!ret) { 1179 ret = futex_handle_fault((unsigned long)uaddr, fshared, attempt++);
1130 newval = (uval & FUTEX_OWNER_DIED) | newtid;
1131 1180
1132 curval = cmpxchg_futex_value_locked(uaddr, uval, newval); 1181 spin_lock(q->lock_ptr);
1133 1182
1134 if (curval == -EFAULT) 1183 /*
1135 ret = -EFAULT; 1184 * Check if someone else fixed it for us:
1136 if (curval == uval) 1185 */
1137 break; 1186 if (pi_state->owner != oldowner)
1138 uval = curval; 1187 return 0;
1139 } 1188
1140 return ret; 1189 if (ret)
1190 return ret;
1191
1192 goto retry;
1141} 1193}
1142 1194
1143/* 1195/*
@@ -1507,7 +1559,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1507 * that case: 1559 * that case:
1508 */ 1560 */
1509 if (q.pi_state->owner != curr) 1561 if (q.pi_state->owner != curr)
1510 ret = fixup_pi_state_owner(uaddr, &q, curr); 1562 ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
1511 } else { 1563 } else {
1512 /* 1564 /*
1513 * Catch the rare case, where the lock was released 1565 * Catch the rare case, where the lock was released
@@ -1539,7 +1591,8 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
1539 int res; 1591 int res;
1540 1592
1541 owner = rt_mutex_owner(&q.pi_state->pi_mutex); 1593 owner = rt_mutex_owner(&q.pi_state->pi_mutex);
1542 res = fixup_pi_state_owner(uaddr, &q, owner); 1594 res = fixup_pi_state_owner(uaddr, &q, owner,
1595 fshared);
1543 1596
1544 /* propagate -EFAULT, if the fixup failed */ 1597 /* propagate -EFAULT, if the fixup failed */
1545 if (res) 1598 if (res)
diff --git a/kernel/kgdb.c b/kernel/kgdb.c
index 79e3c90113c2..3ec23c3ec97f 100644
--- a/kernel/kgdb.c
+++ b/kernel/kgdb.c
@@ -1499,7 +1499,8 @@ int kgdb_nmicallback(int cpu, void *regs)
1499 return 1; 1499 return 1;
1500} 1500}
1501 1501
1502void kgdb_console_write(struct console *co, const char *s, unsigned count) 1502static void kgdb_console_write(struct console *co, const char *s,
1503 unsigned count)
1503{ 1504{
1504 unsigned long flags; 1505 unsigned long flags;
1505 1506
diff --git a/kernel/sched.c b/kernel/sched.c
index b048ad8a11af..3aaa5c8cb421 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
4398 signal_pending(current)) || 4398 signal_pending(current)) ||
4399 (state == TASK_KILLABLE && 4399 (state == TASK_KILLABLE &&
4400 fatal_signal_pending(current))) { 4400 fatal_signal_pending(current))) {
4401 __remove_wait_queue(&x->wait, &wait); 4401 timeout = -ERESTARTSYS;
4402 return -ERESTARTSYS; 4402 break;
4403 } 4403 }
4404 __set_current_state(state); 4404 __set_current_state(state);
4405 spin_unlock_irq(&x->wait.lock); 4405 spin_unlock_irq(&x->wait.lock);
4406 timeout = schedule_timeout(timeout); 4406 timeout = schedule_timeout(timeout);
4407 spin_lock_irq(&x->wait.lock); 4407 spin_lock_irq(&x->wait.lock);
4408 if (!timeout) { 4408 } while (!x->done && timeout);
4409 __remove_wait_queue(&x->wait, &wait);
4410 return timeout;
4411 }
4412 } while (!x->done);
4413 __remove_wait_queue(&x->wait, &wait); 4409 __remove_wait_queue(&x->wait, &wait);
4410 if (!x->done)
4411 return timeout;
4414 } 4412 }
4415 x->done--; 4413 x->done--;
4416 return timeout; 4414 return timeout ?: 1;
4417} 4415}
4418 4416
4419static long __sched 4417static long __sched
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b6..0f3c19197fa4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
250 if (rt_rq->rt_time || rt_rq->rt_nr_running) 250 if (rt_rq->rt_time || rt_rq->rt_nr_running)
251 idle = 0; 251 idle = 0;
252 spin_unlock(&rt_rq->rt_runtime_lock); 252 spin_unlock(&rt_rq->rt_runtime_lock);
253 } 253 } else if (rt_rq->rt_nr_running)
254 idle = 0;
254 255
255 if (enqueue) 256 if (enqueue)
256 sched_rt_rq_enqueue(rt_rq); 257 sched_rt_rq_enqueue(rt_rq);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index e8fb927392b9..8d9f60e06f62 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -442,15 +442,17 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn,
442 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn); 442 return init_bootmem_core(pgdat, freepfn, startpfn, endpfn);
443} 443}
444 444
445void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 445int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
446 unsigned long size, int flags) 446 unsigned long size, int flags)
447{ 447{
448 int ret; 448 int ret;
449 449
450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 450 ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
451 if (ret < 0) 451 if (ret < 0)
452 return; 452 return -ENOMEM;
453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); 453 reserve_bootmem_core(pgdat->bdata, physaddr, size, flags);
454
455 return 0;
454} 456}
455 457
456void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, 458void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
diff --git a/mm/memory.c b/mm/memory.c
index 9aefaae46858..d14b251a25a6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1045,6 +1045,26 @@ no_page_table:
1045 return page; 1045 return page;
1046} 1046}
1047 1047
1048/* Can we do the FOLL_ANON optimization? */
1049static inline int use_zero_page(struct vm_area_struct *vma)
1050{
1051 /*
1052 * We don't want to optimize FOLL_ANON for make_pages_present()
1053 * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
1054 * we want to get the page from the page tables to make sure
1055 * that we serialize and update with any other user of that
1056 * mapping.
1057 */
1058 if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
1059 return 0;
1060 /*
1061 * And if we have a fault or a nopfn routine, it's not an
1062 * anonymous region.
1063 */
1064 return !vma->vm_ops ||
1065 (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
1066}
1067
1048int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, 1068int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1049 unsigned long start, int len, int write, int force, 1069 unsigned long start, int len, int write, int force,
1050 struct page **pages, struct vm_area_struct **vmas) 1070 struct page **pages, struct vm_area_struct **vmas)
@@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
1119 foll_flags = FOLL_TOUCH; 1139 foll_flags = FOLL_TOUCH;
1120 if (pages) 1140 if (pages)
1121 foll_flags |= FOLL_GET; 1141 foll_flags |= FOLL_GET;
1122 if (!write && !(vma->vm_flags & VM_LOCKED) && 1142 if (!write && use_zero_page(vma))
1123 (!vma->vm_ops || !vma->vm_ops->fault))
1124 foll_flags |= FOLL_ANON; 1143 foll_flags |= FOLL_ANON;
1125 1144
1126 do { 1145 do {
@@ -1766,7 +1785,6 @@ gotten:
1766 page_table = pte_offset_map_lock(mm, pmd, address, &ptl); 1785 page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
1767 if (likely(pte_same(*page_table, orig_pte))) { 1786 if (likely(pte_same(*page_table, orig_pte))) {
1768 if (old_page) { 1787 if (old_page) {
1769 page_remove_rmap(old_page, vma);
1770 if (!PageAnon(old_page)) { 1788 if (!PageAnon(old_page)) {
1771 dec_mm_counter(mm, file_rss); 1789 dec_mm_counter(mm, file_rss);
1772 inc_mm_counter(mm, anon_rss); 1790 inc_mm_counter(mm, anon_rss);
@@ -1788,6 +1806,32 @@ gotten:
1788 lru_cache_add_active(new_page); 1806 lru_cache_add_active(new_page);
1789 page_add_new_anon_rmap(new_page, vma, address); 1807 page_add_new_anon_rmap(new_page, vma, address);
1790 1808
1809 if (old_page) {
1810 /*
1811 * Only after switching the pte to the new page may
1812 * we remove the mapcount here. Otherwise another
1813 * process may come and find the rmap count decremented
1814 * before the pte is switched to the new page, and
1815 * "reuse" the old page writing into it while our pte
1816 * here still points into it and can be read by other
1817 * threads.
1818 *
1819 * The critical issue is to order this
1820 * page_remove_rmap with the ptp_clear_flush above.
1821 * Those stores are ordered by (if nothing else,)
1822 * the barrier present in the atomic_add_negative
1823 * in page_remove_rmap.
1824 *
1825 * Then the TLB flush in ptep_clear_flush ensures that
1826 * no process can access the old page before the
1827 * decremented mapcount is visible. And the old page
1828 * cannot be reused until after the decremented
1829 * mapcount is visible. So transitively, TLBs to
1830 * old page will be flushed before it can be reused.
1831 */
1832 page_remove_rmap(old_page, vma);
1833 }
1834
1791 /* Free the old page.. */ 1835 /* Free the old page.. */
1792 new_page = old_page; 1836 new_page = old_page;
1793 ret |= VM_FAULT_WRITE; 1837 ret |= VM_FAULT_WRITE;
diff --git a/mm/slab.c b/mm/slab.c
index 06236e4ddc1b..046607f05f3e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3263,9 +3263,12 @@ retry:
3263 3263
3264 if (cpuset_zone_allowed_hardwall(zone, flags) && 3264 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3265 cache->nodelists[nid] && 3265 cache->nodelists[nid] &&
3266 cache->nodelists[nid]->free_objects) 3266 cache->nodelists[nid]->free_objects) {
3267 obj = ____cache_alloc_node(cache, 3267 obj = ____cache_alloc_node(cache,
3268 flags | GFP_THISNODE, nid); 3268 flags | GFP_THISNODE, nid);
3269 if (obj)
3270 break;
3271 }
3269 } 3272 }
3270 3273
3271 if (!obj) { 3274 if (!obj) {
diff --git a/net/core/dev.c b/net/core/dev.c
index 68d8df0992ab..c421a1f8f0b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2077,6 +2077,10 @@ int netif_receive_skb(struct sk_buff *skb)
2077 2077
2078 rcu_read_lock(); 2078 rcu_read_lock();
2079 2079
2080 /* Don't receive packets in an exiting network namespace */
2081 if (!net_alive(dev_net(skb->dev)))
2082 goto out;
2083
2080#ifdef CONFIG_NET_CLS_ACT 2084#ifdef CONFIG_NET_CLS_ACT
2081 if (skb->tc_verd & TC_NCLS) { 2085 if (skb->tc_verd & TC_NCLS) {
2082 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); 2086 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 72b4c184dd84..7c52fe277b62 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -140,6 +140,9 @@ static void cleanup_net(struct work_struct *work)
140 struct pernet_operations *ops; 140 struct pernet_operations *ops;
141 struct net *net; 141 struct net *net;
142 142
143 /* Be very certain incoming network packets will not find us */
144 rcu_barrier();
145
143 net = container_of(work, struct net, work); 146 net = container_of(work, struct net, work);
144 147
145 mutex_lock(&net_mutex); 148 mutex_lock(&net_mutex);
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 4e5c8615832c..17eb48b8e329 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -102,6 +102,15 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
102 if (hdr->version != 6) 102 if (hdr->version != 6)
103 goto err; 103 goto err;
104 104
105 /*
106 * RFC4291 2.5.3
107 * A packet received on an interface with a destination address
108 * of loopback must be dropped.
109 */
110 if (!(dev->flags & IFF_LOOPBACK) &&
111 ipv6_addr_loopback(&hdr->daddr))
112 goto err;
113
105 skb->transport_header = skb->network_header + sizeof(*hdr); 114 skb->transport_header = skb->network_header + sizeof(*hdr);
106 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); 115 IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
107 116
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c042ce19bd14..86e28a75267f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -345,18 +345,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
345 case IPV6_DSTOPTS: 345 case IPV6_DSTOPTS:
346 { 346 {
347 struct ipv6_txoptions *opt; 347 struct ipv6_txoptions *opt;
348
349 /* remove any sticky options header with a zero option
350 * length, per RFC3542.
351 */
348 if (optlen == 0) 352 if (optlen == 0)
349 optval = NULL; 353 optval = NULL;
354 else if (optlen < sizeof(struct ipv6_opt_hdr) ||
355 optlen & 0x7 || optlen > 8 * 255)
356 goto e_inval;
350 357
351 /* hop-by-hop / destination options are privileged option */ 358 /* hop-by-hop / destination options are privileged option */
352 retv = -EPERM; 359 retv = -EPERM;
353 if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW)) 360 if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
354 break; 361 break;
355 362
356 if (optlen < sizeof(struct ipv6_opt_hdr) ||
357 optlen & 0x7 || optlen > 8 * 255)
358 goto e_inval;
359
360 opt = ipv6_renew_options(sk, np->opt, optname, 363 opt = ipv6_renew_options(sk, np->opt, optname,
361 (struct ipv6_opt_hdr __user *)optval, 364 (struct ipv6_opt_hdr __user *)optval,
362 optlen); 365 optlen);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 28d8bd53bd3a..c80d5899f279 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1132,7 +1132,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
1132 ieee80211_tx_handler *handler; 1132 ieee80211_tx_handler *handler;
1133 struct ieee80211_tx_data tx; 1133 struct ieee80211_tx_data tx;
1134 ieee80211_tx_result res = TX_DROP, res_prepare; 1134 ieee80211_tx_result res = TX_DROP, res_prepare;
1135 int ret, i; 1135 int ret, i, retries = 0;
1136 1136
1137 WARN_ON(__ieee80211_queue_pending(local, control->queue)); 1137 WARN_ON(__ieee80211_queue_pending(local, control->queue));
1138 1138
@@ -1216,6 +1216,13 @@ retry:
1216 if (!__ieee80211_queue_stopped(local, control->queue)) { 1216 if (!__ieee80211_queue_stopped(local, control->queue)) {
1217 clear_bit(IEEE80211_LINK_STATE_PENDING, 1217 clear_bit(IEEE80211_LINK_STATE_PENDING,
1218 &local->state[control->queue]); 1218 &local->state[control->queue]);
1219 retries++;
1220 /*
1221 * Driver bug, it's rejecting packets but
1222 * not stopping queues.
1223 */
1224 if (WARN_ON_ONCE(retries > 5))
1225 goto drop;
1219 goto retry; 1226 goto retry;
1220 } 1227 }
1221 memcpy(&store->control, control, 1228 memcpy(&store->control, control,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e7e3baf7009e..0dbcde6758ea 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4401,7 +4401,9 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len,
4401 if (copy_from_user(&getaddrs, optval, len)) 4401 if (copy_from_user(&getaddrs, optval, len))
4402 return -EFAULT; 4402 return -EFAULT;
4403 4403
4404 if (getaddrs.addr_num <= 0) return -EINVAL; 4404 if (getaddrs.addr_num <= 0 ||
4405 getaddrs.addr_num >= (INT_MAX / sizeof(union sctp_addr)))
4406 return -EINVAL;
4405 /* 4407 /*
4406 * For UDP-style sockets, id specifies the association to query. 4408 * For UDP-style sockets, id specifies the association to query.
4407 * If the id field is set to the value '0' then the locally bound 4409 * If the id field is set to the value '0' then the locally bound
diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c
index 91d14224f6b3..73d4572d136b 100644
--- a/sound/isa/sb/sb_mixer.c
+++ b/sound/isa/sb/sb_mixer.c
@@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = {
925static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) 925static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
926{ 926{
927 unsigned char *val = chip->saved_regs; 927 unsigned char *val = chip->saved_regs;
928 snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return); 928 snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
929 for (; num_regs; num_regs--) 929 for (; num_regs; num_regs--)
930 *val++ = snd_sbmixer_read(chip, *regs++); 930 *val++ = snd_sbmixer_read(chip, *regs++);
931} 931}
@@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
933static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs) 933static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
934{ 934{
935 unsigned char *val = chip->saved_regs; 935 unsigned char *val = chip->saved_regs;
936 snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return); 936 snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
937 for (; num_regs; num_regs--) 937 for (; num_regs; num_regs--)
938 snd_sbmixer_write(chip, *regs++, *val++); 938 snd_sbmixer_write(chip, *regs++, *val++);
939} 939}
diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
index 56f87cd33c19..3f00ddf450f8 100644
--- a/sound/pci/aw2/aw2-alsa.c
+++ b/sound/pci/aw2/aw2-alsa.c
@@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card,
316 return -ENOMEM; 316 return -ENOMEM;
317 } 317 }
318 318
319 /* (2) initialization of the chip hardware */
320 snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
319 321
320 if (request_irq(pci->irq, snd_aw2_saa7146_interrupt, 322 if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
321 IRQF_SHARED, "Audiowerk2", chip)) { 323 IRQF_SHARED, "Audiowerk2", chip)) {
@@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card,
329 } 331 }
330 chip->irq = pci->irq; 332 chip->irq = pci->irq;
331 333
332 /* (2) initialization of the chip hardware */
333 snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
334 err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops); 334 err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
335 if (err < 0) { 335 if (err < 0) {
336 free_irq(chip->irq, (void *)chip); 336 free_irq(chip->irq, (void *)chip);
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 98778cb69c6e..1dcf9f3d1107 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -269,28 +269,9 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
269 } 269 }
270} 270}
271 271
272static int get_eoi_gsi(struct kvm_ioapic *ioapic, int vector) 272static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
273{ 273{
274 int i;
275
276 for (i = 0; i < IOAPIC_NUM_PINS; i++)
277 if (ioapic->redirtbl[i].fields.vector == vector)
278 return i;
279 return -1;
280}
281
282void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
283{
284 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
285 union ioapic_redir_entry *ent; 274 union ioapic_redir_entry *ent;
286 int gsi;
287
288 gsi = get_eoi_gsi(ioapic, vector);
289 if (gsi == -1) {
290 printk(KERN_WARNING "Can't find redir item for %d EOI\n",
291 vector);
292 return;
293 }
294 275
295 ent = &ioapic->redirtbl[gsi]; 276 ent = &ioapic->redirtbl[gsi];
296 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG); 277 ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -300,6 +281,16 @@ void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
300 ioapic_deliver(ioapic, gsi); 281 ioapic_deliver(ioapic, gsi);
301} 282}
302 283
284void kvm_ioapic_update_eoi(struct kvm *kvm, int vector)
285{
286 struct kvm_ioapic *ioapic = kvm->arch.vioapic;
287 int i;
288
289 for (i = 0; i < IOAPIC_NUM_PINS; i++)
290 if (ioapic->redirtbl[i].fields.vector == vector)
291 __kvm_ioapic_update_eoi(ioapic, i);
292}
293
303static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr) 294static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr)
304{ 295{
305 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private; 296 struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;