aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/feature-removal-schedule.txt17
-rw-r--r--Documentation/usb/acm.txt4
-rw-r--r--Documentation/x86_64/boot-options.txt4
-rw-r--r--MAINTAINERS6
-rw-r--r--Makefile2
-rw-r--r--arch/i386/Kconfig41
-rw-r--r--arch/i386/kernel/cpu/common.c2
-rw-r--r--arch/i386/kernel/smpboot.c4
-rw-r--r--arch/i386/kernel/trampoline.S5
-rw-r--r--arch/x86_64/kernel/early-quirks.c5
-rw-r--r--arch/x86_64/kernel/io_apic.c124
-rw-r--r--drivers/acpi/toshiba_acpi.c9
-rw-r--r--drivers/ata/Kconfig2
-rw-r--r--drivers/ata/pata_hpt37x.c6
-rw-r--r--drivers/block/pktcdvd.c49
-rw-r--r--drivers/char/ip2/i2ellis.h4
-rw-r--r--drivers/connector/cn_proc.c11
-rw-r--r--drivers/i2c/busses/Kconfig9
-rw-r--r--drivers/i2c/busses/i2c-mv64xxx.c4
-rw-r--r--drivers/i2c/busses/i2c-pnx.c7
-rw-r--r--drivers/i2c/chips/m41t00.c1
-rw-r--r--drivers/i2c/i2c-core.c28
-rw-r--r--drivers/ide/pci/atiixp.c18
-rw-r--r--drivers/ide/pci/via82cxxx.c138
-rw-r--r--drivers/kvm/kvm.h106
-rw-r--r--drivers/kvm/kvm_main.c155
-rw-r--r--drivers/kvm/mmu.c1114
-rw-r--r--drivers/kvm/paging_tmpl.h260
-rw-r--r--drivers/kvm/svm.c113
-rw-r--r--drivers/kvm/vmx.c175
-rw-r--r--drivers/kvm/x86_emulate.c2
-rw-r--r--drivers/leds/leds-s3c24xx.c2
-rw-r--r--drivers/macintosh/via-pmu.c1
-rw-r--r--drivers/pci/Kconfig2
-rw-r--r--drivers/pci/search.c24
-rw-r--r--drivers/rtc/rtc-at91rm9200.c2
-rw-r--r--drivers/rtc/rtc-rs5c372.c535
-rw-r--r--drivers/usb/class/usblp.c1
-rw-r--r--drivers/usb/core/endpoint.c2
-rw-r--r--drivers/usb/gadget/omap_udc.c245
-rw-r--r--drivers/usb/gadget/omap_udc.h3
-rw-r--r--drivers/usb/host/uhci-hcd.c20
-rw-r--r--drivers/usb/misc/sisusbvga/sisusb_con.c12
-rw-r--r--drivers/usb/net/asix.c2
-rw-r--r--drivers/usb/serial/Kconfig2
-rw-r--r--drivers/usb/serial/option.c3
-rw-r--r--drivers/usb/storage/unusual_devs.h17
-rw-r--r--fs/adfs/dir_f.c2
-rw-r--r--fs/bad_inode.c330
-rw-r--r--fs/binfmt_elf.c8
-rw-r--r--fs/ufs/balloc.c25
-rw-r--r--fs/ufs/inode.c41
-rw-r--r--include/asm-i386/boot.h3
-rw-r--r--include/linux/kvm.h11
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/swap.h2
-rw-r--r--init/main.c5
-rw-r--r--kernel/module.c38
-rw-r--r--kernel/params.c6
-rw-r--r--kernel/power/swap.c9
-rw-r--r--kernel/power/user.c7
-rw-r--r--kernel/profile.c2
-rw-r--r--mm/oom_kill.c12
-rw-r--r--mm/page_alloc.c7
-rw-r--r--mm/slab.c4
-rw-r--r--mm/swapfile.c8
-rw-r--r--mm/vmscan.c33
-rw-r--r--scripts/kconfig/qconf.cc12
-rw-r--r--scripts/kconfig/qconf.h2
69 files changed, 3015 insertions, 851 deletions
diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 30f3c8c9c12a..f2024df7ebe5 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -226,6 +226,23 @@ Who: Jean Delvare <khali@linux-fr.org>
226 226
227--------------------------- 227---------------------------
228 228
229What: i2c_adapter.dev
230 i2c_adapter.list
231When: July 2007
232Why: Superfluous, given i2c_adapter.class_dev:
233 * The "dev" was a stand-in for the physical device node that legacy
234 drivers would not have; but now it's almost always present. Any
235 remaining legacy drivers must upgrade (they now trigger warnings).
236 * The "list" duplicates class device children.
237 The delay in removing this is so upgraded lm_sensors and libsensors
238 can get deployed. (Removal causes minor changes in the sysfs layout,
239 notably the location of the adapter type name and parenting the i2c
240 client hardware directly from their controller.)
241Who: Jean Delvare <khali@linux-fr.org>,
242 David Brownell <dbrownell@users.sourceforge.net>
243
244---------------------------
245
229What: IPv4 only connection tracking/NAT/helpers 246What: IPv4 only connection tracking/NAT/helpers
230When: 2.6.22 247When: 2.6.22
231Why: The new layer 3 independant connection tracking replaces the old 248Why: The new layer 3 independant connection tracking replaces the old
diff --git a/Documentation/usb/acm.txt b/Documentation/usb/acm.txt
index 737d6104c3f3..17f5c2e1a570 100644
--- a/Documentation/usb/acm.txt
+++ b/Documentation/usb/acm.txt
@@ -46,6 +46,10 @@ Abstract Control Model (USB CDC ACM) specification.
46 46
47 3Com USR ISDN Pro TA 47 3Com USR ISDN Pro TA
48 48
49 Some cell phones also connect via USB. I know the following phones work:
50
51 SonyEricsson K800i
52
49 Unfortunately many modems and most ISDN TAs use proprietary interfaces and 53 Unfortunately many modems and most ISDN TAs use proprietary interfaces and
50thus won't work with this drivers. Check for ACM compliance before buying. 54thus won't work with this drivers. Check for ACM compliance before buying.
51 55
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt
index dbdcaf68e3ea..5c86ed6f0448 100644
--- a/Documentation/x86_64/boot-options.txt
+++ b/Documentation/x86_64/boot-options.txt
@@ -52,6 +52,10 @@ APICs
52 apicmaintimer. Useful when your PIT timer is totally 52 apicmaintimer. Useful when your PIT timer is totally
53 broken. 53 broken.
54 54
55 disable_8254_timer / enable_8254_timer
56 Enable interrupt 0 timer routing over the 8254 in addition to over
57 the IO-APIC. The kernel tries to set a sensible default.
58
55Early Console 59Early Console
56 60
57 syntax: earlyprintk=vga 61 syntax: earlyprintk=vga
diff --git a/MAINTAINERS b/MAINTAINERS
index 6ecb29609cfc..4ccc5fa06d09 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2593,6 +2593,12 @@ P: Adam Belay
2593M: ambx1@neo.rr.com 2593M: ambx1@neo.rr.com
2594S: Maintained 2594S: Maintained
2595 2595
2596PNXxxxx I2C DRIVER
2597P: Vitaly Wool
2598M: vitalywool@gmail.com
2599L: i2c@lm-sensors.org
2600S: Maintained
2601
2596PPP PROTOCOL DRIVERS AND COMPRESSORS 2602PPP PROTOCOL DRIVERS AND COMPRESSORS
2597P: Paul Mackerras 2603P: Paul Mackerras
2598M: paulus@samba.org 2604M: paulus@samba.org
diff --git a/Makefile b/Makefile
index 0e9eee768288..fb5b3ef9ab11 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 20 3SUBLEVEL = 20
4EXTRAVERSION =-rc3 4EXTRAVERSION =-rc4
5NAME = Homicidal Dwarf Hamster 5NAME = Homicidal Dwarf Hamster
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 0d67a0a1151e..0dfee812811a 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -777,6 +777,47 @@ config CRASH_DUMP
777 PHYSICAL_START. 777 PHYSICAL_START.
778 For more details see Documentation/kdump/kdump.txt 778 For more details see Documentation/kdump/kdump.txt
779 779
780config PHYSICAL_START
781 hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
782 default "0x100000"
783 help
784 This gives the physical address where the kernel is loaded.
785
786 If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
787 bzImage will decompress itself to above physical address and
788 run from there. Otherwise, bzImage will run from the address where
789 it has been loaded by the boot loader and will ignore above physical
790 address.
791
792 In normal kdump cases one does not have to set/change this option
793 as now bzImage can be compiled as a completely relocatable image
794 (CONFIG_RELOCATABLE=y) and be used to load and run from a different
795 address. This option is mainly useful for the folks who don't want
796 to use a bzImage for capturing the crash dump and want to use a
797 vmlinux instead. vmlinux is not relocatable hence a kernel needs
798 to be specifically compiled to run from a specific memory area
799 (normally a reserved region) and this option comes handy.
800
801 So if you are using bzImage for capturing the crash dump, leave
802 the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y.
803 Otherwise if you plan to use vmlinux for capturing the crash dump
804 change this value to start of the reserved region (Typically 16MB
805 0x1000000). In other words, it can be set based on the "X" value as
806 specified in the "crashkernel=YM@XM" command line boot parameter
807 passed to the panic-ed kernel. Typically this parameter is set as
808 crashkernel=64M@16M. Please take a look at
809 Documentation/kdump/kdump.txt for more details about crash dumps.
810
811 Usage of bzImage for capturing the crash dump is recommended as
812 one does not have to build two kernels. Same kernel can be used
813 as production kernel and capture kernel. Above option should have
814 gone away after relocatable bzImage support is introduced. But it
815 is present because there are users out there who continue to use
816 vmlinux for dump capture. This option should go away down the
817 line.
818
819 Don't change this unless you know what you are doing.
820
780config RELOCATABLE 821config RELOCATABLE
781 bool "Build a relocatable kernel(EXPERIMENTAL)" 822 bool "Build a relocatable kernel(EXPERIMENTAL)"
782 depends on EXPERIMENTAL 823 depends on EXPERIMENTAL
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 1b34c56f8123..8689d62abd4a 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -54,7 +54,7 @@ static struct cpu_dev __cpuinitdata default_cpu = {
54 .c_init = default_init, 54 .c_init = default_init,
55 .c_vendor = "Unknown", 55 .c_vendor = "Unknown",
56}; 56};
57static struct cpu_dev * this_cpu = &default_cpu; 57static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
58 58
59static int __init cachesize_setup(char *str) 59static int __init cachesize_setup(char *str)
60{ 60{
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index aef39be81361..300d9b38d02e 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -227,7 +227,7 @@ static struct {
227 atomic_t count_start; 227 atomic_t count_start;
228 atomic_t count_stop; 228 atomic_t count_stop;
229 unsigned long long values[NR_CPUS]; 229 unsigned long long values[NR_CPUS];
230} tsc __initdata = { 230} tsc __cpuinitdata = {
231 .start_flag = ATOMIC_INIT(0), 231 .start_flag = ATOMIC_INIT(0),
232 .count_start = ATOMIC_INIT(0), 232 .count_start = ATOMIC_INIT(0),
233 .count_stop = ATOMIC_INIT(0), 233 .count_stop = ATOMIC_INIT(0),
@@ -332,7 +332,7 @@ static void __init synchronize_tsc_bp(void)
332 printk("passed.\n"); 332 printk("passed.\n");
333} 333}
334 334
335static void __init synchronize_tsc_ap(void) 335static void __cpuinit synchronize_tsc_ap(void)
336{ 336{
337 int i; 337 int i;
338 338
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S
index fcce0e61b0e7..2f1814c5cfd7 100644
--- a/arch/i386/kernel/trampoline.S
+++ b/arch/i386/kernel/trampoline.S
@@ -38,6 +38,11 @@
38 38
39.data 39.data
40 40
41/* We can free up trampoline after bootup if cpu hotplug is not supported. */
42#ifndef CONFIG_HOTPLUG_CPU
43.section ".init.data","aw",@progbits
44#endif
45
41.code16 46.code16
42 47
43ENTRY(trampoline_data) 48ENTRY(trampoline_data)
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 829698f6d049..49802f1bee94 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -69,6 +69,11 @@ static void nvidia_bugs(void)
69 69
70static void ati_bugs(void) 70static void ati_bugs(void)
71{ 71{
72 if (timer_over_8254 == 1) {
73 timer_over_8254 = 0;
74 printk(KERN_INFO
75 "ATI board detected. Disabling timer routing over 8254.\n");
76 }
72} 77}
73 78
74static void intel_bugs(void) 79static void intel_bugs(void)
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 2a1dcd5f69c2..d7bad90a5ad8 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -55,6 +55,10 @@ int sis_apic_bug; /* not actually supported, dummy for compile */
55 55
56static int no_timer_check; 56static int no_timer_check;
57 57
58static int disable_timer_pin_1 __initdata;
59
60int timer_over_8254 __initdata = 1;
61
58/* Where if anywhere is the i8259 connect in external int mode */ 62/* Where if anywhere is the i8259 connect in external int mode */
59static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; 63static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
60 64
@@ -350,6 +354,29 @@ static int __init disable_ioapic_setup(char *str)
350} 354}
351early_param("noapic", disable_ioapic_setup); 355early_param("noapic", disable_ioapic_setup);
352 356
357/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
358static int __init disable_timer_pin_setup(char *arg)
359{
360 disable_timer_pin_1 = 1;
361 return 1;
362}
363__setup("disable_timer_pin_1", disable_timer_pin_setup);
364
365static int __init setup_disable_8254_timer(char *s)
366{
367 timer_over_8254 = -1;
368 return 1;
369}
370static int __init setup_enable_8254_timer(char *s)
371{
372 timer_over_8254 = 2;
373 return 1;
374}
375
376__setup("disable_8254_timer", setup_disable_8254_timer);
377__setup("enable_8254_timer", setup_enable_8254_timer);
378
379
353/* 380/*
354 * Find the IRQ entry number of a certain pin. 381 * Find the IRQ entry number of a certain pin.
355 */ 382 */
@@ -1568,33 +1595,10 @@ static inline void unlock_ExtINT_logic(void)
1568 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ 1595 * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
1569 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast 1596 * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
1570 * fanatically on his truly buggy board. 1597 * fanatically on his truly buggy board.
1598 *
1599 * FIXME: really need to revamp this for modern platforms only.
1571 */ 1600 */
1572 1601static inline void check_timer(void)
1573static int try_apic_pin(int apic, int pin, char *msg)
1574{
1575 apic_printk(APIC_VERBOSE, KERN_INFO
1576 "..TIMER: trying IO-APIC=%d PIN=%d %s",
1577 apic, pin, msg);
1578
1579 /*
1580 * Ok, does IRQ0 through the IOAPIC work?
1581 */
1582 if (!no_timer_check && timer_irq_works()) {
1583 nmi_watchdog_default();
1584 if (nmi_watchdog == NMI_IO_APIC) {
1585 disable_8259A_irq(0);
1586 setup_nmi();
1587 enable_8259A_irq(0);
1588 }
1589 return 1;
1590 }
1591 clear_IO_APIC_pin(apic, pin);
1592 apic_printk(APIC_QUIET, KERN_ERR " .. failed\n");
1593 return 0;
1594}
1595
1596/* The function from hell */
1597static void check_timer(void)
1598{ 1602{
1599 int apic1, pin1, apic2, pin2; 1603 int apic1, pin1, apic2, pin2;
1600 int vector; 1604 int vector;
@@ -1615,43 +1619,61 @@ static void check_timer(void)
1615 */ 1619 */
1616 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 1620 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
1617 init_8259A(1); 1621 init_8259A(1);
1622 if (timer_over_8254 > 0)
1623 enable_8259A_irq(0);
1618 1624
1619 pin1 = find_isa_irq_pin(0, mp_INT); 1625 pin1 = find_isa_irq_pin(0, mp_INT);
1620 apic1 = find_isa_irq_apic(0, mp_INT); 1626 apic1 = find_isa_irq_apic(0, mp_INT);
1621 pin2 = ioapic_i8259.pin; 1627 pin2 = ioapic_i8259.pin;
1622 apic2 = ioapic_i8259.apic; 1628 apic2 = ioapic_i8259.apic;
1623 1629
1624 /* Do this first, otherwise we get double interrupts on ATI boards */ 1630 apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
1625 if ((pin1 != -1) && try_apic_pin(apic1, pin1,"with 8259 IRQ0 disabled")) 1631 vector, apic1, pin1, apic2, pin2);
1626 return;
1627 1632
1628 /* Now try again with IRQ0 8259A enabled. 1633 if (pin1 != -1) {
1629 Assumes timer is on IO-APIC 0 ?!? */ 1634 /*
1630 enable_8259A_irq(0); 1635 * Ok, does IRQ0 through the IOAPIC work?
1631 unmask_IO_APIC_irq(0); 1636 */
1632 if (try_apic_pin(apic1, pin1, "with 8259 IRQ0 enabled")) 1637 unmask_IO_APIC_irq(0);
1633 return; 1638 if (!no_timer_check && timer_irq_works()) {
1634 disable_8259A_irq(0); 1639 nmi_watchdog_default();
1635 1640 if (nmi_watchdog == NMI_IO_APIC) {
1636 /* Always try pin0 and pin2 on APIC 0 to handle buggy timer overrides 1641 disable_8259A_irq(0);
1637 on Nvidia boards */ 1642 setup_nmi();
1638 if (!(apic1 == 0 && pin1 == 0) && 1643 enable_8259A_irq(0);
1639 try_apic_pin(0, 0, "fallback with 8259 IRQ0 disabled")) 1644 }
1640 return; 1645 if (disable_timer_pin_1 > 0)
1641 if (!(apic1 == 0 && pin1 == 2) && 1646 clear_IO_APIC_pin(0, pin1);
1642 try_apic_pin(0, 2, "fallback with 8259 IRQ0 disabled")) 1647 return;
1643 return; 1648 }
1649 clear_IO_APIC_pin(apic1, pin1);
1650 apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
1651 "connected to IO-APIC\n");
1652 }
1644 1653
1645 /* Then try pure 8259A routing on the 8259 as reported by BIOS*/ 1654 apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
1646 enable_8259A_irq(0); 1655 "through the 8259A ... ");
1647 if (pin2 != -1) { 1656 if (pin2 != -1) {
1657 apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
1658 apic2, pin2);
1659 /*
1660 * legacy devices should be connected to IO APIC #0
1661 */
1648 setup_ExtINT_IRQ0_pin(apic2, pin2, vector); 1662 setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
1649 if (try_apic_pin(apic2,pin2,"8259A broadcast ExtINT from BIOS")) 1663 if (timer_irq_works()) {
1664 apic_printk(APIC_VERBOSE," works.\n");
1665 nmi_watchdog_default();
1666 if (nmi_watchdog == NMI_IO_APIC) {
1667 setup_nmi();
1668 }
1650 return; 1669 return;
1670 }
1671 /*
1672 * Cleanup, just in case ...
1673 */
1674 clear_IO_APIC_pin(apic2, pin2);
1651 } 1675 }
1652 1676 apic_printk(APIC_VERBOSE," failed.\n");
1653 /* Tried all possibilities to go through the IO-APIC. Now come the
1654 really cheesy fallbacks. */
1655 1677
1656 if (nmi_watchdog == NMI_IO_APIC) { 1678 if (nmi_watchdog == NMI_IO_APIC) {
1657 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); 1679 printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
diff --git a/drivers/acpi/toshiba_acpi.c b/drivers/acpi/toshiba_acpi.c
index 88aeccbafaaf..d9b651ffcdc0 100644
--- a/drivers/acpi/toshiba_acpi.c
+++ b/drivers/acpi/toshiba_acpi.c
@@ -321,13 +321,16 @@ static int set_lcd_status(struct backlight_device *bd)
321static unsigned long write_lcd(const char *buffer, unsigned long count) 321static unsigned long write_lcd(const char *buffer, unsigned long count)
322{ 322{
323 int value; 323 int value;
324 int ret = count; 324 int ret;
325 325
326 if (sscanf(buffer, " brightness : %i", &value) == 1 && 326 if (sscanf(buffer, " brightness : %i", &value) == 1 &&
327 value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS) 327 value >= 0 && value < HCI_LCD_BRIGHTNESS_LEVELS) {
328 ret = set_lcd(value); 328 ret = set_lcd(value);
329 else 329 if (ret == 0)
330 ret = count;
331 } else {
330 ret = -EINVAL; 332 ret = -EINVAL;
333 }
331 return ret; 334 return ret;
332} 335}
333 336
diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index b34e0a958d0f..da21552d2b1c 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -381,7 +381,7 @@ config PATA_OPTI
381 If unsure, say N. 381 If unsure, say N.
382 382
383config PATA_OPTIDMA 383config PATA_OPTIDMA
384 tristate "OPTI FireStar PATA support (Veyr Experimental)" 384 tristate "OPTI FireStar PATA support (Very Experimental)"
385 depends on PCI && EXPERIMENTAL 385 depends on PCI && EXPERIMENTAL
386 help 386 help
387 This option enables DMA/PIO support for the later OPTi 387 This option enables DMA/PIO support for the later OPTi
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 47082df7199e..dfb306057cf4 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -25,7 +25,7 @@
25#include <linux/libata.h> 25#include <linux/libata.h>
26 26
27#define DRV_NAME "pata_hpt37x" 27#define DRV_NAME "pata_hpt37x"
28#define DRV_VERSION "0.5.1" 28#define DRV_VERSION "0.5.2"
29 29
30struct hpt_clock { 30struct hpt_clock {
31 u8 xfer_speed; 31 u8 xfer_speed;
@@ -416,7 +416,7 @@ static const char *bad_ata100_5[] = {
416 416
417static unsigned long hpt370_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask) 417static unsigned long hpt370_filter(const struct ata_port *ap, struct ata_device *adev, unsigned long mask)
418{ 418{
419 if (adev->class != ATA_DEV_ATA) { 419 if (adev->class == ATA_DEV_ATA) {
420 if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33)) 420 if (hpt_dma_blacklisted(adev, "UDMA", bad_ata33))
421 mask &= ~ATA_MASK_UDMA; 421 mask &= ~ATA_MASK_UDMA;
422 if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5)) 422 if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
@@ -749,7 +749,7 @@ static void hpt37x_bmdma_stop(struct ata_queued_cmd *qc)
749{ 749{
750 struct ata_port *ap = qc->ap; 750 struct ata_port *ap = qc->ap;
751 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 751 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
752 int mscreg = 0x50 + 2 * ap->port_no; 752 int mscreg = 0x50 + 4 * ap->port_no;
753 u8 bwsr_stat, msc_stat; 753 u8 bwsr_stat, msc_stat;
754 754
755 pci_read_config_byte(pdev, 0x6A, &bwsr_stat); 755 pci_read_config_byte(pdev, 0x6A, &bwsr_stat);
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 7c95c762950f..62462190e07e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -765,47 +765,34 @@ static inline struct bio *pkt_get_list_first(struct bio **list_head, struct bio
765 */ 765 */
766static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) 766static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
767{ 767{
768 char sense[SCSI_SENSE_BUFFERSIZE]; 768 request_queue_t *q = bdev_get_queue(pd->bdev);
769 request_queue_t *q;
770 struct request *rq; 769 struct request *rq;
771 DECLARE_COMPLETION_ONSTACK(wait); 770 int ret = 0;
772 int err = 0;
773 771
774 q = bdev_get_queue(pd->bdev); 772 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
773 WRITE : READ, __GFP_WAIT);
774
775 if (cgc->buflen) {
776 if (blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, __GFP_WAIT))
777 goto out;
778 }
779
780 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
781 memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
782 if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
783 memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
775 784
776 rq = blk_get_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? WRITE : READ,
777 __GFP_WAIT);
778 rq->errors = 0;
779 rq->rq_disk = pd->bdev->bd_disk;
780 rq->bio = NULL;
781 rq->buffer = NULL;
782 rq->timeout = 60*HZ; 785 rq->timeout = 60*HZ;
783 rq->data = cgc->buffer;
784 rq->data_len = cgc->buflen;
785 rq->sense = sense;
786 memset(sense, 0, sizeof(sense));
787 rq->sense_len = 0;
788 rq->cmd_type = REQ_TYPE_BLOCK_PC; 786 rq->cmd_type = REQ_TYPE_BLOCK_PC;
789 rq->cmd_flags |= REQ_HARDBARRIER; 787 rq->cmd_flags |= REQ_HARDBARRIER;
790 if (cgc->quiet) 788 if (cgc->quiet)
791 rq->cmd_flags |= REQ_QUIET; 789 rq->cmd_flags |= REQ_QUIET;
792 memcpy(rq->cmd, cgc->cmd, CDROM_PACKET_SIZE);
793 if (sizeof(rq->cmd) > CDROM_PACKET_SIZE)
794 memset(rq->cmd + CDROM_PACKET_SIZE, 0, sizeof(rq->cmd) - CDROM_PACKET_SIZE);
795 rq->cmd_len = COMMAND_SIZE(rq->cmd[0]);
796
797 rq->ref_count++;
798 rq->end_io_data = &wait;
799 rq->end_io = blk_end_sync_rq;
800 elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 1);
801 generic_unplug_device(q);
802 wait_for_completion(&wait);
803
804 if (rq->errors)
805 err = -EIO;
806 790
791 blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0);
792 ret = rq->errors;
793out:
807 blk_put_request(rq); 794 blk_put_request(rq);
808 return err; 795 return ret;
809} 796}
810 797
811/* 798/*
diff --git a/drivers/char/ip2/i2ellis.h b/drivers/char/ip2/i2ellis.h
index 5eabe47b0bc8..433305062fb8 100644
--- a/drivers/char/ip2/i2ellis.h
+++ b/drivers/char/ip2/i2ellis.h
@@ -606,9 +606,9 @@ static int iiDownloadAll(i2eBordStrPtr, loadHdrStrPtr, int, int);
606// code and returning. 606// code and returning.
607// 607//
608#define COMPLETE(pB,code) \ 608#define COMPLETE(pB,code) \
609 if(1){ \ 609 do { \
610 pB->i2eError = code; \ 610 pB->i2eError = code; \
611 return (code == I2EE_GOOD);\ 611 return (code == I2EE_GOOD);\
612 } 612 } while (0)
613 613
614#endif // I2ELLIS_H 614#endif // I2ELLIS_H
diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index 3ece69231343..5c9f67f98d10 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -28,6 +28,7 @@
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/connector.h> 29#include <linux/connector.h>
30#include <asm/atomic.h> 30#include <asm/atomic.h>
31#include <asm/unaligned.h>
31 32
32#include <linux/cn_proc.h> 33#include <linux/cn_proc.h>
33 34
@@ -60,7 +61,7 @@ void proc_fork_connector(struct task_struct *task)
60 ev = (struct proc_event*)msg->data; 61 ev = (struct proc_event*)msg->data;
61 get_seq(&msg->seq, &ev->cpu); 62 get_seq(&msg->seq, &ev->cpu);
62 ktime_get_ts(&ts); /* get high res monotonic timestamp */ 63 ktime_get_ts(&ts); /* get high res monotonic timestamp */
63 ev->timestamp_ns = timespec_to_ns(&ts); 64 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
64 ev->what = PROC_EVENT_FORK; 65 ev->what = PROC_EVENT_FORK;
65 ev->event_data.fork.parent_pid = task->real_parent->pid; 66 ev->event_data.fork.parent_pid = task->real_parent->pid;
66 ev->event_data.fork.parent_tgid = task->real_parent->tgid; 67 ev->event_data.fork.parent_tgid = task->real_parent->tgid;
@@ -88,7 +89,7 @@ void proc_exec_connector(struct task_struct *task)
88 ev = (struct proc_event*)msg->data; 89 ev = (struct proc_event*)msg->data;
89 get_seq(&msg->seq, &ev->cpu); 90 get_seq(&msg->seq, &ev->cpu);
90 ktime_get_ts(&ts); /* get high res monotonic timestamp */ 91 ktime_get_ts(&ts); /* get high res monotonic timestamp */
91 ev->timestamp_ns = timespec_to_ns(&ts); 92 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
92 ev->what = PROC_EVENT_EXEC; 93 ev->what = PROC_EVENT_EXEC;
93 ev->event_data.exec.process_pid = task->pid; 94 ev->event_data.exec.process_pid = task->pid;
94 ev->event_data.exec.process_tgid = task->tgid; 95 ev->event_data.exec.process_tgid = task->tgid;
@@ -124,7 +125,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
124 return; 125 return;
125 get_seq(&msg->seq, &ev->cpu); 126 get_seq(&msg->seq, &ev->cpu);
126 ktime_get_ts(&ts); /* get high res monotonic timestamp */ 127 ktime_get_ts(&ts); /* get high res monotonic timestamp */
127 ev->timestamp_ns = timespec_to_ns(&ts); 128 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
128 129
129 memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); 130 memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
130 msg->ack = 0; /* not used */ 131 msg->ack = 0; /* not used */
@@ -146,7 +147,7 @@ void proc_exit_connector(struct task_struct *task)
146 ev = (struct proc_event*)msg->data; 147 ev = (struct proc_event*)msg->data;
147 get_seq(&msg->seq, &ev->cpu); 148 get_seq(&msg->seq, &ev->cpu);
148 ktime_get_ts(&ts); /* get high res monotonic timestamp */ 149 ktime_get_ts(&ts); /* get high res monotonic timestamp */
149 ev->timestamp_ns = timespec_to_ns(&ts); 150 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
150 ev->what = PROC_EVENT_EXIT; 151 ev->what = PROC_EVENT_EXIT;
151 ev->event_data.exit.process_pid = task->pid; 152 ev->event_data.exit.process_pid = task->pid;
152 ev->event_data.exit.process_tgid = task->tgid; 153 ev->event_data.exit.process_tgid = task->tgid;
@@ -181,7 +182,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
181 ev = (struct proc_event*)msg->data; 182 ev = (struct proc_event*)msg->data;
182 msg->seq = rcvd_seq; 183 msg->seq = rcvd_seq;
183 ktime_get_ts(&ts); /* get high res monotonic timestamp */ 184 ktime_get_ts(&ts); /* get high res monotonic timestamp */
184 ev->timestamp_ns = timespec_to_ns(&ts); 185 put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns);
185 ev->cpu = -1; 186 ev->cpu = -1;
186 ev->what = PROC_EVENT_NONE; 187 ev->what = PROC_EVENT_NONE;
187 ev->event_data.ack.err = err; 188 ev->event_data.ack.err = err;
diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e1989f3a2684..9367c4cfe936 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -564,13 +564,4 @@ config I2C_PNX
564 This driver can also be built as a module. If so, the module 564 This driver can also be built as a module. If so, the module
565 will be called i2c-pnx. 565 will be called i2c-pnx.
566 566
567config I2C_PNX_EARLY
568 bool "Early initialization for I2C on PNXxxxx"
569 depends on I2C_PNX=y
570 help
571 Under certain circumstances one may need to make sure I2C on PNXxxxx
572 is initialized earlier than some other driver that depends on it
573 (for instance, that might be USB in case of PNX4008). With this
574 option turned on you can guarantee that.
575
576endmenu 567endmenu
diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index bbc8e3a7ff55..490173611d6b 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c
@@ -529,6 +529,8 @@ mv64xxx_i2c_probe(struct platform_device *pd)
529 platform_set_drvdata(pd, drv_data); 529 platform_set_drvdata(pd, drv_data);
530 i2c_set_adapdata(&drv_data->adapter, drv_data); 530 i2c_set_adapdata(&drv_data->adapter, drv_data);
531 531
532 mv64xxx_i2c_hw_init(drv_data);
533
532 if (request_irq(drv_data->irq, mv64xxx_i2c_intr, 0, 534 if (request_irq(drv_data->irq, mv64xxx_i2c_intr, 0,
533 MV64XXX_I2C_CTLR_NAME, drv_data)) { 535 MV64XXX_I2C_CTLR_NAME, drv_data)) {
534 dev_err(&drv_data->adapter.dev, 536 dev_err(&drv_data->adapter.dev,
@@ -542,8 +544,6 @@ mv64xxx_i2c_probe(struct platform_device *pd)
542 goto exit_free_irq; 544 goto exit_free_irq;
543 } 545 }
544 546
545 mv64xxx_i2c_hw_init(drv_data);
546
547 return 0; 547 return 0;
548 548
549 exit_free_irq: 549 exit_free_irq:
diff --git a/drivers/i2c/busses/i2c-pnx.c b/drivers/i2c/busses/i2c-pnx.c
index de0bca77e926..17376feb1acc 100644
--- a/drivers/i2c/busses/i2c-pnx.c
+++ b/drivers/i2c/busses/i2c-pnx.c
@@ -305,8 +305,7 @@ static int i2c_pnx_master_rcv(struct i2c_adapter *adap)
305 return 0; 305 return 0;
306} 306}
307 307
308static irqreturn_t 308static irqreturn_t i2c_pnx_interrupt(int irq, void *dev_id)
309i2c_pnx_interrupt(int irq, void *dev_id, struct pt_regs *regs)
310{ 309{
311 u32 stat, ctl; 310 u32 stat, ctl;
312 struct i2c_adapter *adap = dev_id; 311 struct i2c_adapter *adap = dev_id;
@@ -699,10 +698,6 @@ MODULE_AUTHOR("Vitaly Wool, Dennis Kovalev <source@mvista.com>");
699MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses"); 698MODULE_DESCRIPTION("I2C driver for Philips IP3204-based I2C busses");
700MODULE_LICENSE("GPL"); 699MODULE_LICENSE("GPL");
701 700
702#ifdef CONFIG_I2C_PNX_EARLY
703/* We need to make sure I2C is initialized before USB */ 701/* We need to make sure I2C is initialized before USB */
704subsys_initcall(i2c_adap_pnx_init); 702subsys_initcall(i2c_adap_pnx_init);
705#else
706mudule_init(i2c_adap_pnx_init);
707#endif
708module_exit(i2c_adap_pnx_exit); 703module_exit(i2c_adap_pnx_exit);
diff --git a/drivers/i2c/chips/m41t00.c b/drivers/i2c/chips/m41t00.c
index 420377c86422..3fcb646e2073 100644
--- a/drivers/i2c/chips/m41t00.c
+++ b/drivers/i2c/chips/m41t00.c
@@ -209,6 +209,7 @@ m41t00_set(void *arg)
209 buf[m41t00_chip->hour] = (buf[m41t00_chip->hour] & ~0x3f) | (hour& 0x3f); 209 buf[m41t00_chip->hour] = (buf[m41t00_chip->hour] & ~0x3f) | (hour& 0x3f);
210 buf[m41t00_chip->day] = (buf[m41t00_chip->day] & ~0x3f) | (day & 0x3f); 210 buf[m41t00_chip->day] = (buf[m41t00_chip->day] & ~0x3f) | (day & 0x3f);
211 buf[m41t00_chip->mon] = (buf[m41t00_chip->mon] & ~0x1f) | (mon & 0x1f); 211 buf[m41t00_chip->mon] = (buf[m41t00_chip->mon] & ~0x1f) | (mon & 0x1f);
212 buf[m41t00_chip->year] = year;
212 213
213 if (i2c_master_send(save_client, wbuf, 9) < 0) 214 if (i2c_master_send(save_client, wbuf, 9) < 0)
214 dev_err(&save_client->dev, "m41t00_set: Write error\n"); 215 dev_err(&save_client->dev, "m41t00_set: Write error\n");
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 3e31f1d265c9..b05378a3d673 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -95,16 +95,32 @@ struct device_driver i2c_adapter_driver = {
95 .bus = &i2c_bus_type, 95 .bus = &i2c_bus_type,
96}; 96};
97 97
98/* ------------------------------------------------------------------------- */
99
100/* I2C bus adapters -- one roots each I2C or SMBUS segment */
101
98static void i2c_adapter_class_dev_release(struct class_device *dev) 102static void i2c_adapter_class_dev_release(struct class_device *dev)
99{ 103{
100 struct i2c_adapter *adap = class_dev_to_i2c_adapter(dev); 104 struct i2c_adapter *adap = class_dev_to_i2c_adapter(dev);
101 complete(&adap->class_dev_released); 105 complete(&adap->class_dev_released);
102} 106}
103 107
108static ssize_t i2c_adapter_show_name(struct class_device *cdev, char *buf)
109{
110 struct i2c_adapter *adap = class_dev_to_i2c_adapter(cdev);
111 return sprintf(buf, "%s\n", adap->name);
112}
113
114static struct class_device_attribute i2c_adapter_attrs[] = {
115 __ATTR(name, S_IRUGO, i2c_adapter_show_name, NULL),
116 { },
117};
118
104struct class i2c_adapter_class = { 119struct class i2c_adapter_class = {
105 .owner = THIS_MODULE, 120 .owner = THIS_MODULE,
106 .name = "i2c-adapter", 121 .name = "i2c-adapter",
107 .release = &i2c_adapter_class_dev_release, 122 .class_dev_attrs = i2c_adapter_attrs,
123 .release = &i2c_adapter_class_dev_release,
108}; 124};
109 125
110static ssize_t show_adapter_name(struct device *dev, struct device_attribute *attr, char *buf) 126static ssize_t show_adapter_name(struct device *dev, struct device_attribute *attr, char *buf)
@@ -175,8 +191,12 @@ int i2c_add_adapter(struct i2c_adapter *adap)
175 * If the parent pointer is not set up, 191 * If the parent pointer is not set up,
176 * we add this adapter to the host bus. 192 * we add this adapter to the host bus.
177 */ 193 */
178 if (adap->dev.parent == NULL) 194 if (adap->dev.parent == NULL) {
179 adap->dev.parent = &platform_bus; 195 adap->dev.parent = &platform_bus;
196 printk(KERN_WARNING "**WARNING** I2C adapter driver [%s] "
197 "forgot to specify physical device; fix it!\n",
198 adap->name);
199 }
180 sprintf(adap->dev.bus_id, "i2c-%d", adap->nr); 200 sprintf(adap->dev.bus_id, "i2c-%d", adap->nr);
181 adap->dev.driver = &i2c_adapter_driver; 201 adap->dev.driver = &i2c_adapter_driver;
182 adap->dev.release = &i2c_adapter_dev_release; 202 adap->dev.release = &i2c_adapter_dev_release;
diff --git a/drivers/ide/pci/atiixp.c b/drivers/ide/pci/atiixp.c
index ffdffb6379ef..524e65de4398 100644
--- a/drivers/ide/pci/atiixp.c
+++ b/drivers/ide/pci/atiixp.c
@@ -46,6 +46,8 @@ static atiixp_ide_timing mdma_timing[] = {
46 46
47static int save_mdma_mode[4]; 47static int save_mdma_mode[4];
48 48
49static DEFINE_SPINLOCK(atiixp_lock);
50
49/** 51/**
50 * atiixp_ratemask - compute rate mask for ATIIXP IDE 52 * atiixp_ratemask - compute rate mask for ATIIXP IDE
51 * @drive: IDE drive to compute for 53 * @drive: IDE drive to compute for
@@ -105,7 +107,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive)
105 unsigned long flags; 107 unsigned long flags;
106 u16 tmp16; 108 u16 tmp16;
107 109
108 spin_lock_irqsave(&ide_lock, flags); 110 spin_lock_irqsave(&atiixp_lock, flags);
109 111
110 pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16); 112 pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16);
111 if (save_mdma_mode[drive->dn]) 113 if (save_mdma_mode[drive->dn])
@@ -114,7 +116,7 @@ static int atiixp_ide_dma_host_on(ide_drive_t *drive)
114 tmp16 |= (1 << drive->dn); 116 tmp16 |= (1 << drive->dn);
115 pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16); 117 pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16);
116 118
117 spin_unlock_irqrestore(&ide_lock, flags); 119 spin_unlock_irqrestore(&atiixp_lock, flags);
118 120
119 return __ide_dma_host_on(drive); 121 return __ide_dma_host_on(drive);
120} 122}
@@ -125,13 +127,13 @@ static int atiixp_ide_dma_host_off(ide_drive_t *drive)
125 unsigned long flags; 127 unsigned long flags;
126 u16 tmp16; 128 u16 tmp16;
127 129
128 spin_lock_irqsave(&ide_lock, flags); 130 spin_lock_irqsave(&atiixp_lock, flags);
129 131
130 pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16); 132 pci_read_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, &tmp16);
131 tmp16 &= ~(1 << drive->dn); 133 tmp16 &= ~(1 << drive->dn);
132 pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16); 134 pci_write_config_word(dev, ATIIXP_IDE_UDMA_CONTROL, tmp16);
133 135
134 spin_unlock_irqrestore(&ide_lock, flags); 136 spin_unlock_irqrestore(&atiixp_lock, flags);
135 137
136 return __ide_dma_host_off(drive); 138 return __ide_dma_host_off(drive);
137} 139}
@@ -152,7 +154,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio)
152 u32 pio_timing_data; 154 u32 pio_timing_data;
153 u16 pio_mode_data; 155 u16 pio_mode_data;
154 156
155 spin_lock_irqsave(&ide_lock, flags); 157 spin_lock_irqsave(&atiixp_lock, flags);
156 158
157 pci_read_config_word(dev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); 159 pci_read_config_word(dev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
158 pio_mode_data &= ~(0x07 << (drive->dn * 4)); 160 pio_mode_data &= ~(0x07 << (drive->dn * 4));
@@ -165,7 +167,7 @@ static void atiixp_tuneproc(ide_drive_t *drive, u8 pio)
165 (pio_timing[pio].command_width << (timing_shift + 4)); 167 (pio_timing[pio].command_width << (timing_shift + 4));
166 pci_write_config_dword(dev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); 168 pci_write_config_dword(dev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
167 169
168 spin_unlock_irqrestore(&ide_lock, flags); 170 spin_unlock_irqrestore(&atiixp_lock, flags);
169} 171}
170 172
171/** 173/**
@@ -189,7 +191,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed)
189 191
190 speed = ide_rate_filter(atiixp_ratemask(drive), xferspeed); 192 speed = ide_rate_filter(atiixp_ratemask(drive), xferspeed);
191 193
192 spin_lock_irqsave(&ide_lock, flags); 194 spin_lock_irqsave(&atiixp_lock, flags);
193 195
194 save_mdma_mode[drive->dn] = 0; 196 save_mdma_mode[drive->dn] = 0;
195 if (speed >= XFER_UDMA_0) { 197 if (speed >= XFER_UDMA_0) {
@@ -208,7 +210,7 @@ static int atiixp_speedproc(ide_drive_t *drive, u8 xferspeed)
208 } 210 }
209 } 211 }
210 212
211 spin_unlock_irqrestore(&ide_lock, flags); 213 spin_unlock_irqrestore(&atiixp_lock, flags);
212 214
213 if (speed >= XFER_SW_DMA_0) 215 if (speed >= XFER_SW_DMA_0)
214 pio = atiixp_dma_2_pio(speed); 216 pio = atiixp_dma_2_pio(speed);
diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 61f1a9665a7f..381cc6f101ce 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -123,7 +123,7 @@ struct via82cxxx_dev
123static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing) 123static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
124{ 124{
125 struct pci_dev *dev = hwif->pci_dev; 125 struct pci_dev *dev = hwif->pci_dev;
126 struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif); 126 struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
127 u8 t; 127 u8 t;
128 128
129 if (~vdev->via_config->flags & VIA_BAD_AST) { 129 if (~vdev->via_config->flags & VIA_BAD_AST) {
@@ -162,7 +162,7 @@ static void via_set_speed(ide_hwif_t *hwif, u8 dn, struct ide_timing *timing)
162static int via_set_drive(ide_drive_t *drive, u8 speed) 162static int via_set_drive(ide_drive_t *drive, u8 speed)
163{ 163{
164 ide_drive_t *peer = HWIF(drive)->drives + (~drive->dn & 1); 164 ide_drive_t *peer = HWIF(drive)->drives + (~drive->dn & 1);
165 struct via82cxxx_dev *vdev = ide_get_hwifdata(drive->hwif); 165 struct via82cxxx_dev *vdev = pci_get_drvdata(drive->hwif->pci_dev);
166 struct ide_timing t, p; 166 struct ide_timing t, p;
167 unsigned int T, UT; 167 unsigned int T, UT;
168 168
@@ -225,7 +225,7 @@ static void via82cxxx_tune_drive(ide_drive_t *drive, u8 pio)
225static int via82cxxx_ide_dma_check (ide_drive_t *drive) 225static int via82cxxx_ide_dma_check (ide_drive_t *drive)
226{ 226{
227 ide_hwif_t *hwif = HWIF(drive); 227 ide_hwif_t *hwif = HWIF(drive);
228 struct via82cxxx_dev *vdev = ide_get_hwifdata(hwif); 228 struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
229 u16 w80 = hwif->udma_four; 229 u16 w80 = hwif->udma_four;
230 230
231 u16 speed = ide_find_best_mode(drive, 231 u16 speed = ide_find_best_mode(drive,
@@ -262,6 +262,53 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
262 return via_config; 262 return via_config;
263} 263}
264 264
265/*
266 * Check and handle 80-wire cable presence
267 */
268static void __devinit via_cable_detect(struct via82cxxx_dev *vdev, u32 u)
269{
270 int i;
271
272 switch (vdev->via_config->flags & VIA_UDMA) {
273 case VIA_UDMA_66:
274 for (i = 24; i >= 0; i -= 8)
275 if (((u >> (i & 16)) & 8) &&
276 ((u >> i) & 0x20) &&
277 (((u >> i) & 7) < 2)) {
278 /*
279 * 2x PCI clock and
280 * UDMA w/ < 3T/cycle
281 */
282 vdev->via_80w |= (1 << (1 - (i >> 4)));
283 }
284 break;
285
286 case VIA_UDMA_100:
287 for (i = 24; i >= 0; i -= 8)
288 if (((u >> i) & 0x10) ||
289 (((u >> i) & 0x20) &&
290 (((u >> i) & 7) < 4))) {
291 /* BIOS 80-wire bit or
292 * UDMA w/ < 60ns/cycle
293 */
294 vdev->via_80w |= (1 << (1 - (i >> 4)));
295 }
296 break;
297
298 case VIA_UDMA_133:
299 for (i = 24; i >= 0; i -= 8)
300 if (((u >> i) & 0x10) ||
301 (((u >> i) & 0x20) &&
302 (((u >> i) & 7) < 6))) {
303 /* BIOS 80-wire bit or
304 * UDMA w/ < 60ns/cycle
305 */
306 vdev->via_80w |= (1 << (1 - (i >> 4)));
307 }
308 break;
309 }
310}
311
265/** 312/**
266 * init_chipset_via82cxxx - initialization handler 313 * init_chipset_via82cxxx - initialization handler
267 * @dev: PCI device 314 * @dev: PCI device
@@ -274,14 +321,22 @@ static struct via_isa_bridge *via_config_find(struct pci_dev **isa)
274static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name) 321static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const char *name)
275{ 322{
276 struct pci_dev *isa = NULL; 323 struct pci_dev *isa = NULL;
324 struct via82cxxx_dev *vdev;
277 struct via_isa_bridge *via_config; 325 struct via_isa_bridge *via_config;
278 u8 t, v; 326 u8 t, v;
279 unsigned int u; 327 u32 u;
328
329 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
330 if (!vdev) {
331 printk(KERN_ERR "VP_IDE: out of memory :(\n");
332 return -ENOMEM;
333 }
334 pci_set_drvdata(dev, vdev);
280 335
281 /* 336 /*
282 * Find the ISA bridge to see how good the IDE is. 337 * Find the ISA bridge to see how good the IDE is.
283 */ 338 */
284 via_config = via_config_find(&isa); 339 vdev->via_config = via_config = via_config_find(&isa);
285 340
286 /* We checked this earlier so if it fails here deeep badness 341 /* We checked this earlier so if it fails here deeep badness
287 is involved */ 342 is involved */
@@ -289,16 +344,17 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
289 BUG_ON(!via_config->id); 344 BUG_ON(!via_config->id);
290 345
291 /* 346 /*
292 * Setup or disable Clk66 if appropriate 347 * Detect cable and configure Clk66
293 */ 348 */
349 pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
350
351 via_cable_detect(vdev, u);
294 352
295 if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) { 353 if ((via_config->flags & VIA_UDMA) == VIA_UDMA_66) {
296 /* Enable Clk66 */ 354 /* Enable Clk66 */
297 pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
298 pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008); 355 pci_write_config_dword(dev, VIA_UDMA_TIMING, u|0x80008);
299 } else if (via_config->flags & VIA_BAD_CLK66) { 356 } else if (via_config->flags & VIA_BAD_CLK66) {
300 /* Would cause trouble on 596a and 686 */ 357 /* Would cause trouble on 596a and 686 */
301 pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
302 pci_write_config_dword(dev, VIA_UDMA_TIMING, u & ~0x80008); 358 pci_write_config_dword(dev, VIA_UDMA_TIMING, u & ~0x80008);
303 } 359 }
304 360
@@ -367,75 +423,11 @@ static unsigned int __devinit init_chipset_via82cxxx(struct pci_dev *dev, const
367 return 0; 423 return 0;
368} 424}
369 425
370/*
371 * Check and handle 80-wire cable presence
372 */
373static void __devinit via_cable_detect(struct pci_dev *dev, struct via82cxxx_dev *vdev)
374{
375 unsigned int u;
376 int i;
377 pci_read_config_dword(dev, VIA_UDMA_TIMING, &u);
378
379 switch (vdev->via_config->flags & VIA_UDMA) {
380
381 case VIA_UDMA_66:
382 for (i = 24; i >= 0; i -= 8)
383 if (((u >> (i & 16)) & 8) &&
384 ((u >> i) & 0x20) &&
385 (((u >> i) & 7) < 2)) {
386 /*
387 * 2x PCI clock and
388 * UDMA w/ < 3T/cycle
389 */
390 vdev->via_80w |= (1 << (1 - (i >> 4)));
391 }
392 break;
393
394 case VIA_UDMA_100:
395 for (i = 24; i >= 0; i -= 8)
396 if (((u >> i) & 0x10) ||
397 (((u >> i) & 0x20) &&
398 (((u >> i) & 7) < 4))) {
399 /* BIOS 80-wire bit or
400 * UDMA w/ < 60ns/cycle
401 */
402 vdev->via_80w |= (1 << (1 - (i >> 4)));
403 }
404 break;
405
406 case VIA_UDMA_133:
407 for (i = 24; i >= 0; i -= 8)
408 if (((u >> i) & 0x10) ||
409 (((u >> i) & 0x20) &&
410 (((u >> i) & 7) < 6))) {
411 /* BIOS 80-wire bit or
412 * UDMA w/ < 60ns/cycle
413 */
414 vdev->via_80w |= (1 << (1 - (i >> 4)));
415 }
416 break;
417
418 }
419}
420
421static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif) 426static void __devinit init_hwif_via82cxxx(ide_hwif_t *hwif)
422{ 427{
423 struct via82cxxx_dev *vdev = kmalloc(sizeof(struct via82cxxx_dev), 428 struct via82cxxx_dev *vdev = pci_get_drvdata(hwif->pci_dev);
424 GFP_KERNEL);
425 struct pci_dev *isa = NULL;
426 int i; 429 int i;
427 430
428 if (vdev == NULL) {
429 printk(KERN_ERR "VP_IDE: out of memory :(\n");
430 return;
431 }
432
433 memset(vdev, 0, sizeof(struct via82cxxx_dev));
434 ide_set_hwifdata(hwif, vdev);
435
436 vdev->via_config = via_config_find(&isa);
437 via_cable_detect(hwif->pci_dev, vdev);
438
439 hwif->autodma = 0; 431 hwif->autodma = 0;
440 432
441 hwif->tuneproc = &via82cxxx_tune_drive; 433 hwif->tuneproc = &via82cxxx_tune_drive;
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 100df6f38d92..91e0c75aca8f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -52,6 +52,8 @@
52#define KVM_MAX_VCPUS 1 52#define KVM_MAX_VCPUS 1
53#define KVM_MEMORY_SLOTS 4 53#define KVM_MEMORY_SLOTS 4
54#define KVM_NUM_MMU_PAGES 256 54#define KVM_NUM_MMU_PAGES 256
55#define KVM_MIN_FREE_MMU_PAGES 5
56#define KVM_REFILL_PAGES 25
55 57
56#define FX_IMAGE_SIZE 512 58#define FX_IMAGE_SIZE 512
57#define FX_IMAGE_ALIGN 16 59#define FX_IMAGE_ALIGN 16
@@ -89,14 +91,54 @@ typedef unsigned long hva_t;
89typedef u64 hpa_t; 91typedef u64 hpa_t;
90typedef unsigned long hfn_t; 92typedef unsigned long hfn_t;
91 93
94#define NR_PTE_CHAIN_ENTRIES 5
95
96struct kvm_pte_chain {
97 u64 *parent_ptes[NR_PTE_CHAIN_ENTRIES];
98 struct hlist_node link;
99};
100
101/*
102 * kvm_mmu_page_role, below, is defined as:
103 *
104 * bits 0:3 - total guest paging levels (2-4, or zero for real mode)
105 * bits 4:7 - page table level for this shadow (1-4)
106 * bits 8:9 - page table quadrant for 2-level guests
107 * bit 16 - "metaphysical" - gfn is not a real page (huge page/real mode)
108 */
109union kvm_mmu_page_role {
110 unsigned word;
111 struct {
112 unsigned glevels : 4;
113 unsigned level : 4;
114 unsigned quadrant : 2;
115 unsigned pad_for_nice_hex_output : 6;
116 unsigned metaphysical : 1;
117 };
118};
119
92struct kvm_mmu_page { 120struct kvm_mmu_page {
93 struct list_head link; 121 struct list_head link;
122 struct hlist_node hash_link;
123
124 /*
125 * The following two entries are used to key the shadow page in the
126 * hash table.
127 */
128 gfn_t gfn;
129 union kvm_mmu_page_role role;
130
94 hpa_t page_hpa; 131 hpa_t page_hpa;
95 unsigned long slot_bitmap; /* One bit set per slot which has memory 132 unsigned long slot_bitmap; /* One bit set per slot which has memory
96 * in this shadow page. 133 * in this shadow page.
97 */ 134 */
98 int global; /* Set if all ptes in this page are global */ 135 int global; /* Set if all ptes in this page are global */
99 u64 *parent_pte; 136 int multimapped; /* More than one parent_pte? */
137 int root_count; /* Currently serving as active root */
138 union {
139 u64 *parent_pte; /* !multimapped */
140 struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */
141 };
100}; 142};
101 143
102struct vmcs { 144struct vmcs {
@@ -117,14 +159,26 @@ struct kvm_vcpu;
117struct kvm_mmu { 159struct kvm_mmu {
118 void (*new_cr3)(struct kvm_vcpu *vcpu); 160 void (*new_cr3)(struct kvm_vcpu *vcpu);
119 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); 161 int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
120 void (*inval_page)(struct kvm_vcpu *vcpu, gva_t gva);
121 void (*free)(struct kvm_vcpu *vcpu); 162 void (*free)(struct kvm_vcpu *vcpu);
122 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva); 163 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
123 hpa_t root_hpa; 164 hpa_t root_hpa;
124 int root_level; 165 int root_level;
125 int shadow_root_level; 166 int shadow_root_level;
167
168 u64 *pae_root;
169};
170
171#define KVM_NR_MEM_OBJS 20
172
173struct kvm_mmu_memory_cache {
174 int nobjs;
175 void *objects[KVM_NR_MEM_OBJS];
126}; 176};
127 177
178/*
179 * We don't want allocation failures within the mmu code, so we preallocate
180 * enough memory for a single page fault in a cache.
181 */
128struct kvm_guest_debug { 182struct kvm_guest_debug {
129 int enabled; 183 int enabled;
130 unsigned long bp[4]; 184 unsigned long bp[4];
@@ -173,6 +227,7 @@ struct kvm_vcpu {
173 struct mutex mutex; 227 struct mutex mutex;
174 int cpu; 228 int cpu;
175 int launched; 229 int launched;
230 int interrupt_window_open;
176 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ 231 unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
177#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long) 232#define NR_IRQ_WORDS KVM_IRQ_BITMAP_SIZE(unsigned long)
178 unsigned long irq_pending[NR_IRQ_WORDS]; 233 unsigned long irq_pending[NR_IRQ_WORDS];
@@ -184,6 +239,7 @@ struct kvm_vcpu {
184 unsigned long cr3; 239 unsigned long cr3;
185 unsigned long cr4; 240 unsigned long cr4;
186 unsigned long cr8; 241 unsigned long cr8;
242 u64 pdptrs[4]; /* pae */
187 u64 shadow_efer; 243 u64 shadow_efer;
188 u64 apic_base; 244 u64 apic_base;
189 int nmsrs; 245 int nmsrs;
@@ -194,6 +250,12 @@ struct kvm_vcpu {
194 struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES]; 250 struct kvm_mmu_page page_header_buf[KVM_NUM_MMU_PAGES];
195 struct kvm_mmu mmu; 251 struct kvm_mmu mmu;
196 252
253 struct kvm_mmu_memory_cache mmu_pte_chain_cache;
254 struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
255
256 gfn_t last_pt_write_gfn;
257 int last_pt_write_count;
258
197 struct kvm_guest_debug guest_debug; 259 struct kvm_guest_debug guest_debug;
198 260
199 char fx_buf[FX_BUF_SIZE]; 261 char fx_buf[FX_BUF_SIZE];
@@ -231,10 +293,16 @@ struct kvm {
231 spinlock_t lock; /* protects everything except vcpus */ 293 spinlock_t lock; /* protects everything except vcpus */
232 int nmemslots; 294 int nmemslots;
233 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS]; 295 struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
296 /*
297 * Hash table of struct kvm_mmu_page.
298 */
234 struct list_head active_mmu_pages; 299 struct list_head active_mmu_pages;
300 int n_free_mmu_pages;
301 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
235 struct kvm_vcpu vcpus[KVM_MAX_VCPUS]; 302 struct kvm_vcpu vcpus[KVM_MAX_VCPUS];
236 int memory_config_version; 303 int memory_config_version;
237 int busy; 304 int busy;
305 unsigned long rmap_overflow;
238}; 306};
239 307
240struct kvm_stat { 308struct kvm_stat {
@@ -247,6 +315,9 @@ struct kvm_stat {
247 u32 io_exits; 315 u32 io_exits;
248 u32 mmio_exits; 316 u32 mmio_exits;
249 u32 signal_exits; 317 u32 signal_exits;
318 u32 irq_window_exits;
319 u32 halt_exits;
320 u32 request_irq_exits;
250 u32 irq_exits; 321 u32 irq_exits;
251}; 322};
252 323
@@ -279,6 +350,7 @@ struct kvm_arch_ops {
279 void (*set_segment)(struct kvm_vcpu *vcpu, 350 void (*set_segment)(struct kvm_vcpu *vcpu,
280 struct kvm_segment *var, int seg); 351 struct kvm_segment *var, int seg);
281 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); 352 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
353 void (*decache_cr0_cr4_guest_bits)(struct kvm_vcpu *vcpu);
282 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); 354 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
283 void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu, 355 void (*set_cr0_no_modeswitch)(struct kvm_vcpu *vcpu,
284 unsigned long cr0); 356 unsigned long cr0);
@@ -323,7 +395,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu);
323int kvm_mmu_setup(struct kvm_vcpu *vcpu); 395int kvm_mmu_setup(struct kvm_vcpu *vcpu);
324 396
325int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); 397int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
326void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); 398void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot);
327 399
328hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa); 400hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
329#define HPA_MSB ((sizeof(hpa_t) * 8) - 1) 401#define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
@@ -396,6 +468,19 @@ int kvm_write_guest(struct kvm_vcpu *vcpu,
396 468
397unsigned long segment_base(u16 selector); 469unsigned long segment_base(u16 selector);
398 470
471void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
472void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes);
473int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
474void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
475
476static inline int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
477 u32 error_code)
478{
479 if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
480 kvm_mmu_free_some_pages(vcpu);
481 return vcpu->mmu.page_fault(vcpu, gva, error_code);
482}
483
399static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn) 484static inline struct page *_gfn_to_page(struct kvm *kvm, gfn_t gfn)
400{ 485{
401 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); 486 struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
@@ -541,19 +626,4 @@ static inline u32 get_rdx_init_val(void)
541#define TSS_REDIRECTION_SIZE (256 / 8) 626#define TSS_REDIRECTION_SIZE (256 / 8)
542#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) 627#define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
543 628
544#ifdef CONFIG_X86_64
545
546/*
547 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64. Therefore
548 * we need to allocate shadow page tables in the first 4GB of memory, which
549 * happens to fit the DMA32 zone.
550 */
551#define GFP_KVM_MMU (GFP_KERNEL | __GFP_DMA32)
552
553#else
554
555#define GFP_KVM_MMU GFP_KERNEL
556
557#endif
558
559#endif 629#endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ce7fe640f18d..67c1154960f0 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -58,6 +58,9 @@ static struct kvm_stats_debugfs_item {
58 { "io_exits", &kvm_stat.io_exits }, 58 { "io_exits", &kvm_stat.io_exits },
59 { "mmio_exits", &kvm_stat.mmio_exits }, 59 { "mmio_exits", &kvm_stat.mmio_exits },
60 { "signal_exits", &kvm_stat.signal_exits }, 60 { "signal_exits", &kvm_stat.signal_exits },
61 { "irq_window", &kvm_stat.irq_window_exits },
62 { "halt_exits", &kvm_stat.halt_exits },
63 { "request_irq", &kvm_stat.request_irq_exits },
61 { "irq_exits", &kvm_stat.irq_exits }, 64 { "irq_exits", &kvm_stat.irq_exits },
62 { 0, 0 } 65 { 0, 0 }
63}; 66};
@@ -227,6 +230,7 @@ static int kvm_dev_open(struct inode *inode, struct file *filp)
227 struct kvm_vcpu *vcpu = &kvm->vcpus[i]; 230 struct kvm_vcpu *vcpu = &kvm->vcpus[i];
228 231
229 mutex_init(&vcpu->mutex); 232 mutex_init(&vcpu->mutex);
233 vcpu->kvm = kvm;
230 vcpu->mmu.root_hpa = INVALID_PAGE; 234 vcpu->mmu.root_hpa = INVALID_PAGE;
231 INIT_LIST_HEAD(&vcpu->free_pages); 235 INIT_LIST_HEAD(&vcpu->free_pages);
232 } 236 }
@@ -268,8 +272,8 @@ static void kvm_free_physmem(struct kvm *kvm)
268 272
269static void kvm_free_vcpu(struct kvm_vcpu *vcpu) 273static void kvm_free_vcpu(struct kvm_vcpu *vcpu)
270{ 274{
271 kvm_arch_ops->vcpu_free(vcpu);
272 kvm_mmu_destroy(vcpu); 275 kvm_mmu_destroy(vcpu);
276 kvm_arch_ops->vcpu_free(vcpu);
273} 277}
274 278
275static void kvm_free_vcpus(struct kvm *kvm) 279static void kvm_free_vcpus(struct kvm *kvm)
@@ -295,14 +299,17 @@ static void inject_gp(struct kvm_vcpu *vcpu)
295 kvm_arch_ops->inject_gp(vcpu, 0); 299 kvm_arch_ops->inject_gp(vcpu, 0);
296} 300}
297 301
298static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu, 302/*
299 unsigned long cr3) 303 * Load the pae pdptrs. Return true is they are all valid.
304 */
305static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
300{ 306{
301 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT; 307 gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
302 unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5; 308 unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
303 int i; 309 int i;
304 u64 pdpte; 310 u64 pdpte;
305 u64 *pdpt; 311 u64 *pdpt;
312 int ret;
306 struct kvm_memory_slot *memslot; 313 struct kvm_memory_slot *memslot;
307 314
308 spin_lock(&vcpu->kvm->lock); 315 spin_lock(&vcpu->kvm->lock);
@@ -310,16 +317,23 @@ static int pdptrs_have_reserved_bits_set(struct kvm_vcpu *vcpu,
310 /* FIXME: !memslot - emulate? 0xff? */ 317 /* FIXME: !memslot - emulate? 0xff? */
311 pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0); 318 pdpt = kmap_atomic(gfn_to_page(memslot, pdpt_gfn), KM_USER0);
312 319
320 ret = 1;
313 for (i = 0; i < 4; ++i) { 321 for (i = 0; i < 4; ++i) {
314 pdpte = pdpt[offset + i]; 322 pdpte = pdpt[offset + i];
315 if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) 323 if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) {
316 break; 324 ret = 0;
325 goto out;
326 }
317 } 327 }
318 328
329 for (i = 0; i < 4; ++i)
330 vcpu->pdptrs[i] = pdpt[offset + i];
331
332out:
319 kunmap_atomic(pdpt, KM_USER0); 333 kunmap_atomic(pdpt, KM_USER0);
320 spin_unlock(&vcpu->kvm->lock); 334 spin_unlock(&vcpu->kvm->lock);
321 335
322 return i != 4; 336 return ret;
323} 337}
324 338
325void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 339void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
@@ -365,8 +379,7 @@ void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
365 } 379 }
366 } else 380 } else
367#endif 381#endif
368 if (is_pae(vcpu) && 382 if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->cr3)) {
369 pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) {
370 printk(KERN_DEBUG "set_cr0: #GP, pdptrs " 383 printk(KERN_DEBUG "set_cr0: #GP, pdptrs "
371 "reserved bits\n"); 384 "reserved bits\n");
372 inject_gp(vcpu); 385 inject_gp(vcpu);
@@ -387,6 +400,7 @@ EXPORT_SYMBOL_GPL(set_cr0);
387 400
388void lmsw(struct kvm_vcpu *vcpu, unsigned long msw) 401void lmsw(struct kvm_vcpu *vcpu, unsigned long msw)
389{ 402{
403 kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
390 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f)); 404 set_cr0(vcpu, (vcpu->cr0 & ~0x0ful) | (msw & 0x0f));
391} 405}
392EXPORT_SYMBOL_GPL(lmsw); 406EXPORT_SYMBOL_GPL(lmsw);
@@ -407,7 +421,7 @@ void set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
407 return; 421 return;
408 } 422 }
409 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK) 423 } else if (is_paging(vcpu) && !is_pae(vcpu) && (cr4 & CR4_PAE_MASK)
410 && pdptrs_have_reserved_bits_set(vcpu, vcpu->cr3)) { 424 && !load_pdptrs(vcpu, vcpu->cr3)) {
411 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n"); 425 printk(KERN_DEBUG "set_cr4: #GP, pdptrs reserved bits\n");
412 inject_gp(vcpu); 426 inject_gp(vcpu);
413 } 427 }
@@ -439,7 +453,7 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
439 return; 453 return;
440 } 454 }
441 if (is_paging(vcpu) && is_pae(vcpu) && 455 if (is_paging(vcpu) && is_pae(vcpu) &&
442 pdptrs_have_reserved_bits_set(vcpu, cr3)) { 456 !load_pdptrs(vcpu, cr3)) {
443 printk(KERN_DEBUG "set_cr3: #GP, pdptrs " 457 printk(KERN_DEBUG "set_cr3: #GP, pdptrs "
444 "reserved bits\n"); 458 "reserved bits\n");
445 inject_gp(vcpu); 459 inject_gp(vcpu);
@@ -449,7 +463,19 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
449 463
450 vcpu->cr3 = cr3; 464 vcpu->cr3 = cr3;
451 spin_lock(&vcpu->kvm->lock); 465 spin_lock(&vcpu->kvm->lock);
452 vcpu->mmu.new_cr3(vcpu); 466 /*
467 * Does the new cr3 value map to physical memory? (Note, we
468 * catch an invalid cr3 even in real-mode, because it would
469 * cause trouble later on when we turn on paging anyway.)
470 *
471 * A real CPU would silently accept an invalid cr3 and would
472 * attempt to use it - with largely undefined (and often hard
473 * to debug) behavior on the guest side.
474 */
475 if (unlikely(!gfn_to_memslot(vcpu->kvm, cr3 >> PAGE_SHIFT)))
476 inject_gp(vcpu);
477 else
478 vcpu->mmu.new_cr3(vcpu);
453 spin_unlock(&vcpu->kvm->lock); 479 spin_unlock(&vcpu->kvm->lock);
454} 480}
455EXPORT_SYMBOL_GPL(set_cr3); 481EXPORT_SYMBOL_GPL(set_cr3);
@@ -517,7 +543,6 @@ static int kvm_dev_ioctl_create_vcpu(struct kvm *kvm, int n)
517 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE; 543 vcpu->guest_fx_image = vcpu->host_fx_image + FX_IMAGE_SIZE;
518 544
519 vcpu->cpu = -1; /* First load will set up TR */ 545 vcpu->cpu = -1; /* First load will set up TR */
520 vcpu->kvm = kvm;
521 r = kvm_arch_ops->vcpu_create(vcpu); 546 r = kvm_arch_ops->vcpu_create(vcpu);
522 if (r < 0) 547 if (r < 0)
523 goto out_free_vcpus; 548 goto out_free_vcpus;
@@ -634,6 +659,7 @@ raced:
634 | __GFP_ZERO); 659 | __GFP_ZERO);
635 if (!new.phys_mem[i]) 660 if (!new.phys_mem[i])
636 goto out_free; 661 goto out_free;
662 new.phys_mem[i]->private = 0;
637 } 663 }
638 } 664 }
639 665
@@ -688,6 +714,13 @@ out:
688 return r; 714 return r;
689} 715}
690 716
717static void do_remove_write_access(struct kvm_vcpu *vcpu, int slot)
718{
719 spin_lock(&vcpu->kvm->lock);
720 kvm_mmu_slot_remove_write_access(vcpu, slot);
721 spin_unlock(&vcpu->kvm->lock);
722}
723
691/* 724/*
692 * Get (and clear) the dirty memory log for a memory slot. 725 * Get (and clear) the dirty memory log for a memory slot.
693 */ 726 */
@@ -697,6 +730,7 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
697 struct kvm_memory_slot *memslot; 730 struct kvm_memory_slot *memslot;
698 int r, i; 731 int r, i;
699 int n; 732 int n;
733 int cleared;
700 unsigned long any = 0; 734 unsigned long any = 0;
701 735
702 spin_lock(&kvm->lock); 736 spin_lock(&kvm->lock);
@@ -727,15 +761,17 @@ static int kvm_dev_ioctl_get_dirty_log(struct kvm *kvm,
727 761
728 762
729 if (any) { 763 if (any) {
730 spin_lock(&kvm->lock); 764 cleared = 0;
731 kvm_mmu_slot_remove_write_access(kvm, log->slot);
732 spin_unlock(&kvm->lock);
733 memset(memslot->dirty_bitmap, 0, n);
734 for (i = 0; i < KVM_MAX_VCPUS; ++i) { 765 for (i = 0; i < KVM_MAX_VCPUS; ++i) {
735 struct kvm_vcpu *vcpu = vcpu_load(kvm, i); 766 struct kvm_vcpu *vcpu = vcpu_load(kvm, i);
736 767
737 if (!vcpu) 768 if (!vcpu)
738 continue; 769 continue;
770 if (!cleared) {
771 do_remove_write_access(vcpu, log->slot);
772 memset(memslot->dirty_bitmap, 0, n);
773 cleared = 1;
774 }
739 kvm_arch_ops->tlb_flush(vcpu); 775 kvm_arch_ops->tlb_flush(vcpu);
740 vcpu_put(vcpu); 776 vcpu_put(vcpu);
741 } 777 }
@@ -863,6 +899,27 @@ static int emulator_read_emulated(unsigned long addr,
863 } 899 }
864} 900}
865 901
902static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
903 unsigned long val, int bytes)
904{
905 struct kvm_memory_slot *m;
906 struct page *page;
907 void *virt;
908
909 if (((gpa + bytes - 1) >> PAGE_SHIFT) != (gpa >> PAGE_SHIFT))
910 return 0;
911 m = gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT);
912 if (!m)
913 return 0;
914 page = gfn_to_page(m, gpa >> PAGE_SHIFT);
915 kvm_mmu_pre_write(vcpu, gpa, bytes);
916 virt = kmap_atomic(page, KM_USER0);
917 memcpy(virt + offset_in_page(gpa), &val, bytes);
918 kunmap_atomic(virt, KM_USER0);
919 kvm_mmu_post_write(vcpu, gpa, bytes);
920 return 1;
921}
922
866static int emulator_write_emulated(unsigned long addr, 923static int emulator_write_emulated(unsigned long addr,
867 unsigned long val, 924 unsigned long val,
868 unsigned int bytes, 925 unsigned int bytes,
@@ -874,6 +931,9 @@ static int emulator_write_emulated(unsigned long addr,
874 if (gpa == UNMAPPED_GVA) 931 if (gpa == UNMAPPED_GVA)
875 return X86EMUL_PROPAGATE_FAULT; 932 return X86EMUL_PROPAGATE_FAULT;
876 933
934 if (emulator_write_phys(vcpu, gpa, val, bytes))
935 return X86EMUL_CONTINUE;
936
877 vcpu->mmio_needed = 1; 937 vcpu->mmio_needed = 1;
878 vcpu->mmio_phys_addr = gpa; 938 vcpu->mmio_phys_addr = gpa;
879 vcpu->mmio_size = bytes; 939 vcpu->mmio_size = bytes;
@@ -898,6 +958,30 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
898 return emulator_write_emulated(addr, new, bytes, ctxt); 958 return emulator_write_emulated(addr, new, bytes, ctxt);
899} 959}
900 960
961#ifdef CONFIG_X86_32
962
963static int emulator_cmpxchg8b_emulated(unsigned long addr,
964 unsigned long old_lo,
965 unsigned long old_hi,
966 unsigned long new_lo,
967 unsigned long new_hi,
968 struct x86_emulate_ctxt *ctxt)
969{
970 static int reported;
971 int r;
972
973 if (!reported) {
974 reported = 1;
975 printk(KERN_WARNING "kvm: emulating exchange8b as write\n");
976 }
977 r = emulator_write_emulated(addr, new_lo, 4, ctxt);
978 if (r != X86EMUL_CONTINUE)
979 return r;
980 return emulator_write_emulated(addr+4, new_hi, 4, ctxt);
981}
982
983#endif
984
901static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg) 985static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
902{ 986{
903 return kvm_arch_ops->get_segment_base(vcpu, seg); 987 return kvm_arch_ops->get_segment_base(vcpu, seg);
@@ -905,18 +989,15 @@ static unsigned long get_segment_base(struct kvm_vcpu *vcpu, int seg)
905 989
906int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) 990int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address)
907{ 991{
908 spin_lock(&vcpu->kvm->lock);
909 vcpu->mmu.inval_page(vcpu, address);
910 spin_unlock(&vcpu->kvm->lock);
911 kvm_arch_ops->invlpg(vcpu, address);
912 return X86EMUL_CONTINUE; 992 return X86EMUL_CONTINUE;
913} 993}
914 994
915int emulate_clts(struct kvm_vcpu *vcpu) 995int emulate_clts(struct kvm_vcpu *vcpu)
916{ 996{
917 unsigned long cr0 = vcpu->cr0; 997 unsigned long cr0;
918 998
919 cr0 &= ~CR0_TS_MASK; 999 kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
1000 cr0 = vcpu->cr0 & ~CR0_TS_MASK;
920 kvm_arch_ops->set_cr0(vcpu, cr0); 1001 kvm_arch_ops->set_cr0(vcpu, cr0);
921 return X86EMUL_CONTINUE; 1002 return X86EMUL_CONTINUE;
922} 1003}
@@ -975,6 +1056,9 @@ struct x86_emulate_ops emulate_ops = {
975 .read_emulated = emulator_read_emulated, 1056 .read_emulated = emulator_read_emulated,
976 .write_emulated = emulator_write_emulated, 1057 .write_emulated = emulator_write_emulated,
977 .cmpxchg_emulated = emulator_cmpxchg_emulated, 1058 .cmpxchg_emulated = emulator_cmpxchg_emulated,
1059#ifdef CONFIG_X86_32
1060 .cmpxchg8b_emulated = emulator_cmpxchg8b_emulated,
1061#endif
978}; 1062};
979 1063
980int emulate_instruction(struct kvm_vcpu *vcpu, 1064int emulate_instruction(struct kvm_vcpu *vcpu,
@@ -1024,6 +1108,8 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
1024 } 1108 }
1025 1109
1026 if (r) { 1110 if (r) {
1111 if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
1112 return EMULATE_DONE;
1027 if (!vcpu->mmio_needed) { 1113 if (!vcpu->mmio_needed) {
1028 report_emulation_failure(&emulate_ctxt); 1114 report_emulation_failure(&emulate_ctxt);
1029 return EMULATE_FAIL; 1115 return EMULATE_FAIL;
@@ -1069,6 +1155,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
1069 1155
1070unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) 1156unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
1071{ 1157{
1158 kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
1072 switch (cr) { 1159 switch (cr) {
1073 case 0: 1160 case 0:
1074 return vcpu->cr0; 1161 return vcpu->cr0;
@@ -1403,6 +1490,7 @@ static int kvm_dev_ioctl_get_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
1403 sregs->gdt.limit = dt.limit; 1490 sregs->gdt.limit = dt.limit;
1404 sregs->gdt.base = dt.base; 1491 sregs->gdt.base = dt.base;
1405 1492
1493 kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
1406 sregs->cr0 = vcpu->cr0; 1494 sregs->cr0 = vcpu->cr0;
1407 sregs->cr2 = vcpu->cr2; 1495 sregs->cr2 = vcpu->cr2;
1408 sregs->cr3 = vcpu->cr3; 1496 sregs->cr3 = vcpu->cr3;
@@ -1467,11 +1555,15 @@ static int kvm_dev_ioctl_set_sregs(struct kvm *kvm, struct kvm_sregs *sregs)
1467#endif 1555#endif
1468 vcpu->apic_base = sregs->apic_base; 1556 vcpu->apic_base = sregs->apic_base;
1469 1557
1558 kvm_arch_ops->decache_cr0_cr4_guest_bits(vcpu);
1559
1470 mmu_reset_needed |= vcpu->cr0 != sregs->cr0; 1560 mmu_reset_needed |= vcpu->cr0 != sregs->cr0;
1471 kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0); 1561 kvm_arch_ops->set_cr0_no_modeswitch(vcpu, sregs->cr0);
1472 1562
1473 mmu_reset_needed |= vcpu->cr4 != sregs->cr4; 1563 mmu_reset_needed |= vcpu->cr4 != sregs->cr4;
1474 kvm_arch_ops->set_cr4(vcpu, sregs->cr4); 1564 kvm_arch_ops->set_cr4(vcpu, sregs->cr4);
1565 if (!is_long_mode(vcpu) && is_pae(vcpu))
1566 load_pdptrs(vcpu, vcpu->cr3);
1475 1567
1476 if (mmu_reset_needed) 1568 if (mmu_reset_needed)
1477 kvm_mmu_reset_context(vcpu); 1569 kvm_mmu_reset_context(vcpu);
@@ -1693,12 +1785,12 @@ static long kvm_dev_ioctl(struct file *filp,
1693 if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run)) 1785 if (copy_from_user(&kvm_run, (void *)arg, sizeof kvm_run))
1694 goto out; 1786 goto out;
1695 r = kvm_dev_ioctl_run(kvm, &kvm_run); 1787 r = kvm_dev_ioctl_run(kvm, &kvm_run);
1696 if (r < 0) 1788 if (r < 0 && r != -EINTR)
1697 goto out; 1789 goto out;
1698 r = -EFAULT; 1790 if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) {
1699 if (copy_to_user((void *)arg, &kvm_run, sizeof kvm_run)) 1791 r = -EFAULT;
1700 goto out; 1792 goto out;
1701 r = 0; 1793 }
1702 break; 1794 break;
1703 } 1795 }
1704 case KVM_GET_REGS: { 1796 case KVM_GET_REGS: {
@@ -1842,6 +1934,7 @@ static long kvm_dev_ioctl(struct file *filp,
1842 num_msrs_to_save * sizeof(u32))) 1934 num_msrs_to_save * sizeof(u32)))
1843 goto out; 1935 goto out;
1844 r = 0; 1936 r = 0;
1937 break;
1845 } 1938 }
1846 default: 1939 default:
1847 ; 1940 ;
@@ -1944,17 +2037,17 @@ int kvm_init_arch(struct kvm_arch_ops *ops, struct module *module)
1944 return -EEXIST; 2037 return -EEXIST;
1945 } 2038 }
1946 2039
1947 kvm_arch_ops = ops; 2040 if (!ops->cpu_has_kvm_support()) {
1948
1949 if (!kvm_arch_ops->cpu_has_kvm_support()) {
1950 printk(KERN_ERR "kvm: no hardware support\n"); 2041 printk(KERN_ERR "kvm: no hardware support\n");
1951 return -EOPNOTSUPP; 2042 return -EOPNOTSUPP;
1952 } 2043 }
1953 if (kvm_arch_ops->disabled_by_bios()) { 2044 if (ops->disabled_by_bios()) {
1954 printk(KERN_ERR "kvm: disabled by bios\n"); 2045 printk(KERN_ERR "kvm: disabled by bios\n");
1955 return -EOPNOTSUPP; 2046 return -EOPNOTSUPP;
1956 } 2047 }
1957 2048
2049 kvm_arch_ops = ops;
2050
1958 r = kvm_arch_ops->hardware_setup(); 2051 r = kvm_arch_ops->hardware_setup();
1959 if (r < 0) 2052 if (r < 0)
1960 return r; 2053 return r;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 790423c5f23d..c6f972914f08 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -26,7 +26,31 @@
26#include "vmx.h" 26#include "vmx.h"
27#include "kvm.h" 27#include "kvm.h"
28 28
29#undef MMU_DEBUG
30
31#undef AUDIT
32
33#ifdef AUDIT
34static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg);
35#else
36static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {}
37#endif
38
39#ifdef MMU_DEBUG
40
41#define pgprintk(x...) do { if (dbg) printk(x); } while (0)
42#define rmap_printk(x...) do { if (dbg) printk(x); } while (0)
43
44#else
45
29#define pgprintk(x...) do { } while (0) 46#define pgprintk(x...) do { } while (0)
47#define rmap_printk(x...) do { } while (0)
48
49#endif
50
51#if defined(MMU_DEBUG) || defined(AUDIT)
52static int dbg = 1;
53#endif
30 54
31#define ASSERT(x) \ 55#define ASSERT(x) \
32 if (!(x)) { \ 56 if (!(x)) { \
@@ -34,8 +58,10 @@
34 __FILE__, __LINE__, #x); \ 58 __FILE__, __LINE__, #x); \
35 } 59 }
36 60
37#define PT64_ENT_PER_PAGE 512 61#define PT64_PT_BITS 9
38#define PT32_ENT_PER_PAGE 1024 62#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
63#define PT32_PT_BITS 10
64#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
39 65
40#define PT_WRITABLE_SHIFT 1 66#define PT_WRITABLE_SHIFT 1
41 67
@@ -125,6 +151,13 @@
125#define PT_DIRECTORY_LEVEL 2 151#define PT_DIRECTORY_LEVEL 2
126#define PT_PAGE_TABLE_LEVEL 1 152#define PT_PAGE_TABLE_LEVEL 1
127 153
154#define RMAP_EXT 4
155
156struct kvm_rmap_desc {
157 u64 *shadow_ptes[RMAP_EXT];
158 struct kvm_rmap_desc *more;
159};
160
128static int is_write_protection(struct kvm_vcpu *vcpu) 161static int is_write_protection(struct kvm_vcpu *vcpu)
129{ 162{
130 return vcpu->cr0 & CR0_WP_MASK; 163 return vcpu->cr0 & CR0_WP_MASK;
@@ -150,32 +183,272 @@ static int is_io_pte(unsigned long pte)
150 return pte & PT_SHADOW_IO_MARK; 183 return pte & PT_SHADOW_IO_MARK;
151} 184}
152 185
186static int is_rmap_pte(u64 pte)
187{
188 return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
189 == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
190}
191
192static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
193 size_t objsize, int min)
194{
195 void *obj;
196
197 if (cache->nobjs >= min)
198 return 0;
199 while (cache->nobjs < ARRAY_SIZE(cache->objects)) {
200 obj = kzalloc(objsize, GFP_NOWAIT);
201 if (!obj)
202 return -ENOMEM;
203 cache->objects[cache->nobjs++] = obj;
204 }
205 return 0;
206}
207
208static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc)
209{
210 while (mc->nobjs)
211 kfree(mc->objects[--mc->nobjs]);
212}
213
214static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
215{
216 int r;
217
218 r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
219 sizeof(struct kvm_pte_chain), 4);
220 if (r)
221 goto out;
222 r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
223 sizeof(struct kvm_rmap_desc), 1);
224out:
225 return r;
226}
227
228static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
229{
230 mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
231 mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
232}
233
234static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
235 size_t size)
236{
237 void *p;
238
239 BUG_ON(!mc->nobjs);
240 p = mc->objects[--mc->nobjs];
241 memset(p, 0, size);
242 return p;
243}
244
245static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj)
246{
247 if (mc->nobjs < KVM_NR_MEM_OBJS)
248 mc->objects[mc->nobjs++] = obj;
249 else
250 kfree(obj);
251}
252
253static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
254{
255 return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
256 sizeof(struct kvm_pte_chain));
257}
258
259static void mmu_free_pte_chain(struct kvm_vcpu *vcpu,
260 struct kvm_pte_chain *pc)
261{
262 mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc);
263}
264
265static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
266{
267 return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
268 sizeof(struct kvm_rmap_desc));
269}
270
271static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu,
272 struct kvm_rmap_desc *rd)
273{
274 mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd);
275}
276
277/*
278 * Reverse mapping data structures:
279 *
280 * If page->private bit zero is zero, then page->private points to the
281 * shadow page table entry that points to page_address(page).
282 *
283 * If page->private bit zero is one, (then page->private & ~1) points
284 * to a struct kvm_rmap_desc containing more mappings.
285 */
286static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte)
287{
288 struct page *page;
289 struct kvm_rmap_desc *desc;
290 int i;
291
292 if (!is_rmap_pte(*spte))
293 return;
294 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
295 if (!page->private) {
296 rmap_printk("rmap_add: %p %llx 0->1\n", spte, *spte);
297 page->private = (unsigned long)spte;
298 } else if (!(page->private & 1)) {
299 rmap_printk("rmap_add: %p %llx 1->many\n", spte, *spte);
300 desc = mmu_alloc_rmap_desc(vcpu);
301 desc->shadow_ptes[0] = (u64 *)page->private;
302 desc->shadow_ptes[1] = spte;
303 page->private = (unsigned long)desc | 1;
304 } else {
305 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
306 desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
307 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more)
308 desc = desc->more;
309 if (desc->shadow_ptes[RMAP_EXT-1]) {
310 desc->more = mmu_alloc_rmap_desc(vcpu);
311 desc = desc->more;
312 }
313 for (i = 0; desc->shadow_ptes[i]; ++i)
314 ;
315 desc->shadow_ptes[i] = spte;
316 }
317}
318
319static void rmap_desc_remove_entry(struct kvm_vcpu *vcpu,
320 struct page *page,
321 struct kvm_rmap_desc *desc,
322 int i,
323 struct kvm_rmap_desc *prev_desc)
324{
325 int j;
326
327 for (j = RMAP_EXT - 1; !desc->shadow_ptes[j] && j > i; --j)
328 ;
329 desc->shadow_ptes[i] = desc->shadow_ptes[j];
330 desc->shadow_ptes[j] = 0;
331 if (j != 0)
332 return;
333 if (!prev_desc && !desc->more)
334 page->private = (unsigned long)desc->shadow_ptes[0];
335 else
336 if (prev_desc)
337 prev_desc->more = desc->more;
338 else
339 page->private = (unsigned long)desc->more | 1;
340 mmu_free_rmap_desc(vcpu, desc);
341}
342
343static void rmap_remove(struct kvm_vcpu *vcpu, u64 *spte)
344{
345 struct page *page;
346 struct kvm_rmap_desc *desc;
347 struct kvm_rmap_desc *prev_desc;
348 int i;
349
350 if (!is_rmap_pte(*spte))
351 return;
352 page = pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT);
353 if (!page->private) {
354 printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
355 BUG();
356 } else if (!(page->private & 1)) {
357 rmap_printk("rmap_remove: %p %llx 1->0\n", spte, *spte);
358 if ((u64 *)page->private != spte) {
359 printk(KERN_ERR "rmap_remove: %p %llx 1->BUG\n",
360 spte, *spte);
361 BUG();
362 }
363 page->private = 0;
364 } else {
365 rmap_printk("rmap_remove: %p %llx many->many\n", spte, *spte);
366 desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
367 prev_desc = NULL;
368 while (desc) {
369 for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i)
370 if (desc->shadow_ptes[i] == spte) {
371 rmap_desc_remove_entry(vcpu, page,
372 desc, i,
373 prev_desc);
374 return;
375 }
376 prev_desc = desc;
377 desc = desc->more;
378 }
379 BUG();
380 }
381}
382
383static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
384{
385 struct kvm *kvm = vcpu->kvm;
386 struct page *page;
387 struct kvm_memory_slot *slot;
388 struct kvm_rmap_desc *desc;
389 u64 *spte;
390
391 slot = gfn_to_memslot(kvm, gfn);
392 BUG_ON(!slot);
393 page = gfn_to_page(slot, gfn);
394
395 while (page->private) {
396 if (!(page->private & 1))
397 spte = (u64 *)page->private;
398 else {
399 desc = (struct kvm_rmap_desc *)(page->private & ~1ul);
400 spte = desc->shadow_ptes[0];
401 }
402 BUG_ON(!spte);
403 BUG_ON((*spte & PT64_BASE_ADDR_MASK) !=
404 page_to_pfn(page) << PAGE_SHIFT);
405 BUG_ON(!(*spte & PT_PRESENT_MASK));
406 BUG_ON(!(*spte & PT_WRITABLE_MASK));
407 rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
408 rmap_remove(vcpu, spte);
409 kvm_arch_ops->tlb_flush(vcpu);
410 *spte &= ~(u64)PT_WRITABLE_MASK;
411 }
412}
413
414static int is_empty_shadow_page(hpa_t page_hpa)
415{
416 u64 *pos;
417 u64 *end;
418
419 for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u64);
420 pos != end; pos++)
421 if (*pos != 0) {
422 printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
423 pos, *pos);
424 return 0;
425 }
426 return 1;
427}
428
153static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa) 429static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, hpa_t page_hpa)
154{ 430{
155 struct kvm_mmu_page *page_head = page_header(page_hpa); 431 struct kvm_mmu_page *page_head = page_header(page_hpa);
156 432
433 ASSERT(is_empty_shadow_page(page_hpa));
157 list_del(&page_head->link); 434 list_del(&page_head->link);
158 page_head->page_hpa = page_hpa; 435 page_head->page_hpa = page_hpa;
159 list_add(&page_head->link, &vcpu->free_pages); 436 list_add(&page_head->link, &vcpu->free_pages);
437 ++vcpu->kvm->n_free_mmu_pages;
160} 438}
161 439
162static int is_empty_shadow_page(hpa_t page_hpa) 440static unsigned kvm_page_table_hashfn(gfn_t gfn)
163{ 441{
164 u32 *pos; 442 return gfn;
165 u32 *end;
166 for (pos = __va(page_hpa), end = pos + PAGE_SIZE / sizeof(u32);
167 pos != end; pos++)
168 if (*pos != 0)
169 return 0;
170 return 1;
171} 443}
172 444
173static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte) 445static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
446 u64 *parent_pte)
174{ 447{
175 struct kvm_mmu_page *page; 448 struct kvm_mmu_page *page;
176 449
177 if (list_empty(&vcpu->free_pages)) 450 if (list_empty(&vcpu->free_pages))
178 return INVALID_PAGE; 451 return NULL;
179 452
180 page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link); 453 page = list_entry(vcpu->free_pages.next, struct kvm_mmu_page, link);
181 list_del(&page->link); 454 list_del(&page->link);
@@ -183,8 +456,239 @@ static hpa_t kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, u64 *parent_pte)
183 ASSERT(is_empty_shadow_page(page->page_hpa)); 456 ASSERT(is_empty_shadow_page(page->page_hpa));
184 page->slot_bitmap = 0; 457 page->slot_bitmap = 0;
185 page->global = 1; 458 page->global = 1;
459 page->multimapped = 0;
186 page->parent_pte = parent_pte; 460 page->parent_pte = parent_pte;
187 return page->page_hpa; 461 --vcpu->kvm->n_free_mmu_pages;
462 return page;
463}
464
465static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu,
466 struct kvm_mmu_page *page, u64 *parent_pte)
467{
468 struct kvm_pte_chain *pte_chain;
469 struct hlist_node *node;
470 int i;
471
472 if (!parent_pte)
473 return;
474 if (!page->multimapped) {
475 u64 *old = page->parent_pte;
476
477 if (!old) {
478 page->parent_pte = parent_pte;
479 return;
480 }
481 page->multimapped = 1;
482 pte_chain = mmu_alloc_pte_chain(vcpu);
483 INIT_HLIST_HEAD(&page->parent_ptes);
484 hlist_add_head(&pte_chain->link, &page->parent_ptes);
485 pte_chain->parent_ptes[0] = old;
486 }
487 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link) {
488 if (pte_chain->parent_ptes[NR_PTE_CHAIN_ENTRIES-1])
489 continue;
490 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i)
491 if (!pte_chain->parent_ptes[i]) {
492 pte_chain->parent_ptes[i] = parent_pte;
493 return;
494 }
495 }
496 pte_chain = mmu_alloc_pte_chain(vcpu);
497 BUG_ON(!pte_chain);
498 hlist_add_head(&pte_chain->link, &page->parent_ptes);
499 pte_chain->parent_ptes[0] = parent_pte;
500}
501
502static void mmu_page_remove_parent_pte(struct kvm_vcpu *vcpu,
503 struct kvm_mmu_page *page,
504 u64 *parent_pte)
505{
506 struct kvm_pte_chain *pte_chain;
507 struct hlist_node *node;
508 int i;
509
510 if (!page->multimapped) {
511 BUG_ON(page->parent_pte != parent_pte);
512 page->parent_pte = NULL;
513 return;
514 }
515 hlist_for_each_entry(pte_chain, node, &page->parent_ptes, link)
516 for (i = 0; i < NR_PTE_CHAIN_ENTRIES; ++i) {
517 if (!pte_chain->parent_ptes[i])
518 break;
519 if (pte_chain->parent_ptes[i] != parent_pte)
520 continue;
521 while (i + 1 < NR_PTE_CHAIN_ENTRIES
522 && pte_chain->parent_ptes[i + 1]) {
523 pte_chain->parent_ptes[i]
524 = pte_chain->parent_ptes[i + 1];
525 ++i;
526 }
527 pte_chain->parent_ptes[i] = NULL;
528 if (i == 0) {
529 hlist_del(&pte_chain->link);
530 mmu_free_pte_chain(vcpu, pte_chain);
531 if (hlist_empty(&page->parent_ptes)) {
532 page->multimapped = 0;
533 page->parent_pte = NULL;
534 }
535 }
536 return;
537 }
538 BUG();
539}
540
541static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
542 gfn_t gfn)
543{
544 unsigned index;
545 struct hlist_head *bucket;
546 struct kvm_mmu_page *page;
547 struct hlist_node *node;
548
549 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
550 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
551 bucket = &vcpu->kvm->mmu_page_hash[index];
552 hlist_for_each_entry(page, node, bucket, hash_link)
553 if (page->gfn == gfn && !page->role.metaphysical) {
554 pgprintk("%s: found role %x\n",
555 __FUNCTION__, page->role.word);
556 return page;
557 }
558 return NULL;
559}
560
561static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
562 gfn_t gfn,
563 gva_t gaddr,
564 unsigned level,
565 int metaphysical,
566 u64 *parent_pte)
567{
568 union kvm_mmu_page_role role;
569 unsigned index;
570 unsigned quadrant;
571 struct hlist_head *bucket;
572 struct kvm_mmu_page *page;
573 struct hlist_node *node;
574
575 role.word = 0;
576 role.glevels = vcpu->mmu.root_level;
577 role.level = level;
578 role.metaphysical = metaphysical;
579 if (vcpu->mmu.root_level <= PT32_ROOT_LEVEL) {
580 quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
581 quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
582 role.quadrant = quadrant;
583 }
584 pgprintk("%s: looking gfn %lx role %x\n", __FUNCTION__,
585 gfn, role.word);
586 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
587 bucket = &vcpu->kvm->mmu_page_hash[index];
588 hlist_for_each_entry(page, node, bucket, hash_link)
589 if (page->gfn == gfn && page->role.word == role.word) {
590 mmu_page_add_parent_pte(vcpu, page, parent_pte);
591 pgprintk("%s: found\n", __FUNCTION__);
592 return page;
593 }
594 page = kvm_mmu_alloc_page(vcpu, parent_pte);
595 if (!page)
596 return page;
597 pgprintk("%s: adding gfn %lx role %x\n", __FUNCTION__, gfn, role.word);
598 page->gfn = gfn;
599 page->role = role;
600 hlist_add_head(&page->hash_link, bucket);
601 if (!metaphysical)
602 rmap_write_protect(vcpu, gfn);
603 return page;
604}
605
606static void kvm_mmu_page_unlink_children(struct kvm_vcpu *vcpu,
607 struct kvm_mmu_page *page)
608{
609 unsigned i;
610 u64 *pt;
611 u64 ent;
612
613 pt = __va(page->page_hpa);
614
615 if (page->role.level == PT_PAGE_TABLE_LEVEL) {
616 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
617 if (pt[i] & PT_PRESENT_MASK)
618 rmap_remove(vcpu, &pt[i]);
619 pt[i] = 0;
620 }
621 kvm_arch_ops->tlb_flush(vcpu);
622 return;
623 }
624
625 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
626 ent = pt[i];
627
628 pt[i] = 0;
629 if (!(ent & PT_PRESENT_MASK))
630 continue;
631 ent &= PT64_BASE_ADDR_MASK;
632 mmu_page_remove_parent_pte(vcpu, page_header(ent), &pt[i]);
633 }
634}
635
636static void kvm_mmu_put_page(struct kvm_vcpu *vcpu,
637 struct kvm_mmu_page *page,
638 u64 *parent_pte)
639{
640 mmu_page_remove_parent_pte(vcpu, page, parent_pte);
641}
642
643static void kvm_mmu_zap_page(struct kvm_vcpu *vcpu,
644 struct kvm_mmu_page *page)
645{
646 u64 *parent_pte;
647
648 while (page->multimapped || page->parent_pte) {
649 if (!page->multimapped)
650 parent_pte = page->parent_pte;
651 else {
652 struct kvm_pte_chain *chain;
653
654 chain = container_of(page->parent_ptes.first,
655 struct kvm_pte_chain, link);
656 parent_pte = chain->parent_ptes[0];
657 }
658 BUG_ON(!parent_pte);
659 kvm_mmu_put_page(vcpu, page, parent_pte);
660 *parent_pte = 0;
661 }
662 kvm_mmu_page_unlink_children(vcpu, page);
663 if (!page->root_count) {
664 hlist_del(&page->hash_link);
665 kvm_mmu_free_page(vcpu, page->page_hpa);
666 } else {
667 list_del(&page->link);
668 list_add(&page->link, &vcpu->kvm->active_mmu_pages);
669 }
670}
671
672static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
673{
674 unsigned index;
675 struct hlist_head *bucket;
676 struct kvm_mmu_page *page;
677 struct hlist_node *node, *n;
678 int r;
679
680 pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
681 r = 0;
682 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
683 bucket = &vcpu->kvm->mmu_page_hash[index];
684 hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
685 if (page->gfn == gfn && !page->role.metaphysical) {
686 pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
687 page->role.word);
688 kvm_mmu_zap_page(vcpu, page);
689 r = 1;
690 }
691 return r;
188} 692}
189 693
190static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa) 694static void page_header_update_slot(struct kvm *kvm, void *pte, gpa_t gpa)
@@ -225,35 +729,6 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
225 return gpa_to_hpa(vcpu, gpa); 729 return gpa_to_hpa(vcpu, gpa);
226} 730}
227 731
228
229static void release_pt_page_64(struct kvm_vcpu *vcpu, hpa_t page_hpa,
230 int level)
231{
232 ASSERT(vcpu);
233 ASSERT(VALID_PAGE(page_hpa));
234 ASSERT(level <= PT64_ROOT_LEVEL && level > 0);
235
236 if (level == 1)
237 memset(__va(page_hpa), 0, PAGE_SIZE);
238 else {
239 u64 *pos;
240 u64 *end;
241
242 for (pos = __va(page_hpa), end = pos + PT64_ENT_PER_PAGE;
243 pos != end; pos++) {
244 u64 current_ent = *pos;
245
246 *pos = 0;
247 if (is_present_pte(current_ent))
248 release_pt_page_64(vcpu,
249 current_ent &
250 PT64_BASE_ADDR_MASK,
251 level - 1);
252 }
253 }
254 kvm_mmu_free_page(vcpu, page_hpa);
255}
256
257static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) 732static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
258{ 733{
259} 734}
@@ -266,52 +741,109 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
266 for (; ; level--) { 741 for (; ; level--) {
267 u32 index = PT64_INDEX(v, level); 742 u32 index = PT64_INDEX(v, level);
268 u64 *table; 743 u64 *table;
744 u64 pte;
269 745
270 ASSERT(VALID_PAGE(table_addr)); 746 ASSERT(VALID_PAGE(table_addr));
271 table = __va(table_addr); 747 table = __va(table_addr);
272 748
273 if (level == 1) { 749 if (level == 1) {
750 pte = table[index];
751 if (is_present_pte(pte) && is_writeble_pte(pte))
752 return 0;
274 mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT); 753 mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
275 page_header_update_slot(vcpu->kvm, table, v); 754 page_header_update_slot(vcpu->kvm, table, v);
276 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK | 755 table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
277 PT_USER_MASK; 756 PT_USER_MASK;
757 rmap_add(vcpu, &table[index]);
278 return 0; 758 return 0;
279 } 759 }
280 760
281 if (table[index] == 0) { 761 if (table[index] == 0) {
282 hpa_t new_table = kvm_mmu_alloc_page(vcpu, 762 struct kvm_mmu_page *new_table;
283 &table[index]); 763 gfn_t pseudo_gfn;
284 764
285 if (!VALID_PAGE(new_table)) { 765 pseudo_gfn = (v & PT64_DIR_BASE_ADDR_MASK)
766 >> PAGE_SHIFT;
767 new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
768 v, level - 1,
769 1, &table[index]);
770 if (!new_table) {
286 pgprintk("nonpaging_map: ENOMEM\n"); 771 pgprintk("nonpaging_map: ENOMEM\n");
287 return -ENOMEM; 772 return -ENOMEM;
288 } 773 }
289 774
290 if (level == PT32E_ROOT_LEVEL) 775 table[index] = new_table->page_hpa | PT_PRESENT_MASK
291 table[index] = new_table | PT_PRESENT_MASK; 776 | PT_WRITABLE_MASK | PT_USER_MASK;
292 else
293 table[index] = new_table | PT_PRESENT_MASK |
294 PT_WRITABLE_MASK | PT_USER_MASK;
295 } 777 }
296 table_addr = table[index] & PT64_BASE_ADDR_MASK; 778 table_addr = table[index] & PT64_BASE_ADDR_MASK;
297 } 779 }
298} 780}
299 781
300static void nonpaging_flush(struct kvm_vcpu *vcpu) 782static void mmu_free_roots(struct kvm_vcpu *vcpu)
301{ 783{
302 hpa_t root = vcpu->mmu.root_hpa; 784 int i;
785 struct kvm_mmu_page *page;
303 786
304 ++kvm_stat.tlb_flush; 787#ifdef CONFIG_X86_64
305 pgprintk("nonpaging_flush\n"); 788 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
306 ASSERT(VALID_PAGE(root)); 789 hpa_t root = vcpu->mmu.root_hpa;
307 release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level); 790
308 root = kvm_mmu_alloc_page(vcpu, NULL); 791 ASSERT(VALID_PAGE(root));
309 ASSERT(VALID_PAGE(root)); 792 page = page_header(root);
310 vcpu->mmu.root_hpa = root; 793 --page->root_count;
311 if (is_paging(vcpu)) 794 vcpu->mmu.root_hpa = INVALID_PAGE;
312 root |= (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)); 795 return;
313 kvm_arch_ops->set_cr3(vcpu, root); 796 }
314 kvm_arch_ops->tlb_flush(vcpu); 797#endif
798 for (i = 0; i < 4; ++i) {
799 hpa_t root = vcpu->mmu.pae_root[i];
800
801 ASSERT(VALID_PAGE(root));
802 root &= PT64_BASE_ADDR_MASK;
803 page = page_header(root);
804 --page->root_count;
805 vcpu->mmu.pae_root[i] = INVALID_PAGE;
806 }
807 vcpu->mmu.root_hpa = INVALID_PAGE;
808}
809
810static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
811{
812 int i;
813 gfn_t root_gfn;
814 struct kvm_mmu_page *page;
815
816 root_gfn = vcpu->cr3 >> PAGE_SHIFT;
817
818#ifdef CONFIG_X86_64
819 if (vcpu->mmu.shadow_root_level == PT64_ROOT_LEVEL) {
820 hpa_t root = vcpu->mmu.root_hpa;
821
822 ASSERT(!VALID_PAGE(root));
823 page = kvm_mmu_get_page(vcpu, root_gfn, 0,
824 PT64_ROOT_LEVEL, 0, NULL);
825 root = page->page_hpa;
826 ++page->root_count;
827 vcpu->mmu.root_hpa = root;
828 return;
829 }
830#endif
831 for (i = 0; i < 4; ++i) {
832 hpa_t root = vcpu->mmu.pae_root[i];
833
834 ASSERT(!VALID_PAGE(root));
835 if (vcpu->mmu.root_level == PT32E_ROOT_LEVEL)
836 root_gfn = vcpu->pdptrs[i] >> PAGE_SHIFT;
837 else if (vcpu->mmu.root_level == 0)
838 root_gfn = 0;
839 page = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
840 PT32_ROOT_LEVEL, !is_paging(vcpu),
841 NULL);
842 root = page->page_hpa;
843 ++page->root_count;
844 vcpu->mmu.pae_root[i] = root | PT_PRESENT_MASK;
845 }
846 vcpu->mmu.root_hpa = __pa(vcpu->mmu.pae_root);
315} 847}
316 848
317static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) 849static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -322,43 +854,29 @@ static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
322static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva, 854static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
323 u32 error_code) 855 u32 error_code)
324{ 856{
325 int ret;
326 gpa_t addr = gva; 857 gpa_t addr = gva;
858 hpa_t paddr;
859 int r;
860
861 r = mmu_topup_memory_caches(vcpu);
862 if (r)
863 return r;
327 864
328 ASSERT(vcpu); 865 ASSERT(vcpu);
329 ASSERT(VALID_PAGE(vcpu->mmu.root_hpa)); 866 ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
330 867
331 for (;;) {
332 hpa_t paddr;
333
334 paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
335 868
336 if (is_error_hpa(paddr)) 869 paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
337 return 1;
338 870
339 ret = nonpaging_map(vcpu, addr & PAGE_MASK, paddr); 871 if (is_error_hpa(paddr))
340 if (ret) { 872 return 1;
341 nonpaging_flush(vcpu);
342 continue;
343 }
344 break;
345 }
346 return ret;
347}
348 873
349static void nonpaging_inval_page(struct kvm_vcpu *vcpu, gva_t addr) 874 return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
350{
351} 875}
352 876
353static void nonpaging_free(struct kvm_vcpu *vcpu) 877static void nonpaging_free(struct kvm_vcpu *vcpu)
354{ 878{
355 hpa_t root; 879 mmu_free_roots(vcpu);
356
357 ASSERT(vcpu);
358 root = vcpu->mmu.root_hpa;
359 if (VALID_PAGE(root))
360 release_pt_page_64(vcpu, root, vcpu->mmu.shadow_root_level);
361 vcpu->mmu.root_hpa = INVALID_PAGE;
362} 880}
363 881
364static int nonpaging_init_context(struct kvm_vcpu *vcpu) 882static int nonpaging_init_context(struct kvm_vcpu *vcpu)
@@ -367,40 +885,31 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu)
367 885
368 context->new_cr3 = nonpaging_new_cr3; 886 context->new_cr3 = nonpaging_new_cr3;
369 context->page_fault = nonpaging_page_fault; 887 context->page_fault = nonpaging_page_fault;
370 context->inval_page = nonpaging_inval_page;
371 context->gva_to_gpa = nonpaging_gva_to_gpa; 888 context->gva_to_gpa = nonpaging_gva_to_gpa;
372 context->free = nonpaging_free; 889 context->free = nonpaging_free;
373 context->root_level = PT32E_ROOT_LEVEL; 890 context->root_level = 0;
374 context->shadow_root_level = PT32E_ROOT_LEVEL; 891 context->shadow_root_level = PT32E_ROOT_LEVEL;
375 context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); 892 mmu_alloc_roots(vcpu);
376 ASSERT(VALID_PAGE(context->root_hpa)); 893 ASSERT(VALID_PAGE(context->root_hpa));
377 kvm_arch_ops->set_cr3(vcpu, context->root_hpa); 894 kvm_arch_ops->set_cr3(vcpu, context->root_hpa);
378 return 0; 895 return 0;
379} 896}
380 897
381
382static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu) 898static void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu)
383{ 899{
384 struct kvm_mmu_page *page, *npage;
385
386 list_for_each_entry_safe(page, npage, &vcpu->kvm->active_mmu_pages,
387 link) {
388 if (page->global)
389 continue;
390
391 if (!page->parent_pte)
392 continue;
393
394 *page->parent_pte = 0;
395 release_pt_page_64(vcpu, page->page_hpa, 1);
396 }
397 ++kvm_stat.tlb_flush; 900 ++kvm_stat.tlb_flush;
398 kvm_arch_ops->tlb_flush(vcpu); 901 kvm_arch_ops->tlb_flush(vcpu);
399} 902}
400 903
401static void paging_new_cr3(struct kvm_vcpu *vcpu) 904static void paging_new_cr3(struct kvm_vcpu *vcpu)
402{ 905{
906 pgprintk("%s: cr3 %lx\n", __FUNCTION__, vcpu->cr3);
907 mmu_free_roots(vcpu);
908 if (unlikely(vcpu->kvm->n_free_mmu_pages < KVM_MIN_FREE_MMU_PAGES))
909 kvm_mmu_free_some_pages(vcpu);
910 mmu_alloc_roots(vcpu);
403 kvm_mmu_flush_tlb(vcpu); 911 kvm_mmu_flush_tlb(vcpu);
912 kvm_arch_ops->set_cr3(vcpu, vcpu->mmu.root_hpa);
404} 913}
405 914
406static void mark_pagetable_nonglobal(void *shadow_pte) 915static void mark_pagetable_nonglobal(void *shadow_pte)
@@ -412,7 +921,8 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
412 u64 *shadow_pte, 921 u64 *shadow_pte,
413 gpa_t gaddr, 922 gpa_t gaddr,
414 int dirty, 923 int dirty,
415 u64 access_bits) 924 u64 access_bits,
925 gfn_t gfn)
416{ 926{
417 hpa_t paddr; 927 hpa_t paddr;
418 928
@@ -420,13 +930,10 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
420 if (!dirty) 930 if (!dirty)
421 access_bits &= ~PT_WRITABLE_MASK; 931 access_bits &= ~PT_WRITABLE_MASK;
422 932
423 if (access_bits & PT_WRITABLE_MASK) 933 paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
424 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
425 934
426 *shadow_pte |= access_bits; 935 *shadow_pte |= access_bits;
427 936
428 paddr = gpa_to_hpa(vcpu, gaddr & PT64_BASE_ADDR_MASK);
429
430 if (!(*shadow_pte & PT_GLOBAL_MASK)) 937 if (!(*shadow_pte & PT_GLOBAL_MASK))
431 mark_pagetable_nonglobal(shadow_pte); 938 mark_pagetable_nonglobal(shadow_pte);
432 939
@@ -434,10 +941,31 @@ static inline void set_pte_common(struct kvm_vcpu *vcpu,
434 *shadow_pte |= gaddr; 941 *shadow_pte |= gaddr;
435 *shadow_pte |= PT_SHADOW_IO_MARK; 942 *shadow_pte |= PT_SHADOW_IO_MARK;
436 *shadow_pte &= ~PT_PRESENT_MASK; 943 *shadow_pte &= ~PT_PRESENT_MASK;
437 } else { 944 return;
438 *shadow_pte |= paddr; 945 }
439 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr); 946
947 *shadow_pte |= paddr;
948
949 if (access_bits & PT_WRITABLE_MASK) {
950 struct kvm_mmu_page *shadow;
951
952 shadow = kvm_mmu_lookup_page(vcpu, gfn);
953 if (shadow) {
954 pgprintk("%s: found shadow page for %lx, marking ro\n",
955 __FUNCTION__, gfn);
956 access_bits &= ~PT_WRITABLE_MASK;
957 if (is_writeble_pte(*shadow_pte)) {
958 *shadow_pte &= ~PT_WRITABLE_MASK;
959 kvm_arch_ops->tlb_flush(vcpu);
960 }
961 }
440 } 962 }
963
964 if (access_bits & PT_WRITABLE_MASK)
965 mark_page_dirty(vcpu->kvm, gaddr >> PAGE_SHIFT);
966
967 page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
968 rmap_add(vcpu, shadow_pte);
441} 969}
442 970
443static void inject_page_fault(struct kvm_vcpu *vcpu, 971static void inject_page_fault(struct kvm_vcpu *vcpu,
@@ -474,41 +1002,6 @@ static int may_access(u64 pte, int write, int user)
474 return 1; 1002 return 1;
475} 1003}
476 1004
477/*
478 * Remove a shadow pte.
479 */
480static void paging_inval_page(struct kvm_vcpu *vcpu, gva_t addr)
481{
482 hpa_t page_addr = vcpu->mmu.root_hpa;
483 int level = vcpu->mmu.shadow_root_level;
484
485 ++kvm_stat.invlpg;
486
487 for (; ; level--) {
488 u32 index = PT64_INDEX(addr, level);
489 u64 *table = __va(page_addr);
490
491 if (level == PT_PAGE_TABLE_LEVEL ) {
492 table[index] = 0;
493 return;
494 }
495
496 if (!is_present_pte(table[index]))
497 return;
498
499 page_addr = table[index] & PT64_BASE_ADDR_MASK;
500
501 if (level == PT_DIRECTORY_LEVEL &&
502 (table[index] & PT_SHADOW_PS_MARK)) {
503 table[index] = 0;
504 release_pt_page_64(vcpu, page_addr, PT_PAGE_TABLE_LEVEL);
505
506 kvm_arch_ops->tlb_flush(vcpu);
507 return;
508 }
509 }
510}
511
512static void paging_free(struct kvm_vcpu *vcpu) 1005static void paging_free(struct kvm_vcpu *vcpu)
513{ 1006{
514 nonpaging_free(vcpu); 1007 nonpaging_free(vcpu);
@@ -522,37 +1015,40 @@ static void paging_free(struct kvm_vcpu *vcpu)
522#include "paging_tmpl.h" 1015#include "paging_tmpl.h"
523#undef PTTYPE 1016#undef PTTYPE
524 1017
525static int paging64_init_context(struct kvm_vcpu *vcpu) 1018static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level)
526{ 1019{
527 struct kvm_mmu *context = &vcpu->mmu; 1020 struct kvm_mmu *context = &vcpu->mmu;
528 1021
529 ASSERT(is_pae(vcpu)); 1022 ASSERT(is_pae(vcpu));
530 context->new_cr3 = paging_new_cr3; 1023 context->new_cr3 = paging_new_cr3;
531 context->page_fault = paging64_page_fault; 1024 context->page_fault = paging64_page_fault;
532 context->inval_page = paging_inval_page;
533 context->gva_to_gpa = paging64_gva_to_gpa; 1025 context->gva_to_gpa = paging64_gva_to_gpa;
534 context->free = paging_free; 1026 context->free = paging_free;
535 context->root_level = PT64_ROOT_LEVEL; 1027 context->root_level = level;
536 context->shadow_root_level = PT64_ROOT_LEVEL; 1028 context->shadow_root_level = level;
537 context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); 1029 mmu_alloc_roots(vcpu);
538 ASSERT(VALID_PAGE(context->root_hpa)); 1030 ASSERT(VALID_PAGE(context->root_hpa));
539 kvm_arch_ops->set_cr3(vcpu, context->root_hpa | 1031 kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
540 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); 1032 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
541 return 0; 1033 return 0;
542} 1034}
543 1035
1036static int paging64_init_context(struct kvm_vcpu *vcpu)
1037{
1038 return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL);
1039}
1040
544static int paging32_init_context(struct kvm_vcpu *vcpu) 1041static int paging32_init_context(struct kvm_vcpu *vcpu)
545{ 1042{
546 struct kvm_mmu *context = &vcpu->mmu; 1043 struct kvm_mmu *context = &vcpu->mmu;
547 1044
548 context->new_cr3 = paging_new_cr3; 1045 context->new_cr3 = paging_new_cr3;
549 context->page_fault = paging32_page_fault; 1046 context->page_fault = paging32_page_fault;
550 context->inval_page = paging_inval_page;
551 context->gva_to_gpa = paging32_gva_to_gpa; 1047 context->gva_to_gpa = paging32_gva_to_gpa;
552 context->free = paging_free; 1048 context->free = paging_free;
553 context->root_level = PT32_ROOT_LEVEL; 1049 context->root_level = PT32_ROOT_LEVEL;
554 context->shadow_root_level = PT32E_ROOT_LEVEL; 1050 context->shadow_root_level = PT32E_ROOT_LEVEL;
555 context->root_hpa = kvm_mmu_alloc_page(vcpu, NULL); 1051 mmu_alloc_roots(vcpu);
556 ASSERT(VALID_PAGE(context->root_hpa)); 1052 ASSERT(VALID_PAGE(context->root_hpa));
557 kvm_arch_ops->set_cr3(vcpu, context->root_hpa | 1053 kvm_arch_ops->set_cr3(vcpu, context->root_hpa |
558 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK))); 1054 (vcpu->cr3 & (CR3_PCD_MASK | CR3_WPT_MASK)));
@@ -561,14 +1057,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu)
561 1057
562static int paging32E_init_context(struct kvm_vcpu *vcpu) 1058static int paging32E_init_context(struct kvm_vcpu *vcpu)
563{ 1059{
564 int ret; 1060 return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL);
565
566 if ((ret = paging64_init_context(vcpu)))
567 return ret;
568
569 vcpu->mmu.root_level = PT32E_ROOT_LEVEL;
570 vcpu->mmu.shadow_root_level = PT32E_ROOT_LEVEL;
571 return 0;
572} 1061}
573 1062
574static int init_kvm_mmu(struct kvm_vcpu *vcpu) 1063static int init_kvm_mmu(struct kvm_vcpu *vcpu)
@@ -597,41 +1086,161 @@ static void destroy_kvm_mmu(struct kvm_vcpu *vcpu)
597 1086
598int kvm_mmu_reset_context(struct kvm_vcpu *vcpu) 1087int kvm_mmu_reset_context(struct kvm_vcpu *vcpu)
599{ 1088{
1089 int r;
1090
600 destroy_kvm_mmu(vcpu); 1091 destroy_kvm_mmu(vcpu);
601 return init_kvm_mmu(vcpu); 1092 r = init_kvm_mmu(vcpu);
1093 if (r < 0)
1094 goto out;
1095 r = mmu_topup_memory_caches(vcpu);
1096out:
1097 return r;
602} 1098}
603 1099
604static void free_mmu_pages(struct kvm_vcpu *vcpu) 1100void kvm_mmu_pre_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
605{ 1101{
606 while (!list_empty(&vcpu->free_pages)) { 1102 gfn_t gfn = gpa >> PAGE_SHIFT;
1103 struct kvm_mmu_page *page;
1104 struct kvm_mmu_page *child;
1105 struct hlist_node *node, *n;
1106 struct hlist_head *bucket;
1107 unsigned index;
1108 u64 *spte;
1109 u64 pte;
1110 unsigned offset = offset_in_page(gpa);
1111 unsigned pte_size;
1112 unsigned page_offset;
1113 unsigned misaligned;
1114 int level;
1115 int flooded = 0;
1116
1117 pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
1118 if (gfn == vcpu->last_pt_write_gfn) {
1119 ++vcpu->last_pt_write_count;
1120 if (vcpu->last_pt_write_count >= 3)
1121 flooded = 1;
1122 } else {
1123 vcpu->last_pt_write_gfn = gfn;
1124 vcpu->last_pt_write_count = 1;
1125 }
1126 index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
1127 bucket = &vcpu->kvm->mmu_page_hash[index];
1128 hlist_for_each_entry_safe(page, node, n, bucket, hash_link) {
1129 if (page->gfn != gfn || page->role.metaphysical)
1130 continue;
1131 pte_size = page->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
1132 misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
1133 if (misaligned || flooded) {
1134 /*
1135 * Misaligned accesses are too much trouble to fix
1136 * up; also, they usually indicate a page is not used
1137 * as a page table.
1138 *
1139 * If we're seeing too many writes to a page,
1140 * it may no longer be a page table, or we may be
1141 * forking, in which case it is better to unmap the
1142 * page.
1143 */
1144 pgprintk("misaligned: gpa %llx bytes %d role %x\n",
1145 gpa, bytes, page->role.word);
1146 kvm_mmu_zap_page(vcpu, page);
1147 continue;
1148 }
1149 page_offset = offset;
1150 level = page->role.level;
1151 if (page->role.glevels == PT32_ROOT_LEVEL) {
1152 page_offset <<= 1; /* 32->64 */
1153 page_offset &= ~PAGE_MASK;
1154 }
1155 spte = __va(page->page_hpa);
1156 spte += page_offset / sizeof(*spte);
1157 pte = *spte;
1158 if (is_present_pte(pte)) {
1159 if (level == PT_PAGE_TABLE_LEVEL)
1160 rmap_remove(vcpu, spte);
1161 else {
1162 child = page_header(pte & PT64_BASE_ADDR_MASK);
1163 mmu_page_remove_parent_pte(vcpu, child, spte);
1164 }
1165 }
1166 *spte = 0;
1167 }
1168}
1169
1170void kvm_mmu_post_write(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes)
1171{
1172}
1173
1174int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
1175{
1176 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
1177
1178 return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
1179}
1180
1181void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
1182{
1183 while (vcpu->kvm->n_free_mmu_pages < KVM_REFILL_PAGES) {
607 struct kvm_mmu_page *page; 1184 struct kvm_mmu_page *page;
608 1185
1186 page = container_of(vcpu->kvm->active_mmu_pages.prev,
1187 struct kvm_mmu_page, link);
1188 kvm_mmu_zap_page(vcpu, page);
1189 }
1190}
1191EXPORT_SYMBOL_GPL(kvm_mmu_free_some_pages);
1192
1193static void free_mmu_pages(struct kvm_vcpu *vcpu)
1194{
1195 struct kvm_mmu_page *page;
1196
1197 while (!list_empty(&vcpu->kvm->active_mmu_pages)) {
1198 page = container_of(vcpu->kvm->active_mmu_pages.next,
1199 struct kvm_mmu_page, link);
1200 kvm_mmu_zap_page(vcpu, page);
1201 }
1202 while (!list_empty(&vcpu->free_pages)) {
609 page = list_entry(vcpu->free_pages.next, 1203 page = list_entry(vcpu->free_pages.next,
610 struct kvm_mmu_page, link); 1204 struct kvm_mmu_page, link);
611 list_del(&page->link); 1205 list_del(&page->link);
612 __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT)); 1206 __free_page(pfn_to_page(page->page_hpa >> PAGE_SHIFT));
613 page->page_hpa = INVALID_PAGE; 1207 page->page_hpa = INVALID_PAGE;
614 } 1208 }
1209 free_page((unsigned long)vcpu->mmu.pae_root);
615} 1210}
616 1211
617static int alloc_mmu_pages(struct kvm_vcpu *vcpu) 1212static int alloc_mmu_pages(struct kvm_vcpu *vcpu)
618{ 1213{
1214 struct page *page;
619 int i; 1215 int i;
620 1216
621 ASSERT(vcpu); 1217 ASSERT(vcpu);
622 1218
623 for (i = 0; i < KVM_NUM_MMU_PAGES; i++) { 1219 for (i = 0; i < KVM_NUM_MMU_PAGES; i++) {
624 struct page *page;
625 struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i]; 1220 struct kvm_mmu_page *page_header = &vcpu->page_header_buf[i];
626 1221
627 INIT_LIST_HEAD(&page_header->link); 1222 INIT_LIST_HEAD(&page_header->link);
628 if ((page = alloc_page(GFP_KVM_MMU)) == NULL) 1223 if ((page = alloc_page(GFP_KERNEL)) == NULL)
629 goto error_1; 1224 goto error_1;
630 page->private = (unsigned long)page_header; 1225 page->private = (unsigned long)page_header;
631 page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT; 1226 page_header->page_hpa = (hpa_t)page_to_pfn(page) << PAGE_SHIFT;
632 memset(__va(page_header->page_hpa), 0, PAGE_SIZE); 1227 memset(__va(page_header->page_hpa), 0, PAGE_SIZE);
633 list_add(&page_header->link, &vcpu->free_pages); 1228 list_add(&page_header->link, &vcpu->free_pages);
1229 ++vcpu->kvm->n_free_mmu_pages;
634 } 1230 }
1231
1232 /*
1233 * When emulating 32-bit mode, cr3 is only 32 bits even on x86_64.
1234 * Therefore we need to allocate shadow page tables in the first
1235 * 4GB of memory, which happens to fit the DMA32 zone.
1236 */
1237 page = alloc_page(GFP_KERNEL | __GFP_DMA32);
1238 if (!page)
1239 goto error_1;
1240 vcpu->mmu.pae_root = page_address(page);
1241 for (i = 0; i < 4; ++i)
1242 vcpu->mmu.pae_root[i] = INVALID_PAGE;
1243
635 return 0; 1244 return 0;
636 1245
637error_1: 1246error_1:
@@ -663,10 +1272,12 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu)
663 1272
664 destroy_kvm_mmu(vcpu); 1273 destroy_kvm_mmu(vcpu);
665 free_mmu_pages(vcpu); 1274 free_mmu_pages(vcpu);
1275 mmu_free_memory_caches(vcpu);
666} 1276}
667 1277
668void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) 1278void kvm_mmu_slot_remove_write_access(struct kvm_vcpu *vcpu, int slot)
669{ 1279{
1280 struct kvm *kvm = vcpu->kvm;
670 struct kvm_mmu_page *page; 1281 struct kvm_mmu_page *page;
671 1282
672 list_for_each_entry(page, &kvm->active_mmu_pages, link) { 1283 list_for_each_entry(page, &kvm->active_mmu_pages, link) {
@@ -679,8 +1290,169 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
679 pt = __va(page->page_hpa); 1290 pt = __va(page->page_hpa);
680 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) 1291 for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
681 /* avoid RMW */ 1292 /* avoid RMW */
682 if (pt[i] & PT_WRITABLE_MASK) 1293 if (pt[i] & PT_WRITABLE_MASK) {
1294 rmap_remove(vcpu, &pt[i]);
683 pt[i] &= ~PT_WRITABLE_MASK; 1295 pt[i] &= ~PT_WRITABLE_MASK;
1296 }
1297 }
1298}
1299
1300#ifdef AUDIT
1301
1302static const char *audit_msg;
1303
1304static gva_t canonicalize(gva_t gva)
1305{
1306#ifdef CONFIG_X86_64
1307 gva = (long long)(gva << 16) >> 16;
1308#endif
1309 return gva;
1310}
684 1311
1312static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
1313 gva_t va, int level)
1314{
1315 u64 *pt = __va(page_pte & PT64_BASE_ADDR_MASK);
1316 int i;
1317 gva_t va_delta = 1ul << (PAGE_SHIFT + 9 * (level - 1));
1318
1319 for (i = 0; i < PT64_ENT_PER_PAGE; ++i, va += va_delta) {
1320 u64 ent = pt[i];
1321
1322 if (!ent & PT_PRESENT_MASK)
1323 continue;
1324
1325 va = canonicalize(va);
1326 if (level > 1)
1327 audit_mappings_page(vcpu, ent, va, level - 1);
1328 else {
1329 gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
1330 hpa_t hpa = gpa_to_hpa(vcpu, gpa);
1331
1332 if ((ent & PT_PRESENT_MASK)
1333 && (ent & PT64_BASE_ADDR_MASK) != hpa)
1334 printk(KERN_ERR "audit error: (%s) levels %d"
1335 " gva %lx gpa %llx hpa %llx ent %llx\n",
1336 audit_msg, vcpu->mmu.root_level,
1337 va, gpa, hpa, ent);
1338 }
685 } 1339 }
686} 1340}
1341
1342static void audit_mappings(struct kvm_vcpu *vcpu)
1343{
1344 int i;
1345
1346 if (vcpu->mmu.root_level == 4)
1347 audit_mappings_page(vcpu, vcpu->mmu.root_hpa, 0, 4);
1348 else
1349 for (i = 0; i < 4; ++i)
1350 if (vcpu->mmu.pae_root[i] & PT_PRESENT_MASK)
1351 audit_mappings_page(vcpu,
1352 vcpu->mmu.pae_root[i],
1353 i << 30,
1354 2);
1355}
1356
1357static int count_rmaps(struct kvm_vcpu *vcpu)
1358{
1359 int nmaps = 0;
1360 int i, j, k;
1361
1362 for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
1363 struct kvm_memory_slot *m = &vcpu->kvm->memslots[i];
1364 struct kvm_rmap_desc *d;
1365
1366 for (j = 0; j < m->npages; ++j) {
1367 struct page *page = m->phys_mem[j];
1368
1369 if (!page->private)
1370 continue;
1371 if (!(page->private & 1)) {
1372 ++nmaps;
1373 continue;
1374 }
1375 d = (struct kvm_rmap_desc *)(page->private & ~1ul);
1376 while (d) {
1377 for (k = 0; k < RMAP_EXT; ++k)
1378 if (d->shadow_ptes[k])
1379 ++nmaps;
1380 else
1381 break;
1382 d = d->more;
1383 }
1384 }
1385 }
1386 return nmaps;
1387}
1388
1389static int count_writable_mappings(struct kvm_vcpu *vcpu)
1390{
1391 int nmaps = 0;
1392 struct kvm_mmu_page *page;
1393 int i;
1394
1395 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1396 u64 *pt = __va(page->page_hpa);
1397
1398 if (page->role.level != PT_PAGE_TABLE_LEVEL)
1399 continue;
1400
1401 for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
1402 u64 ent = pt[i];
1403
1404 if (!(ent & PT_PRESENT_MASK))
1405 continue;
1406 if (!(ent & PT_WRITABLE_MASK))
1407 continue;
1408 ++nmaps;
1409 }
1410 }
1411 return nmaps;
1412}
1413
1414static void audit_rmap(struct kvm_vcpu *vcpu)
1415{
1416 int n_rmap = count_rmaps(vcpu);
1417 int n_actual = count_writable_mappings(vcpu);
1418
1419 if (n_rmap != n_actual)
1420 printk(KERN_ERR "%s: (%s) rmap %d actual %d\n",
1421 __FUNCTION__, audit_msg, n_rmap, n_actual);
1422}
1423
1424static void audit_write_protection(struct kvm_vcpu *vcpu)
1425{
1426 struct kvm_mmu_page *page;
1427
1428 list_for_each_entry(page, &vcpu->kvm->active_mmu_pages, link) {
1429 hfn_t hfn;
1430 struct page *pg;
1431
1432 if (page->role.metaphysical)
1433 continue;
1434
1435 hfn = gpa_to_hpa(vcpu, (gpa_t)page->gfn << PAGE_SHIFT)
1436 >> PAGE_SHIFT;
1437 pg = pfn_to_page(hfn);
1438 if (pg->private)
1439 printk(KERN_ERR "%s: (%s) shadow page has writable"
1440 " mappings: gfn %lx role %x\n",
1441 __FUNCTION__, audit_msg, page->gfn,
1442 page->role.word);
1443 }
1444}
1445
1446static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg)
1447{
1448 int olddbg = dbg;
1449
1450 dbg = 0;
1451 audit_msg = msg;
1452 audit_rmap(vcpu);
1453 audit_write_protection(vcpu);
1454 audit_mappings(vcpu);
1455 dbg = olddbg;
1456}
1457
1458#endif
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 09bb9b4ed12d..2dbf4307ed9e 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -32,6 +32,11 @@
32 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 32 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
33 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level) 33 #define PT_LEVEL_MASK(level) PT64_LEVEL_MASK(level)
34 #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK 34 #define PT_PTE_COPY_MASK PT64_PTE_COPY_MASK
35 #ifdef CONFIG_X86_64
36 #define PT_MAX_FULL_LEVELS 4
37 #else
38 #define PT_MAX_FULL_LEVELS 2
39 #endif
35#elif PTTYPE == 32 40#elif PTTYPE == 32
36 #define pt_element_t u32 41 #define pt_element_t u32
37 #define guest_walker guest_walker32 42 #define guest_walker guest_walker32
@@ -42,6 +47,7 @@
42 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level) 47 #define SHADOW_PT_INDEX(addr, level) PT64_INDEX(addr, level)
43 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level) 48 #define PT_LEVEL_MASK(level) PT32_LEVEL_MASK(level)
44 #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK 49 #define PT_PTE_COPY_MASK PT32_PTE_COPY_MASK
50 #define PT_MAX_FULL_LEVELS 2
45#else 51#else
46 #error Invalid PTTYPE value 52 #error Invalid PTTYPE value
47#endif 53#endif
@@ -52,93 +58,126 @@
52 */ 58 */
53struct guest_walker { 59struct guest_walker {
54 int level; 60 int level;
61 gfn_t table_gfn[PT_MAX_FULL_LEVELS];
55 pt_element_t *table; 62 pt_element_t *table;
63 pt_element_t *ptep;
56 pt_element_t inherited_ar; 64 pt_element_t inherited_ar;
65 gfn_t gfn;
57}; 66};
58 67
59static void FNAME(init_walker)(struct guest_walker *walker, 68/*
60 struct kvm_vcpu *vcpu) 69 * Fetch a guest pte for a guest virtual address
70 */
71static void FNAME(walk_addr)(struct guest_walker *walker,
72 struct kvm_vcpu *vcpu, gva_t addr)
61{ 73{
62 hpa_t hpa; 74 hpa_t hpa;
63 struct kvm_memory_slot *slot; 75 struct kvm_memory_slot *slot;
76 pt_element_t *ptep;
77 pt_element_t root;
78 gfn_t table_gfn;
64 79
80 pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
65 walker->level = vcpu->mmu.root_level; 81 walker->level = vcpu->mmu.root_level;
66 slot = gfn_to_memslot(vcpu->kvm, 82 walker->table = NULL;
67 (vcpu->cr3 & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT); 83 root = vcpu->cr3;
68 hpa = safe_gpa_to_hpa(vcpu, vcpu->cr3 & PT64_BASE_ADDR_MASK); 84#if PTTYPE == 64
85 if (!is_long_mode(vcpu)) {
86 walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
87 root = *walker->ptep;
88 if (!(root & PT_PRESENT_MASK))
89 return;
90 --walker->level;
91 }
92#endif
93 table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
94 walker->table_gfn[walker->level - 1] = table_gfn;
95 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
96 walker->level - 1, table_gfn);
97 slot = gfn_to_memslot(vcpu->kvm, table_gfn);
98 hpa = safe_gpa_to_hpa(vcpu, root & PT64_BASE_ADDR_MASK);
69 walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0); 99 walker->table = kmap_atomic(pfn_to_page(hpa >> PAGE_SHIFT), KM_USER0);
70 100
71 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || 101 ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
72 (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0); 102 (vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) == 0);
73 103
74 walker->table = (pt_element_t *)( (unsigned long)walker->table |
75 (unsigned long)(vcpu->cr3 & ~(PAGE_MASK | CR3_FLAGS_MASK)) );
76 walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK; 104 walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
105
106 for (;;) {
107 int index = PT_INDEX(addr, walker->level);
108 hpa_t paddr;
109
110 ptep = &walker->table[index];
111 ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
112 ((unsigned long)ptep & PAGE_MASK));
113
114 if (is_present_pte(*ptep) && !(*ptep & PT_ACCESSED_MASK))
115 *ptep |= PT_ACCESSED_MASK;
116
117 if (!is_present_pte(*ptep))
118 break;
119
120 if (walker->level == PT_PAGE_TABLE_LEVEL) {
121 walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
122 >> PAGE_SHIFT;
123 break;
124 }
125
126 if (walker->level == PT_DIRECTORY_LEVEL
127 && (*ptep & PT_PAGE_SIZE_MASK)
128 && (PTTYPE == 64 || is_pse(vcpu))) {
129 walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
130 >> PAGE_SHIFT;
131 walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
132 break;
133 }
134
135 if (walker->level != 3 || is_long_mode(vcpu))
136 walker->inherited_ar &= walker->table[index];
137 table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
138 paddr = safe_gpa_to_hpa(vcpu, *ptep & PT_BASE_ADDR_MASK);
139 kunmap_atomic(walker->table, KM_USER0);
140 walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
141 KM_USER0);
142 --walker->level;
143 walker->table_gfn[walker->level - 1 ] = table_gfn;
144 pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
145 walker->level - 1, table_gfn);
146 }
147 walker->ptep = ptep;
148 pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
77} 149}
78 150
79static void FNAME(release_walker)(struct guest_walker *walker) 151static void FNAME(release_walker)(struct guest_walker *walker)
80{ 152{
81 kunmap_atomic(walker->table, KM_USER0); 153 if (walker->table)
154 kunmap_atomic(walker->table, KM_USER0);
82} 155}
83 156
84static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte, 157static void FNAME(set_pte)(struct kvm_vcpu *vcpu, u64 guest_pte,
85 u64 *shadow_pte, u64 access_bits) 158 u64 *shadow_pte, u64 access_bits, gfn_t gfn)
86{ 159{
87 ASSERT(*shadow_pte == 0); 160 ASSERT(*shadow_pte == 0);
88 access_bits &= guest_pte; 161 access_bits &= guest_pte;
89 *shadow_pte = (guest_pte & PT_PTE_COPY_MASK); 162 *shadow_pte = (guest_pte & PT_PTE_COPY_MASK);
90 set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK, 163 set_pte_common(vcpu, shadow_pte, guest_pte & PT_BASE_ADDR_MASK,
91 guest_pte & PT_DIRTY_MASK, access_bits); 164 guest_pte & PT_DIRTY_MASK, access_bits, gfn);
92} 165}
93 166
94static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde, 167static void FNAME(set_pde)(struct kvm_vcpu *vcpu, u64 guest_pde,
95 u64 *shadow_pte, u64 access_bits, 168 u64 *shadow_pte, u64 access_bits, gfn_t gfn)
96 int index)
97{ 169{
98 gpa_t gaddr; 170 gpa_t gaddr;
99 171
100 ASSERT(*shadow_pte == 0); 172 ASSERT(*shadow_pte == 0);
101 access_bits &= guest_pde; 173 access_bits &= guest_pde;
102 gaddr = (guest_pde & PT_DIR_BASE_ADDR_MASK) + PAGE_SIZE * index; 174 gaddr = (gpa_t)gfn << PAGE_SHIFT;
103 if (PTTYPE == 32 && is_cpuid_PSE36()) 175 if (PTTYPE == 32 && is_cpuid_PSE36())
104 gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) << 176 gaddr |= (guest_pde & PT32_DIR_PSE36_MASK) <<
105 (32 - PT32_DIR_PSE36_SHIFT); 177 (32 - PT32_DIR_PSE36_SHIFT);
106 *shadow_pte = guest_pde & PT_PTE_COPY_MASK; 178 *shadow_pte = guest_pde & PT_PTE_COPY_MASK;
107 set_pte_common(vcpu, shadow_pte, gaddr, 179 set_pte_common(vcpu, shadow_pte, gaddr,
108 guest_pde & PT_DIRTY_MASK, access_bits); 180 guest_pde & PT_DIRTY_MASK, access_bits, gfn);
109}
110
111/*
112 * Fetch a guest pte from a specific level in the paging hierarchy.
113 */
114static pt_element_t *FNAME(fetch_guest)(struct kvm_vcpu *vcpu,
115 struct guest_walker *walker,
116 int level,
117 gva_t addr)
118{
119
120 ASSERT(level > 0 && level <= walker->level);
121
122 for (;;) {
123 int index = PT_INDEX(addr, walker->level);
124 hpa_t paddr;
125
126 ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
127 ((unsigned long)&walker->table[index] & PAGE_MASK));
128 if (level == walker->level ||
129 !is_present_pte(walker->table[index]) ||
130 (walker->level == PT_DIRECTORY_LEVEL &&
131 (walker->table[index] & PT_PAGE_SIZE_MASK) &&
132 (PTTYPE == 64 || is_pse(vcpu))))
133 return &walker->table[index];
134 if (walker->level != 3 || is_long_mode(vcpu))
135 walker->inherited_ar &= walker->table[index];
136 paddr = safe_gpa_to_hpa(vcpu, walker->table[index] & PT_BASE_ADDR_MASK);
137 kunmap_atomic(walker->table, KM_USER0);
138 walker->table = kmap_atomic(pfn_to_page(paddr >> PAGE_SHIFT),
139 KM_USER0);
140 --walker->level;
141 }
142} 181}
143 182
144/* 183/*
@@ -150,15 +189,26 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
150 hpa_t shadow_addr; 189 hpa_t shadow_addr;
151 int level; 190 int level;
152 u64 *prev_shadow_ent = NULL; 191 u64 *prev_shadow_ent = NULL;
192 pt_element_t *guest_ent = walker->ptep;
193
194 if (!is_present_pte(*guest_ent))
195 return NULL;
153 196
154 shadow_addr = vcpu->mmu.root_hpa; 197 shadow_addr = vcpu->mmu.root_hpa;
155 level = vcpu->mmu.shadow_root_level; 198 level = vcpu->mmu.shadow_root_level;
199 if (level == PT32E_ROOT_LEVEL) {
200 shadow_addr = vcpu->mmu.pae_root[(addr >> 30) & 3];
201 shadow_addr &= PT64_BASE_ADDR_MASK;
202 --level;
203 }
156 204
157 for (; ; level--) { 205 for (; ; level--) {
158 u32 index = SHADOW_PT_INDEX(addr, level); 206 u32 index = SHADOW_PT_INDEX(addr, level);
159 u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index; 207 u64 *shadow_ent = ((u64 *)__va(shadow_addr)) + index;
160 pt_element_t *guest_ent; 208 struct kvm_mmu_page *shadow_page;
161 u64 shadow_pte; 209 u64 shadow_pte;
210 int metaphysical;
211 gfn_t table_gfn;
162 212
163 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) { 213 if (is_present_pte(*shadow_ent) || is_io_pte(*shadow_ent)) {
164 if (level == PT_PAGE_TABLE_LEVEL) 214 if (level == PT_PAGE_TABLE_LEVEL)
@@ -168,21 +218,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
168 continue; 218 continue;
169 } 219 }
170 220
171 if (PTTYPE == 32 && level > PT32_ROOT_LEVEL) {
172 ASSERT(level == PT32E_ROOT_LEVEL);
173 guest_ent = FNAME(fetch_guest)(vcpu, walker,
174 PT32_ROOT_LEVEL, addr);
175 } else
176 guest_ent = FNAME(fetch_guest)(vcpu, walker,
177 level, addr);
178
179 if (!is_present_pte(*guest_ent))
180 return NULL;
181
182 /* Don't set accessed bit on PAE PDPTRs */
183 if (vcpu->mmu.root_level != 3 || walker->level != 3)
184 *guest_ent |= PT_ACCESSED_MASK;
185
186 if (level == PT_PAGE_TABLE_LEVEL) { 221 if (level == PT_PAGE_TABLE_LEVEL) {
187 222
188 if (walker->level == PT_DIRECTORY_LEVEL) { 223 if (walker->level == PT_DIRECTORY_LEVEL) {
@@ -190,21 +225,30 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
190 *prev_shadow_ent |= PT_SHADOW_PS_MARK; 225 *prev_shadow_ent |= PT_SHADOW_PS_MARK;
191 FNAME(set_pde)(vcpu, *guest_ent, shadow_ent, 226 FNAME(set_pde)(vcpu, *guest_ent, shadow_ent,
192 walker->inherited_ar, 227 walker->inherited_ar,
193 PT_INDEX(addr, PT_PAGE_TABLE_LEVEL)); 228 walker->gfn);
194 } else { 229 } else {
195 ASSERT(walker->level == PT_PAGE_TABLE_LEVEL); 230 ASSERT(walker->level == PT_PAGE_TABLE_LEVEL);
196 FNAME(set_pte)(vcpu, *guest_ent, shadow_ent, walker->inherited_ar); 231 FNAME(set_pte)(vcpu, *guest_ent, shadow_ent,
232 walker->inherited_ar,
233 walker->gfn);
197 } 234 }
198 return shadow_ent; 235 return shadow_ent;
199 } 236 }
200 237
201 shadow_addr = kvm_mmu_alloc_page(vcpu, shadow_ent); 238 if (level - 1 == PT_PAGE_TABLE_LEVEL
202 if (!VALID_PAGE(shadow_addr)) 239 && walker->level == PT_DIRECTORY_LEVEL) {
203 return ERR_PTR(-ENOMEM); 240 metaphysical = 1;
204 shadow_pte = shadow_addr | PT_PRESENT_MASK; 241 table_gfn = (*guest_ent & PT_BASE_ADDR_MASK)
205 if (vcpu->mmu.root_level > 3 || level != 3) 242 >> PAGE_SHIFT;
206 shadow_pte |= PT_ACCESSED_MASK 243 } else {
207 | PT_WRITABLE_MASK | PT_USER_MASK; 244 metaphysical = 0;
245 table_gfn = walker->table_gfn[level - 2];
246 }
247 shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
248 metaphysical, shadow_ent);
249 shadow_addr = shadow_page->page_hpa;
250 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
251 | PT_WRITABLE_MASK | PT_USER_MASK;
208 *shadow_ent = shadow_pte; 252 *shadow_ent = shadow_pte;
209 prev_shadow_ent = shadow_ent; 253 prev_shadow_ent = shadow_ent;
210 } 254 }
@@ -221,11 +265,13 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
221 u64 *shadow_ent, 265 u64 *shadow_ent,
222 struct guest_walker *walker, 266 struct guest_walker *walker,
223 gva_t addr, 267 gva_t addr,
224 int user) 268 int user,
269 int *write_pt)
225{ 270{
226 pt_element_t *guest_ent; 271 pt_element_t *guest_ent;
227 int writable_shadow; 272 int writable_shadow;
228 gfn_t gfn; 273 gfn_t gfn;
274 struct kvm_mmu_page *page;
229 275
230 if (is_writeble_pte(*shadow_ent)) 276 if (is_writeble_pte(*shadow_ent))
231 return 0; 277 return 0;
@@ -250,17 +296,35 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
250 *shadow_ent &= ~PT_USER_MASK; 296 *shadow_ent &= ~PT_USER_MASK;
251 } 297 }
252 298
253 guest_ent = FNAME(fetch_guest)(vcpu, walker, PT_PAGE_TABLE_LEVEL, addr); 299 guest_ent = walker->ptep;
254 300
255 if (!is_present_pte(*guest_ent)) { 301 if (!is_present_pte(*guest_ent)) {
256 *shadow_ent = 0; 302 *shadow_ent = 0;
257 return 0; 303 return 0;
258 } 304 }
259 305
260 gfn = (*guest_ent & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; 306 gfn = walker->gfn;
307
308 if (user) {
309 /*
310 * Usermode page faults won't be for page table updates.
311 */
312 while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
313 pgprintk("%s: zap %lx %x\n",
314 __FUNCTION__, gfn, page->role.word);
315 kvm_mmu_zap_page(vcpu, page);
316 }
317 } else if (kvm_mmu_lookup_page(vcpu, gfn)) {
318 pgprintk("%s: found shadow page for %lx, marking ro\n",
319 __FUNCTION__, gfn);
320 *guest_ent |= PT_DIRTY_MASK;
321 *write_pt = 1;
322 return 0;
323 }
261 mark_page_dirty(vcpu->kvm, gfn); 324 mark_page_dirty(vcpu->kvm, gfn);
262 *shadow_ent |= PT_WRITABLE_MASK; 325 *shadow_ent |= PT_WRITABLE_MASK;
263 *guest_ent |= PT_DIRTY_MASK; 326 *guest_ent |= PT_DIRTY_MASK;
327 rmap_add(vcpu, shadow_ent);
264 328
265 return 1; 329 return 1;
266} 330}
@@ -276,7 +340,8 @@ static int FNAME(fix_write_pf)(struct kvm_vcpu *vcpu,
276 * - normal guest page fault due to the guest pte marked not present, not 340 * - normal guest page fault due to the guest pte marked not present, not
277 * writable, or not executable 341 * writable, or not executable
278 * 342 *
279 * Returns: 1 if we need to emulate the instruction, 0 otherwise 343 * Returns: 1 if we need to emulate the instruction, 0 otherwise, or
344 * a negative value on error.
280 */ 345 */
281static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, 346static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
282 u32 error_code) 347 u32 error_code)
@@ -287,39 +352,47 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
287 struct guest_walker walker; 352 struct guest_walker walker;
288 u64 *shadow_pte; 353 u64 *shadow_pte;
289 int fixed; 354 int fixed;
355 int write_pt = 0;
356 int r;
357
358 pgprintk("%s: addr %lx err %x\n", __FUNCTION__, addr, error_code);
359 kvm_mmu_audit(vcpu, "pre page fault");
360
361 r = mmu_topup_memory_caches(vcpu);
362 if (r)
363 return r;
290 364
291 /* 365 /*
292 * Look up the shadow pte for the faulting address. 366 * Look up the shadow pte for the faulting address.
293 */ 367 */
294 for (;;) { 368 FNAME(walk_addr)(&walker, vcpu, addr);
295 FNAME(init_walker)(&walker, vcpu); 369 shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
296 shadow_pte = FNAME(fetch)(vcpu, addr, &walker);
297 if (IS_ERR(shadow_pte)) { /* must be -ENOMEM */
298 nonpaging_flush(vcpu);
299 FNAME(release_walker)(&walker);
300 continue;
301 }
302 break;
303 }
304 370
305 /* 371 /*
306 * The page is not mapped by the guest. Let the guest handle it. 372 * The page is not mapped by the guest. Let the guest handle it.
307 */ 373 */
308 if (!shadow_pte) { 374 if (!shadow_pte) {
375 pgprintk("%s: not mapped\n", __FUNCTION__);
309 inject_page_fault(vcpu, addr, error_code); 376 inject_page_fault(vcpu, addr, error_code);
310 FNAME(release_walker)(&walker); 377 FNAME(release_walker)(&walker);
311 return 0; 378 return 0;
312 } 379 }
313 380
381 pgprintk("%s: shadow pte %p %llx\n", __FUNCTION__,
382 shadow_pte, *shadow_pte);
383
314 /* 384 /*
315 * Update the shadow pte. 385 * Update the shadow pte.
316 */ 386 */
317 if (write_fault) 387 if (write_fault)
318 fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr, 388 fixed = FNAME(fix_write_pf)(vcpu, shadow_pte, &walker, addr,
319 user_fault); 389 user_fault, &write_pt);
320 else 390 else
321 fixed = fix_read_pf(shadow_pte); 391 fixed = fix_read_pf(shadow_pte);
322 392
393 pgprintk("%s: updated shadow pte %p %llx\n", __FUNCTION__,
394 shadow_pte, *shadow_pte);
395
323 FNAME(release_walker)(&walker); 396 FNAME(release_walker)(&walker);
324 397
325 /* 398 /*
@@ -331,20 +404,23 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
331 pgprintk("%s: io work, no access\n", __FUNCTION__); 404 pgprintk("%s: io work, no access\n", __FUNCTION__);
332 inject_page_fault(vcpu, addr, 405 inject_page_fault(vcpu, addr,
333 error_code | PFERR_PRESENT_MASK); 406 error_code | PFERR_PRESENT_MASK);
407 kvm_mmu_audit(vcpu, "post page fault (io)");
334 return 0; 408 return 0;
335 } 409 }
336 410
337 /* 411 /*
338 * pte not present, guest page fault. 412 * pte not present, guest page fault.
339 */ 413 */
340 if (pte_present && !fixed) { 414 if (pte_present && !fixed && !write_pt) {
341 inject_page_fault(vcpu, addr, error_code); 415 inject_page_fault(vcpu, addr, error_code);
416 kvm_mmu_audit(vcpu, "post page fault (guest)");
342 return 0; 417 return 0;
343 } 418 }
344 419
345 ++kvm_stat.pf_fixed; 420 ++kvm_stat.pf_fixed;
421 kvm_mmu_audit(vcpu, "post page fault (fixed)");
346 422
347 return 0; 423 return write_pt;
348} 424}
349 425
350static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) 426static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
@@ -353,9 +429,8 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
353 pt_element_t guest_pte; 429 pt_element_t guest_pte;
354 gpa_t gpa; 430 gpa_t gpa;
355 431
356 FNAME(init_walker)(&walker, vcpu); 432 FNAME(walk_addr)(&walker, vcpu, vaddr);
357 guest_pte = *FNAME(fetch_guest)(vcpu, &walker, PT_PAGE_TABLE_LEVEL, 433 guest_pte = *walker.ptep;
358 vaddr);
359 FNAME(release_walker)(&walker); 434 FNAME(release_walker)(&walker);
360 435
361 if (!is_present_pte(guest_pte)) 436 if (!is_present_pte(guest_pte))
@@ -389,3 +464,4 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
389#undef PT_PTE_COPY_MASK 464#undef PT_PTE_COPY_MASK
390#undef PT_NON_PTE_COPY_MASK 465#undef PT_NON_PTE_COPY_MASK
391#undef PT_DIR_BASE_ADDR_MASK 466#undef PT_DIR_BASE_ADDR_MASK
467#undef PT_MAX_FULL_LEVELS
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index fa0428735717..ccc06b1b91b5 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -235,6 +235,8 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
235 235
236 vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip; 236 vcpu->rip = vcpu->svm->vmcb->save.rip = vcpu->svm->next_rip;
237 vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; 237 vcpu->svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
238
239 vcpu->interrupt_window_open = 1;
238} 240}
239 241
240static int has_svm(void) 242static int has_svm(void)
@@ -495,7 +497,6 @@ static void init_vmcb(struct vmcb *vmcb)
495 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */ 497 /* (1ULL << INTERCEPT_SELECTIVE_CR0) | */
496 (1ULL << INTERCEPT_CPUID) | 498 (1ULL << INTERCEPT_CPUID) |
497 (1ULL << INTERCEPT_HLT) | 499 (1ULL << INTERCEPT_HLT) |
498 (1ULL << INTERCEPT_INVLPG) |
499 (1ULL << INTERCEPT_INVLPGA) | 500 (1ULL << INTERCEPT_INVLPGA) |
500 (1ULL << INTERCEPT_IOIO_PROT) | 501 (1ULL << INTERCEPT_IOIO_PROT) |
501 (1ULL << INTERCEPT_MSR_PROT) | 502 (1ULL << INTERCEPT_MSR_PROT) |
@@ -700,6 +701,10 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct descriptor_table *dt)
700 vcpu->svm->vmcb->save.gdtr.base = dt->base ; 701 vcpu->svm->vmcb->save.gdtr.base = dt->base ;
701} 702}
702 703
704static void svm_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
705{
706}
707
703static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 708static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
704{ 709{
705#ifdef CONFIG_X86_64 710#ifdef CONFIG_X86_64
@@ -847,6 +852,7 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
847 u64 fault_address; 852 u64 fault_address;
848 u32 error_code; 853 u32 error_code;
849 enum emulation_result er; 854 enum emulation_result er;
855 int r;
850 856
851 if (is_external_interrupt(exit_int_info)) 857 if (is_external_interrupt(exit_int_info))
852 push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); 858 push_irq(vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
@@ -855,7 +861,12 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
855 861
856 fault_address = vcpu->svm->vmcb->control.exit_info_2; 862 fault_address = vcpu->svm->vmcb->control.exit_info_2;
857 error_code = vcpu->svm->vmcb->control.exit_info_1; 863 error_code = vcpu->svm->vmcb->control.exit_info_1;
858 if (!vcpu->mmu.page_fault(vcpu, fault_address, error_code)) { 864 r = kvm_mmu_page_fault(vcpu, fault_address, error_code);
865 if (r < 0) {
866 spin_unlock(&vcpu->kvm->lock);
867 return r;
868 }
869 if (!r) {
859 spin_unlock(&vcpu->kvm->lock); 870 spin_unlock(&vcpu->kvm->lock);
860 return 1; 871 return 1;
861 } 872 }
@@ -1031,10 +1042,11 @@ static int halt_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1031{ 1042{
1032 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1; 1043 vcpu->svm->next_rip = vcpu->svm->vmcb->save.rip + 1;
1033 skip_emulated_instruction(vcpu); 1044 skip_emulated_instruction(vcpu);
1034 if (vcpu->irq_summary && (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF)) 1045 if (vcpu->irq_summary)
1035 return 1; 1046 return 1;
1036 1047
1037 kvm_run->exit_reason = KVM_EXIT_HLT; 1048 kvm_run->exit_reason = KVM_EXIT_HLT;
1049 ++kvm_stat.halt_exits;
1038 return 0; 1050 return 0;
1039} 1051}
1040 1052
@@ -1186,6 +1198,23 @@ static int msr_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1186 return rdmsr_interception(vcpu, kvm_run); 1198 return rdmsr_interception(vcpu, kvm_run);
1187} 1199}
1188 1200
1201static int interrupt_window_interception(struct kvm_vcpu *vcpu,
1202 struct kvm_run *kvm_run)
1203{
1204 /*
1205 * If the user space waits to inject interrupts, exit as soon as
1206 * possible
1207 */
1208 if (kvm_run->request_interrupt_window &&
1209 !vcpu->irq_summary) {
1210 ++kvm_stat.irq_window_exits;
1211 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
1212 return 0;
1213 }
1214
1215 return 1;
1216}
1217
1189static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, 1218static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
1190 struct kvm_run *kvm_run) = { 1219 struct kvm_run *kvm_run) = {
1191 [SVM_EXIT_READ_CR0] = emulate_on_interception, 1220 [SVM_EXIT_READ_CR0] = emulate_on_interception,
@@ -1210,6 +1239,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
1210 [SVM_EXIT_NMI] = nop_on_interception, 1239 [SVM_EXIT_NMI] = nop_on_interception,
1211 [SVM_EXIT_SMI] = nop_on_interception, 1240 [SVM_EXIT_SMI] = nop_on_interception,
1212 [SVM_EXIT_INIT] = nop_on_interception, 1241 [SVM_EXIT_INIT] = nop_on_interception,
1242 [SVM_EXIT_VINTR] = interrupt_window_interception,
1213 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ 1243 /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */
1214 [SVM_EXIT_CPUID] = cpuid_interception, 1244 [SVM_EXIT_CPUID] = cpuid_interception,
1215 [SVM_EXIT_HLT] = halt_interception, 1245 [SVM_EXIT_HLT] = halt_interception,
@@ -1278,15 +1308,11 @@ static void pre_svm_run(struct kvm_vcpu *vcpu)
1278} 1308}
1279 1309
1280 1310
1281static inline void kvm_try_inject_irq(struct kvm_vcpu *vcpu) 1311static inline void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
1282{ 1312{
1283 struct vmcb_control_area *control; 1313 struct vmcb_control_area *control;
1284 1314
1285 if (!vcpu->irq_summary)
1286 return;
1287
1288 control = &vcpu->svm->vmcb->control; 1315 control = &vcpu->svm->vmcb->control;
1289
1290 control->int_vector = pop_irq(vcpu); 1316 control->int_vector = pop_irq(vcpu);
1291 control->int_ctl &= ~V_INTR_PRIO_MASK; 1317 control->int_ctl &= ~V_INTR_PRIO_MASK;
1292 control->int_ctl |= V_IRQ_MASK | 1318 control->int_ctl |= V_IRQ_MASK |
@@ -1301,6 +1327,59 @@ static void kvm_reput_irq(struct kvm_vcpu *vcpu)
1301 control->int_ctl &= ~V_IRQ_MASK; 1327 control->int_ctl &= ~V_IRQ_MASK;
1302 push_irq(vcpu, control->int_vector); 1328 push_irq(vcpu, control->int_vector);
1303 } 1329 }
1330
1331 vcpu->interrupt_window_open =
1332 !(control->int_state & SVM_INTERRUPT_SHADOW_MASK);
1333}
1334
1335static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1336 struct kvm_run *kvm_run)
1337{
1338 struct vmcb_control_area *control = &vcpu->svm->vmcb->control;
1339
1340 vcpu->interrupt_window_open =
1341 (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) &&
1342 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
1343
1344 if (vcpu->interrupt_window_open && vcpu->irq_summary)
1345 /*
1346 * If interrupts enabled, and not blocked by sti or mov ss. Good.
1347 */
1348 kvm_do_inject_irq(vcpu);
1349
1350 /*
1351 * Interrupts blocked. Wait for unblock.
1352 */
1353 if (!vcpu->interrupt_window_open &&
1354 (vcpu->irq_summary || kvm_run->request_interrupt_window)) {
1355 control->intercept |= 1ULL << INTERCEPT_VINTR;
1356 } else
1357 control->intercept &= ~(1ULL << INTERCEPT_VINTR);
1358}
1359
1360static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1361 struct kvm_run *kvm_run)
1362{
1363 kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
1364 vcpu->irq_summary == 0);
1365 kvm_run->if_flag = (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF) != 0;
1366 kvm_run->cr8 = vcpu->cr8;
1367 kvm_run->apic_base = vcpu->apic_base;
1368}
1369
1370/*
1371 * Check if userspace requested an interrupt window, and that the
1372 * interrupt window is open.
1373 *
1374 * No need to exit to userspace if we already have an interrupt queued.
1375 */
1376static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1377 struct kvm_run *kvm_run)
1378{
1379 return (!vcpu->irq_summary &&
1380 kvm_run->request_interrupt_window &&
1381 vcpu->interrupt_window_open &&
1382 (vcpu->svm->vmcb->save.rflags & X86_EFLAGS_IF));
1304} 1383}
1305 1384
1306static void save_db_regs(unsigned long *db_regs) 1385static void save_db_regs(unsigned long *db_regs)
@@ -1324,9 +1403,10 @@ static int svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1324 u16 fs_selector; 1403 u16 fs_selector;
1325 u16 gs_selector; 1404 u16 gs_selector;
1326 u16 ldt_selector; 1405 u16 ldt_selector;
1406 int r;
1327 1407
1328again: 1408again:
1329 kvm_try_inject_irq(vcpu); 1409 do_interrupt_requests(vcpu, kvm_run);
1330 1410
1331 clgi(); 1411 clgi();
1332 1412
@@ -1487,18 +1567,28 @@ again:
1487 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) { 1567 if (vcpu->svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
1488 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 1568 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
1489 kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code; 1569 kvm_run->exit_reason = vcpu->svm->vmcb->control.exit_code;
1570 post_kvm_run_save(vcpu, kvm_run);
1490 return 0; 1571 return 0;
1491 } 1572 }
1492 1573
1493 if (handle_exit(vcpu, kvm_run)) { 1574 r = handle_exit(vcpu, kvm_run);
1575 if (r > 0) {
1494 if (signal_pending(current)) { 1576 if (signal_pending(current)) {
1495 ++kvm_stat.signal_exits; 1577 ++kvm_stat.signal_exits;
1578 post_kvm_run_save(vcpu, kvm_run);
1579 return -EINTR;
1580 }
1581
1582 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1583 ++kvm_stat.request_irq_exits;
1584 post_kvm_run_save(vcpu, kvm_run);
1496 return -EINTR; 1585 return -EINTR;
1497 } 1586 }
1498 kvm_resched(vcpu); 1587 kvm_resched(vcpu);
1499 goto again; 1588 goto again;
1500 } 1589 }
1501 return 0; 1590 post_kvm_run_save(vcpu, kvm_run);
1591 return r;
1502} 1592}
1503 1593
1504static void svm_flush_tlb(struct kvm_vcpu *vcpu) 1594static void svm_flush_tlb(struct kvm_vcpu *vcpu)
@@ -1565,6 +1655,7 @@ static struct kvm_arch_ops svm_arch_ops = {
1565 .get_segment = svm_get_segment, 1655 .get_segment = svm_get_segment,
1566 .set_segment = svm_set_segment, 1656 .set_segment = svm_set_segment,
1567 .get_cs_db_l_bits = svm_get_cs_db_l_bits, 1657 .get_cs_db_l_bits = svm_get_cs_db_l_bits,
1658 .decache_cr0_cr4_guest_bits = svm_decache_cr0_cr4_guest_bits,
1568 .set_cr0 = svm_set_cr0, 1659 .set_cr0 = svm_set_cr0,
1569 .set_cr0_no_modeswitch = svm_set_cr0, 1660 .set_cr0_no_modeswitch = svm_set_cr0,
1570 .set_cr3 = svm_set_cr3, 1661 .set_cr3 = svm_set_cr3,
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index d0a2c2d5342a..d4701cb4c654 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -116,7 +116,7 @@ static void vmcs_clear(struct vmcs *vmcs)
116static void __vcpu_clear(void *arg) 116static void __vcpu_clear(void *arg)
117{ 117{
118 struct kvm_vcpu *vcpu = arg; 118 struct kvm_vcpu *vcpu = arg;
119 int cpu = smp_processor_id(); 119 int cpu = raw_smp_processor_id();
120 120
121 if (vcpu->cpu == cpu) 121 if (vcpu->cpu == cpu)
122 vmcs_clear(vcpu->vmcs); 122 vmcs_clear(vcpu->vmcs);
@@ -152,15 +152,21 @@ static u64 vmcs_read64(unsigned long field)
152#endif 152#endif
153} 153}
154 154
155static noinline void vmwrite_error(unsigned long field, unsigned long value)
156{
157 printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
158 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
159 dump_stack();
160}
161
155static void vmcs_writel(unsigned long field, unsigned long value) 162static void vmcs_writel(unsigned long field, unsigned long value)
156{ 163{
157 u8 error; 164 u8 error;
158 165
159 asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" 166 asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0"
160 : "=q"(error) : "a"(value), "d"(field) : "cc" ); 167 : "=q"(error) : "a"(value), "d"(field) : "cc" );
161 if (error) 168 if (unlikely(error))
162 printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", 169 vmwrite_error(field, value);
163 field, value, vmcs_read32(VM_INSTRUCTION_ERROR));
164} 170}
165 171
166static void vmcs_write16(unsigned long field, u16 value) 172static void vmcs_write16(unsigned long field, u16 value)
@@ -263,6 +269,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
263 if (interruptibility & 3) 269 if (interruptibility & 3)
264 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 270 vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
265 interruptibility & ~3); 271 interruptibility & ~3);
272 vcpu->interrupt_window_open = 1;
266} 273}
267 274
268static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code) 275static void vmx_inject_gp(struct kvm_vcpu *vcpu, unsigned error_code)
@@ -541,7 +548,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu)
541 548
542static struct vmcs *alloc_vmcs(void) 549static struct vmcs *alloc_vmcs(void)
543{ 550{
544 return alloc_vmcs_cpu(smp_processor_id()); 551 return alloc_vmcs_cpu(raw_smp_processor_id());
545} 552}
546 553
547static void free_vmcs(struct vmcs *vmcs) 554static void free_vmcs(struct vmcs *vmcs)
@@ -736,6 +743,15 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
736 743
737#endif 744#endif
738 745
746static void vmx_decache_cr0_cr4_guest_bits(struct kvm_vcpu *vcpu)
747{
748 vcpu->cr0 &= KVM_GUEST_CR0_MASK;
749 vcpu->cr0 |= vmcs_readl(GUEST_CR0) & ~KVM_GUEST_CR0_MASK;
750
751 vcpu->cr4 &= KVM_GUEST_CR4_MASK;
752 vcpu->cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
753}
754
739static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) 755static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
740{ 756{
741 if (vcpu->rmode.active && (cr0 & CR0_PE_MASK)) 757 if (vcpu->rmode.active && (cr0 & CR0_PE_MASK))
@@ -1011,8 +1027,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1011 vmcs_writel(GUEST_RIP, 0xfff0); 1027 vmcs_writel(GUEST_RIP, 0xfff0);
1012 vmcs_writel(GUEST_RSP, 0); 1028 vmcs_writel(GUEST_RSP, 0);
1013 1029
1014 vmcs_writel(GUEST_CR3, 0);
1015
1016 //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0 1030 //todo: dr0 = dr1 = dr2 = dr3 = 0; dr6 = 0xffff0ff0
1017 vmcs_writel(GUEST_DR7, 0x400); 1031 vmcs_writel(GUEST_DR7, 0x400);
1018 1032
@@ -1049,7 +1063,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1049 | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */ 1063 | CPU_BASED_CR8_LOAD_EXITING /* 20.6.2 */
1050 | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */ 1064 | CPU_BASED_CR8_STORE_EXITING /* 20.6.2 */
1051 | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */ 1065 | CPU_BASED_UNCOND_IO_EXITING /* 20.6.2 */
1052 | CPU_BASED_INVDPG_EXITING
1053 | CPU_BASED_MOV_DR_EXITING 1066 | CPU_BASED_MOV_DR_EXITING
1054 | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */ 1067 | CPU_BASED_USE_TSC_OFFSETING /* 21.3 */
1055 ); 1068 );
@@ -1094,14 +1107,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1094 rdmsrl(MSR_IA32_SYSENTER_EIP, a); 1107 rdmsrl(MSR_IA32_SYSENTER_EIP, a);
1095 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ 1108 vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */
1096 1109
1097 ret = -ENOMEM;
1098 vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
1099 if (!vcpu->guest_msrs)
1100 goto out;
1101 vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
1102 if (!vcpu->host_msrs)
1103 goto out_free_guest_msrs;
1104
1105 for (i = 0; i < NR_VMX_MSR; ++i) { 1110 for (i = 0; i < NR_VMX_MSR; ++i) {
1106 u32 index = vmx_msr_index[i]; 1111 u32 index = vmx_msr_index[i];
1107 u32 data_low, data_high; 1112 u32 data_low, data_high;
@@ -1155,8 +1160,6 @@ static int vmx_vcpu_setup(struct kvm_vcpu *vcpu)
1155 1160
1156 return 0; 1161 return 0;
1157 1162
1158out_free_guest_msrs:
1159 kfree(vcpu->guest_msrs);
1160out: 1163out:
1161 return ret; 1164 return ret;
1162} 1165}
@@ -1224,21 +1227,34 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu)
1224 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); 1227 irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK);
1225} 1228}
1226 1229
1227static void kvm_try_inject_irq(struct kvm_vcpu *vcpu) 1230
1231static void do_interrupt_requests(struct kvm_vcpu *vcpu,
1232 struct kvm_run *kvm_run)
1228{ 1233{
1229 if ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) 1234 u32 cpu_based_vm_exec_control;
1230 && (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0) 1235
1236 vcpu->interrupt_window_open =
1237 ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
1238 (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0);
1239
1240 if (vcpu->interrupt_window_open &&
1241 vcpu->irq_summary &&
1242 !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
1231 /* 1243 /*
1232 * Interrupts enabled, and not blocked by sti or mov ss. Good. 1244 * If interrupts enabled, and not blocked by sti or mov ss. Good.
1233 */ 1245 */
1234 kvm_do_inject_irq(vcpu); 1246 kvm_do_inject_irq(vcpu);
1235 else 1247
1248 cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
1249 if (!vcpu->interrupt_window_open &&
1250 (vcpu->irq_summary || kvm_run->request_interrupt_window))
1236 /* 1251 /*
1237 * Interrupts blocked. Wait for unblock. 1252 * Interrupts blocked. Wait for unblock.
1238 */ 1253 */
1239 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1254 cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING;
1240 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) 1255 else
1241 | CPU_BASED_VIRTUAL_INTR_PENDING); 1256 cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING;
1257 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
1242} 1258}
1243 1259
1244static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu) 1260static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
@@ -1277,6 +1293,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1277 unsigned long cr2, rip; 1293 unsigned long cr2, rip;
1278 u32 vect_info; 1294 u32 vect_info;
1279 enum emulation_result er; 1295 enum emulation_result er;
1296 int r;
1280 1297
1281 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); 1298 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
1282 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); 1299 intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
@@ -1305,7 +1322,12 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1305 cr2 = vmcs_readl(EXIT_QUALIFICATION); 1322 cr2 = vmcs_readl(EXIT_QUALIFICATION);
1306 1323
1307 spin_lock(&vcpu->kvm->lock); 1324 spin_lock(&vcpu->kvm->lock);
1308 if (!vcpu->mmu.page_fault(vcpu, cr2, error_code)) { 1325 r = kvm_mmu_page_fault(vcpu, cr2, error_code);
1326 if (r < 0) {
1327 spin_unlock(&vcpu->kvm->lock);
1328 return r;
1329 }
1330 if (!r) {
1309 spin_unlock(&vcpu->kvm->lock); 1331 spin_unlock(&vcpu->kvm->lock);
1310 return 1; 1332 return 1;
1311 } 1333 }
@@ -1425,17 +1447,6 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1425 return 0; 1447 return 0;
1426} 1448}
1427 1449
1428static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1429{
1430 u64 address = vmcs_read64(EXIT_QUALIFICATION);
1431 int instruction_length = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
1432 spin_lock(&vcpu->kvm->lock);
1433 vcpu->mmu.inval_page(vcpu, address);
1434 spin_unlock(&vcpu->kvm->lock);
1435 vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) + instruction_length);
1436 return 1;
1437}
1438
1439static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1450static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1440{ 1451{
1441 u64 exit_qualification; 1452 u64 exit_qualification;
@@ -1575,23 +1586,40 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1575 return 1; 1586 return 1;
1576} 1587}
1577 1588
1589static void post_kvm_run_save(struct kvm_vcpu *vcpu,
1590 struct kvm_run *kvm_run)
1591{
1592 kvm_run->if_flag = (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) != 0;
1593 kvm_run->cr8 = vcpu->cr8;
1594 kvm_run->apic_base = vcpu->apic_base;
1595 kvm_run->ready_for_interrupt_injection = (vcpu->interrupt_window_open &&
1596 vcpu->irq_summary == 0);
1597}
1598
1578static int handle_interrupt_window(struct kvm_vcpu *vcpu, 1599static int handle_interrupt_window(struct kvm_vcpu *vcpu,
1579 struct kvm_run *kvm_run) 1600 struct kvm_run *kvm_run)
1580{ 1601{
1581 /* Turn off interrupt window reporting. */ 1602 /*
1582 vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, 1603 * If the user space waits to inject interrupts, exit as soon as
1583 vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) 1604 * possible
1584 & ~CPU_BASED_VIRTUAL_INTR_PENDING); 1605 */
1606 if (kvm_run->request_interrupt_window &&
1607 !vcpu->irq_summary) {
1608 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
1609 ++kvm_stat.irq_window_exits;
1610 return 0;
1611 }
1585 return 1; 1612 return 1;
1586} 1613}
1587 1614
1588static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1615static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1589{ 1616{
1590 skip_emulated_instruction(vcpu); 1617 skip_emulated_instruction(vcpu);
1591 if (vcpu->irq_summary && (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)) 1618 if (vcpu->irq_summary)
1592 return 1; 1619 return 1;
1593 1620
1594 kvm_run->exit_reason = KVM_EXIT_HLT; 1621 kvm_run->exit_reason = KVM_EXIT_HLT;
1622 ++kvm_stat.halt_exits;
1595 return 0; 1623 return 0;
1596} 1624}
1597 1625
@@ -1605,7 +1633,6 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
1605 [EXIT_REASON_EXCEPTION_NMI] = handle_exception, 1633 [EXIT_REASON_EXCEPTION_NMI] = handle_exception,
1606 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, 1634 [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
1607 [EXIT_REASON_IO_INSTRUCTION] = handle_io, 1635 [EXIT_REASON_IO_INSTRUCTION] = handle_io,
1608 [EXIT_REASON_INVLPG] = handle_invlpg,
1609 [EXIT_REASON_CR_ACCESS] = handle_cr, 1636 [EXIT_REASON_CR_ACCESS] = handle_cr,
1610 [EXIT_REASON_DR_ACCESS] = handle_dr, 1637 [EXIT_REASON_DR_ACCESS] = handle_dr,
1611 [EXIT_REASON_CPUID] = handle_cpuid, 1638 [EXIT_REASON_CPUID] = handle_cpuid,
@@ -1642,11 +1669,27 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
1642 return 0; 1669 return 0;
1643} 1670}
1644 1671
1672/*
1673 * Check if userspace requested an interrupt window, and that the
1674 * interrupt window is open.
1675 *
1676 * No need to exit to userspace if we already have an interrupt queued.
1677 */
1678static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
1679 struct kvm_run *kvm_run)
1680{
1681 return (!vcpu->irq_summary &&
1682 kvm_run->request_interrupt_window &&
1683 vcpu->interrupt_window_open &&
1684 (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF));
1685}
1686
1645static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 1687static int vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
1646{ 1688{
1647 u8 fail; 1689 u8 fail;
1648 u16 fs_sel, gs_sel, ldt_sel; 1690 u16 fs_sel, gs_sel, ldt_sel;
1649 int fs_gs_ldt_reload_needed; 1691 int fs_gs_ldt_reload_needed;
1692 int r;
1650 1693
1651again: 1694again:
1652 /* 1695 /*
@@ -1673,9 +1716,7 @@ again:
1673 vmcs_writel(HOST_GS_BASE, segment_base(gs_sel)); 1716 vmcs_writel(HOST_GS_BASE, segment_base(gs_sel));
1674#endif 1717#endif
1675 1718
1676 if (vcpu->irq_summary && 1719 do_interrupt_requests(vcpu, kvm_run);
1677 !(vmcs_read32(VM_ENTRY_INTR_INFO_FIELD) & INTR_INFO_VALID_MASK))
1678 kvm_try_inject_irq(vcpu);
1679 1720
1680 if (vcpu->guest_debug.enabled) 1721 if (vcpu->guest_debug.enabled)
1681 kvm_guest_debug_pre(vcpu); 1722 kvm_guest_debug_pre(vcpu);
@@ -1812,6 +1853,7 @@ again:
1812 1853
1813 fx_save(vcpu->guest_fx_image); 1854 fx_save(vcpu->guest_fx_image);
1814 fx_restore(vcpu->host_fx_image); 1855 fx_restore(vcpu->host_fx_image);
1856 vcpu->interrupt_window_open = (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
1815 1857
1816#ifndef CONFIG_X86_64 1858#ifndef CONFIG_X86_64
1817 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); 1859 asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS));
@@ -1821,6 +1863,7 @@ again:
1821 if (fail) { 1863 if (fail) {
1822 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; 1864 kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY;
1823 kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR); 1865 kvm_run->exit_reason = vmcs_read32(VM_INSTRUCTION_ERROR);
1866 r = 0;
1824 } else { 1867 } else {
1825 if (fs_gs_ldt_reload_needed) { 1868 if (fs_gs_ldt_reload_needed) {
1826 load_ldt(ldt_sel); 1869 load_ldt(ldt_sel);
@@ -1840,17 +1883,28 @@ again:
1840 } 1883 }
1841 vcpu->launched = 1; 1884 vcpu->launched = 1;
1842 kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; 1885 kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT;
1843 if (kvm_handle_exit(kvm_run, vcpu)) { 1886 r = kvm_handle_exit(kvm_run, vcpu);
1887 if (r > 0) {
1844 /* Give scheduler a change to reschedule. */ 1888 /* Give scheduler a change to reschedule. */
1845 if (signal_pending(current)) { 1889 if (signal_pending(current)) {
1846 ++kvm_stat.signal_exits; 1890 ++kvm_stat.signal_exits;
1891 post_kvm_run_save(vcpu, kvm_run);
1892 return -EINTR;
1893 }
1894
1895 if (dm_request_for_irq_injection(vcpu, kvm_run)) {
1896 ++kvm_stat.request_irq_exits;
1897 post_kvm_run_save(vcpu, kvm_run);
1847 return -EINTR; 1898 return -EINTR;
1848 } 1899 }
1900
1849 kvm_resched(vcpu); 1901 kvm_resched(vcpu);
1850 goto again; 1902 goto again;
1851 } 1903 }
1852 } 1904 }
1853 return 0; 1905
1906 post_kvm_run_save(vcpu, kvm_run);
1907 return r;
1854} 1908}
1855 1909
1856static void vmx_flush_tlb(struct kvm_vcpu *vcpu) 1910static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
@@ -1906,13 +1960,33 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
1906{ 1960{
1907 struct vmcs *vmcs; 1961 struct vmcs *vmcs;
1908 1962
1963 vcpu->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
1964 if (!vcpu->guest_msrs)
1965 return -ENOMEM;
1966
1967 vcpu->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
1968 if (!vcpu->host_msrs)
1969 goto out_free_guest_msrs;
1970
1909 vmcs = alloc_vmcs(); 1971 vmcs = alloc_vmcs();
1910 if (!vmcs) 1972 if (!vmcs)
1911 return -ENOMEM; 1973 goto out_free_msrs;
1974
1912 vmcs_clear(vmcs); 1975 vmcs_clear(vmcs);
1913 vcpu->vmcs = vmcs; 1976 vcpu->vmcs = vmcs;
1914 vcpu->launched = 0; 1977 vcpu->launched = 0;
1978
1915 return 0; 1979 return 0;
1980
1981out_free_msrs:
1982 kfree(vcpu->host_msrs);
1983 vcpu->host_msrs = NULL;
1984
1985out_free_guest_msrs:
1986 kfree(vcpu->guest_msrs);
1987 vcpu->guest_msrs = NULL;
1988
1989 return -ENOMEM;
1916} 1990}
1917 1991
1918static struct kvm_arch_ops vmx_arch_ops = { 1992static struct kvm_arch_ops vmx_arch_ops = {
@@ -1936,6 +2010,7 @@ static struct kvm_arch_ops vmx_arch_ops = {
1936 .get_segment = vmx_get_segment, 2010 .get_segment = vmx_get_segment,
1937 .set_segment = vmx_set_segment, 2011 .set_segment = vmx_set_segment,
1938 .get_cs_db_l_bits = vmx_get_cs_db_l_bits, 2012 .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
2013 .decache_cr0_cr4_guest_bits = vmx_decache_cr0_cr4_guest_bits,
1939 .set_cr0 = vmx_set_cr0, 2014 .set_cr0 = vmx_set_cr0,
1940 .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch, 2015 .set_cr0_no_modeswitch = vmx_set_cr0_no_modeswitch,
1941 .set_cr3 = vmx_set_cr3, 2016 .set_cr3 = vmx_set_cr3,
diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 1bff3e925fda..be70795b4822 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1323,7 +1323,7 @@ twobyte_special_insn:
1323 ctxt)) != 0)) 1323 ctxt)) != 0))
1324 goto done; 1324 goto done;
1325 if ((old_lo != _regs[VCPU_REGS_RAX]) 1325 if ((old_lo != _regs[VCPU_REGS_RAX])
1326 || (old_hi != _regs[VCPU_REGS_RDI])) { 1326 || (old_hi != _regs[VCPU_REGS_RDX])) {
1327 _regs[VCPU_REGS_RAX] = old_lo; 1327 _regs[VCPU_REGS_RAX] = old_lo;
1328 _regs[VCPU_REGS_RDX] = old_hi; 1328 _regs[VCPU_REGS_RDX] = old_hi;
1329 _eflags &= ~EFLG_ZF; 1329 _eflags &= ~EFLG_ZF;
diff --git a/drivers/leds/leds-s3c24xx.c b/drivers/leds/leds-s3c24xx.c
index fb1edc1c9edb..50914439d861 100644
--- a/drivers/leds/leds-s3c24xx.c
+++ b/drivers/leds/leds-s3c24xx.c
@@ -16,7 +16,7 @@
16#include <linux/platform_device.h> 16#include <linux/platform_device.h>
17#include <linux/leds.h> 17#include <linux/leds.h>
18 18
19#include <asm/arch/hardware.h> 19#include <asm/hardware.h>
20#include <asm/arch/regs-gpio.h> 20#include <asm/arch/regs-gpio.h>
21#include <asm/arch/leds-gpio.h> 21#include <asm/arch/leds-gpio.h>
22 22
diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index c8558d4ed506..8ca75e52f637 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -44,6 +44,7 @@
44#include <linux/sysdev.h> 44#include <linux/sysdev.h>
45#include <linux/freezer.h> 45#include <linux/freezer.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/suspend.h>
47#include <linux/cpu.h> 48#include <linux/cpu.h>
48#include <asm/prom.h> 49#include <asm/prom.h>
49#include <asm/machdep.h> 50#include <asm/machdep.h>
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index f1dd81a1d592..3cfb0a3575e6 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -19,7 +19,7 @@ config PCI_MSI
19 19
20config PCI_MULTITHREAD_PROBE 20config PCI_MULTITHREAD_PROBE
21 bool "PCI Multi-threaded probe (EXPERIMENTAL)" 21 bool "PCI Multi-threaded probe (EXPERIMENTAL)"
22 depends on PCI && EXPERIMENTAL 22 depends on PCI && EXPERIMENTAL && BROKEN
23 help 23 help
24 Say Y here if you want the PCI core to spawn a new thread for 24 Say Y here if you want the PCI core to spawn a new thread for
25 every PCI device that is probed. This can cause a huge 25 every PCI device that is probed. This can cause a huge
diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 45f2b20ef513..fab381ed853c 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -193,6 +193,18 @@ static struct pci_dev * pci_find_subsys(unsigned int vendor,
193 struct pci_dev *dev; 193 struct pci_dev *dev;
194 194
195 WARN_ON(in_interrupt()); 195 WARN_ON(in_interrupt());
196
197 /*
198 * pci_find_subsys() can be called on the ide_setup() path, super-early
199 * in boot. But the down_read() will enable local interrupts, which
200 * can cause some machines to crash. So here we detect and flag that
201 * situation and bail out early.
202 */
203 if (unlikely(list_empty(&pci_devices))) {
204 printk(KERN_INFO "pci_find_subsys() called while pci_devices "
205 "is still empty\n");
206 return NULL;
207 }
196 down_read(&pci_bus_sem); 208 down_read(&pci_bus_sem);
197 n = from ? from->global_list.next : pci_devices.next; 209 n = from ? from->global_list.next : pci_devices.next;
198 210
@@ -259,6 +271,18 @@ pci_get_subsys(unsigned int vendor, unsigned int device,
259 struct pci_dev *dev; 271 struct pci_dev *dev;
260 272
261 WARN_ON(in_interrupt()); 273 WARN_ON(in_interrupt());
274
275 /*
276 * pci_get_subsys() can potentially be called by drivers super-early
277 * in boot. But the down_read() will enable local interrupts, which
278 * can cause some machines to crash. So here we detect and flag that
279 * situation and bail out early.
280 */
281 if (unlikely(list_empty(&pci_devices))) {
282 printk(KERN_NOTICE "pci_get_subsys() called while pci_devices "
283 "is still empty\n");
284 return NULL;
285 }
262 down_read(&pci_bus_sem); 286 down_read(&pci_bus_sem);
263 n = from ? from->global_list.next : pci_devices.next; 287 n = from ? from->global_list.next : pci_devices.next;
264 288
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 4f654c901c64..a724ab49a797 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -33,6 +33,8 @@
33 33
34#include <asm/mach/time.h> 34#include <asm/mach/time.h>
35 35
36#include <asm/arch/at91_rtc.h>
37
36 38
37#define AT91_RTC_FREQ 1 39#define AT91_RTC_FREQ 1
38#define AT91_RTC_EPOCH 1900UL /* just like arch/arm/common/rtctime.c */ 40#define AT91_RTC_EPOCH 1900UL /* just like arch/arm/common/rtctime.c */
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 1460f6b769f2..e7851e3739ab 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * An I2C driver for the Ricoh RS5C372 RTC 2 * An I2C driver for Ricoh RS5C372 and RV5C38[67] RTCs
3 * 3 *
4 * Copyright (C) 2005 Pavel Mironchik <pmironchik@optifacio.net> 4 * Copyright (C) 2005 Pavel Mironchik <pmironchik@optifacio.net>
5 * Copyright (C) 2006 Tower Technologies 5 * Copyright (C) 2006 Tower Technologies
@@ -13,7 +13,7 @@
13#include <linux/rtc.h> 13#include <linux/rtc.h>
14#include <linux/bcd.h> 14#include <linux/bcd.h>
15 15
16#define DRV_VERSION "0.3" 16#define DRV_VERSION "0.4"
17 17
18/* Addresses to scan */ 18/* Addresses to scan */
19static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END }; 19static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END };
@@ -21,6 +21,13 @@ static unsigned short normal_i2c[] = { /* 0x32,*/ I2C_CLIENT_END };
21/* Insmod parameters */ 21/* Insmod parameters */
22I2C_CLIENT_INSMOD; 22I2C_CLIENT_INSMOD;
23 23
24
25/*
26 * Ricoh has a family of I2C based RTCs, which differ only slightly from
27 * each other. Differences center on pinout (e.g. how many interrupts,
28 * output clock, etc) and how the control registers are used. The '372
29 * is significant only because that's the one this driver first supported.
30 */
24#define RS5C372_REG_SECS 0 31#define RS5C372_REG_SECS 0
25#define RS5C372_REG_MINS 1 32#define RS5C372_REG_MINS 1
26#define RS5C372_REG_HOURS 2 33#define RS5C372_REG_HOURS 2
@@ -29,59 +36,142 @@ I2C_CLIENT_INSMOD;
29#define RS5C372_REG_MONTH 5 36#define RS5C372_REG_MONTH 5
30#define RS5C372_REG_YEAR 6 37#define RS5C372_REG_YEAR 6
31#define RS5C372_REG_TRIM 7 38#define RS5C372_REG_TRIM 7
39# define RS5C372_TRIM_XSL 0x80
40# define RS5C372_TRIM_MASK 0x7F
41
42#define RS5C_REG_ALARM_A_MIN 8 /* or ALARM_W */
43#define RS5C_REG_ALARM_A_HOURS 9
44#define RS5C_REG_ALARM_A_WDAY 10
45
46#define RS5C_REG_ALARM_B_MIN 11 /* or ALARM_D */
47#define RS5C_REG_ALARM_B_HOURS 12
48#define RS5C_REG_ALARM_B_WDAY 13 /* (ALARM_B only) */
49
50#define RS5C_REG_CTRL1 14
51# define RS5C_CTRL1_AALE (1 << 7) /* or WALE */
52# define RS5C_CTRL1_BALE (1 << 6) /* or DALE */
53# define RV5C387_CTRL1_24 (1 << 5)
54# define RS5C372A_CTRL1_SL1 (1 << 5)
55# define RS5C_CTRL1_CT_MASK (7 << 0)
56# define RS5C_CTRL1_CT0 (0 << 0) /* no periodic irq */
57# define RS5C_CTRL1_CT4 (4 << 0) /* 1 Hz level irq */
58#define RS5C_REG_CTRL2 15
59# define RS5C372_CTRL2_24 (1 << 5)
60# define RS5C_CTRL2_XSTP (1 << 4)
61# define RS5C_CTRL2_CTFG (1 << 2)
62# define RS5C_CTRL2_AAFG (1 << 1) /* or WAFG */
63# define RS5C_CTRL2_BAFG (1 << 0) /* or DAFG */
64
65
66/* to read (style 1) or write registers starting at R */
67#define RS5C_ADDR(R) (((R) << 4) | 0)
68
69
70enum rtc_type {
71 rtc_undef = 0,
72 rtc_rs5c372a,
73 rtc_rs5c372b,
74 rtc_rv5c386,
75 rtc_rv5c387a,
76};
32 77
33#define RS5C372_TRIM_XSL 0x80 78/* REVISIT: this assumes that:
34#define RS5C372_TRIM_MASK 0x7F 79 * - we're in the 21st century, so it's safe to ignore the century
80 * bit for rv5c38[67] (REG_MONTH bit 7);
81 * - we should use ALARM_A not ALARM_B (may be wrong on some boards)
82 */
83struct rs5c372 {
84 struct i2c_client *client;
85 struct rtc_device *rtc;
86 enum rtc_type type;
87 unsigned time24:1;
88 unsigned has_irq:1;
89 char buf[17];
90 char *regs;
91
92 /* on conversion to a "new style" i2c driver, this vanishes */
93 struct i2c_client dev;
94};
35 95
36#define RS5C372_REG_BASE 0 96static int rs5c_get_regs(struct rs5c372 *rs5c)
97{
98 struct i2c_client *client = rs5c->client;
99 struct i2c_msg msgs[] = {
100 { client->addr, I2C_M_RD, sizeof rs5c->buf, rs5c->buf },
101 };
102
103 /* This implements the third reading method from the datasheet, using
104 * an internal address that's reset after each transaction (by STOP)
105 * to 0x0f ... so we read extra registers, and skip the first one.
106 *
107 * The first method doesn't work with the iop3xx adapter driver, on at
108 * least 80219 chips; this works around that bug.
109 */
110 if ((i2c_transfer(client->adapter, msgs, 1)) != 1) {
111 pr_debug("%s: can't read registers\n", rs5c->rtc->name);
112 return -EIO;
113 }
37 114
38static int rs5c372_attach(struct i2c_adapter *adapter); 115 dev_dbg(&client->dev,
39static int rs5c372_detach(struct i2c_client *client); 116 "%02x %02x %02x (%02x) %02x %02x %02x (%02x), "
40static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind); 117 "%02x %02x %02x, %02x %02x %02x; %02x %02x\n",
118 rs5c->regs[0], rs5c->regs[1], rs5c->regs[2], rs5c->regs[3],
119 rs5c->regs[4], rs5c->regs[5], rs5c->regs[6], rs5c->regs[7],
120 rs5c->regs[8], rs5c->regs[9], rs5c->regs[10], rs5c->regs[11],
121 rs5c->regs[12], rs5c->regs[13], rs5c->regs[14], rs5c->regs[15]);
41 122
42struct rs5c372 { 123 return 0;
43 u8 reg_addr; 124}
44 u8 regs[17];
45 struct i2c_msg msg[1];
46 struct i2c_client client;
47 struct rtc_device *rtc;
48};
49 125
50static struct i2c_driver rs5c372_driver = { 126static unsigned rs5c_reg2hr(struct rs5c372 *rs5c, unsigned reg)
51 .driver = { 127{
52 .name = "rs5c372", 128 unsigned hour;
53 },
54 .attach_adapter = &rs5c372_attach,
55 .detach_client = &rs5c372_detach,
56};
57 129
58static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm) 130 if (rs5c->time24)
131 return BCD2BIN(reg & 0x3f);
132
133 hour = BCD2BIN(reg & 0x1f);
134 if (hour == 12)
135 hour = 0;
136 if (reg & 0x20)
137 hour += 12;
138 return hour;
139}
140
141static unsigned rs5c_hr2reg(struct rs5c372 *rs5c, unsigned hour)
59{ 142{
143 if (rs5c->time24)
144 return BIN2BCD(hour);
145
146 if (hour > 12)
147 return 0x20 | BIN2BCD(hour - 12);
148 if (hour == 12)
149 return 0x20 | BIN2BCD(12);
150 if (hour == 0)
151 return BIN2BCD(12);
152 return BIN2BCD(hour);
153}
60 154
61 struct rs5c372 *rs5c372 = i2c_get_clientdata(client); 155static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
62 u8 *buf = &(rs5c372->regs[1]); 156{
157 struct rs5c372 *rs5c = i2c_get_clientdata(client);
158 int status = rs5c_get_regs(rs5c);
63 159
64 /* this implements the 3rd reading method, according 160 if (status < 0)
65 * to the datasheet. rs5c372 defaults to internal 161 return status;
66 * address 0xF, so 0x0 is in regs[1]
67 */
68 162
69 if ((i2c_transfer(client->adapter, rs5c372->msg, 1)) != 1) { 163 tm->tm_sec = BCD2BIN(rs5c->regs[RS5C372_REG_SECS] & 0x7f);
70 dev_err(&client->dev, "%s: read error\n", __FUNCTION__); 164 tm->tm_min = BCD2BIN(rs5c->regs[RS5C372_REG_MINS] & 0x7f);
71 return -EIO; 165 tm->tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C372_REG_HOURS]);
72 }
73 166
74 tm->tm_sec = BCD2BIN(buf[RS5C372_REG_SECS] & 0x7f); 167 tm->tm_wday = BCD2BIN(rs5c->regs[RS5C372_REG_WDAY] & 0x07);
75 tm->tm_min = BCD2BIN(buf[RS5C372_REG_MINS] & 0x7f); 168 tm->tm_mday = BCD2BIN(rs5c->regs[RS5C372_REG_DAY] & 0x3f);
76 tm->tm_hour = BCD2BIN(buf[RS5C372_REG_HOURS] & 0x3f);
77 tm->tm_wday = BCD2BIN(buf[RS5C372_REG_WDAY] & 0x07);
78 tm->tm_mday = BCD2BIN(buf[RS5C372_REG_DAY] & 0x3f);
79 169
80 /* tm->tm_mon is zero-based */ 170 /* tm->tm_mon is zero-based */
81 tm->tm_mon = BCD2BIN(buf[RS5C372_REG_MONTH] & 0x1f) - 1; 171 tm->tm_mon = BCD2BIN(rs5c->regs[RS5C372_REG_MONTH] & 0x1f) - 1;
82 172
83 /* year is 1900 + tm->tm_year */ 173 /* year is 1900 + tm->tm_year */
84 tm->tm_year = BCD2BIN(buf[RS5C372_REG_YEAR]) + 100; 174 tm->tm_year = BCD2BIN(rs5c->regs[RS5C372_REG_YEAR]) + 100;
85 175
86 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, " 176 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d, "
87 "mday=%d, mon=%d, year=%d, wday=%d\n", 177 "mday=%d, mon=%d, year=%d, wday=%d\n",
@@ -89,22 +179,25 @@ static int rs5c372_get_datetime(struct i2c_client *client, struct rtc_time *tm)
89 tm->tm_sec, tm->tm_min, tm->tm_hour, 179 tm->tm_sec, tm->tm_min, tm->tm_hour,
90 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); 180 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
91 181
92 return 0; 182 /* rtc might need initialization */
183 return rtc_valid_tm(tm);
93} 184}
94 185
95static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm) 186static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
96{ 187{
97 unsigned char buf[8] = { RS5C372_REG_BASE }; 188 struct rs5c372 *rs5c = i2c_get_clientdata(client);
189 unsigned char buf[8];
98 190
99 dev_dbg(&client->dev, 191 dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d "
100 "%s: secs=%d, mins=%d, hours=%d "
101 "mday=%d, mon=%d, year=%d, wday=%d\n", 192 "mday=%d, mon=%d, year=%d, wday=%d\n",
102 __FUNCTION__, tm->tm_sec, tm->tm_min, tm->tm_hour, 193 __FUNCTION__,
194 tm->tm_sec, tm->tm_min, tm->tm_hour,
103 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday); 195 tm->tm_mday, tm->tm_mon, tm->tm_year, tm->tm_wday);
104 196
197 buf[0] = RS5C_ADDR(RS5C372_REG_SECS);
105 buf[1] = BIN2BCD(tm->tm_sec); 198 buf[1] = BIN2BCD(tm->tm_sec);
106 buf[2] = BIN2BCD(tm->tm_min); 199 buf[2] = BIN2BCD(tm->tm_min);
107 buf[3] = BIN2BCD(tm->tm_hour); 200 buf[3] = rs5c_hr2reg(rs5c, tm->tm_hour);
108 buf[4] = BIN2BCD(tm->tm_wday); 201 buf[4] = BIN2BCD(tm->tm_wday);
109 buf[5] = BIN2BCD(tm->tm_mday); 202 buf[5] = BIN2BCD(tm->tm_mday);
110 buf[6] = BIN2BCD(tm->tm_mon + 1); 203 buf[6] = BIN2BCD(tm->tm_mon + 1);
@@ -118,21 +211,43 @@ static int rs5c372_set_datetime(struct i2c_client *client, struct rtc_time *tm)
118 return 0; 211 return 0;
119} 212}
120 213
214#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
215#define NEED_TRIM
216#endif
217
218#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE)
219#define NEED_TRIM
220#endif
221
222#ifdef NEED_TRIM
121static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim) 223static int rs5c372_get_trim(struct i2c_client *client, int *osc, int *trim)
122{ 224{
123 struct rs5c372 *rs5c372 = i2c_get_clientdata(client); 225 struct rs5c372 *rs5c372 = i2c_get_clientdata(client);
124 u8 tmp = rs5c372->regs[RS5C372_REG_TRIM + 1]; 226 u8 tmp = rs5c372->regs[RS5C372_REG_TRIM];
125 227
126 if (osc) 228 if (osc)
127 *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768; 229 *osc = (tmp & RS5C372_TRIM_XSL) ? 32000 : 32768;
128 230
129 if (trim) { 231 if (trim) {
130 *trim = tmp & RS5C372_TRIM_MASK; 232 dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, tmp);
131 dev_dbg(&client->dev, "%s: raw trim=%x\n", __FUNCTION__, *trim); 233 tmp &= RS5C372_TRIM_MASK;
234 if (tmp & 0x3e) {
235 int t = tmp & 0x3f;
236
237 if (tmp & 0x40)
238 t = (~t | (s8)0xc0) + 1;
239 else
240 t = t - 1;
241
242 tmp = t * 2;
243 } else
244 tmp = 0;
245 *trim = tmp;
132 } 246 }
133 247
134 return 0; 248 return 0;
135} 249}
250#endif
136 251
137static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm) 252static int rs5c372_rtc_read_time(struct device *dev, struct rtc_time *tm)
138{ 253{
@@ -144,25 +259,190 @@ static int rs5c372_rtc_set_time(struct device *dev, struct rtc_time *tm)
144 return rs5c372_set_datetime(to_i2c_client(dev), tm); 259 return rs5c372_set_datetime(to_i2c_client(dev), tm);
145} 260}
146 261
262#if defined(CONFIG_RTC_INTF_DEV) || defined(CONFIG_RTC_INTF_DEV_MODULE)
263
264static int
265rs5c_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
266{
267 struct i2c_client *client = to_i2c_client(dev);
268 struct rs5c372 *rs5c = i2c_get_clientdata(client);
269 unsigned char buf[2];
270 int status;
271
272 buf[1] = rs5c->regs[RS5C_REG_CTRL1];
273 switch (cmd) {
274 case RTC_UIE_OFF:
275 case RTC_UIE_ON:
276 /* some 327a modes use a different IRQ pin for 1Hz irqs */
277 if (rs5c->type == rtc_rs5c372a
278 && (buf[1] & RS5C372A_CTRL1_SL1))
279 return -ENOIOCTLCMD;
280 case RTC_AIE_OFF:
281 case RTC_AIE_ON:
282 /* these irq management calls only make sense for chips
283 * which are wired up to an IRQ.
284 */
285 if (!rs5c->has_irq)
286 return -ENOIOCTLCMD;
287 break;
288 default:
289 return -ENOIOCTLCMD;
290 }
291
292 status = rs5c_get_regs(rs5c);
293 if (status < 0)
294 return status;
295
296 buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
297 switch (cmd) {
298 case RTC_AIE_OFF: /* alarm off */
299 buf[1] &= ~RS5C_CTRL1_AALE;
300 break;
301 case RTC_AIE_ON: /* alarm on */
302 buf[1] |= RS5C_CTRL1_AALE;
303 break;
304 case RTC_UIE_OFF: /* update off */
305 buf[1] &= ~RS5C_CTRL1_CT_MASK;
306 break;
307 case RTC_UIE_ON: /* update on */
308 buf[1] &= ~RS5C_CTRL1_CT_MASK;
309 buf[1] |= RS5C_CTRL1_CT4;
310 break;
311 }
312 if ((i2c_master_send(client, buf, 2)) != 2) {
313 printk(KERN_WARNING "%s: can't update alarm\n",
314 rs5c->rtc->name);
315 status = -EIO;
316 } else
317 rs5c->regs[RS5C_REG_CTRL1] = buf[1];
318 return status;
319}
320
321#else
322#define rs5c_rtc_ioctl NULL
323#endif
324
325
326/* NOTE: Since RTC_WKALM_{RD,SET} were originally defined for EFI,
327 * which only exposes a polled programming interface; and since
328 * these calls map directly to those EFI requests; we don't demand
329 * we have an IRQ for this chip when we go through this API.
330 *
331 * The older x86_pc derived RTC_ALM_{READ,SET} calls require irqs
332 * though, managed through RTC_AIE_{ON,OFF} requests.
333 */
334
335static int rs5c_read_alarm(struct device *dev, struct rtc_wkalrm *t)
336{
337 struct i2c_client *client = to_i2c_client(dev);
338 struct rs5c372 *rs5c = i2c_get_clientdata(client);
339 int status;
340
341 status = rs5c_get_regs(rs5c);
342 if (status < 0)
343 return status;
344
345 /* report alarm time */
346 t->time.tm_sec = 0;
347 t->time.tm_min = BCD2BIN(rs5c->regs[RS5C_REG_ALARM_A_MIN] & 0x7f);
348 t->time.tm_hour = rs5c_reg2hr(rs5c, rs5c->regs[RS5C_REG_ALARM_A_HOURS]);
349 t->time.tm_mday = -1;
350 t->time.tm_mon = -1;
351 t->time.tm_year = -1;
352 t->time.tm_wday = -1;
353 t->time.tm_yday = -1;
354 t->time.tm_isdst = -1;
355
356 /* ... and status */
357 t->enabled = !!(rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE);
358 t->pending = !!(rs5c->regs[RS5C_REG_CTRL2] & RS5C_CTRL2_AAFG);
359
360 return 0;
361}
362
363static int rs5c_set_alarm(struct device *dev, struct rtc_wkalrm *t)
364{
365 struct i2c_client *client = to_i2c_client(dev);
366 struct rs5c372 *rs5c = i2c_get_clientdata(client);
367 int status;
368 unsigned char buf[4];
369
370 /* only handle up to 24 hours in the future, like RTC_ALM_SET */
371 if (t->time.tm_mday != -1
372 || t->time.tm_mon != -1
373 || t->time.tm_year != -1)
374 return -EINVAL;
375
376 /* REVISIT: round up tm_sec */
377
378 /* if needed, disable irq (clears pending status) */
379 status = rs5c_get_regs(rs5c);
380 if (status < 0)
381 return status;
382 if (rs5c->regs[RS5C_REG_CTRL1] & RS5C_CTRL1_AALE) {
383 buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
384 buf[1] = rs5c->regs[RS5C_REG_CTRL1] & ~RS5C_CTRL1_AALE;
385 if (i2c_master_send(client, buf, 2) != 2) {
386 pr_debug("%s: can't disable alarm\n", rs5c->rtc->name);
387 return -EIO;
388 }
389 rs5c->regs[RS5C_REG_CTRL1] = buf[1];
390 }
391
392 /* set alarm */
393 buf[0] = RS5C_ADDR(RS5C_REG_ALARM_A_MIN);
394 buf[1] = BIN2BCD(t->time.tm_min);
395 buf[2] = rs5c_hr2reg(rs5c, t->time.tm_hour);
396 buf[3] = 0x7f; /* any/all days */
397 if ((i2c_master_send(client, buf, 4)) != 4) {
398 pr_debug("%s: can't set alarm time\n", rs5c->rtc->name);
399 return -EIO;
400 }
401
402 /* ... and maybe enable its irq */
403 if (t->enabled) {
404 buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
405 buf[1] = rs5c->regs[RS5C_REG_CTRL1] | RS5C_CTRL1_AALE;
406 if ((i2c_master_send(client, buf, 2)) != 2)
407 printk(KERN_WARNING "%s: can't enable alarm\n",
408 rs5c->rtc->name);
409 rs5c->regs[RS5C_REG_CTRL1] = buf[1];
410 }
411
412 return 0;
413}
414
415#if defined(CONFIG_RTC_INTF_PROC) || defined(CONFIG_RTC_INTF_PROC_MODULE)
416
147static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq) 417static int rs5c372_rtc_proc(struct device *dev, struct seq_file *seq)
148{ 418{
149 int err, osc, trim; 419 int err, osc, trim;
150 420
151 err = rs5c372_get_trim(to_i2c_client(dev), &osc, &trim); 421 err = rs5c372_get_trim(to_i2c_client(dev), &osc, &trim);
152 if (err == 0) { 422 if (err == 0) {
153 seq_printf(seq, "%d.%03d KHz\n", osc / 1000, osc % 1000); 423 seq_printf(seq, "crystal\t\t: %d.%03d KHz\n",
154 seq_printf(seq, "trim\t: %d\n", trim); 424 osc / 1000, osc % 1000);
425 seq_printf(seq, "trim\t\t: %d\n", trim);
155 } 426 }
156 427
157 return 0; 428 return 0;
158} 429}
159 430
431#else
432#define rs5c372_rtc_proc NULL
433#endif
434
160static const struct rtc_class_ops rs5c372_rtc_ops = { 435static const struct rtc_class_ops rs5c372_rtc_ops = {
161 .proc = rs5c372_rtc_proc, 436 .proc = rs5c372_rtc_proc,
437 .ioctl = rs5c_rtc_ioctl,
162 .read_time = rs5c372_rtc_read_time, 438 .read_time = rs5c372_rtc_read_time,
163 .set_time = rs5c372_rtc_set_time, 439 .set_time = rs5c372_rtc_set_time,
440 .read_alarm = rs5c_read_alarm,
441 .set_alarm = rs5c_set_alarm,
164}; 442};
165 443
444#if defined(CONFIG_RTC_INTF_SYSFS) || defined(CONFIG_RTC_INTF_SYSFS_MODULE)
445
166static ssize_t rs5c372_sysfs_show_trim(struct device *dev, 446static ssize_t rs5c372_sysfs_show_trim(struct device *dev,
167 struct device_attribute *attr, char *buf) 447 struct device_attribute *attr, char *buf)
168{ 448{
@@ -172,7 +452,7 @@ static ssize_t rs5c372_sysfs_show_trim(struct device *dev,
172 if (err) 452 if (err)
173 return err; 453 return err;
174 454
175 return sprintf(buf, "0x%2x\n", trim); 455 return sprintf(buf, "%d\n", trim);
176} 456}
177static DEVICE_ATTR(trim, S_IRUGO, rs5c372_sysfs_show_trim, NULL); 457static DEVICE_ATTR(trim, S_IRUGO, rs5c372_sysfs_show_trim, NULL);
178 458
@@ -189,16 +469,35 @@ static ssize_t rs5c372_sysfs_show_osc(struct device *dev,
189} 469}
190static DEVICE_ATTR(osc, S_IRUGO, rs5c372_sysfs_show_osc, NULL); 470static DEVICE_ATTR(osc, S_IRUGO, rs5c372_sysfs_show_osc, NULL);
191 471
192static int rs5c372_attach(struct i2c_adapter *adapter) 472static int rs5c_sysfs_register(struct device *dev)
193{ 473{
194 return i2c_probe(adapter, &addr_data, rs5c372_probe); 474 int err;
475
476 err = device_create_file(dev, &dev_attr_trim);
477 if (err)
478 return err;
479 err = device_create_file(dev, &dev_attr_osc);
480 if (err)
481 device_remove_file(dev, &dev_attr_trim);
482
483 return err;
484}
485
486#else
487static int rs5c_sysfs_register(struct device *dev)
488{
489 return 0;
195} 490}
491#endif /* SYSFS */
492
493static struct i2c_driver rs5c372_driver;
196 494
197static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind) 495static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
198{ 496{
199 int err = 0; 497 int err = 0;
200 struct i2c_client *client; 498 struct i2c_client *client;
201 struct rs5c372 *rs5c372; 499 struct rs5c372 *rs5c372;
500 struct rtc_time tm;
202 501
203 dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__); 502 dev_dbg(adapter->class_dev.dev, "%s\n", __FUNCTION__);
204 503
@@ -211,7 +510,15 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
211 err = -ENOMEM; 510 err = -ENOMEM;
212 goto exit; 511 goto exit;
213 } 512 }
214 client = &rs5c372->client; 513
514 /* we read registers 0x0f then 0x00-0x0f; skip the first one */
515 rs5c372->regs=&rs5c372->buf[1];
516
517 /* On conversion to a "new style" i2c driver, we'll be handed
518 * the i2c_client (we won't create it)
519 */
520 client = &rs5c372->dev;
521 rs5c372->client = client;
215 522
216 /* I2C client */ 523 /* I2C client */
217 client->addr = address; 524 client->addr = address;
@@ -222,16 +529,99 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
222 529
223 i2c_set_clientdata(client, rs5c372); 530 i2c_set_clientdata(client, rs5c372);
224 531
225 rs5c372->msg[0].addr = address;
226 rs5c372->msg[0].flags = I2C_M_RD;
227 rs5c372->msg[0].len = sizeof(rs5c372->regs);
228 rs5c372->msg[0].buf = rs5c372->regs;
229
230 /* Inform the i2c layer */ 532 /* Inform the i2c layer */
231 if ((err = i2c_attach_client(client))) 533 if ((err = i2c_attach_client(client)))
232 goto exit_kfree; 534 goto exit_kfree;
233 535
234 dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n"); 536 err = rs5c_get_regs(rs5c372);
537 if (err < 0)
538 goto exit_detach;
539
540 /* For "new style" drivers, irq is in i2c_client and chip type
541 * info comes from i2c_client.dev.platform_data. Meanwhile:
542 *
543 * STICK BOARD-SPECIFIC SETUP CODE RIGHT HERE
544 */
545 if (rs5c372->type == rtc_undef) {
546 rs5c372->type = rtc_rs5c372b;
547 dev_warn(&client->dev, "assuming rs5c372b\n");
548 }
549
550 /* clock may be set for am/pm or 24 hr time */
551 switch (rs5c372->type) {
552 case rtc_rs5c372a:
553 case rtc_rs5c372b:
554 /* alarm uses ALARM_A; and nINTRA on 372a, nINTR on 372b.
555 * so does periodic irq, except some 327a modes.
556 */
557 if (rs5c372->regs[RS5C_REG_CTRL2] & RS5C372_CTRL2_24)
558 rs5c372->time24 = 1;
559 break;
560 case rtc_rv5c386:
561 case rtc_rv5c387a:
562 if (rs5c372->regs[RS5C_REG_CTRL1] & RV5C387_CTRL1_24)
563 rs5c372->time24 = 1;
564 /* alarm uses ALARM_W; and nINTRB for alarm and periodic
565 * irq, on both 386 and 387
566 */
567 break;
568 default:
569 dev_err(&client->dev, "unknown RTC type\n");
570 goto exit_detach;
571 }
572
573 /* if the oscillator lost power and no other software (like
574 * the bootloader) set it up, do it here.
575 */
576 if (rs5c372->regs[RS5C_REG_CTRL2] & RS5C_CTRL2_XSTP) {
577 unsigned char buf[3];
578
579 rs5c372->regs[RS5C_REG_CTRL2] &= ~RS5C_CTRL2_XSTP;
580
581 buf[0] = RS5C_ADDR(RS5C_REG_CTRL1);
582 buf[1] = rs5c372->regs[RS5C_REG_CTRL1];
583 buf[2] = rs5c372->regs[RS5C_REG_CTRL2];
584
585 /* use 24hr mode */
586 switch (rs5c372->type) {
587 case rtc_rs5c372a:
588 case rtc_rs5c372b:
589 buf[2] |= RS5C372_CTRL2_24;
590 rs5c372->time24 = 1;
591 break;
592 case rtc_rv5c386:
593 case rtc_rv5c387a:
594 buf[1] |= RV5C387_CTRL1_24;
595 rs5c372->time24 = 1;
596 break;
597 default:
598 /* impossible */
599 break;
600 }
601
602 if ((i2c_master_send(client, buf, 3)) != 3) {
603 dev_err(&client->dev, "setup error\n");
604 goto exit_detach;
605 }
606 rs5c372->regs[RS5C_REG_CTRL1] = buf[1];
607 rs5c372->regs[RS5C_REG_CTRL2] = buf[2];
608 }
609
610 if (rs5c372_get_datetime(client, &tm) < 0)
611 dev_warn(&client->dev, "clock needs to be set\n");
612
613 dev_info(&client->dev, "%s found, %s, driver version " DRV_VERSION "\n",
614 ({ char *s; switch (rs5c372->type) {
615 case rtc_rs5c372a: s = "rs5c372a"; break;
616 case rtc_rs5c372b: s = "rs5c372b"; break;
617 case rtc_rv5c386: s = "rv5c386"; break;
618 case rtc_rv5c387a: s = "rv5c387a"; break;
619 default: s = "chip"; break;
620 }; s;}),
621 rs5c372->time24 ? "24hr" : "am/pm"
622 );
623
624 /* FIXME when client->irq exists, use it to register alarm irq */
235 625
236 rs5c372->rtc = rtc_device_register(rs5c372_driver.driver.name, 626 rs5c372->rtc = rtc_device_register(rs5c372_driver.driver.name,
237 &client->dev, &rs5c372_rtc_ops, THIS_MODULE); 627 &client->dev, &rs5c372_rtc_ops, THIS_MODULE);
@@ -241,18 +631,12 @@ static int rs5c372_probe(struct i2c_adapter *adapter, int address, int kind)
241 goto exit_detach; 631 goto exit_detach;
242 } 632 }
243 633
244 err = device_create_file(&client->dev, &dev_attr_trim); 634 err = rs5c_sysfs_register(&client->dev);
245 if (err) 635 if (err)
246 goto exit_devreg; 636 goto exit_devreg;
247 err = device_create_file(&client->dev, &dev_attr_osc);
248 if (err)
249 goto exit_trim;
250 637
251 return 0; 638 return 0;
252 639
253exit_trim:
254 device_remove_file(&client->dev, &dev_attr_trim);
255
256exit_devreg: 640exit_devreg:
257 rtc_device_unregister(rs5c372->rtc); 641 rtc_device_unregister(rs5c372->rtc);
258 642
@@ -266,6 +650,11 @@ exit:
266 return err; 650 return err;
267} 651}
268 652
653static int rs5c372_attach(struct i2c_adapter *adapter)
654{
655 return i2c_probe(adapter, &addr_data, rs5c372_probe);
656}
657
269static int rs5c372_detach(struct i2c_client *client) 658static int rs5c372_detach(struct i2c_client *client)
270{ 659{
271 int err; 660 int err;
@@ -274,6 +663,8 @@ static int rs5c372_detach(struct i2c_client *client)
274 if (rs5c372->rtc) 663 if (rs5c372->rtc)
275 rtc_device_unregister(rs5c372->rtc); 664 rtc_device_unregister(rs5c372->rtc);
276 665
666 /* REVISIT properly destroy the sysfs files ... */
667
277 if ((err = i2c_detach_client(client))) 668 if ((err = i2c_detach_client(client)))
278 return err; 669 return err;
279 670
@@ -281,6 +672,14 @@ static int rs5c372_detach(struct i2c_client *client)
281 return 0; 672 return 0;
282} 673}
283 674
675static struct i2c_driver rs5c372_driver = {
676 .driver = {
677 .name = "rtc-rs5c372",
678 },
679 .attach_adapter = &rs5c372_attach,
680 .detach_client = &rs5c372_detach,
681};
682
284static __init int rs5c372_init(void) 683static __init int rs5c372_init(void)
285{ 684{
286 return i2c_add_driver(&rs5c372_driver); 685 return i2c_add_driver(&rs5c372_driver);
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 24ee8be359f5..6377db1b446d 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -217,6 +217,7 @@ static const struct quirk_printer_struct quirk_printers[] = {
217 { 0x0409, 0xbef4, USBLP_QUIRK_BIDIR }, /* NEC Picty760 (HP OEM) */ 217 { 0x0409, 0xbef4, USBLP_QUIRK_BIDIR }, /* NEC Picty760 (HP OEM) */
218 { 0x0409, 0xf0be, USBLP_QUIRK_BIDIR }, /* NEC Picty920 (HP OEM) */ 218 { 0x0409, 0xf0be, USBLP_QUIRK_BIDIR }, /* NEC Picty920 (HP OEM) */
219 { 0x0409, 0xf1be, USBLP_QUIRK_BIDIR }, /* NEC Picty800 (HP OEM) */ 219 { 0x0409, 0xf1be, USBLP_QUIRK_BIDIR }, /* NEC Picty800 (HP OEM) */
220 { 0x0482, 0x0010, USBLP_QUIRK_BIDIR }, /* Kyocera Mita FS 820, by zut <kernel@zut.de> */
220 { 0, 0 } 221 { 0, 0 }
221}; 222};
222 223
diff --git a/drivers/usb/core/endpoint.c b/drivers/usb/core/endpoint.c
index c505b767cee1..5e628ae3aec7 100644
--- a/drivers/usb/core/endpoint.c
+++ b/drivers/usb/core/endpoint.c
@@ -268,6 +268,7 @@ static void ep_device_release(struct device *dev)
268 struct ep_device *ep_dev = to_ep_device(dev); 268 struct ep_device *ep_dev = to_ep_device(dev);
269 269
270 dev_dbg(dev, "%s called for %s\n", __FUNCTION__, dev->bus_id); 270 dev_dbg(dev, "%s called for %s\n", __FUNCTION__, dev->bus_id);
271 endpoint_free_minor(ep_dev);
271 kfree(ep_dev); 272 kfree(ep_dev);
272} 273}
273 274
@@ -349,7 +350,6 @@ void usb_remove_ep_files(struct usb_host_endpoint *endpoint)
349 sprintf(name, "ep_%02x", endpoint->desc.bEndpointAddress); 350 sprintf(name, "ep_%02x", endpoint->desc.bEndpointAddress);
350 sysfs_remove_link(&ep_dev->dev.parent->kobj, name); 351 sysfs_remove_link(&ep_dev->dev.parent->kobj, name);
351 sysfs_remove_group(&ep_dev->dev.kobj, &ep_dev_attr_grp); 352 sysfs_remove_group(&ep_dev->dev.kobj, &ep_dev_attr_grp);
352 endpoint_free_minor(ep_dev);
353 device_unregister(&ep_dev->dev); 353 device_unregister(&ep_dev->dev);
354 endpoint->ep_dev = NULL; 354 endpoint->ep_dev = NULL;
355 destroy_endpoint_class(); 355 destroy_endpoint_class();
diff --git a/drivers/usb/gadget/omap_udc.c b/drivers/usb/gadget/omap_udc.c
index 15d77c307930..cdcfd42843d4 100644
--- a/drivers/usb/gadget/omap_udc.c
+++ b/drivers/usb/gadget/omap_udc.c
@@ -42,6 +42,7 @@
42#include <linux/usb_gadget.h> 42#include <linux/usb_gadget.h>
43#include <linux/usb/otg.h> 43#include <linux/usb/otg.h>
44#include <linux/dma-mapping.h> 44#include <linux/dma-mapping.h>
45#include <linux/clk.h>
45 46
46#include <asm/byteorder.h> 47#include <asm/byteorder.h>
47#include <asm/io.h> 48#include <asm/io.h>
@@ -60,6 +61,11 @@
60/* bulk DMA seems to be behaving for both IN and OUT */ 61/* bulk DMA seems to be behaving for both IN and OUT */
61#define USE_DMA 62#define USE_DMA
62 63
64/* FIXME: OMAP2 currently has some problem in DMA mode */
65#ifdef CONFIG_ARCH_OMAP2
66#undef USE_DMA
67#endif
68
63/* ISO too */ 69/* ISO too */
64#define USE_ISO 70#define USE_ISO
65 71
@@ -99,7 +105,7 @@ static unsigned fifo_mode = 0;
99 * boot parameter "omap_udc:fifo_mode=42" 105 * boot parameter "omap_udc:fifo_mode=42"
100 */ 106 */
101module_param (fifo_mode, uint, 0); 107module_param (fifo_mode, uint, 0);
102MODULE_PARM_DESC (fifo_mode, "endpoint setup (0 == default)"); 108MODULE_PARM_DESC (fifo_mode, "endpoint configuration");
103 109
104#ifdef USE_DMA 110#ifdef USE_DMA
105static unsigned use_dma = 1; 111static unsigned use_dma = 1;
@@ -122,7 +128,7 @@ static const char driver_desc [] = DRIVER_DESC;
122/*-------------------------------------------------------------------------*/ 128/*-------------------------------------------------------------------------*/
123 129
124/* there's a notion of "current endpoint" for modifying endpoint 130/* there's a notion of "current endpoint" for modifying endpoint
125 * state, and PIO access to its FIFO. 131 * state, and PIO access to its FIFO.
126 */ 132 */
127 133
128static void use_ep(struct omap_ep *ep, u16 select) 134static void use_ep(struct omap_ep *ep, u16 select)
@@ -391,7 +397,7 @@ done(struct omap_ep *ep, struct omap_req *req, int status)
391#define FIFO_EMPTY (UDC_NON_ISO_FIFO_EMPTY | UDC_ISO_FIFO_EMPTY) 397#define FIFO_EMPTY (UDC_NON_ISO_FIFO_EMPTY | UDC_ISO_FIFO_EMPTY)
392#define FIFO_UNREADABLE (UDC_EP_HALTED | FIFO_EMPTY) 398#define FIFO_UNREADABLE (UDC_EP_HALTED | FIFO_EMPTY)
393 399
394static inline int 400static inline int
395write_packet(u8 *buf, struct omap_req *req, unsigned max) 401write_packet(u8 *buf, struct omap_req *req, unsigned max)
396{ 402{
397 unsigned len; 403 unsigned len;
@@ -456,7 +462,7 @@ static int write_fifo(struct omap_ep *ep, struct omap_req *req)
456 return is_last; 462 return is_last;
457} 463}
458 464
459static inline int 465static inline int
460read_packet(u8 *buf, struct omap_req *req, unsigned avail) 466read_packet(u8 *buf, struct omap_req *req, unsigned avail)
461{ 467{
462 unsigned len; 468 unsigned len;
@@ -542,9 +548,9 @@ static inline dma_addr_t dma_csac(unsigned lch)
542 /* omap 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is 548 /* omap 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is
543 * read before the DMA controller finished disabling the channel. 549 * read before the DMA controller finished disabling the channel.
544 */ 550 */
545 csac = omap_readw(OMAP_DMA_CSAC(lch)); 551 csac = OMAP_DMA_CSAC_REG(lch);
546 if (csac == 0) 552 if (csac == 0)
547 csac = omap_readw(OMAP_DMA_CSAC(lch)); 553 csac = OMAP_DMA_CSAC_REG(lch);
548 return csac; 554 return csac;
549} 555}
550 556
@@ -555,9 +561,9 @@ static inline dma_addr_t dma_cdac(unsigned lch)
555 /* omap 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is 561 /* omap 3.2/3.3 erratum: sometimes 0 is returned if CSAC/CDAC is
556 * read before the DMA controller finished disabling the channel. 562 * read before the DMA controller finished disabling the channel.
557 */ 563 */
558 cdac = omap_readw(OMAP_DMA_CDAC(lch)); 564 cdac = OMAP_DMA_CDAC_REG(lch);
559 if (cdac == 0) 565 if (cdac == 0)
560 cdac = omap_readw(OMAP_DMA_CDAC(lch)); 566 cdac = OMAP_DMA_CDAC_REG(lch);
561 return cdac; 567 return cdac;
562} 568}
563 569
@@ -582,7 +588,7 @@ static u16 dma_src_len(struct omap_ep *ep, dma_addr_t start)
582} 588}
583 589
584#define DMA_DEST_LAST(x) (cpu_is_omap15xx() \ 590#define DMA_DEST_LAST(x) (cpu_is_omap15xx() \
585 ? omap_readw(OMAP_DMA_CSAC(x)) /* really: CPC */ \ 591 ? OMAP_DMA_CSAC_REG(x) /* really: CPC */ \
586 : dma_cdac(x)) 592 : dma_cdac(x))
587 593
588static u16 dma_dest_len(struct omap_ep *ep, dma_addr_t start) 594static u16 dma_dest_len(struct omap_ep *ep, dma_addr_t start)
@@ -620,17 +626,19 @@ static void next_in_dma(struct omap_ep *ep, struct omap_req *req)
620 || (cpu_is_omap15xx() && length < ep->maxpacket)) { 626 || (cpu_is_omap15xx() && length < ep->maxpacket)) {
621 txdma_ctrl = UDC_TXN_EOT | length; 627 txdma_ctrl = UDC_TXN_EOT | length;
622 omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S8, 628 omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S8,
623 length, 1, sync_mode); 629 length, 1, sync_mode, 0, 0);
624 } else { 630 } else {
625 length = min(length / ep->maxpacket, 631 length = min(length / ep->maxpacket,
626 (unsigned) UDC_TXN_TSC + 1); 632 (unsigned) UDC_TXN_TSC + 1);
627 txdma_ctrl = length; 633 txdma_ctrl = length;
628 omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S16, 634 omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S16,
629 ep->ep.maxpacket >> 1, length, sync_mode); 635 ep->ep.maxpacket >> 1, length, sync_mode,
636 0, 0);
630 length *= ep->maxpacket; 637 length *= ep->maxpacket;
631 } 638 }
632 omap_set_dma_src_params(ep->lch, OMAP_DMA_PORT_EMIFF, 639 omap_set_dma_src_params(ep->lch, OMAP_DMA_PORT_EMIFF,
633 OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual); 640 OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual,
641 0, 0);
634 642
635 omap_start_dma(ep->lch); 643 omap_start_dma(ep->lch);
636 ep->dma_counter = dma_csac(ep->lch); 644 ep->dma_counter = dma_csac(ep->lch);
@@ -675,9 +683,11 @@ static void next_out_dma(struct omap_ep *ep, struct omap_req *req)
675 req->dma_bytes = packets * ep->ep.maxpacket; 683 req->dma_bytes = packets * ep->ep.maxpacket;
676 omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S16, 684 omap_set_dma_transfer_params(ep->lch, OMAP_DMA_DATA_TYPE_S16,
677 ep->ep.maxpacket >> 1, packets, 685 ep->ep.maxpacket >> 1, packets,
678 OMAP_DMA_SYNC_ELEMENT); 686 OMAP_DMA_SYNC_ELEMENT,
687 0, 0);
679 omap_set_dma_dest_params(ep->lch, OMAP_DMA_PORT_EMIFF, 688 omap_set_dma_dest_params(ep->lch, OMAP_DMA_PORT_EMIFF,
680 OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual); 689 OMAP_DMA_AMODE_POST_INC, req->req.dma + req->req.actual,
690 0, 0);
681 ep->dma_counter = DMA_DEST_LAST(ep->lch); 691 ep->dma_counter = DMA_DEST_LAST(ep->lch);
682 692
683 UDC_RXDMA_REG(ep->dma_channel) = UDC_RXN_STOP | (packets - 1); 693 UDC_RXDMA_REG(ep->dma_channel) = UDC_RXN_STOP | (packets - 1);
@@ -820,7 +830,8 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel)
820 omap_set_dma_dest_params(ep->lch, 830 omap_set_dma_dest_params(ep->lch,
821 OMAP_DMA_PORT_TIPB, 831 OMAP_DMA_PORT_TIPB,
822 OMAP_DMA_AMODE_CONSTANT, 832 OMAP_DMA_AMODE_CONSTANT,
823 (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG)); 833 (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG),
834 0, 0);
824 } 835 }
825 } else { 836 } else {
826 status = omap_request_dma(OMAP_DMA_USB_W2FC_RX0 - 1 + channel, 837 status = omap_request_dma(OMAP_DMA_USB_W2FC_RX0 - 1 + channel,
@@ -831,7 +842,8 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel)
831 omap_set_dma_src_params(ep->lch, 842 omap_set_dma_src_params(ep->lch,
832 OMAP_DMA_PORT_TIPB, 843 OMAP_DMA_PORT_TIPB,
833 OMAP_DMA_AMODE_CONSTANT, 844 OMAP_DMA_AMODE_CONSTANT,
834 (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG)); 845 (unsigned long) io_v2p((u32)&UDC_DATA_DMA_REG),
846 0, 0);
835 /* EMIFF */ 847 /* EMIFF */
836 omap_set_dma_dest_burst_mode(ep->lch, 848 omap_set_dma_dest_burst_mode(ep->lch,
837 OMAP_DMA_DATA_BURST_4); 849 OMAP_DMA_DATA_BURST_4);
@@ -846,7 +858,7 @@ static void dma_channel_claim(struct omap_ep *ep, unsigned channel)
846 858
847 /* channel type P: hw synch (fifo) */ 859 /* channel type P: hw synch (fifo) */
848 if (!cpu_is_omap15xx()) 860 if (!cpu_is_omap15xx())
849 omap_writew(2, OMAP_DMA_LCH_CTRL(ep->lch)); 861 OMAP1_DMA_LCH_CTRL_REG(ep->lch) = 2;
850 } 862 }
851 863
852just_restart: 864just_restart:
@@ -893,7 +905,7 @@ static void dma_channel_release(struct omap_ep *ep)
893 else 905 else
894 req = NULL; 906 req = NULL;
895 907
896 active = ((1 << 7) & omap_readl(OMAP_DMA_CCR(ep->lch))) != 0; 908 active = ((1 << 7) & OMAP_DMA_CCR_REG(ep->lch)) != 0;
897 909
898 DBG("%s release %s %cxdma%d %p\n", ep->ep.name, 910 DBG("%s release %s %cxdma%d %p\n", ep->ep.name,
899 active ? "active" : "idle", 911 active ? "active" : "idle",
@@ -1117,7 +1129,7 @@ static int omap_ep_dequeue(struct usb_ep *_ep, struct usb_request *_req)
1117 */ 1129 */
1118 dma_channel_release(ep); 1130 dma_channel_release(ep);
1119 dma_channel_claim(ep, channel); 1131 dma_channel_claim(ep, channel);
1120 } else 1132 } else
1121 done(ep, req, -ECONNRESET); 1133 done(ep, req, -ECONNRESET);
1122 spin_unlock_irqrestore(&ep->udc->lock, flags); 1134 spin_unlock_irqrestore(&ep->udc->lock, flags);
1123 return 0; 1135 return 0;
@@ -1153,7 +1165,7 @@ static int omap_ep_set_halt(struct usb_ep *_ep, int value)
1153 1165
1154 /* IN endpoints must already be idle */ 1166 /* IN endpoints must already be idle */
1155 if ((ep->bEndpointAddress & USB_DIR_IN) 1167 if ((ep->bEndpointAddress & USB_DIR_IN)
1156 && !list_empty(&ep->queue)) { 1168 && !list_empty(&ep->queue)) {
1157 status = -EAGAIN; 1169 status = -EAGAIN;
1158 goto done; 1170 goto done;
1159 } 1171 }
@@ -1298,6 +1310,23 @@ static void pullup_disable(struct omap_udc *udc)
1298 UDC_SYSCON1_REG &= ~UDC_PULLUP_EN; 1310 UDC_SYSCON1_REG &= ~UDC_PULLUP_EN;
1299} 1311}
1300 1312
1313static struct omap_udc *udc;
1314
1315static void omap_udc_enable_clock(int enable)
1316{
1317 if (udc == NULL || udc->dc_clk == NULL || udc->hhc_clk == NULL)
1318 return;
1319
1320 if (enable) {
1321 clk_enable(udc->dc_clk);
1322 clk_enable(udc->hhc_clk);
1323 udelay(100);
1324 } else {
1325 clk_disable(udc->hhc_clk);
1326 clk_disable(udc->dc_clk);
1327 }
1328}
1329
1301/* 1330/*
1302 * Called by whatever detects VBUS sessions: external transceiver 1331 * Called by whatever detects VBUS sessions: external transceiver
1303 * driver, or maybe GPIO0 VBUS IRQ. May request 48 MHz clock. 1332 * driver, or maybe GPIO0 VBUS IRQ. May request 48 MHz clock.
@@ -1318,10 +1347,22 @@ static int omap_vbus_session(struct usb_gadget *gadget, int is_active)
1318 else 1347 else
1319 FUNC_MUX_CTRL_0_REG &= ~VBUS_CTRL_1510; 1348 FUNC_MUX_CTRL_0_REG &= ~VBUS_CTRL_1510;
1320 } 1349 }
1350 if (udc->dc_clk != NULL && is_active) {
1351 if (!udc->clk_requested) {
1352 omap_udc_enable_clock(1);
1353 udc->clk_requested = 1;
1354 }
1355 }
1321 if (can_pullup(udc)) 1356 if (can_pullup(udc))
1322 pullup_enable(udc); 1357 pullup_enable(udc);
1323 else 1358 else
1324 pullup_disable(udc); 1359 pullup_disable(udc);
1360 if (udc->dc_clk != NULL && !is_active) {
1361 if (udc->clk_requested) {
1362 omap_udc_enable_clock(0);
1363 udc->clk_requested = 0;
1364 }
1365 }
1325 spin_unlock_irqrestore(&udc->lock, flags); 1366 spin_unlock_irqrestore(&udc->lock, flags);
1326 return 0; 1367 return 0;
1327} 1368}
@@ -1441,7 +1482,7 @@ static void ep0_irq(struct omap_udc *udc, u16 irq_src)
1441 } 1482 }
1442 } 1483 }
1443 1484
1444 /* IN/OUT packets mean we're in the DATA or STATUS stage. 1485 /* IN/OUT packets mean we're in the DATA or STATUS stage.
1445 * This driver uses only uses protocol stalls (ep0 never halts), 1486 * This driver uses only uses protocol stalls (ep0 never halts),
1446 * and if we got this far the gadget driver already had a 1487 * and if we got this far the gadget driver already had a
1447 * chance to stall. Tries to be forgiving of host oddities. 1488 * chance to stall. Tries to be forgiving of host oddities.
@@ -1509,7 +1550,7 @@ static void ep0_irq(struct omap_udc *udc, u16 irq_src)
1509 } else if (stat == 0) 1550 } else if (stat == 0)
1510 UDC_CTRL_REG = UDC_SET_FIFO_EN; 1551 UDC_CTRL_REG = UDC_SET_FIFO_EN;
1511 UDC_EP_NUM_REG = 0; 1552 UDC_EP_NUM_REG = 0;
1512 1553
1513 /* activate status stage */ 1554 /* activate status stage */
1514 if (stat == 1) { 1555 if (stat == 1) {
1515 done(ep0, req, 0); 1556 done(ep0, req, 0);
@@ -1866,7 +1907,7 @@ static void pio_out_timer(unsigned long _ep)
1866 1907
1867 spin_lock_irqsave(&ep->udc->lock, flags); 1908 spin_lock_irqsave(&ep->udc->lock, flags);
1868 if (!list_empty(&ep->queue) && ep->ackwait) { 1909 if (!list_empty(&ep->queue) && ep->ackwait) {
1869 use_ep(ep, 0); 1910 use_ep(ep, UDC_EP_SEL);
1870 stat_flg = UDC_STAT_FLG_REG; 1911 stat_flg = UDC_STAT_FLG_REG;
1871 1912
1872 if ((stat_flg & UDC_ACK) && (!(stat_flg & UDC_FIFO_EN) 1913 if ((stat_flg & UDC_ACK) && (!(stat_flg & UDC_FIFO_EN)
@@ -1876,12 +1917,12 @@ static void pio_out_timer(unsigned long _ep)
1876 VDBG("%s: lose, %04x\n", ep->ep.name, stat_flg); 1917 VDBG("%s: lose, %04x\n", ep->ep.name, stat_flg);
1877 req = container_of(ep->queue.next, 1918 req = container_of(ep->queue.next,
1878 struct omap_req, queue); 1919 struct omap_req, queue);
1879 UDC_EP_NUM_REG = ep->bEndpointAddress | UDC_EP_SEL;
1880 (void) read_fifo(ep, req); 1920 (void) read_fifo(ep, req);
1881 UDC_EP_NUM_REG = ep->bEndpointAddress; 1921 UDC_EP_NUM_REG = ep->bEndpointAddress;
1882 UDC_CTRL_REG = UDC_SET_FIFO_EN; 1922 UDC_CTRL_REG = UDC_SET_FIFO_EN;
1883 ep->ackwait = 1 + ep->double_buf; 1923 ep->ackwait = 1 + ep->double_buf;
1884 } 1924 } else
1925 deselect_ep();
1885 } 1926 }
1886 mod_timer(&ep->timer, PIO_OUT_TIMEOUT); 1927 mod_timer(&ep->timer, PIO_OUT_TIMEOUT);
1887 spin_unlock_irqrestore(&ep->udc->lock, flags); 1928 spin_unlock_irqrestore(&ep->udc->lock, flags);
@@ -2028,7 +2069,17 @@ static irqreturn_t omap_udc_iso_irq(int irq, void *_dev)
2028 2069
2029/*-------------------------------------------------------------------------*/ 2070/*-------------------------------------------------------------------------*/
2030 2071
2031static struct omap_udc *udc; 2072static inline int machine_needs_vbus_session(void)
2073{
2074 return (machine_is_omap_innovator()
2075 || machine_is_omap_osk()
2076 || machine_is_omap_apollon()
2077#ifndef CONFIG_MACH_OMAP_H4_OTG
2078 || machine_is_omap_h4()
2079#endif
2080 || machine_is_sx1()
2081 );
2082}
2032 2083
2033int usb_gadget_register_driver (struct usb_gadget_driver *driver) 2084int usb_gadget_register_driver (struct usb_gadget_driver *driver)
2034{ 2085{
@@ -2070,6 +2121,9 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
2070 udc->gadget.dev.driver = &driver->driver; 2121 udc->gadget.dev.driver = &driver->driver;
2071 spin_unlock_irqrestore(&udc->lock, flags); 2122 spin_unlock_irqrestore(&udc->lock, flags);
2072 2123
2124 if (udc->dc_clk != NULL)
2125 omap_udc_enable_clock(1);
2126
2073 status = driver->bind (&udc->gadget); 2127 status = driver->bind (&udc->gadget);
2074 if (status) { 2128 if (status) {
2075 DBG("bind to %s --> %d\n", driver->driver.name, status); 2129 DBG("bind to %s --> %d\n", driver->driver.name, status);
@@ -2103,10 +2157,12 @@ int usb_gadget_register_driver (struct usb_gadget_driver *driver)
2103 /* boards that don't have VBUS sensing can't autogate 48MHz; 2157 /* boards that don't have VBUS sensing can't autogate 48MHz;
2104 * can't enter deep sleep while a gadget driver is active. 2158 * can't enter deep sleep while a gadget driver is active.
2105 */ 2159 */
2106 if (machine_is_omap_innovator() || machine_is_omap_osk()) 2160 if (machine_needs_vbus_session())
2107 omap_vbus_session(&udc->gadget, 1); 2161 omap_vbus_session(&udc->gadget, 1);
2108 2162
2109done: 2163done:
2164 if (udc->dc_clk != NULL)
2165 omap_udc_enable_clock(0);
2110 return status; 2166 return status;
2111} 2167}
2112EXPORT_SYMBOL(usb_gadget_register_driver); 2168EXPORT_SYMBOL(usb_gadget_register_driver);
@@ -2121,7 +2177,10 @@ int usb_gadget_unregister_driver (struct usb_gadget_driver *driver)
2121 if (!driver || driver != udc->driver || !driver->unbind) 2177 if (!driver || driver != udc->driver || !driver->unbind)
2122 return -EINVAL; 2178 return -EINVAL;
2123 2179
2124 if (machine_is_omap_innovator() || machine_is_omap_osk()) 2180 if (udc->dc_clk != NULL)
2181 omap_udc_enable_clock(1);
2182
2183 if (machine_needs_vbus_session())
2125 omap_vbus_session(&udc->gadget, 0); 2184 omap_vbus_session(&udc->gadget, 0);
2126 2185
2127 if (udc->transceiver) 2186 if (udc->transceiver)
@@ -2137,6 +2196,8 @@ int usb_gadget_unregister_driver (struct usb_gadget_driver *driver)
2137 udc->gadget.dev.driver = NULL; 2196 udc->gadget.dev.driver = NULL;
2138 udc->driver = NULL; 2197 udc->driver = NULL;
2139 2198
2199 if (udc->dc_clk != NULL)
2200 omap_udc_enable_clock(0);
2140 DBG("unregistered driver '%s'\n", driver->driver.name); 2201 DBG("unregistered driver '%s'\n", driver->driver.name);
2141 return status; 2202 return status;
2142} 2203}
@@ -2219,7 +2280,7 @@ static char *trx_mode(unsigned m, int enabled)
2219 case 0: return enabled ? "*6wire" : "unused"; 2280 case 0: return enabled ? "*6wire" : "unused";
2220 case 1: return "4wire"; 2281 case 1: return "4wire";
2221 case 2: return "3wire"; 2282 case 2: return "3wire";
2222 case 3: return "6wire"; 2283 case 3: return "6wire";
2223 default: return "unknown"; 2284 default: return "unknown";
2224 } 2285 }
2225} 2286}
@@ -2228,11 +2289,18 @@ static int proc_otg_show(struct seq_file *s)
2228{ 2289{
2229 u32 tmp; 2290 u32 tmp;
2230 u32 trans; 2291 u32 trans;
2292 char *ctrl_name;
2231 2293
2232 tmp = OTG_REV_REG; 2294 tmp = OTG_REV_REG;
2233 trans = USB_TRANSCEIVER_CTRL_REG; 2295 if (cpu_is_omap24xx()) {
2234 seq_printf(s, "\nOTG rev %d.%d, transceiver_ctrl %05x\n", 2296 ctrl_name = "control_devconf";
2235 tmp >> 4, tmp & 0xf, trans); 2297 trans = CONTROL_DEVCONF_REG;
2298 } else {
2299 ctrl_name = "tranceiver_ctrl";
2300 trans = USB_TRANSCEIVER_CTRL_REG;
2301 }
2302 seq_printf(s, "\nOTG rev %d.%d, %s %05x\n",
2303 tmp >> 4, tmp & 0xf, ctrl_name, trans);
2236 tmp = OTG_SYSCON_1_REG; 2304 tmp = OTG_SYSCON_1_REG;
2237 seq_printf(s, "otg_syscon1 %08x usb2 %s, usb1 %s, usb0 %s," 2305 seq_printf(s, "otg_syscon1 %08x usb2 %s, usb1 %s, usb0 %s,"
2238 FOURBITS "\n", tmp, 2306 FOURBITS "\n", tmp,
@@ -2307,7 +2375,7 @@ static int proc_udc_show(struct seq_file *s, void *_)
2307 driver_desc, 2375 driver_desc,
2308 use_dma ? " (dma)" : ""); 2376 use_dma ? " (dma)" : "");
2309 2377
2310 tmp = UDC_REV_REG & 0xff; 2378 tmp = UDC_REV_REG & 0xff;
2311 seq_printf(s, 2379 seq_printf(s,
2312 "UDC rev %d.%d, fifo mode %d, gadget %s\n" 2380 "UDC rev %d.%d, fifo mode %d, gadget %s\n"
2313 "hmc %d, transceiver %s\n", 2381 "hmc %d, transceiver %s\n",
@@ -2315,11 +2383,16 @@ static int proc_udc_show(struct seq_file *s, void *_)
2315 fifo_mode, 2383 fifo_mode,
2316 udc->driver ? udc->driver->driver.name : "(none)", 2384 udc->driver ? udc->driver->driver.name : "(none)",
2317 HMC, 2385 HMC,
2318 udc->transceiver ? udc->transceiver->label : "(none)"); 2386 udc->transceiver
2319 seq_printf(s, "ULPD control %04x req %04x status %04x\n", 2387 ? udc->transceiver->label
2320 __REG16(ULPD_CLOCK_CTRL), 2388 : ((cpu_is_omap1710() || cpu_is_omap24xx())
2321 __REG16(ULPD_SOFT_REQ), 2389 ? "external" : "(none)"));
2322 __REG16(ULPD_STATUS_REQ)); 2390 if (cpu_class_is_omap1()) {
2391 seq_printf(s, "ULPD control %04x req %04x status %04x\n",
2392 __REG16(ULPD_CLOCK_CTRL),
2393 __REG16(ULPD_SOFT_REQ),
2394 __REG16(ULPD_STATUS_REQ));
2395 }
2323 2396
2324 /* OTG controller registers */ 2397 /* OTG controller registers */
2325 if (!cpu_is_omap15xx()) 2398 if (!cpu_is_omap15xx())
@@ -2504,9 +2577,10 @@ omap_ep_setup(char *name, u8 addr, u8 type,
2504 dbuf = 1; 2577 dbuf = 1;
2505 } else { 2578 } else {
2506 /* double-buffering "not supported" on 15xx, 2579 /* double-buffering "not supported" on 15xx,
2507 * and ignored for PIO-IN on 16xx 2580 * and ignored for PIO-IN on newer chips
2581 * (for more reliable behavior)
2508 */ 2582 */
2509 if (!use_dma || cpu_is_omap15xx()) 2583 if (!use_dma || cpu_is_omap15xx() || cpu_is_omap24xx())
2510 dbuf = 0; 2584 dbuf = 0;
2511 2585
2512 switch (maxp) { 2586 switch (maxp) {
@@ -2549,7 +2623,7 @@ omap_ep_setup(char *name, u8 addr, u8 type,
2549 ep->bEndpointAddress = addr; 2623 ep->bEndpointAddress = addr;
2550 ep->bmAttributes = type; 2624 ep->bmAttributes = type;
2551 ep->double_buf = dbuf; 2625 ep->double_buf = dbuf;
2552 ep->udc = udc; 2626 ep->udc = udc;
2553 2627
2554 ep->ep.name = ep->name; 2628 ep->ep.name = ep->name;
2555 ep->ep.ops = &omap_ep_ops; 2629 ep->ep.ops = &omap_ep_ops;
@@ -2709,15 +2783,37 @@ static int __init omap_udc_probe(struct platform_device *pdev)
2709 struct otg_transceiver *xceiv = NULL; 2783 struct otg_transceiver *xceiv = NULL;
2710 const char *type = NULL; 2784 const char *type = NULL;
2711 struct omap_usb_config *config = pdev->dev.platform_data; 2785 struct omap_usb_config *config = pdev->dev.platform_data;
2786 struct clk *dc_clk;
2787 struct clk *hhc_clk;
2712 2788
2713 /* NOTE: "knows" the order of the resources! */ 2789 /* NOTE: "knows" the order of the resources! */
2714 if (!request_mem_region(pdev->resource[0].start, 2790 if (!request_mem_region(pdev->resource[0].start,
2715 pdev->resource[0].end - pdev->resource[0].start + 1, 2791 pdev->resource[0].end - pdev->resource[0].start + 1,
2716 driver_name)) { 2792 driver_name)) {
2717 DBG("request_mem_region failed\n"); 2793 DBG("request_mem_region failed\n");
2718 return -EBUSY; 2794 return -EBUSY;
2719 } 2795 }
2720 2796
2797 if (cpu_is_omap16xx()) {
2798 dc_clk = clk_get(&pdev->dev, "usb_dc_ck");
2799 hhc_clk = clk_get(&pdev->dev, "usb_hhc_ck");
2800 BUG_ON(IS_ERR(dc_clk) || IS_ERR(hhc_clk));
2801 /* can't use omap_udc_enable_clock yet */
2802 clk_enable(dc_clk);
2803 clk_enable(hhc_clk);
2804 udelay(100);
2805 }
2806
2807 if (cpu_is_omap24xx()) {
2808 dc_clk = clk_get(&pdev->dev, "usb_fck");
2809 hhc_clk = clk_get(&pdev->dev, "usb_l4_ick");
2810 BUG_ON(IS_ERR(dc_clk) || IS_ERR(hhc_clk));
2811 /* can't use omap_udc_enable_clock yet */
2812 clk_enable(dc_clk);
2813 clk_enable(hhc_clk);
2814 udelay(100);
2815 }
2816
2721 INFO("OMAP UDC rev %d.%d%s\n", 2817 INFO("OMAP UDC rev %d.%d%s\n",
2722 UDC_REV_REG >> 4, UDC_REV_REG & 0xf, 2818 UDC_REV_REG >> 4, UDC_REV_REG & 0xf,
2723 config->otg ? ", Mini-AB" : ""); 2819 config->otg ? ", Mini-AB" : "");
@@ -2727,7 +2823,7 @@ static int __init omap_udc_probe(struct platform_device *pdev)
2727 hmc = HMC_1510; 2823 hmc = HMC_1510;
2728 type = "(unknown)"; 2824 type = "(unknown)";
2729 2825
2730 if (machine_is_omap_innovator()) { 2826 if (machine_is_omap_innovator() || machine_is_sx1()) {
2731 /* just set up software VBUS detect, and then 2827 /* just set up software VBUS detect, and then
2732 * later rig it so we always report VBUS. 2828 * later rig it so we always report VBUS.
2733 * FIXME without really sensing VBUS, we can't 2829 * FIXME without really sensing VBUS, we can't
@@ -2756,6 +2852,15 @@ static int __init omap_udc_probe(struct platform_device *pdev)
2756 } 2852 }
2757 2853
2758 hmc = HMC_1610; 2854 hmc = HMC_1610;
2855
2856 if (cpu_is_omap24xx()) {
2857 /* this could be transceiverless in one of the
2858 * "we don't need to know" modes.
2859 */
2860 type = "external";
2861 goto known;
2862 }
2863
2759 switch (hmc) { 2864 switch (hmc) {
2760 case 0: /* POWERUP DEFAULT == 0 */ 2865 case 0: /* POWERUP DEFAULT == 0 */
2761 case 4: 2866 case 4:
@@ -2794,6 +2899,7 @@ bad_on_1710:
2794 goto cleanup0; 2899 goto cleanup0;
2795 } 2900 }
2796 } 2901 }
2902known:
2797 INFO("hmc mode %d, %s transceiver\n", hmc, type); 2903 INFO("hmc mode %d, %s transceiver\n", hmc, type);
2798 2904
2799 /* a "gadget" abstracts/virtualizes the controller */ 2905 /* a "gadget" abstracts/virtualizes the controller */
@@ -2818,8 +2924,8 @@ bad_on_1710:
2818 status = request_irq(pdev->resource[1].start, omap_udc_irq, 2924 status = request_irq(pdev->resource[1].start, omap_udc_irq,
2819 IRQF_SAMPLE_RANDOM, driver_name, udc); 2925 IRQF_SAMPLE_RANDOM, driver_name, udc);
2820 if (status != 0) { 2926 if (status != 0) {
2821 ERR( "can't get irq %ld, err %d\n", 2927 ERR("can't get irq %d, err %d\n",
2822 pdev->resource[1].start, status); 2928 (int) pdev->resource[1].start, status);
2823 goto cleanup1; 2929 goto cleanup1;
2824 } 2930 }
2825 2931
@@ -2827,24 +2933,41 @@ bad_on_1710:
2827 status = request_irq(pdev->resource[2].start, omap_udc_pio_irq, 2933 status = request_irq(pdev->resource[2].start, omap_udc_pio_irq,
2828 IRQF_SAMPLE_RANDOM, "omap_udc pio", udc); 2934 IRQF_SAMPLE_RANDOM, "omap_udc pio", udc);
2829 if (status != 0) { 2935 if (status != 0) {
2830 ERR( "can't get irq %ld, err %d\n", 2936 ERR("can't get irq %d, err %d\n",
2831 pdev->resource[2].start, status); 2937 (int) pdev->resource[2].start, status);
2832 goto cleanup2; 2938 goto cleanup2;
2833 } 2939 }
2834#ifdef USE_ISO 2940#ifdef USE_ISO
2835 status = request_irq(pdev->resource[3].start, omap_udc_iso_irq, 2941 status = request_irq(pdev->resource[3].start, omap_udc_iso_irq,
2836 IRQF_DISABLED, "omap_udc iso", udc); 2942 IRQF_DISABLED, "omap_udc iso", udc);
2837 if (status != 0) { 2943 if (status != 0) {
2838 ERR("can't get irq %ld, err %d\n", 2944 ERR("can't get irq %d, err %d\n",
2839 pdev->resource[3].start, status); 2945 (int) pdev->resource[3].start, status);
2840 goto cleanup3; 2946 goto cleanup3;
2841 } 2947 }
2842#endif 2948#endif
2949 if (cpu_is_omap16xx()) {
2950 udc->dc_clk = dc_clk;
2951 udc->hhc_clk = hhc_clk;
2952 clk_disable(hhc_clk);
2953 clk_disable(dc_clk);
2954 }
2955
2956 if (cpu_is_omap24xx()) {
2957 udc->dc_clk = dc_clk;
2958 udc->hhc_clk = hhc_clk;
2959 /* FIXME OMAP2 don't release hhc & dc clock */
2960#if 0
2961 clk_disable(hhc_clk);
2962 clk_disable(dc_clk);
2963#endif
2964 }
2843 2965
2844 create_proc_file(); 2966 create_proc_file();
2845 device_add(&udc->gadget.dev); 2967 status = device_add(&udc->gadget.dev);
2846 return 0; 2968 if (!status)
2847 2969 return status;
2970 /* If fail, fall through */
2848#ifdef USE_ISO 2971#ifdef USE_ISO
2849cleanup3: 2972cleanup3:
2850 free_irq(pdev->resource[2].start, udc); 2973 free_irq(pdev->resource[2].start, udc);
@@ -2860,8 +2983,17 @@ cleanup1:
2860cleanup0: 2983cleanup0:
2861 if (xceiv) 2984 if (xceiv)
2862 put_device(xceiv->dev); 2985 put_device(xceiv->dev);
2986
2987 if (cpu_is_omap16xx() || cpu_is_omap24xx()) {
2988 clk_disable(hhc_clk);
2989 clk_disable(dc_clk);
2990 clk_put(hhc_clk);
2991 clk_put(dc_clk);
2992 }
2993
2863 release_mem_region(pdev->resource[0].start, 2994 release_mem_region(pdev->resource[0].start,
2864 pdev->resource[0].end - pdev->resource[0].start + 1); 2995 pdev->resource[0].end - pdev->resource[0].start + 1);
2996
2865 return status; 2997 return status;
2866} 2998}
2867 2999
@@ -2891,6 +3023,13 @@ static int __exit omap_udc_remove(struct platform_device *pdev)
2891 free_irq(pdev->resource[2].start, udc); 3023 free_irq(pdev->resource[2].start, udc);
2892 free_irq(pdev->resource[1].start, udc); 3024 free_irq(pdev->resource[1].start, udc);
2893 3025
3026 if (udc->dc_clk) {
3027 if (udc->clk_requested)
3028 omap_udc_enable_clock(0);
3029 clk_put(udc->hhc_clk);
3030 clk_put(udc->dc_clk);
3031 }
3032
2894 release_mem_region(pdev->resource[0].start, 3033 release_mem_region(pdev->resource[0].start,
2895 pdev->resource[0].end - pdev->resource[0].start + 1); 3034 pdev->resource[0].end - pdev->resource[0].start + 1);
2896 3035
diff --git a/drivers/usb/gadget/omap_udc.h b/drivers/usb/gadget/omap_udc.h
index 652ee4627344..1dc398bb9ab2 100644
--- a/drivers/usb/gadget/omap_udc.h
+++ b/drivers/usb/gadget/omap_udc.h
@@ -175,6 +175,9 @@ struct omap_udc {
175 unsigned ep0_reset_config:1; 175 unsigned ep0_reset_config:1;
176 unsigned ep0_setup:1; 176 unsigned ep0_setup:1;
177 struct completion *done; 177 struct completion *done;
178 struct clk *dc_clk;
179 struct clk *hhc_clk;
180 unsigned clk_requested:1;
178}; 181};
179 182
180/*-------------------------------------------------------------------------*/ 183/*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index acd101caeeeb..e0d4c2358b39 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -209,24 +209,16 @@ static int resume_detect_interrupts_are_broken(struct uhci_hcd *uhci)
209 209
210static int remote_wakeup_is_broken(struct uhci_hcd *uhci) 210static int remote_wakeup_is_broken(struct uhci_hcd *uhci)
211{ 211{
212 static struct dmi_system_id broken_wakeup_table[] = {
213 {
214 .ident = "Asus A7V8X",
215 .matches = {
216 DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK"),
217 DMI_MATCH(DMI_BOARD_NAME, "A7V8X"),
218 DMI_MATCH(DMI_BOARD_VERSION, "REV 1.xx"),
219 }
220 },
221 { }
222 };
223 int port; 212 int port;
213 char *sys_info;
214 static char bad_Asus_board[] = "A7V8X";
224 215
225 /* One of Asus's motherboards has a bug which causes it to 216 /* One of Asus's motherboards has a bug which causes it to
226 * wake up immediately from suspend-to-RAM if any of the ports 217 * wake up immediately from suspend-to-RAM if any of the ports
227 * are connected. In such cases we will not set EGSM. 218 * are connected. In such cases we will not set EGSM.
228 */ 219 */
229 if (dmi_check_system(broken_wakeup_table)) { 220 sys_info = dmi_get_system_info(DMI_BOARD_NAME);
221 if (sys_info && !strcmp(sys_info, bad_Asus_board)) {
230 for (port = 0; port < uhci->rh_numports; ++port) { 222 for (port = 0; port < uhci->rh_numports; ++port) {
231 if (inw(uhci->io_addr + USBPORTSC1 + port * 2) & 223 if (inw(uhci->io_addr + USBPORTSC1 + port * 2) &
232 USBPORTSC_CCS) 224 USBPORTSC_CCS)
@@ -265,7 +257,9 @@ __acquires(uhci->lock)
265 int_enable = USBINTR_RESUME; 257 int_enable = USBINTR_RESUME;
266 if (remote_wakeup_is_broken(uhci)) 258 if (remote_wakeup_is_broken(uhci))
267 egsm_enable = 0; 259 egsm_enable = 0;
268 if (resume_detect_interrupts_are_broken(uhci) || !egsm_enable) 260 if (resume_detect_interrupts_are_broken(uhci) || !egsm_enable ||
261 !device_may_wakeup(
262 &uhci_to_hcd(uhci)->self.root_hub->dev))
269 uhci->working_RD = int_enable = 0; 263 uhci->working_RD = int_enable = 0;
270 264
271 outw(int_enable, uhci->io_addr + USBINTR); 265 outw(int_enable, uhci->io_addr + USBINTR);
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
index bf26c3c56990..9148694627d5 100644
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ b/drivers/usb/misc/sisusbvga/sisusb_con.c
@@ -403,7 +403,7 @@ sisusbcon_putc(struct vc_data *c, int ch, int y, int x)
403 403
404 404
405 sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y), 405 sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y),
406 (u32)SISUSB_HADDR(x, y), 2, &written); 406 (long)SISUSB_HADDR(x, y), 2, &written);
407 407
408 mutex_unlock(&sisusb->lock); 408 mutex_unlock(&sisusb->lock);
409} 409}
@@ -438,7 +438,7 @@ sisusbcon_putcs(struct vc_data *c, const unsigned short *s,
438 } 438 }
439 439
440 sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y), 440 sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(x, y),
441 (u32)SISUSB_HADDR(x, y), count * 2, &written); 441 (long)SISUSB_HADDR(x, y), count * 2, &written);
442 442
443 mutex_unlock(&sisusb->lock); 443 mutex_unlock(&sisusb->lock);
444} 444}
@@ -492,7 +492,7 @@ sisusbcon_clear(struct vc_data *c, int y, int x, int height, int width)
492 492
493 493
494 sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(x, y), 494 sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(x, y),
495 (u32)SISUSB_HADDR(x, y), length, &written); 495 (long)SISUSB_HADDR(x, y), length, &written);
496 496
497 mutex_unlock(&sisusb->lock); 497 mutex_unlock(&sisusb->lock);
498} 498}
@@ -564,7 +564,7 @@ sisusbcon_bmove(struct vc_data *c, int sy, int sx,
564 564
565 565
566 sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(dx, dy), 566 sisusb_copy_memory(sisusb, (unsigned char *)SISUSB_VADDR(dx, dy),
567 (u32)SISUSB_HADDR(dx, dy), length, &written); 567 (long)SISUSB_HADDR(dx, dy), length, &written);
568 568
569 mutex_unlock(&sisusb->lock); 569 mutex_unlock(&sisusb->lock);
570} 570}
@@ -612,7 +612,7 @@ sisusbcon_switch(struct vc_data *c)
612 length); 612 length);
613 613
614 sisusb_copy_memory(sisusb, (unsigned char *)c->vc_origin, 614 sisusb_copy_memory(sisusb, (unsigned char *)c->vc_origin,
615 (u32)SISUSB_HADDR(0, 0), 615 (long)SISUSB_HADDR(0, 0),
616 length, &written); 616 length, &written);
617 617
618 mutex_unlock(&sisusb->lock); 618 mutex_unlock(&sisusb->lock);
@@ -939,7 +939,7 @@ sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb,
939 } 939 }
940 940
941 sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(0, t), 941 sisusb_copy_memory(sisusb, (char *)SISUSB_VADDR(0, t),
942 (u32)SISUSB_HADDR(0, t), length, &written); 942 (long)SISUSB_HADDR(0, t), length, &written);
943 943
944 mutex_unlock(&sisusb->lock); 944 mutex_unlock(&sisusb->lock);
945 945
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 95e682e2c9d6..f538013965b0 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -920,7 +920,7 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf)
920 goto out2; 920 goto out2;
921 921
922 if ((ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 922 if ((ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT,
923 0x0000, 0, 0, buf)) < 0) { 923 1, 0, 0, buf)) < 0) {
924 dbg("Select PHY #1 failed: %d", ret); 924 dbg("Select PHY #1 failed: %d", ret);
925 goto out2; 925 goto out2;
926 } 926 }
diff --git a/drivers/usb/serial/Kconfig b/drivers/usb/serial/Kconfig
index 2f4d303ee36f..c8999ae58652 100644
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
@@ -170,7 +170,7 @@ config USB_SERIAL_FTDI_SIO
170 170
171config USB_SERIAL_FUNSOFT 171config USB_SERIAL_FUNSOFT
172 tristate "USB Fundamental Software Dongle Driver" 172 tristate "USB Fundamental Software Dongle Driver"
173 depends on USB_SERIAL 173 depends on USB_SERIAL && !(SPARC || SPARC64)
174 ---help--- 174 ---help---
175 Say Y here if you want to use the Fundamental Software dongle. 175 Say Y here if you want to use the Fundamental Software dongle.
176 176
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 819266b7e2f8..5ca04e82ea19 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -625,6 +625,9 @@ static int option_send_setup(struct usb_serial_port *port)
625 625
626 dbg("%s", __FUNCTION__); 626 dbg("%s", __FUNCTION__);
627 627
628 if (port->number != 0)
629 return 0;
630
628 portdata = usb_get_serial_port_data(port); 631 portdata = usb_get_serial_port_data(port);
629 632
630 if (port->tty) { 633 if (port->tty) {
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index 5fe7ff441a09..cddef3efba0a 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -728,7 +728,7 @@ UNUSUAL_DEV( 0x05ac, 0x1204, 0x0000, 0x9999,
728 "Apple", 728 "Apple",
729 "iPod", 729 "iPod",
730 US_SC_DEVICE, US_PR_DEVICE, NULL, 730 US_SC_DEVICE, US_PR_DEVICE, NULL,
731 US_FL_FIX_CAPACITY ), 731 US_FL_FIX_CAPACITY | US_FL_NOT_LOCKABLE ),
732 732
733UNUSUAL_DEV( 0x05ac, 0x1205, 0x0000, 0x9999, 733UNUSUAL_DEV( 0x05ac, 0x1205, 0x0000, 0x9999,
734 "Apple", 734 "Apple",
@@ -1358,6 +1358,21 @@ UNUSUAL_DEV( 0x1370, 0x6828, 0x0110, 0x0110,
1358 US_SC_DEVICE, US_PR_DEVICE, NULL, 1358 US_SC_DEVICE, US_PR_DEVICE, NULL,
1359 US_FL_IGNORE_RESIDUE ), 1359 US_FL_IGNORE_RESIDUE ),
1360 1360
1361/* Reported by Francesco Foresti <frafore@tiscali.it> */
1362UNUSUAL_DEV( 0x14cd, 0x6600, 0x0201, 0x0201,
1363 "Super Top",
1364 "IDE DEVICE",
1365 US_SC_DEVICE, US_PR_DEVICE, NULL,
1366 US_FL_IGNORE_RESIDUE ),
1367
1368/* Reported by Robert Schedel <r.schedel@yahoo.de>
1369 * Note: this is a 'super top' device like the above 14cd/6600 device */
1370UNUSUAL_DEV( 0x1652, 0x6600, 0x0201, 0x0201,
1371 "Teac",
1372 "HD-35PUK-B",
1373 US_SC_DEVICE, US_PR_DEVICE, NULL,
1374 US_FL_IGNORE_RESIDUE ),
1375
1361/* patch submitted by Davide Perini <perini.davide@dpsoftware.org> 1376/* patch submitted by Davide Perini <perini.davide@dpsoftware.org>
1362 * and Renato Perini <rperini@email.it> 1377 * and Renato Perini <rperini@email.it>
1363 */ 1378 */
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index bbfc86259272..b9b2b27b68c3 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -53,7 +53,7 @@ static inline int adfs_readname(char *buf, char *ptr, int maxlen)
53{ 53{
54 char *old_buf = buf; 54 char *old_buf = buf;
55 55
56 while (*ptr >= ' ' && maxlen--) { 56 while ((unsigned char)*ptr >= ' ' && maxlen--) {
57 if (*ptr == '/') 57 if (*ptr == '/')
58 *buf++ = '.'; 58 *buf++ = '.';
59 else 59 else
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index 34e6d7b220c3..869f5193ecc2 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -14,59 +14,307 @@
14#include <linux/time.h> 14#include <linux/time.h>
15#include <linux/smp_lock.h> 15#include <linux/smp_lock.h>
16#include <linux/namei.h> 16#include <linux/namei.h>
17#include <linux/poll.h>
17 18
18static int return_EIO(void) 19
20static loff_t bad_file_llseek(struct file *file, loff_t offset, int origin)
21{
22 return -EIO;
23}
24
25static ssize_t bad_file_read(struct file *filp, char __user *buf,
26 size_t size, loff_t *ppos)
27{
28 return -EIO;
29}
30
31static ssize_t bad_file_write(struct file *filp, const char __user *buf,
32 size_t siz, loff_t *ppos)
33{
34 return -EIO;
35}
36
37static ssize_t bad_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
38 unsigned long nr_segs, loff_t pos)
39{
40 return -EIO;
41}
42
43static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
44 unsigned long nr_segs, loff_t pos)
45{
46 return -EIO;
47}
48
49static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir)
50{
51 return -EIO;
52}
53
54static unsigned int bad_file_poll(struct file *filp, poll_table *wait)
55{
56 return POLLERR;
57}
58
59static int bad_file_ioctl (struct inode *inode, struct file *filp,
60 unsigned int cmd, unsigned long arg)
61{
62 return -EIO;
63}
64
65static long bad_file_unlocked_ioctl(struct file *file, unsigned cmd,
66 unsigned long arg)
67{
68 return -EIO;
69}
70
71static long bad_file_compat_ioctl(struct file *file, unsigned int cmd,
72 unsigned long arg)
73{
74 return -EIO;
75}
76
77static int bad_file_mmap(struct file *file, struct vm_area_struct *vma)
78{
79 return -EIO;
80}
81
82static int bad_file_open(struct inode *inode, struct file *filp)
83{
84 return -EIO;
85}
86
87static int bad_file_flush(struct file *file, fl_owner_t id)
88{
89 return -EIO;
90}
91
92static int bad_file_release(struct inode *inode, struct file *filp)
93{
94 return -EIO;
95}
96
97static int bad_file_fsync(struct file *file, struct dentry *dentry,
98 int datasync)
99{
100 return -EIO;
101}
102
103static int bad_file_aio_fsync(struct kiocb *iocb, int datasync)
104{
105 return -EIO;
106}
107
108static int bad_file_fasync(int fd, struct file *filp, int on)
109{
110 return -EIO;
111}
112
113static int bad_file_lock(struct file *file, int cmd, struct file_lock *fl)
114{
115 return -EIO;
116}
117
118static ssize_t bad_file_sendfile(struct file *in_file, loff_t *ppos,
119 size_t count, read_actor_t actor, void *target)
120{
121 return -EIO;
122}
123
124static ssize_t bad_file_sendpage(struct file *file, struct page *page,
125 int off, size_t len, loff_t *pos, int more)
126{
127 return -EIO;
128}
129
130static unsigned long bad_file_get_unmapped_area(struct file *file,
131 unsigned long addr, unsigned long len,
132 unsigned long pgoff, unsigned long flags)
133{
134 return -EIO;
135}
136
137static int bad_file_check_flags(int flags)
19{ 138{
20 return -EIO; 139 return -EIO;
21} 140}
22 141
23#define EIO_ERROR ((void *) (return_EIO)) 142static int bad_file_dir_notify(struct file *file, unsigned long arg)
143{
144 return -EIO;
145}
146
147static int bad_file_flock(struct file *filp, int cmd, struct file_lock *fl)
148{
149 return -EIO;
150}
151
152static ssize_t bad_file_splice_write(struct pipe_inode_info *pipe,
153 struct file *out, loff_t *ppos, size_t len,
154 unsigned int flags)
155{
156 return -EIO;
157}
158
159static ssize_t bad_file_splice_read(struct file *in, loff_t *ppos,
160 struct pipe_inode_info *pipe, size_t len,
161 unsigned int flags)
162{
163 return -EIO;
164}
24 165
25static const struct file_operations bad_file_ops = 166static const struct file_operations bad_file_ops =
26{ 167{
27 .llseek = EIO_ERROR, 168 .llseek = bad_file_llseek,
28 .aio_read = EIO_ERROR, 169 .read = bad_file_read,
29 .read = EIO_ERROR, 170 .write = bad_file_write,
30 .write = EIO_ERROR, 171 .aio_read = bad_file_aio_read,
31 .aio_write = EIO_ERROR, 172 .aio_write = bad_file_aio_write,
32 .readdir = EIO_ERROR, 173 .readdir = bad_file_readdir,
33 .poll = EIO_ERROR, 174 .poll = bad_file_poll,
34 .ioctl = EIO_ERROR, 175 .ioctl = bad_file_ioctl,
35 .mmap = EIO_ERROR, 176 .unlocked_ioctl = bad_file_unlocked_ioctl,
36 .open = EIO_ERROR, 177 .compat_ioctl = bad_file_compat_ioctl,
37 .flush = EIO_ERROR, 178 .mmap = bad_file_mmap,
38 .release = EIO_ERROR, 179 .open = bad_file_open,
39 .fsync = EIO_ERROR, 180 .flush = bad_file_flush,
40 .aio_fsync = EIO_ERROR, 181 .release = bad_file_release,
41 .fasync = EIO_ERROR, 182 .fsync = bad_file_fsync,
42 .lock = EIO_ERROR, 183 .aio_fsync = bad_file_aio_fsync,
43 .sendfile = EIO_ERROR, 184 .fasync = bad_file_fasync,
44 .sendpage = EIO_ERROR, 185 .lock = bad_file_lock,
45 .get_unmapped_area = EIO_ERROR, 186 .sendfile = bad_file_sendfile,
187 .sendpage = bad_file_sendpage,
188 .get_unmapped_area = bad_file_get_unmapped_area,
189 .check_flags = bad_file_check_flags,
190 .dir_notify = bad_file_dir_notify,
191 .flock = bad_file_flock,
192 .splice_write = bad_file_splice_write,
193 .splice_read = bad_file_splice_read,
46}; 194};
47 195
196static int bad_inode_create (struct inode *dir, struct dentry *dentry,
197 int mode, struct nameidata *nd)
198{
199 return -EIO;
200}
201
202static struct dentry *bad_inode_lookup(struct inode *dir,
203 struct dentry *dentry, struct nameidata *nd)
204{
205 return ERR_PTR(-EIO);
206}
207
208static int bad_inode_link (struct dentry *old_dentry, struct inode *dir,
209 struct dentry *dentry)
210{
211 return -EIO;
212}
213
214static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
215{
216 return -EIO;
217}
218
219static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
220 const char *symname)
221{
222 return -EIO;
223}
224
225static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
226 int mode)
227{
228 return -EIO;
229}
230
231static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
232{
233 return -EIO;
234}
235
236static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
237 int mode, dev_t rdev)
238{
239 return -EIO;
240}
241
242static int bad_inode_rename (struct inode *old_dir, struct dentry *old_dentry,
243 struct inode *new_dir, struct dentry *new_dentry)
244{
245 return -EIO;
246}
247
248static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
249 int buflen)
250{
251 return -EIO;
252}
253
254static int bad_inode_permission(struct inode *inode, int mask,
255 struct nameidata *nd)
256{
257 return -EIO;
258}
259
260static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry,
261 struct kstat *stat)
262{
263 return -EIO;
264}
265
266static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
267{
268 return -EIO;
269}
270
271static int bad_inode_setxattr(struct dentry *dentry, const char *name,
272 const void *value, size_t size, int flags)
273{
274 return -EIO;
275}
276
277static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name,
278 void *buffer, size_t size)
279{
280 return -EIO;
281}
282
283static ssize_t bad_inode_listxattr(struct dentry *dentry, char *buffer,
284 size_t buffer_size)
285{
286 return -EIO;
287}
288
289static int bad_inode_removexattr(struct dentry *dentry, const char *name)
290{
291 return -EIO;
292}
293
48static struct inode_operations bad_inode_ops = 294static struct inode_operations bad_inode_ops =
49{ 295{
50 .create = EIO_ERROR, 296 .create = bad_inode_create,
51 .lookup = EIO_ERROR, 297 .lookup = bad_inode_lookup,
52 .link = EIO_ERROR, 298 .link = bad_inode_link,
53 .unlink = EIO_ERROR, 299 .unlink = bad_inode_unlink,
54 .symlink = EIO_ERROR, 300 .symlink = bad_inode_symlink,
55 .mkdir = EIO_ERROR, 301 .mkdir = bad_inode_mkdir,
56 .rmdir = EIO_ERROR, 302 .rmdir = bad_inode_rmdir,
57 .mknod = EIO_ERROR, 303 .mknod = bad_inode_mknod,
58 .rename = EIO_ERROR, 304 .rename = bad_inode_rename,
59 .readlink = EIO_ERROR, 305 .readlink = bad_inode_readlink,
60 /* follow_link must be no-op, otherwise unmounting this inode 306 /* follow_link must be no-op, otherwise unmounting this inode
61 won't work */ 307 won't work */
62 .truncate = EIO_ERROR, 308 /* put_link returns void */
63 .permission = EIO_ERROR, 309 /* truncate returns void */
64 .getattr = EIO_ERROR, 310 .permission = bad_inode_permission,
65 .setattr = EIO_ERROR, 311 .getattr = bad_inode_getattr,
66 .setxattr = EIO_ERROR, 312 .setattr = bad_inode_setattr,
67 .getxattr = EIO_ERROR, 313 .setxattr = bad_inode_setxattr,
68 .listxattr = EIO_ERROR, 314 .getxattr = bad_inode_getxattr,
69 .removexattr = EIO_ERROR, 315 .listxattr = bad_inode_listxattr,
316 .removexattr = bad_inode_removexattr,
317 /* truncate_range returns void */
70}; 318};
71 319
72 320
@@ -88,7 +336,7 @@ static struct inode_operations bad_inode_ops =
88 * on it to fail from this point on. 336 * on it to fail from this point on.
89 */ 337 */
90 338
91void make_bad_inode(struct inode * inode) 339void make_bad_inode(struct inode *inode)
92{ 340{
93 remove_inode_hash(inode); 341 remove_inode_hash(inode);
94 342
@@ -113,7 +361,7 @@ EXPORT_SYMBOL(make_bad_inode);
113 * Returns true if the inode in question has been marked as bad. 361 * Returns true if the inode in question has been marked as bad.
114 */ 362 */
115 363
116int is_bad_inode(struct inode * inode) 364int is_bad_inode(struct inode *inode)
117{ 365{
118 return (inode->i_op == &bad_inode_ops); 366 return (inode->i_op == &bad_inode_ops);
119} 367}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index d3adfd353ff9..7cb28720f90e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -854,13 +854,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
854 * default mmap base, as well as whatever program they 854 * default mmap base, as well as whatever program they
855 * might try to exec. This is because the brk will 855 * might try to exec. This is because the brk will
856 * follow the loader, and is not movable. */ 856 * follow the loader, and is not movable. */
857 if (current->flags & PF_RANDOMIZE) 857 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
858 load_bias = randomize_range(0x10000,
859 ELF_ET_DYN_BASE,
860 0);
861 else
862 load_bias = ELF_ET_DYN_BASE;
863 load_bias = ELF_PAGESTART(load_bias - vaddr);
864 } 858 }
865 859
866 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, 860 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index b82381475779..2e0021e8f366 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -275,6 +275,25 @@ static void ufs_change_blocknr(struct inode *inode, unsigned int baseblk,
275 UFSD("EXIT\n"); 275 UFSD("EXIT\n");
276} 276}
277 277
278static void ufs_clear_frags(struct inode *inode, sector_t beg, unsigned int n,
279 int sync)
280{
281 struct buffer_head *bh;
282 sector_t end = beg + n;
283
284 for (; beg < end; ++beg) {
285 bh = sb_getblk(inode->i_sb, beg);
286 lock_buffer(bh);
287 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
288 set_buffer_uptodate(bh);
289 mark_buffer_dirty(bh);
290 unlock_buffer(bh);
291 if (IS_SYNC(inode) || sync)
292 sync_dirty_buffer(bh);
293 brelse(bh);
294 }
295}
296
278unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment, 297unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
279 unsigned goal, unsigned count, int * err, struct page *locked_page) 298 unsigned goal, unsigned count, int * err, struct page *locked_page)
280{ 299{
@@ -350,6 +369,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
350 *p = cpu_to_fs32(sb, result); 369 *p = cpu_to_fs32(sb, result);
351 *err = 0; 370 *err = 0;
352 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 371 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
372 ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
373 locked_page != NULL);
353 } 374 }
354 unlock_super(sb); 375 unlock_super(sb);
355 UFSD("EXIT, result %u\n", result); 376 UFSD("EXIT, result %u\n", result);
@@ -363,6 +384,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
363 if (result) { 384 if (result) {
364 *err = 0; 385 *err = 0;
365 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 386 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
387 ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
388 locked_page != NULL);
366 unlock_super(sb); 389 unlock_super(sb);
367 UFSD("EXIT, result %u\n", result); 390 UFSD("EXIT, result %u\n", result);
368 return result; 391 return result;
@@ -398,6 +421,8 @@ unsigned ufs_new_fragments(struct inode * inode, __fs32 * p, unsigned fragment,
398 *p = cpu_to_fs32(sb, result); 421 *p = cpu_to_fs32(sb, result);
399 *err = 0; 422 *err = 0;
400 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count); 423 UFS_I(inode)->i_lastfrag = max_t(u32, UFS_I(inode)->i_lastfrag, fragment + count);
424 ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
425 locked_page != NULL);
401 unlock_super(sb); 426 unlock_super(sb);
402 if (newcount < request) 427 if (newcount < request)
403 ufs_free_fragments (inode, result + newcount, request - newcount); 428 ufs_free_fragments (inode, result + newcount, request - newcount);
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index ee1eaa6f4ec2..2fbab0aab688 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -156,36 +156,6 @@ out:
156 return ret; 156 return ret;
157} 157}
158 158
159static void ufs_clear_frag(struct inode *inode, struct buffer_head *bh)
160{
161 lock_buffer(bh);
162 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
163 set_buffer_uptodate(bh);
164 mark_buffer_dirty(bh);
165 unlock_buffer(bh);
166 if (IS_SYNC(inode))
167 sync_dirty_buffer(bh);
168}
169
170static struct buffer_head *
171ufs_clear_frags(struct inode *inode, sector_t beg,
172 unsigned int n, sector_t want)
173{
174 struct buffer_head *res = NULL, *bh;
175 sector_t end = beg + n;
176
177 for (; beg < end; ++beg) {
178 bh = sb_getblk(inode->i_sb, beg);
179 ufs_clear_frag(inode, bh);
180 if (want != beg)
181 brelse(bh);
182 else
183 res = bh;
184 }
185 BUG_ON(!res);
186 return res;
187}
188
189/** 159/**
190 * ufs_inode_getfrag() - allocate new fragment(s) 160 * ufs_inode_getfrag() - allocate new fragment(s)
191 * @inode - pointer to inode 161 * @inode - pointer to inode
@@ -302,7 +272,7 @@ repeat:
302 } 272 }
303 273
304 if (!phys) { 274 if (!phys) {
305 result = ufs_clear_frags(inode, tmp, required, tmp + blockoff); 275 result = sb_getblk(sb, tmp + blockoff);
306 } else { 276 } else {
307 *phys = tmp + blockoff; 277 *phys = tmp + blockoff;
308 result = NULL; 278 result = NULL;
@@ -403,8 +373,7 @@ repeat:
403 373
404 374
405 if (!phys) { 375 if (!phys) {
406 result = ufs_clear_frags(inode, tmp, uspi->s_fpb, 376 result = sb_getblk(sb, tmp + blockoff);
407 tmp + blockoff);
408 } else { 377 } else {
409 *phys = tmp + blockoff; 378 *phys = tmp + blockoff;
410 *new = 1; 379 *new = 1;
@@ -471,13 +440,13 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head
471#define GET_INODE_DATABLOCK(x) \ 440#define GET_INODE_DATABLOCK(x) \
472 ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page) 441 ufs_inode_getfrag(inode, x, fragment, 1, &err, &phys, &new, bh_result->b_page)
473#define GET_INODE_PTR(x) \ 442#define GET_INODE_PTR(x) \
474 ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, bh_result->b_page) 443 ufs_inode_getfrag(inode, x, fragment, uspi->s_fpb, &err, NULL, NULL, NULL)
475#define GET_INDIRECT_DATABLOCK(x) \ 444#define GET_INDIRECT_DATABLOCK(x) \
476 ufs_inode_getblock(inode, bh, x, fragment, \ 445 ufs_inode_getblock(inode, bh, x, fragment, \
477 &err, &phys, &new, bh_result->b_page); 446 &err, &phys, &new, bh_result->b_page)
478#define GET_INDIRECT_PTR(x) \ 447#define GET_INDIRECT_PTR(x) \
479 ufs_inode_getblock(inode, bh, x, fragment, \ 448 ufs_inode_getblock(inode, bh, x, fragment, \
480 &err, NULL, NULL, bh_result->b_page); 449 &err, NULL, NULL, NULL)
481 450
482 if (ptr < UFS_NDIR_FRAGMENT) { 451 if (ptr < UFS_NDIR_FRAGMENT) {
483 bh = GET_INODE_DATABLOCK(ptr); 452 bh = GET_INODE_DATABLOCK(ptr);
diff --git a/include/asm-i386/boot.h b/include/asm-i386/boot.h
index 8ce79a6fa891..e7686d0a8413 100644
--- a/include/asm-i386/boot.h
+++ b/include/asm-i386/boot.h
@@ -13,7 +13,8 @@
13#define ASK_VGA 0xfffd /* ask for it at bootup */ 13#define ASK_VGA 0xfffd /* ask for it at bootup */
14 14
15/* Physical address where kenrel should be loaded. */ 15/* Physical address where kenrel should be loaded. */
16#define LOAD_PHYSICAL_ADDR ((0x100000 + CONFIG_PHYSICAL_ALIGN - 1) \ 16#define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
17 + (CONFIG_PHYSICAL_ALIGN - 1)) \
17 & ~(CONFIG_PHYSICAL_ALIGN - 1)) 18 & ~(CONFIG_PHYSICAL_ALIGN - 1))
18 19
19#endif /* _LINUX_BOOT_H */ 20#endif /* _LINUX_BOOT_H */
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 28fdce1ac1db..bc8b4616bad7 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -11,7 +11,7 @@
11#include <asm/types.h> 11#include <asm/types.h>
12#include <linux/ioctl.h> 12#include <linux/ioctl.h>
13 13
14#define KVM_API_VERSION 1 14#define KVM_API_VERSION 2
15 15
16/* 16/*
17 * Architectural interrupt line count, and the size of the bitmap needed 17 * Architectural interrupt line count, and the size of the bitmap needed
@@ -45,6 +45,7 @@ enum kvm_exit_reason {
45 KVM_EXIT_DEBUG = 4, 45 KVM_EXIT_DEBUG = 4,
46 KVM_EXIT_HLT = 5, 46 KVM_EXIT_HLT = 5,
47 KVM_EXIT_MMIO = 6, 47 KVM_EXIT_MMIO = 6,
48 KVM_EXIT_IRQ_WINDOW_OPEN = 7,
48}; 49};
49 50
50/* for KVM_RUN */ 51/* for KVM_RUN */
@@ -53,11 +54,19 @@ struct kvm_run {
53 __u32 vcpu; 54 __u32 vcpu;
54 __u32 emulated; /* skip current instruction */ 55 __u32 emulated; /* skip current instruction */
55 __u32 mmio_completed; /* mmio request completed */ 56 __u32 mmio_completed; /* mmio request completed */
57 __u8 request_interrupt_window;
58 __u8 padding1[3];
56 59
57 /* out */ 60 /* out */
58 __u32 exit_type; 61 __u32 exit_type;
59 __u32 exit_reason; 62 __u32 exit_reason;
60 __u32 instruction_length; 63 __u32 instruction_length;
64 __u8 ready_for_interrupt_injection;
65 __u8 if_flag;
66 __u16 padding2;
67 __u64 cr8;
68 __u64 apic_base;
69
61 union { 70 union {
62 /* KVM_EXIT_UNKNOWN */ 71 /* KVM_EXIT_UNKNOWN */
63 struct { 72 struct {
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 156c40fc664e..b78bbf42135a 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -3,6 +3,7 @@
3 3
4#define ADFS_SUPER_MAGIC 0xadf5 4#define ADFS_SUPER_MAGIC 0xadf5
5#define AFFS_SUPER_MAGIC 0xadff 5#define AFFS_SUPER_MAGIC 0xadff
6#define AFS_SUPER_MAGIC 0x5346414F
6#define AUTOFS_SUPER_MAGIC 0x0187 7#define AUTOFS_SUPER_MAGIC 0x0187
7#define CODA_SUPER_MAGIC 0x73757245 8#define CODA_SUPER_MAGIC 0x73757245
8#define EFS_SUPER_MAGIC 0x414A53 9#define EFS_SUPER_MAGIC 0x414A53
diff --git a/include/linux/swap.h b/include/linux/swap.h
index add51cebc8d9..5423559a44a6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -245,7 +245,7 @@ extern int swap_duplicate(swp_entry_t);
245extern int valid_swaphandles(swp_entry_t, unsigned long *); 245extern int valid_swaphandles(swp_entry_t, unsigned long *);
246extern void swap_free(swp_entry_t); 246extern void swap_free(swp_entry_t);
247extern void free_swap_and_cache(swp_entry_t); 247extern void free_swap_and_cache(swp_entry_t);
248extern int swap_type_of(dev_t, sector_t); 248extern int swap_type_of(dev_t, sector_t, struct block_device **);
249extern unsigned int count_swap_pages(int, int); 249extern unsigned int count_swap_pages(int, int);
250extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t); 250extern sector_t map_swap_page(struct swap_info_struct *, pgoff_t);
251extern sector_t swapdev_block(int, pgoff_t); 251extern sector_t swapdev_block(int, pgoff_t);
diff --git a/init/main.c b/init/main.c
index 2b1cdaab45e6..bc27d72bbb19 100644
--- a/init/main.c
+++ b/init/main.c
@@ -538,6 +538,11 @@ asmlinkage void __init start_kernel(void)
538 parse_args("Booting kernel", command_line, __start___param, 538 parse_args("Booting kernel", command_line, __start___param,
539 __stop___param - __start___param, 539 __stop___param - __start___param,
540 &unknown_bootoption); 540 &unknown_bootoption);
541 if (!irqs_disabled()) {
542 printk(KERN_WARNING "start_kernel(): bug: interrupts were "
543 "enabled *very* early, fixing it\n");
544 local_irq_disable();
545 }
541 sort_main_extable(); 546 sort_main_extable();
542 trap_init(); 547 trap_init();
543 rcu_init(); 548 rcu_init();
diff --git a/kernel/module.c b/kernel/module.c
index dbce132b354c..d0f2260a0210 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1148,10 +1148,10 @@ static int mod_sysfs_setup(struct module *mod,
1148 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD); 1148 kobject_uevent(&mod->mkobj.kobj, KOBJ_ADD);
1149 return 0; 1149 return 0;
1150 1150
1151out_unreg_drivers:
1152 kobject_unregister(mod->drivers_dir);
1153out_unreg_param: 1151out_unreg_param:
1154 module_param_sysfs_remove(mod); 1152 module_param_sysfs_remove(mod);
1153out_unreg_drivers:
1154 kobject_unregister(mod->drivers_dir);
1155out_unreg: 1155out_unreg:
1156 kobject_del(&mod->mkobj.kobj); 1156 kobject_del(&mod->mkobj.kobj);
1157 kobject_put(&mod->mkobj.kobj); 1157 kobject_put(&mod->mkobj.kobj);
@@ -2327,8 +2327,22 @@ void print_modules(void)
2327 printk("\n"); 2327 printk("\n");
2328} 2328}
2329 2329
2330static char *make_driver_name(struct device_driver *drv)
2331{
2332 char *driver_name;
2333
2334 driver_name = kmalloc(strlen(drv->name) + strlen(drv->bus->name) + 2,
2335 GFP_KERNEL);
2336 if (!driver_name)
2337 return NULL;
2338
2339 sprintf(driver_name, "%s:%s", drv->bus->name, drv->name);
2340 return driver_name;
2341}
2342
2330void module_add_driver(struct module *mod, struct device_driver *drv) 2343void module_add_driver(struct module *mod, struct device_driver *drv)
2331{ 2344{
2345 char *driver_name;
2332 int no_warn; 2346 int no_warn;
2333 2347
2334 if (!mod || !drv) 2348 if (!mod || !drv)
@@ -2336,17 +2350,31 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
2336 2350
2337 /* Don't check return codes; these calls are idempotent */ 2351 /* Don't check return codes; these calls are idempotent */
2338 no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module"); 2352 no_warn = sysfs_create_link(&drv->kobj, &mod->mkobj.kobj, "module");
2339 no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj, drv->name); 2353 driver_name = make_driver_name(drv);
2354 if (driver_name) {
2355 no_warn = sysfs_create_link(mod->drivers_dir, &drv->kobj,
2356 driver_name);
2357 kfree(driver_name);
2358 }
2340} 2359}
2341EXPORT_SYMBOL(module_add_driver); 2360EXPORT_SYMBOL(module_add_driver);
2342 2361
2343void module_remove_driver(struct device_driver *drv) 2362void module_remove_driver(struct device_driver *drv)
2344{ 2363{
2364 char *driver_name;
2365
2345 if (!drv) 2366 if (!drv)
2346 return; 2367 return;
2368
2347 sysfs_remove_link(&drv->kobj, "module"); 2369 sysfs_remove_link(&drv->kobj, "module");
2348 if (drv->owner && drv->owner->drivers_dir) 2370 if (drv->owner && drv->owner->drivers_dir) {
2349 sysfs_remove_link(drv->owner->drivers_dir, drv->name); 2371 driver_name = make_driver_name(drv);
2372 if (driver_name) {
2373 sysfs_remove_link(drv->owner->drivers_dir,
2374 driver_name);
2375 kfree(driver_name);
2376 }
2377 }
2350} 2378}
2351EXPORT_SYMBOL(module_remove_driver); 2379EXPORT_SYMBOL(module_remove_driver);
2352 2380
diff --git a/kernel/params.c b/kernel/params.c
index f406655d6653..718945da8f58 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -143,9 +143,15 @@ int parse_args(const char *name,
143 143
144 while (*args) { 144 while (*args) {
145 int ret; 145 int ret;
146 int irq_was_disabled;
146 147
147 args = next_arg(args, &param, &val); 148 args = next_arg(args, &param, &val);
149 irq_was_disabled = irqs_disabled();
148 ret = parse_one(param, val, params, num, unknown); 150 ret = parse_one(param, val, params, num, unknown);
151 if (irq_was_disabled && !irqs_disabled()) {
152 printk(KERN_WARNING "parse_args(): option '%s' enabled "
153 "irq's!\n", param);
154 }
149 switch (ret) { 155 switch (ret) {
150 case -ENOENT: 156 case -ENOENT:
151 printk(KERN_ERR "%s: Unknown parameter `%s'\n", 157 printk(KERN_ERR "%s: Unknown parameter `%s'\n",
diff --git a/kernel/power/swap.c b/kernel/power/swap.c
index f133d4a6d817..3581f8f86acd 100644
--- a/kernel/power/swap.c
+++ b/kernel/power/swap.c
@@ -165,14 +165,15 @@ static int swsusp_swap_check(void) /* This is called before saving image */
165{ 165{
166 int res; 166 int res;
167 167
168 res = swap_type_of(swsusp_resume_device, swsusp_resume_block); 168 res = swap_type_of(swsusp_resume_device, swsusp_resume_block,
169 &resume_bdev);
169 if (res < 0) 170 if (res < 0)
170 return res; 171 return res;
171 172
172 root_swap = res; 173 root_swap = res;
173 resume_bdev = open_by_devnum(swsusp_resume_device, FMODE_WRITE); 174 res = blkdev_get(resume_bdev, FMODE_WRITE, O_RDWR);
174 if (IS_ERR(resume_bdev)) 175 if (res)
175 return PTR_ERR(resume_bdev); 176 return res;
176 177
177 res = set_blocksize(resume_bdev, PAGE_SIZE); 178 res = set_blocksize(resume_bdev, PAGE_SIZE);
178 if (res < 0) 179 if (res < 0)
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 89443b85163b..f7b7a785a5c6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -57,7 +57,7 @@ static int snapshot_open(struct inode *inode, struct file *filp)
57 memset(&data->handle, 0, sizeof(struct snapshot_handle)); 57 memset(&data->handle, 0, sizeof(struct snapshot_handle));
58 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) { 58 if ((filp->f_flags & O_ACCMODE) == O_RDONLY) {
59 data->swap = swsusp_resume_device ? 59 data->swap = swsusp_resume_device ?
60 swap_type_of(swsusp_resume_device, 0) : -1; 60 swap_type_of(swsusp_resume_device, 0, NULL) : -1;
61 data->mode = O_RDONLY; 61 data->mode = O_RDONLY;
62 } else { 62 } else {
63 data->swap = -1; 63 data->swap = -1;
@@ -268,7 +268,8 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
268 * so we need to recode them 268 * so we need to recode them
269 */ 269 */
270 if (old_decode_dev(arg)) { 270 if (old_decode_dev(arg)) {
271 data->swap = swap_type_of(old_decode_dev(arg), 0); 271 data->swap = swap_type_of(old_decode_dev(arg),
272 0, NULL);
272 if (data->swap < 0) 273 if (data->swap < 0)
273 error = -ENODEV; 274 error = -ENODEV;
274 } else { 275 } else {
@@ -365,7 +366,7 @@ static int snapshot_ioctl(struct inode *inode, struct file *filp,
365 swdev = old_decode_dev(swap_area.dev); 366 swdev = old_decode_dev(swap_area.dev);
366 if (swdev) { 367 if (swdev) {
367 offset = swap_area.offset; 368 offset = swap_area.offset;
368 data->swap = swap_type_of(swdev, offset); 369 data->swap = swap_type_of(swdev, offset, NULL);
369 if (data->swap < 0) 370 if (data->swap < 0)
370 error = -ENODEV; 371 error = -ENODEV;
371 } else { 372 } else {
diff --git a/kernel/profile.c b/kernel/profile.c
index fb5e03d57e9d..11550b2290b6 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -63,7 +63,7 @@ static int __init profile_setup(char * str)
63 printk(KERN_INFO 63 printk(KERN_INFO
64 "kernel sleep profiling enabled (shift: %ld)\n", 64 "kernel sleep profiling enabled (shift: %ld)\n",
65 prof_shift); 65 prof_shift);
66 } else if (!strncmp(str, sleepstr, strlen(sleepstr))) { 66 } else if (!strncmp(str, schedstr, strlen(schedstr))) {
67 prof_on = SCHED_PROFILING; 67 prof_on = SCHED_PROFILING;
68 if (str[strlen(schedstr)] == ',') 68 if (str[strlen(schedstr)] == ',')
69 str += strlen(schedstr) + 1; 69 str += strlen(schedstr) + 1;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 6969cfb33901..b278b8d60eee 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -61,12 +61,6 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
61 } 61 }
62 62
63 /* 63 /*
64 * swapoff can easily use up all memory, so kill those first.
65 */
66 if (p->flags & PF_SWAPOFF)
67 return ULONG_MAX;
68
69 /*
70 * The memory size of the process is the basis for the badness. 64 * The memory size of the process is the basis for the badness.
71 */ 65 */
72 points = mm->total_vm; 66 points = mm->total_vm;
@@ -77,6 +71,12 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
77 task_unlock(p); 71 task_unlock(p);
78 72
79 /* 73 /*
74 * swapoff can easily use up all memory, so kill those first.
75 */
76 if (p->flags & PF_SWAPOFF)
77 return ULONG_MAX;
78
79 /*
80 * Processes which fork a lot of child processes are likely 80 * Processes which fork a lot of child processes are likely
81 * a good choice. We add half the vmsize of the children if they 81 * a good choice. We add half the vmsize of the children if they
82 * have an own mm. This prevents forking servers to flood the 82 * have an own mm. This prevents forking servers to flood the
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8c1a116875bc..a49f96b7ea43 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -711,6 +711,9 @@ static void __drain_pages(unsigned int cpu)
711 for_each_zone(zone) { 711 for_each_zone(zone) {
712 struct per_cpu_pageset *pset; 712 struct per_cpu_pageset *pset;
713 713
714 if (!populated_zone(zone))
715 continue;
716
714 pset = zone_pcp(zone, cpu); 717 pset = zone_pcp(zone, cpu);
715 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) { 718 for (i = 0; i < ARRAY_SIZE(pset->pcp); i++) {
716 struct per_cpu_pages *pcp; 719 struct per_cpu_pages *pcp;
@@ -3321,6 +3324,10 @@ void *__init alloc_large_system_hash(const char *tablename,
3321 numentries >>= (scale - PAGE_SHIFT); 3324 numentries >>= (scale - PAGE_SHIFT);
3322 else 3325 else
3323 numentries <<= (PAGE_SHIFT - scale); 3326 numentries <<= (PAGE_SHIFT - scale);
3327
3328 /* Make sure we've got at least a 0-order allocation.. */
3329 if (unlikely((numentries * bucketsize) < PAGE_SIZE))
3330 numentries = PAGE_SIZE / bucketsize;
3324 } 3331 }
3325 numentries = roundup_pow_of_two(numentries); 3332 numentries = roundup_pow_of_two(numentries);
3326 3333
diff --git a/mm/slab.c b/mm/slab.c
index 0d4e57431de4..c6100628a6ef 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3281,7 +3281,7 @@ retry:
3281 flags | GFP_THISNODE, nid); 3281 flags | GFP_THISNODE, nid);
3282 } 3282 }
3283 3283
3284 if (!obj) { 3284 if (!obj && !(flags & __GFP_NO_GROW)) {
3285 /* 3285 /*
3286 * This allocation will be performed within the constraints 3286 * This allocation will be performed within the constraints
3287 * of the current cpuset / memory policy requirements. 3287 * of the current cpuset / memory policy requirements.
@@ -3310,7 +3310,7 @@ retry:
3310 */ 3310 */
3311 goto retry; 3311 goto retry;
3312 } else { 3312 } else {
3313 kmem_freepages(cache, obj); 3313 /* cache_grow already freed obj */
3314 obj = NULL; 3314 obj = NULL;
3315 } 3315 }
3316 } 3316 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b9fc0e5de6d5..a2d9bb4e80df 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -434,7 +434,7 @@ void free_swap_and_cache(swp_entry_t entry)
434 * 434 *
435 * This is needed for the suspend to disk (aka swsusp). 435 * This is needed for the suspend to disk (aka swsusp).
436 */ 436 */
437int swap_type_of(dev_t device, sector_t offset) 437int swap_type_of(dev_t device, sector_t offset, struct block_device **bdev_p)
438{ 438{
439 struct block_device *bdev = NULL; 439 struct block_device *bdev = NULL;
440 int i; 440 int i;
@@ -450,6 +450,9 @@ int swap_type_of(dev_t device, sector_t offset)
450 continue; 450 continue;
451 451
452 if (!bdev) { 452 if (!bdev) {
453 if (bdev_p)
454 *bdev_p = sis->bdev;
455
453 spin_unlock(&swap_lock); 456 spin_unlock(&swap_lock);
454 return i; 457 return i;
455 } 458 }
@@ -459,6 +462,9 @@ int swap_type_of(dev_t device, sector_t offset)
459 se = list_entry(sis->extent_list.next, 462 se = list_entry(sis->extent_list.next,
460 struct swap_extent, list); 463 struct swap_extent, list);
461 if (se->start_block == offset) { 464 if (se->start_block == offset) {
465 if (bdev_p)
466 *bdev_p = sis->bdev;
467
462 spin_unlock(&swap_lock); 468 spin_unlock(&swap_lock);
463 bdput(bdev); 469 bdput(bdev);
464 return i; 470 return i;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 40fea4918390..7430df68cb64 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1406,6 +1406,16 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
1406 return ret; 1406 return ret;
1407} 1407}
1408 1408
1409static unsigned long count_lru_pages(void)
1410{
1411 struct zone *zone;
1412 unsigned long ret = 0;
1413
1414 for_each_zone(zone)
1415 ret += zone->nr_active + zone->nr_inactive;
1416 return ret;
1417}
1418
1409/* 1419/*
1410 * Try to free `nr_pages' of memory, system-wide, and return the number of 1420 * Try to free `nr_pages' of memory, system-wide, and return the number of
1411 * freed pages. 1421 * freed pages.
@@ -1420,7 +1430,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1420 unsigned long ret = 0; 1430 unsigned long ret = 0;
1421 int pass; 1431 int pass;
1422 struct reclaim_state reclaim_state; 1432 struct reclaim_state reclaim_state;
1423 struct zone *zone;
1424 struct scan_control sc = { 1433 struct scan_control sc = {
1425 .gfp_mask = GFP_KERNEL, 1434 .gfp_mask = GFP_KERNEL,
1426 .may_swap = 0, 1435 .may_swap = 0,
@@ -1431,10 +1440,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1431 1440
1432 current->reclaim_state = &reclaim_state; 1441 current->reclaim_state = &reclaim_state;
1433 1442
1434 lru_pages = 0; 1443 lru_pages = count_lru_pages();
1435 for_each_zone(zone)
1436 lru_pages += zone->nr_active + zone->nr_inactive;
1437
1438 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); 1444 nr_slab = global_page_state(NR_SLAB_RECLAIMABLE);
1439 /* If slab caches are huge, it's better to hit them first */ 1445 /* If slab caches are huge, it's better to hit them first */
1440 while (nr_slab >= lru_pages) { 1446 while (nr_slab >= lru_pages) {
@@ -1461,13 +1467,6 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1461 for (pass = 0; pass < 5; pass++) { 1467 for (pass = 0; pass < 5; pass++) {
1462 int prio; 1468 int prio;
1463 1469
1464 /* Needed for shrinking slab caches later on */
1465 if (!lru_pages)
1466 for_each_zone(zone) {
1467 lru_pages += zone->nr_active;
1468 lru_pages += zone->nr_inactive;
1469 }
1470
1471 /* Force reclaiming mapped pages in the passes #3 and #4 */ 1470 /* Force reclaiming mapped pages in the passes #3 and #4 */
1472 if (pass > 2) { 1471 if (pass > 2) {
1473 sc.may_swap = 1; 1472 sc.may_swap = 1;
@@ -1483,7 +1482,8 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1483 goto out; 1482 goto out;
1484 1483
1485 reclaim_state.reclaimed_slab = 0; 1484 reclaim_state.reclaimed_slab = 0;
1486 shrink_slab(sc.nr_scanned, sc.gfp_mask, lru_pages); 1485 shrink_slab(sc.nr_scanned, sc.gfp_mask,
1486 count_lru_pages());
1487 ret += reclaim_state.reclaimed_slab; 1487 ret += reclaim_state.reclaimed_slab;
1488 if (ret >= nr_pages) 1488 if (ret >= nr_pages)
1489 goto out; 1489 goto out;
@@ -1491,20 +1491,19 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
1491 if (sc.nr_scanned && prio < DEF_PRIORITY - 2) 1491 if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
1492 congestion_wait(WRITE, HZ / 10); 1492 congestion_wait(WRITE, HZ / 10);
1493 } 1493 }
1494
1495 lru_pages = 0;
1496 } 1494 }
1497 1495
1498 /* 1496 /*
1499 * If ret = 0, we could not shrink LRUs, but there may be something 1497 * If ret = 0, we could not shrink LRUs, but there may be something
1500 * in slab caches 1498 * in slab caches
1501 */ 1499 */
1502 if (!ret) 1500 if (!ret) {
1503 do { 1501 do {
1504 reclaim_state.reclaimed_slab = 0; 1502 reclaim_state.reclaimed_slab = 0;
1505 shrink_slab(nr_pages, sc.gfp_mask, lru_pages); 1503 shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages());
1506 ret += reclaim_state.reclaimed_slab; 1504 ret += reclaim_state.reclaimed_slab;
1507 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); 1505 } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
1506 }
1508 1507
1509out: 1508out:
1510 current->reclaim_state = NULL; 1509 current->reclaim_state = NULL;
diff --git a/scripts/kconfig/qconf.cc b/scripts/kconfig/qconf.cc
index 0b2fcc417f59..a8ffc329666a 100644
--- a/scripts/kconfig/qconf.cc
+++ b/scripts/kconfig/qconf.cc
@@ -925,6 +925,8 @@ ConfigInfoView::ConfigInfoView(QWidget* parent, const char *name)
925 configSettings->endGroup(); 925 configSettings->endGroup();
926 connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings())); 926 connect(configApp, SIGNAL(aboutToQuit()), SLOT(saveSettings()));
927 } 927 }
928
929 has_dbg_info = 0;
928} 930}
929 931
930void ConfigInfoView::saveSettings(void) 932void ConfigInfoView::saveSettings(void)
@@ -953,10 +955,13 @@ void ConfigInfoView::setInfo(struct menu *m)
953 if (menu == m) 955 if (menu == m)
954 return; 956 return;
955 menu = m; 957 menu = m;
956 if (!menu) 958 if (!menu) {
959 has_dbg_info = 0;
957 clear(); 960 clear();
958 else 961 } else {
962 has_dbg_info = 1;
959 menuInfo(); 963 menuInfo();
964 }
960} 965}
961 966
962void ConfigInfoView::setSource(const QString& name) 967void ConfigInfoView::setSource(const QString& name)
@@ -991,6 +996,9 @@ void ConfigInfoView::symbolInfo(void)
991{ 996{
992 QString str; 997 QString str;
993 998
999 if (!has_dbg_info)
1000 return;
1001
994 str += "<big>Symbol: <b>"; 1002 str += "<big>Symbol: <b>";
995 str += print_filter(sym->name); 1003 str += print_filter(sym->name);
996 str += "</b></big><br><br>value: "; 1004 str += "</b></big><br><br>value: ";
diff --git a/scripts/kconfig/qconf.h b/scripts/kconfig/qconf.h
index 6fc1c5f14425..a397edb5adcf 100644
--- a/scripts/kconfig/qconf.h
+++ b/scripts/kconfig/qconf.h
@@ -273,6 +273,8 @@ protected:
273 struct symbol *sym; 273 struct symbol *sym;
274 struct menu *menu; 274 struct menu *menu;
275 bool _showDebug; 275 bool _showDebug;
276
277 int has_dbg_info;
276}; 278};
277 279
278class ConfigSearchWindow : public QDialog { 280class ConfigSearchWindow : public QDialog {