From d596043d71ff0d7b3d0bead19b1d68c55f003093 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Wed, 30 Jun 2010 17:45:19 -0700
Subject: x86, Calgary: Limit the max PHB number to 256

The x3950 family can have as many as 256 PCI buses in a single system, so
change the limits to the maximum.  Since there can only be 256 PCI buses in one
domain, we no longer need the BUG_ON check.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
LKML-Reference: <20100701004519.GQ15515@tux1.beaverton.ibm.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/pci-calgary_64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index 0b96b5589f08..078d4ec1a9d9 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -110,7 +110,7 @@ int use_calgary __read_mostly = 0;
  * x3950 (PCIE): 8 chassis, 32 PHBs per chassis   = 256
  * x3950 (PCIX): 8 chassis, 16 PHBs per chassis   = 128
  */
-#define MAX_PHB_BUS_NUM		384
+#define MAX_PHB_BUS_NUM		256
 
 #define PHBS_PER_CALGARY	  4
 
@@ -1056,8 +1056,6 @@ static int __init calgary_init_one(struct pci_dev *dev)
 	struct iommu_table *tbl;
 	int ret;
 
-	BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
-
 	bbar = busno_to_bbar(dev->bus->number);
 	ret = calgary_setup_tar(dev, bbar);
 	if (ret)
-- 
cgit v1.2.2


From b945d6b2554d550fe95caadc61e521c0ad71fb9c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 29 May 2010 15:31:43 +0200
Subject: rbtree: Undo augmented trees performance damage and regression

Reimplement augmented RB-trees without sprinkling extra branches
all over the RB-tree code (which lives in the scheduler hot path).

This approach is 'borrowed' from Fabio's BFQ implementation and
relies on traversing the rebalance path after the RB-tree-op to
correct the heap property for insertion/removal and make up for
the damage done by the tree rotations.

For insertion the rebalance path is trivially that from the new
node upwards to the root, for removal it is that from the deepest
node in the path from the to be removed node that will still
be around after the removal.

[ This patch also fixes a video driver regression reported by
  Ali Gholami Rudi - the memtype->subtree_max_end was updated
  incorrectly. ]

Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Acked-by: Venkatesh Pallipadi <venki@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Tested-by: Ali Gholami Rudi <ali@rudi.ir>
Cc: Fabio Checconi <fabio@gandalf.sssup.it>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
LKML-Reference: <1275414172.27810.27961.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat_rbtree.c | 34 ++++++----------------------------
 1 file changed, 6 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index f20eeec85a86..8acaddd0fb21 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -34,8 +34,7 @@
  * memtype_lock protects the rbtree.
  */
 
-static void memtype_rb_augment_cb(struct rb_node *node);
-static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb);
+static struct rb_root memtype_rbroot = RB_ROOT;
 
 static int is_node_overlap(struct memtype *node, u64 start, u64 end)
 {
@@ -56,7 +55,7 @@ static u64 get_subtree_max_end(struct rb_node *node)
 }
 
 /* Update 'subtree_max_end' for a node, based on node and its children */
-static void update_node_max_end(struct rb_node *node)
+static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
 {
 	struct memtype *data;
 	u64 max_end, child_max_end;
@@ -78,25 +77,6 @@ static void update_node_max_end(struct rb_node *node)
 	data->subtree_max_end = max_end;
 }
 
-/* Update 'subtree_max_end' for a node and all its ancestors */
-static void update_path_max_end(struct rb_node *node)
-{
-	u64 old_max_end, new_max_end;
-
-	while (node) {
-		struct memtype *data = container_of(node, struct memtype, rb);
-
-		old_max_end = data->subtree_max_end;
-		update_node_max_end(node);
-		new_max_end = data->subtree_max_end;
-
-		if (new_max_end == old_max_end)
-			break;
-
-		node = rb_parent(node);
-	}
-}
-
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
 				u64 start, u64 end)
@@ -190,12 +170,6 @@ failure:
 	return -EBUSY;
 }
 
-static void memtype_rb_augment_cb(struct rb_node *node)
-{
-	if (node)
-		update_path_max_end(node);
-}
-
 static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
 {
 	struct rb_node **node = &(root->rb_node);
@@ -213,6 +187,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
 
 	rb_link_node(&newdata->rb, parent, node);
 	rb_insert_color(&newdata->rb, root);
+	rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
 }
 
 int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -234,13 +209,16 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
 
 struct memtype *rbt_memtype_erase(u64 start, u64 end)
 {
+	struct rb_node *deepest;
 	struct memtype *data;
 
 	data = memtype_rb_exact_match(&memtype_rbroot, start, end);
 	if (!data)
 		goto out;
 
+	deepest = rb_augment_erase_begin(&data->rb);
 	rb_erase(&data->rb, &memtype_rbroot);
+	rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
 out:
 	return data;
 }
-- 
cgit v1.2.2


From da38f43859467a8048365b9e1cce99ccbc62b6e2 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 6 Jul 2010 11:30:49 +0300
Subject: KVM: VMX: Fix host MSR_KERNEL_GS_BASE corruption

enter_lmode() and exit_lmode() modify the guest's EFER.LMA before calling
vmx_set_efer().  However, the latter function depends on the value of EFER.LMA
to determine whether MSR_KERNEL_GS_BASE needs reloading, via
vmx_load_host_state().  With EFER.LMA changing under its feet, it took the
wrong choice and corrupted userspace's %gs.

This causes 32-on-64 host userspace to fault.

Fix not touching EFER.LMA; instead ask vmx_set_efer() to change it.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 859a01a07dbf..ee03679efe78 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1744,18 +1744,15 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 			     (guest_tr_ar & ~AR_TYPE_MASK)
 			     | AR_TYPE_BUSY_64_TSS);
 	}
-	vcpu->arch.efer |= EFER_LMA;
-	vmx_set_efer(vcpu, vcpu->arch.efer);
+	vmx_set_efer(vcpu, vcpu->arch.efer | EFER_LMA);
 }
 
 static void exit_lmode(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.efer &= ~EFER_LMA;
-
 	vmcs_write32(VM_ENTRY_CONTROLS,
 		     vmcs_read32(VM_ENTRY_CONTROLS)
 		     & ~VM_ENTRY_IA32E_MODE);
-	vmx_set_efer(vcpu, vcpu->arch.efer);
+	vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
 }
 
 #endif
-- 
cgit v1.2.2


From 72550b3ae545c75897c769d43d62d4be3f3d48fe Mon Sep 17 00:00:00 2001
From: "H. Peter Anvin" <hpa@linux.intel.com>
Date: Wed, 7 Jul 2010 16:57:46 -0700
Subject: x86, platform: Export x86_platform to modules

Export x86_platform to modules in preparation of using it for i8042
discovery control.

Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
LKML-Reference: <1278342202-10973-1-git-send-email-feng.tang@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Dmitry Torokhov <dtor@mail.ru>
---
 arch/x86/kernel/x86_init.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 61a1e8c7e19f..ebfb8e4c9f22 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -5,6 +5,7 @@
  */
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/module.h>
 
 #include <asm/bios_ebda.h>
 #include <asm/paravirt.h>
@@ -94,3 +95,5 @@ struct x86_platform_ops x86_platform = {
 	.is_untracked_pat_range		= is_ISA_range,
 	.nmi_init			= default_nmi_init
 };
+
+EXPORT_SYMBOL_GPL(x86_platform);
-- 
cgit v1.2.2


From c516ac583973196162b1ba7e4d597d6f6892dac0 Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 5 Jul 2010 23:03:18 +0800
Subject: x86: Add i8042 pre-detection hook to x86_platform_ops

Some x86 platforms like Intel MID platforms don't have i8042 controllers,
and i8042 driver's probe to some legacy IO ports may hang the MID
processor. With this hook, i8042 driver can runtime check and skip the
probe when the pretection fail which also saves some probe time

[ hpa note: this is currently a compile-time check, which breaks the
  i386 allyesconfig build.  This patch series thus does fix a regression. ]

Signed-off-by: Feng Tang <feng.tang@intel.com>
LKML-Reference: <1278342202-10973-2-git-send-email-feng.tang@intel.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/x86_init.h | 2 ++
 arch/x86/kernel/x86_init.c      | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 519b54327d75..baa579c8e038 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -142,6 +142,7 @@ struct x86_cpuinit_ops {
  * @set_wallclock:		set time back to HW clock
  * @is_untracked_pat_range	exclude from PAT logic
  * @nmi_init			enable NMI on cpus
+ * @i8042_detect		pre-detect if i8042 controller exists
  */
 struct x86_platform_ops {
 	unsigned long (*calibrate_tsc)(void);
@@ -150,6 +151,7 @@ struct x86_platform_ops {
 	void (*iommu_shutdown)(void);
 	bool (*is_untracked_pat_range)(u64 start, u64 end);
 	void (*nmi_init)(void);
+	int (*i8042_detect)(void);
 };
 
 extern struct x86_init_ops x86_init;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index ebfb8e4c9f22..cd6da6bf3eca 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -86,6 +86,7 @@ struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = {
 };
 
 static void default_nmi_init(void) { };
+static int default_i8042_detect(void) { return 1; };
 
 struct x86_platform_ops x86_platform = {
 	.calibrate_tsc			= native_calibrate_tsc,
@@ -93,7 +94,8 @@ struct x86_platform_ops x86_platform = {
 	.set_wallclock			= mach_set_rtc_mmss,
 	.iommu_shutdown			= iommu_shutdown_noop,
 	.is_untracked_pat_range		= is_ISA_range,
-	.nmi_init			= default_nmi_init
+	.nmi_init			= default_nmi_init,
+	.i8042_detect			= default_i8042_detect
 };
 
 EXPORT_SYMBOL_GPL(x86_platform);
-- 
cgit v1.2.2


From 6d2cce62017efe957e34cfcbba23861b7671980b Mon Sep 17 00:00:00 2001
From: Feng Tang <feng.tang@intel.com>
Date: Mon, 5 Jul 2010 23:03:19 +0800
Subject: x86, mrst: Add i8042_detect API for Moorestwon platform

It will just return 0 as there is no i8042 controller

Signed-off-by: Feng Tang <feng.tang@intel.com>
LKML-Reference: <1278342202-10973-3-git-send-email-feng.tang@intel.com>
Acked-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/mrst.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c
index e796448f0eb5..5915e0b33303 100644
--- a/arch/x86/kernel/mrst.c
+++ b/arch/x86/kernel/mrst.c
@@ -216,6 +216,12 @@ static void __init mrst_setup_boot_clock(void)
 		setup_boot_APIC_clock();
 };
 
+/* MID systems don't have i8042 controller */
+static int mrst_i8042_detect(void)
+{
+	return 0;
+}
+
 /*
  * Moorestown specific x86_init function overrides and early setup
  * calls.
@@ -233,6 +239,7 @@ void __init x86_mrst_early_setup(void)
 	x86_cpuinit.setup_percpu_clockev = mrst_setup_secondary_clock;
 
 	x86_platform.calibrate_tsc = mrst_calibrate_tsc;
+	x86_platform.i8042_detect = mrst_i8042_detect;
 	x86_init.pci.init = pci_mrst_init;
 	x86_init.pci.fixup_irqs = x86_init_noop;
 
-- 
cgit v1.2.2


From 91546356d0e550fa23abf7f4b04a903c2855761f Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Date: Wed, 30 Jun 2010 16:04:06 +0800
Subject: KVM: MMU: flush remote tlbs when overwriting spte with different pfn

After remove a rmap, we should flush all vcpu's tlb

Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a6f695d76928..3699613e8830 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1879,6 +1879,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 			pgprintk("hfn old %lx new %lx\n",
 				 spte_to_pfn(*sptep), pfn);
 			rmap_remove(vcpu->kvm, sptep);
+			__set_spte(sptep, shadow_trap_nonpresent_pte);
+			kvm_flush_remote_tlbs(vcpu->kvm);
 		} else
 			was_rmapped = 1;
 	}
-- 
cgit v1.2.2


From 08be97962bf338161325d4901642f956ce8c1adb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Wed, 14 Jul 2010 21:36:27 +0200
Subject: x86: Force HPET readback_cmp for all ATI chipsets

commit 30a564be (x86, hpet: Restrict read back to affected ATI
chipset) restricted the workaround for the HPET bug to SMX00
chipsets. This was reasonable as those were the only ones against
which we ever got a bug report.

Stephan Wolf reported now that this patch breaks his IXP400 based
machine. Though it's confirmed to work on other IXP400 based systems.

To error out on the safe side, we force the HPET readback workaround
for all ATI SMbus class chipsets.

Reported-by: Stephan Wolf <stephan@letzte-bankreihe.de>
LKML-Reference: <alpine.LFD.2.00.1007142134140.3321@localhost.localdomain>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Stephan Wolf <stephan@letzte-bankreihe.de>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/kernel/early-quirks.c | 18 ++++++++++++++++++
 arch/x86/kernel/quirks.c       |  5 -----
 2 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index ebdb85cf2686..e5cc7e82e60d 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -18,6 +18,7 @@
 #include <asm/apic.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
+#include <asm/hpet.h>
 
 static void __init fix_hypertransport_config(int num, int slot, int func)
 {
@@ -191,6 +192,21 @@ static void __init ati_bugs_contd(int num, int slot, int func)
 }
 #endif
 
+/*
+ * Force the read back of the CMP register in hpet_next_event()
+ * to work around the problem that the CMP register write seems to be
+ * delayed. See hpet_next_event() for details.
+ *
+ * We do this on all SMBUS incarnations for now until we have more
+ * information about the affected chipsets.
+ */
+static void __init ati_hpet_bugs(int num, int slot, int func)
+{
+#ifdef CONFIG_HPET_TIMER
+	hpet_readback_cmp = 1;
+#endif
+}
+
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -220,6 +236,8 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
 	{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
+	{ PCI_VENDOR_ID_ATI, PCI_ANY_ID,
+	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs },
 	{}
 };
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index e72d3fc6547d..939b9e98245f 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -498,15 +498,10 @@ void force_hpet_resume(void)
  * See erratum #27 (Misinterpreted MSI Requests May Result in
  * Corrupted LPC DMA Data) in AMD Publication #46837,
  * "SB700 Family Product Errata", Rev. 1.0, March 2010.
- *
- * Also force the read back of the CMP register in hpet_next_event()
- * to work around the problem that the CMP register write seems to be
- * delayed. See hpet_next_event() for details.
  */
 static void force_disable_hpet_msi(struct pci_dev *unused)
 {
 	hpet_msi_disable = 1;
-	hpet_readback_cmp = 1;
 }
 
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
-- 
cgit v1.2.2


From 58c84eda07560a6b75b03e8d3b26d6eddfc14011 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 15 Jul 2010 09:41:42 -0600
Subject: PCI: fall back to original BIOS BAR addresses

If we fail to assign resources to a PCI BAR, this patch makes us try the
original address from BIOS rather than leaving it disabled.

Linux tries to make sure all PCI device BARs are inside the upstream
PCI host bridge or P2P bridge apertures, reassigning BARs if necessary.
Windows does similar reassignment.

Before this patch, if we could not move a BAR into an aperture, we left
the resource unassigned, i.e., at address zero.  Windows leaves such BARs
at the original BIOS addresses, and this patch makes Linux do the same.

This is a bit ugly because we disable the resource long before we try to
reassign it, so we have to keep track of the BIOS BAR address somewhere.
For lack of a better place, I put it in the struct pci_dev.

I think it would be cleaner to attempt the assignment immediately when the
claim fails, so we could easily remember the original address.  But we
currently claim motherboard resources in the middle, after attempting to
claim PCI resources and before assigning new PCI resources, and changing
that is a fairly big job.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16263

Reported-by: Andrew <nitr0@seti.kr.ua>
Tested-by: Andrew <nitr0@seti.kr.ua>
Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/x86/pci/i386.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 6fdb3ec30c31..55253095be84 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -184,6 +184,7 @@ static void __init pcibios_allocate_resources(int pass)
 					idx, r, disabled, pass);
 				if (pci_claim_resource(dev, idx) < 0) {
 					/* We'll assign a new address later */
+					dev->fw_addr[idx] = r->start;
 					r->end -= r->start;
 					r->start = 0;
 				}
-- 
cgit v1.2.2


From fd19dce7ac07973f700b0f13fb7f94b951414a4c Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 15 Jul 2010 00:00:59 -0700
Subject: x86: Fix x2apic preenabled system with kexec

Found one x2apic system kexec loop test failed
when CONFIG_NMI_WATCHDOG=y (old) or CONFIG_LOCKUP_DETECTOR=y (current tip)

first kernel can kexec second kernel, but second kernel can not kexec third one.

it can be duplicated on another system with BIOS preenabled x2apic.
First kernel can not kexec second kernel.

It turns out, when kernel boot with pre-enabled x2apic, it will not execute
disable_local_APIC on shutdown path.

when init_apic_mappings() is called in setup_arch, it will skip setting of
apic_phys when x2apic_mode is set. ( x2apic_mode is much early check_x2apic())
Then later, disable_local_APIC() will bail out early because !apic_phys.

So check !x2apic_mode in x2apic_mode in disable_local_APIC with !apic_phys.

another solution could be updating init_apic_mappings() to set apic_phys even
for preenabled x2apic system. Actually even for x2apic system, that lapic
address is mapped already in early stage.

BTW: is there any x2apic preenabled system with apicid of boot cpu > 255?

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
LKML-Reference: <4C3EB22B.3000701@kernel.org>
Acked-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: stable@kernel.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/apic/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c02cc692985c..a96489ee6cab 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -921,7 +921,7 @@ void disable_local_APIC(void)
 	unsigned int value;
 
 	/* APIC hasn't been mapped yet */
-	if (!apic_phys)
+	if (!x2apic_mode && !apic_phys)
 		return;
 
 	clear_local_APIC();
-- 
cgit v1.2.2


From f82c3d71d6fd2e6a3e3416f09099e29087e39abf Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Fri, 16 Jul 2010 11:58:26 -0700
Subject: x86, pci, mrst: Add extra sanity check in walking the PCI extended
 cap chain

The fixed bar capability structure is searched in PCI extended
configuration space.  We need to make sure there is a valid capability
ID to begin with otherwise, the search code may stuck in a infinite
loop which results in boot hang.  This patch adds additional check for
cap ID 0, which is also invalid, and indicates end of chain.

End of chain is supposed to have all fields zero, but that doesn't
seem to always be the case in the field.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Jesse Barnes <jbarnes@virtuousgeek.org>
LKML-Reference: <1279306706-27087-1-git-send-email-jacob.jun.pan@linux.intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/pci/mrst.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/mrst.c b/arch/x86/pci/mrst.c
index 7ef3a2735df3..cb29191cee58 100644
--- a/arch/x86/pci/mrst.c
+++ b/arch/x86/pci/mrst.c
@@ -66,8 +66,9 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
 					  devfn, pos, 4, &pcie_cap))
 			return 0;
 
-		if (pcie_cap == 0xffffffff)
-			return 0;
+		if (PCI_EXT_CAP_ID(pcie_cap) == 0x0000 ||
+			PCI_EXT_CAP_ID(pcie_cap) == 0xffff)
+			break;
 
 		if (PCI_EXT_CAP_ID(pcie_cap) == PCI_EXT_CAP_ID_VNDR) {
 			raw_pci_ext_ops->read(pci_domain_nr(bus), bus->number,
@@ -76,7 +77,7 @@ static int fixed_bar_cap(struct pci_bus *bus, unsigned int devfn)
 				return pos;
 		}
 
-		pos = pcie_cap >> 20;
+		pos = PCI_EXT_CAP_NEXT(pcie_cap);
 	}
 
 	return 0;
-- 
cgit v1.2.2


From a197479848a2f1a2a5c07cffa6c31ab5e8c82797 Mon Sep 17 00:00:00 2001
From: Roland McGrath <roland@redhat.com>
Date: Fri, 16 Jul 2010 18:17:12 -0700
Subject: x86: kprobes: fix swapped segment registers in kretprobe

In commit f007ea26, the order of the %es and %ds segment registers
got accidentally swapped, so synthesized 'struct pt_regs' frames
have the two values inverted.  It's almost sure that these values
never matter, and that they also never differ.  But wrong is wrong.

Signed-off-by: Roland McGrath <roland@redhat.com>
---
 arch/x86/kernel/kprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 345a4b1fe144..675879b65ce6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -640,8 +640,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	/* Skip cs, ip, orig_ax and gs. */	\
 	"	subl $16, %esp\n"	\
 	"	pushl %fs\n"		\
-	"	pushl %ds\n"		\
 	"	pushl %es\n"		\
+	"	pushl %ds\n"		\
 	"	pushl %eax\n"		\
 	"	pushl %ebp\n"		\
 	"	pushl %edi\n"		\
-- 
cgit v1.2.2


From 7f8275d0d660c146de6ee3017e1e2e594c49e820 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 19 Jul 2010 14:56:17 +1000
Subject: mm: add context argument to shrinker callback

The current shrinker implementation requires the registered callback
to have global state to work from. This makes it difficult to shrink
caches that are not global (e.g. per-filesystem caches). Pass the shrinker
structure to the callback so that users can embed the shrinker structure
in the context the shrinker needs to operate on and get back to it in the
callback via container_of().

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3699613e8830..b1ed0a1a5913 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2926,7 +2926,7 @@ static int kvm_mmu_remove_some_alloc_mmu_pages(struct kvm *kvm)
 	return kvm_mmu_zap_page(kvm, page) + 1;
 }
 
-static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask)
+static int mmu_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
 {
 	struct kvm *kvm;
 	struct kvm *kvm_freed = NULL;
-- 
cgit v1.2.2


From 9aebbdb637a73a6092e1456ebb4a2df32cc1f611 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Tue, 20 Jul 2010 13:24:30 -0700
Subject: x86, numa: fix boot without RAM on node0 again

Commit e534c7c5f8d6 ("numa: x86_64: use generic percpu var
numa_node_id() implementation") broke numa systems that don't have ram
on node0 when MEMORY_HOTPLUG is enabled, because cpu_up() will call
cpu_to_node() before per_cpu(numa_node) is setup for APs.

When Node0 doesn't have RAM, on x86, cpus already round it to nearest
node with RAM in x86_cpu_to_node_map.  and per_cpu(numa_node) is not set
up until in c_init for APs.

When later cpu_up() calling cpu_to_node() will get 0 again, and make it
online even there is no RAM on node0.  so later all APs can not booted up,
and later will have panic.

[    1.611101] On node 0 totalpages: 0
.........
[    2.608558] On node 0 totalpages: 0
[    2.612065] Brought up 1 CPUs
[    2.615199] Total of 1 processors activated (3990.31 BogoMIPS).
...
   93.225341] calling  loop_init+0x0/0x1a4 @ 1
[   93.229314] PERCPU: allocation failed, size=80 align=8, failed to populate
[   93.246539] Pid: 1, comm: swapper Tainted: G        W   2.6.35-rc4-tip-yh-04371-gd64e6c4-dirty #354
[   93.264621] Call Trace:
[   93.266533]  [<ffffffff81125e43>] pcpu_alloc+0x83a/0x8e7
[   93.270710]  [<ffffffff81125f15>] __alloc_percpu+0x10/0x12
[   93.285849]  [<ffffffff8140786c>] alloc_disk_node+0x94/0x16d
[   93.291811]  [<ffffffff81407956>] alloc_disk+0x11/0x13
[   93.306157]  [<ffffffff81503e51>] loop_alloc+0xa7/0x180
[   93.310538]  [<ffffffff8277ef48>] loop_init+0x9b/0x1a4
[   93.324909]  [<ffffffff8277eead>] ? loop_init+0x0/0x1a4
[   93.329650]  [<ffffffff810001f2>] do_one_initcall+0x57/0x136
[   93.345197]  [<ffffffff827486d0>] kernel_init+0x184/0x20e
[   93.348146]  [<ffffffff81034954>] kernel_thread_helper+0x4/0x10
[   93.365194]  [<ffffffff81c7cc3c>] ? restore_args+0x0/0x30
[   93.369305]  [<ffffffff8274854c>] ? kernel_init+0x0/0x20e
[   93.386011]  [<ffffffff81034950>] ? kernel_thread_helper+0x0/0x10
[   93.392047] loop: out of memory
...

Try to assign per_cpu(numa_node) early

[akpm@linux-foundation.org: tidy up code comment]
Signed-off-by: Yinghai <yinghai@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Denys Vlasenko <vda.linux@googlemail.com>
Acked-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup_percpu.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index de3b63ae3da2..690c2c09faf3 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -238,6 +238,15 @@ void __init setup_per_cpu_areas(void)
 #ifdef CONFIG_NUMA
 		per_cpu(x86_cpu_to_node_map, cpu) =
 			early_per_cpu_map(x86_cpu_to_node_map, cpu);
+		/*
+		 * Ensure taht the boot cpu numa_node is correct when the boot
+		 * cpu is on a node that doesn't have memory installed.
+		 * Also cpu_up() will call cpu_to_node() for APs when
+		 * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
+		 * up later with c_init aka intel_init/amd_init.
+		 * So set them all (boot cpu and all APs).
+		 */
+		set_cpu_numa_node(cpu, early_cpu_to_node(cpu));
 #endif
 #endif
 		/*
@@ -257,14 +266,6 @@ void __init setup_per_cpu_areas(void)
 	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
 #endif
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
-	/*
-	 * make sure boot cpu numa_node is right, when boot cpu is on the
-	 * node that doesn't have mem installed
-	 */
-	set_cpu_numa_node(boot_cpu_id, early_cpu_to_node(boot_cpu_id));
-#endif
-
 	/* Setup node to cpumask map */
 	setup_node_to_cpumask_map();
 
-- 
cgit v1.2.2


From a4ce96ac356e7024a7724ade9d18ba1bdf3c5c06 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 21 Jul 2010 09:25:42 -0700
Subject: Fix up trivial spelling errors ('taht' -> 'that')

Pointed out by Lucas who found the new one in a comment in
setup_percpu.c. And then I fixed the others that I grepped
for.

Reported-by: Lucas <canolucas@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/setup_percpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 690c2c09faf3..a60df9ae6454 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -239,7 +239,7 @@ void __init setup_per_cpu_areas(void)
 		per_cpu(x86_cpu_to_node_map, cpu) =
 			early_per_cpu_map(x86_cpu_to_node_map, cpu);
 		/*
-		 * Ensure taht the boot cpu numa_node is correct when the boot
+		 * Ensure that the boot cpu numa_node is correct when the boot
 		 * cpu is on a node that doesn't have memory installed.
 		 * Also cpu_up() will call cpu_to_node() for APs when
 		 * MEMORY_HOTPLUG is defined, before per_cpu(numa_node) is set
-- 
cgit v1.2.2