From 323ec001c6bb98eeabb5abbdbb8c8055d9496554 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dbaryshkov@gmail.com>
Date: Sun, 29 Jun 2008 14:19:31 +0400
Subject: x86: use generic per-device dma coherent allocator

Signed-off-by: Dmitry Baryshkov <dbaryshkov@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/dma-mapping.h | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h
index a1a4dc7fe6ec..c68f360ef564 100644
--- a/include/asm-x86/dma-mapping.h
+++ b/include/asm-x86/dma-mapping.h
@@ -213,25 +213,5 @@ static inline int dma_get_cache_alignment(void)
 
 #define dma_is_consistent(d, h)	(1)
 
-#ifdef CONFIG_X86_32
-#  define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY
-struct dma_coherent_mem {
-	void		*virt_base;
-	u32		device_base;
-	int		size;
-	int		flags;
-	unsigned long	*bitmap;
-};
-
-extern int
-dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
-			    dma_addr_t device_addr, size_t size, int flags);
-
-extern void
-dma_release_declared_memory(struct device *dev);
-
-extern void *
-dma_mark_declared_memory_occupied(struct device *dev,
-				  dma_addr_t device_addr, size_t size);
-#endif /* CONFIG_X86_32 */
+#include <asm-generic/dma-coherent.h>
 #endif
-- 
cgit v1.2.2


From e930bffe95e1e886a1ede80726ea38df5838d067 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <andrea@qumranet.com>
Date: Fri, 25 Jul 2008 16:24:52 +0200
Subject: KVM: Synchronize guest physical memory map to host virtual memory map

Synchronize changes to host virtual addresses which are part of
a KVM memory slot to the KVM shadow mmu.  This allows pte operations
like swapping, page migration, and madvise() to transparently work
with KVM.

Signed-off-by: Andrea Arcangeli <andrea@qumranet.com>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 include/asm-x86/kvm_host.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index bc34dc21f178..0f3c53114614 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -13,6 +13,7 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
+#include <linux/mmu_notifier.h>
 
 #include <linux/kvm.h>
 #include <linux/kvm_para.h>
@@ -251,6 +252,7 @@ struct kvm_vcpu_arch {
 		gfn_t gfn;	/* presumed gfn during guest pte update */
 		pfn_t pfn;	/* pfn corresponding to that gfn */
 		int largepage;
+		unsigned long mmu_seq;
 	} update_pte;
 
 	struct i387_fxsave_struct host_fx_image;
@@ -729,4 +731,8 @@ asmlinkage void kvm_handle_fault_on_reboot(void);
 	KVM_EX_ENTRY " 666b, 667b \n\t" \
 	".popsection"
 
+#define KVM_ARCH_WANT_MMU_NOTIFIER
+int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+
 #endif
-- 
cgit v1.2.2


From 8978b74253280d59e97cf49a3ec2c0cbccd5b801 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 29 Jul 2008 13:38:53 +0900
Subject: generic, x86: fix add iommu_num_pages helper function

This IOMMU helper function doesn't work for some architectures:

  http://marc.info/?l=linux-kernel&m=121699304403202&w=2

It also breaks POWER and SPARC builds:

  http://marc.info/?l=linux-kernel&m=121730388001890&w=2

Currently, only x86 IOMMUs use this so let's move it to x86 for
now.

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/iommu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h
index ecc8061904a9..5f888cc5be49 100644
--- a/include/asm-x86/iommu.h
+++ b/include/asm-x86/iommu.h
@@ -7,6 +7,8 @@ extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
 
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
+
 #ifdef CONFIG_GART_IOMMU
 extern int gart_iommu_aperture;
 extern int gart_iommu_aperture_allowed;
-- 
cgit v1.2.2


From d388e5fdc461344d04307a3fa83862b9ed429647 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sat, 9 Aug 2008 15:09:02 -0700
Subject: x86: Restore proper vector locking during cpu hotplug

Having cpu_online_map change during assign_irq_vector can result
in some really nasty and weird things happening.  The one that
bit me last time was accessing non existent per cpu memory for non
existent cpus.

This locking was removed in a sloppy x86_64 and x86_32 merge patch.

Guys can we please try and avoid subtly breaking x86 when we are
merging files together?

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-x86/hw_irq.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 77ba51df5668..edd0b95f14d0 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -98,9 +98,17 @@ extern void (*const interrupt[NR_IRQS])(void);
 #else
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern spinlock_t vector_lock;
 #endif
-extern void setup_vector_irq(int cpu);
+
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_X86_64)
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
+extern void __setup_vector_irq(int cpu);
+#else
+static inline void lock_vector_lock(void) {}
+static inline void unlock_vector_lock(void) {}
+static inline void __setup_vector_irq(int cpu) {}
+#endif
 
 #endif /* !ASSEMBLY_ */
 
-- 
cgit v1.2.2


From 3c7569b284e1be55d086b61a70d9f545326f6d74 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 10 Aug 2008 00:35:50 -0700
Subject: x86_64: restore the proper NR_IRQS define so larger systems work.

As pointed out and tracked by Yinghai Lu <yhlu.kernel@gmail.com>:

 Dhaval Giani got:
 kernel BUG at arch/x86/kernel/io_apic_64.c:357!
 invalid opcode: 0000 [1] SMP
 CPU 24
 ...

his system (x3950) has 8 ioapic, irq > 256

This was caused by:

       commit 9b7dc567d03d74a1fbae84e88949b6a60d922d82
       Author: Thomas Gleixner <tglx@linutronix.de>
       Date:   Fri May 2 20:10:09 2008 +0200

          x86: unify interrupt vector defines

          The interrupt vector defines are copied 4 times around with minimal
          differences. Move them all into asm-x86/irq_vectors.h

It appears that Thomas did not notice that x86_64 does something
completely different when he merge irq_vectors.h

We can solve this for 2.6.27 by simply reintroducing the old heuristic
for setting NR_IRQS on x86_64 to a usable value, which trivially removes
the regression.

Long term it would be nice to harmonize the handling of ioapic interrupts
of x86_32 and x86_64 so we don't have this kind of confusion.

Dhaval Giani <dhaval@linux.vnet.ibm.com> tested an earlier version of
this patch by YH which confirms simply increasing NR_IRQS fixes the
problem.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Dhaval Giani <dhaval@linux.vnet.ibm.com>
Cc: Mike Travis <travis@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/irq_vectors.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h
index 90b1d1f12f08..b95d167b7fb2 100644
--- a/include/asm-x86/irq_vectors.h
+++ b/include/asm-x86/irq_vectors.h
@@ -109,7 +109,15 @@
 #define LAST_VM86_IRQ		15
 #define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
 
-#if !defined(CONFIG_X86_VOYAGER)
+#ifdef CONFIG_X86_64
+# if NR_CPUS < MAX_IO_APICS
+#  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
+# else
+#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+# endif
+# define NR_IRQ_VECTORS NR_IRQS
+
+#elif !defined(CONFIG_X86_VOYAGER)
 
 # if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS)
 
-- 
cgit v1.2.2


From b0fbaa6b5976962434349849673b9ff63631b6d4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 7 Aug 2008 15:12:39 -0700
Subject: EFI, x86: fix function prototype

Fix function prototype in header file to match source code:

linux-next-20080807/arch/x86/kernel/efi_64.c:100:14: error: symbol 'efi_ioremap' redeclared with different type (originally declared at include2/asm/efi.h:89) - different address spaces

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/asm-x86/efi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/efi.h b/include/asm-x86/efi.h
index 7ed2bd7a7f51..d4f2b0abe929 100644
--- a/include/asm-x86/efi.h
+++ b/include/asm-x86/efi.h
@@ -86,7 +86,7 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3,
 	efi_call6((void *)(efi.systab->runtime->f), (u64)(a1), (u64)(a2), \
 		  (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6))
 
-extern void *efi_ioremap(unsigned long addr, unsigned long size);
+extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
 
 #endif /* CONFIG_X86_32 */
 
-- 
cgit v1.2.2


From e49140120c88eb99db1a9172d9ac224c0f2bbdd2 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Wed, 13 Aug 2008 22:02:26 +1000
Subject: crypto: padlock - fix VIA PadLock instruction usage with
 irq_ts_save/restore()

Wolfgang Walter reported this oops on his via C3 using padlock for
AES-encryption:

##################################################################

BUG: unable to handle kernel NULL pointer dereference at 000001f0
IP: [<c01028c5>] __switch_to+0x30/0x117
*pde = 00000000
Oops: 0002 [#1] PREEMPT
Modules linked in:

Pid: 2071, comm: sleep Not tainted (2.6.26 #11)
EIP: 0060:[<c01028c5>] EFLAGS: 00010002 CPU: 0
EIP is at __switch_to+0x30/0x117
EAX: 00000000 EBX: c0493300 ECX: dc48dd00 EDX: c0493300
ESI: dc48dd00 EDI: c0493530 EBP: c04cff8c ESP: c04cff7c
 DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068
Process sleep (pid: 2071, ti=c04ce000 task=dc48dd00 task.ti=d2fe6000)
Stack: dc48df30 c0493300 00000000 00000000 d2fe7f44 c03b5b43 c04cffc8 00000046
       c0131856 0000005a dc472d3c c0493300 c0493470 d983ae00 00002696 00000000
       c0239f54 00000000 c04c4000 c04cffd8 c01025fe c04f3740 00049800 c04cffe0
Call Trace:
 [<c03b5b43>] ? schedule+0x285/0x2ff
 [<c0131856>] ? pm_qos_requirement+0x3c/0x53
 [<c0239f54>] ? acpi_processor_idle+0x0/0x434
 [<c01025fe>] ? cpu_idle+0x73/0x7f
 [<c03a4dcd>] ? rest_init+0x61/0x63
 =======================

Wolfgang also found out that adding kernel_fpu_begin() and kernel_fpu_end()
around the padlock instructions fix the oops.

Suresh wrote:

These padlock instructions though don't use/touch SSE registers, but it behaves
similar to other SSE instructions. For example, it might cause DNA faults
when cr0.ts is set. While this is a spurious DNA trap, it might cause
oops with the recent fpu code changes.

This is the code sequence  that is probably causing this problem:

a) new app is getting exec'd and it is somewhere in between
   start_thread() and flush_old_exec() in the load_xyz_binary()

b) At pont "a", task's fpu state (like TS_USEDFPU, used_math() etc) is
   cleared.

c) Now we get an interrupt/softirq which starts using these encrypt/decrypt
   routines in the network stack. This generates a math fault (as
   cr0.ts is '1') which sets TS_USEDFPU and restores the math that is
   in the task's xstate.

d) Return to exec code path, which does start_thread() which does
   free_thread_xstate() and sets xstate pointer to NULL while
   the TS_USEDFPU is still set.

e) At the next context switch from the new exec'd task to another task,
   we have a scenarios where TS_USEDFPU is set but xstate pointer is null.
   This can cause an oops during unlazy_fpu() in __switch_to()

Now:

1) This should happen with or with out pre-emption. Viro also encountered
   similar problem with out CONFIG_PREEMPT.

2) kernel_fpu_begin() and kernel_fpu_end() will fix this problem, because
   kernel_fpu_begin() will manually do a clts() and won't run in to the
   situation of setting TS_USEDFPU in step "c" above.

3) This was working before the fpu changes, because its a spurious
   math fault  which doesn't corrupt any fpu/sse registers and the task's
   math state was always in an allocated state.

With out the recent lazy fpu allocation changes, while we don't see oops,
there is a possible race still present in older kernels(for example,
while kernel is using kernel_fpu_begin() in some optimized clear/copy
page and an interrupt/softirq happens which uses these padlock
instructions generating DNA fault).

This is the failing scenario that existed even before the lazy fpu allocation
changes:

0. CPU's TS flag is set

1. kernel using FPU in some optimized copy  routine and while doing
kernel_fpu_begin() takes an interrupt just before doing clts()

2. Takes an interrupt and ipsec uses padlock instruction. And we
take a DNA fault as TS flag is still set.

3. We handle the DNA fault and set TS_USEDFPU and clear cr0.ts

4. We complete the padlock routine

5. Go back to step-1, which resumes clts() in kernel_fpu_begin(), finishes
the optimized copy routine and does kernel_fpu_end(). At this point,
we have cr0.ts again set to '1' but the task's TS_USEFPU is stilll
set and not cleared.

6. Now kernel resumes its user operation. And at the next context
switch, kernel sees it has do a FP save as TS_USEDFPU is still set
and then will do a unlazy_fpu() in __switch_to(). unlazy_fpu()
will take a DNA fault, as cr0.ts is '1' and now, because we are
in __switch_to(), math_state_restore() will get confused and will
restore the next task's FP state and will save it in prev tasks's FP state.
Remember, in __switch_to() we are already on the stack of the next task
but take a DNA fault for the prev task.

This causes the fpu leakage.

Fix the padlock instruction usage by calling them inside the
context of new routines irq_ts_save/restore(), which clear/restore cr0.ts
manually in the interrupt context. This will not generate spurious DNA
in the  context of the interrupt which will fix the oops encountered and
the possible FPU leakage issue.

Reported-and-bisected-by: Wolfgang Walter <wolfgang.walter@stwm.de>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/asm-x86/i387.h | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'include/asm-x86')

diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 96fa8449ff11..6d3b21063419 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -13,6 +13,7 @@
 #include <linux/sched.h>
 #include <linux/kernel_stat.h>
 #include <linux/regset.h>
+#include <linux/hardirq.h>
 #include <asm/asm.h>
 #include <asm/processor.h>
 #include <asm/sigcontext.h>
@@ -236,6 +237,37 @@ static inline void kernel_fpu_end(void)
 	preempt_enable();
 }
 
+/*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+ * get used from interrupt context aswell. To prevent these kernel instructions
+ * in interrupt context interact wrongly with other user/kernel fpu usage, we
+ * should use them only in the context of irq_ts_save/restore()
+ */
+static inline int irq_ts_save(void)
+{
+	/*
+	 * If we are in process context, we are ok to take a spurious DNA fault.
+	 * Otherwise, doing clts() in process context require pre-emption to
+	 * be disabled or some heavy lifting like kernel_fpu_begin()
+	 */
+	if (!in_interrupt())
+		return 0;
+
+	if (read_cr0() & X86_CR0_TS) {
+		clts();
+		return 1;
+	}
+
+	return 0;
+}
+
+static inline void irq_ts_restore(int TS_state)
+{
+	if (TS_state)
+		stts();
+}
+
 #ifdef CONFIG_X86_64
 
 static inline void save_init_fpu(struct task_struct *tsk)
-- 
cgit v1.2.2