From ae531c26c5c2a28ca1b35a75b39b3b256850f2c8 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 24 Apr 2008 23:40:47 +0200
Subject: x86: introduce /dev/mem restrictions with a config option

This patch introduces a restriction on /dev/mem: Only non-memory can be
read or written unless the newly introduced config option is set.

The X server needs access to /dev/mem for the PCI space, but it doesn't need
access to memory; both the file permissions and SELinux permissions of /dev/mem
just make X effectively super-super powerful. With the exception of the
BIOS area, there's just no valid app that uses /dev/mem on actual memory.
Other popular users of /dev/mem are rootkits and the like.
(note: mmap access of memory via /dev/mem was already not allowed since
a really long time)

People who want to use /dev/mem for kernel debugging can enable the config
option.

The restrictions of this patch have been in the Fedora and RHEL kernels for
at least 4 years without any problems.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/Kconfig.debug | 12 ++++++++++++
 arch/x86/mm/init_32.c  | 19 +++++++++++++++++++
 arch/x86/mm/init_64.c  | 20 ++++++++++++++++++++
 3 files changed, 51 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 610aaecc19f8..0c1890c41279 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -5,6 +5,18 @@ config TRACE_IRQFLAGS_SUPPORT
 
 source "lib/Kconfig.debug"
 
+config NONPROMISC_DEVMEM
+	bool "Disable promiscuous /dev/mem"
+	default y
+	help
+	  The /dev/mem file by default only allows userspace access to PCI
+	  space and the BIOS code and data regions. This is sufficient for
+	  dosemu and X and all common users of /dev/mem. With this config
+	  option, you allow userspace access to all of memory, including
+	  kernel and userspace memory. Accidental access to this is
+	  obviously disasterous, but specific access can be used by people
+	  debugging the kernel.
+
 config EARLY_PRINTK
 	bool "Early printk" if EMBEDDED
 	default y
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd7..39852d539018 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr)
 	return 0;
 }
 
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	if (pagenr <= 256)
+		return 1;
+	if (!page_is_ram(pagenr))
+		return 1;
+	return 0;
+}
+
 #ifdef CONFIG_HIGHMEM
 pte_t *kmap_pte;
 pgprot_t kmap_prot;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ff7906a9a4d..49c274ee2fba 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -664,6 +664,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
+/*
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+ * is valid. The argument is a physical page number.
+ *
+ *
+ * On x86, access has to be given to the first megabyte of ram because that area
+ * contains bios code and data regions used by X and dosemu and similar apps.
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+ * mmio resources as well as potential bios/acpi data regions.
+ */
+int devmem_is_allowed(unsigned long pagenr)
+{
+	if (pagenr <= 256)
+		return 1;
+	if (!page_is_ram(pagenr))
+		return 1;
+	return 0;
+}
+
+
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
 			 kcore_modules, kcore_vsyscall;
 
-- 
cgit v1.2.2


From e045fb2a988a9a1964059b0d33dbaf18d12f925f Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Tue, 18 Mar 2008 17:00:15 -0700
Subject: x86: PAT avoid aliasing in /dev/mem read/write

Add xlate and unxlate around /dev/mem read/write. This sets up the mapping
that can be used for /dev/mem read and write without aliasing worries.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/ioremap.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24d..caac7d5699a7 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -336,6 +336,35 @@ void iounmap(volatile void __iomem *addr)
 }
 EXPORT_SYMBOL(iounmap);
 
+/*
+ * Convert a physical pointer to a virtual kernel pointer for /dev/mem
+ * access
+ */
+void *xlate_dev_mem_ptr(unsigned long phys)
+{
+	void *addr;
+	unsigned long start = phys & PAGE_MASK;
+
+	/* If page is RAM, we can use __va. Otherwise ioremap and unmap. */
+	if (page_is_ram(start >> PAGE_SHIFT))
+		return __va(phys);
+
+	addr = (void *)ioremap(start, PAGE_SIZE);
+	if (addr)
+		addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
+
+	return addr;
+}
+
+void unxlate_dev_mem_ptr(unsigned long phys, void *addr)
+{
+	if (page_is_ram(phys >> PAGE_SHIFT))
+		return;
+
+	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
+	return;
+}
+
 #ifdef CONFIG_X86_32
 
 int __initdata early_ioremap_debug;
-- 
cgit v1.2.2


From f0970c13b6a5b01189aeb196ebb573cf87d95839 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Tue, 18 Mar 2008 17:00:20 -0700
Subject: x86: PAT phys_mem_access_prot_allowed for dev/mem mmap

Introduce phys_mem_access_prot_allowed(), which checks whether the mapping
is possible, without any conflicts and returns success or failure based on that.
phys_mem_access_prot() by itself does not allow failure case. This ability
to return error is needed for PAT where we may have aliasing conflicts.

x86 setup __HAVE_PHYS_MEM_ACCESS_PROT and move x86 specific code out of
/dev/mem into arch specific area.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 72c0f6097402..64cc0c18233e 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -419,3 +419,42 @@ int free_memtype(u64 start, u64 end)
 	return err;
 }
 
+
+/* /dev/mem interface. Use the previous mapping */
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				unsigned long size, pgprot_t vma_prot)
+{
+	return vma_prot;
+}
+
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+				unsigned long size, pgprot_t *vma_prot)
+{
+
+	if (file->f_flags & O_SYNC) {
+		*vma_prot = pgprot_noncached(*vma_prot);
+		return 1;
+	}
+
+#ifdef CONFIG_X86_32
+	/*
+	 * On the PPro and successors, the MTRRs are used to set
+	 * memory types for physical addresses outside main memory,
+	 * so blindly setting UC or PWT on those pages is wrong.
+	 * For Pentiums and earlier, the surround logic should disable
+	 * caching for the high addresses through the KEN pin, but
+	 * we maintain the tradition of paranoia in this code.
+	 */
+	if (!pat_wc_enabled &&
+	    ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
+		test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
+		test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
+		test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
+	   (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
+		*vma_prot = pgprot_noncached(*vma_prot);
+		return 1;
+	}
+#endif
+
+	return 1;
+}
-- 
cgit v1.2.2


From e7f260a276f2c9184fe753732d834b1f6fbe9f17 Mon Sep 17 00:00:00 2001
From: "venkatesh.pallipadi@intel.com" <venkatesh.pallipadi@intel.com>
Date: Tue, 18 Mar 2008 17:00:21 -0700
Subject: x86: PAT use reserve free memtype in mmap of /dev/mem

Use reserve_memtype and free_memtype wrappers for /dev/mem mmaps. The memtype
is slightly complicated here, given that we have to support existing X mappings.
We fallback on UC_MINUS for that.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 118 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 64cc0c18233e..1489aafbfa71 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/gfp.h>
 #include <linux/fs.h>
+#include <linux/bootmem.h>
 
 #include <asm/msr.h>
 #include <asm/tlbflush.h>
@@ -21,6 +22,7 @@
 #include <asm/cacheflush.h>
 #include <asm/fcntl.h>
 #include <asm/mtrr.h>
+#include <asm/io.h>
 
 int pat_wc_enabled = 1;
 
@@ -190,6 +192,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
 	return 0;
 }
 
+/*
+ * req_type typically has one of the:
+ * - _PAGE_CACHE_WB
+ * - _PAGE_CACHE_WC
+ * - _PAGE_CACHE_UC_MINUS
+ * - _PAGE_CACHE_UC
+ *
+ * req_type will have a special case value '-1', when requester want to inherit
+ * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
+ *
+ * If ret_type is NULL, function will return an error if it cannot reserve the
+ * region with req_type. If ret_type is non-null, function will return
+ * available type in ret_type in case of no error. In case of any error
+ * it will return a negative return value.
+ */
 int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 			unsigned long *ret_type)
 {
@@ -200,9 +217,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 
 	/* Only track when pat_wc_enabled */
 	if (!pat_wc_enabled) {
-		if (ret_type)
-			*ret_type = req_type;
-
+		/* This is identical to page table setting without PAT */
+		if (ret_type) {
+			if (req_type == -1) {
+				*ret_type = _PAGE_CACHE_WB;
+			} else {
+				*ret_type = req_type;
+			}
+		}
 		return 0;
 	}
 
@@ -214,8 +236,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		return 0;
 	}
 
-	req_type &= _PAGE_CACHE_MASK;
-	err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+	if (req_type == -1) {
+		/*
+		 * Special case where caller wants to inherit from mtrr or
+		 * existing pat mapping, defaulting to UC_MINUS in case of
+		 * no match.
+		 */
+		u8 mtrr_type = mtrr_type_lookup(start, end);
+		if (mtrr_type == 0xFE) { /* MTRR match error */
+			err = -1;
+		}
+
+		if (mtrr_type == MTRR_TYPE_WRBACK) {
+			req_type = _PAGE_CACHE_WB;
+			actual_type = _PAGE_CACHE_WB;
+		} else {
+			req_type = _PAGE_CACHE_UC_MINUS;
+			actual_type = _PAGE_CACHE_UC_MINUS;
+		}
+	} else {
+		req_type &= _PAGE_CACHE_MASK;
+		err = pat_x_mtrr_type(start, end, req_type, &actual_type);
+	}
+
 	if (err) {
 		if (ret_type)
 			*ret_type = actual_type;
@@ -420,7 +463,14 @@ int free_memtype(u64 start, u64 end)
 }
 
 
-/* /dev/mem interface. Use the previous mapping */
+/*
+ * /dev/mem mmap interface. The memtype used for mapping varies:
+ * - Use UC for mappings with O_SYNC flag
+ * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
+ *   inherit the memtype from existing mapping.
+ * - Else use UC_MINUS memtype (for backward compatibility with existing
+ *   X drivers.
+ */
 pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t vma_prot)
 {
@@ -430,10 +480,13 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 				unsigned long size, pgprot_t *vma_prot)
 {
+	u64 offset = ((u64) pfn) << PAGE_SHIFT;
+	unsigned long flags = _PAGE_CACHE_UC_MINUS;
+	unsigned long ret_flags;
+	int retval;
 
 	if (file->f_flags & O_SYNC) {
-		*vma_prot = pgprot_noncached(*vma_prot);
-		return 1;
+		flags = _PAGE_CACHE_UC;
 	}
 
 #ifdef CONFIG_X86_32
@@ -451,10 +504,65 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 		test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
 		test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
 	   (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
-		*vma_prot = pgprot_noncached(*vma_prot);
-		return 1;
+		flags = _PAGE_CACHE_UC;
 	}
 #endif
 
+	/*
+	 * With O_SYNC, we can only take UC mapping. Fail if we cannot.
+	 * Without O_SYNC, we want to get
+	 * - WB for WB-able memory and no other conflicting mappings
+	 * - UC_MINUS for non-WB-able memory with no other conflicting mappings
+	 * - Inherit from confliting mappings otherwise
+	 */
+	if (flags != _PAGE_CACHE_UC_MINUS) {
+		retval = reserve_memtype(offset, offset + size, flags, NULL);
+	} else {
+		retval = reserve_memtype(offset, offset + size, -1, &ret_flags);
+	}
+
+	if (retval < 0)
+		return 0;
+
+	flags = ret_flags;
+
+	if (pfn <= max_pfn_mapped &&
+            ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
+		free_memtype(offset, offset + size);
+		printk(KERN_DEBUG
+		"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
+			current->comm, current->pid,
+			cattr_name(flags),
+			offset, offset + size);
+		return 0;
+	}
+
+	*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
+			     flags);
 	return 1;
 }
+
+void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
+{
+	u64 addr = (u64)pfn << PAGE_SHIFT;
+	unsigned long flags;
+	unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
+
+	reserve_memtype(addr, addr + size, want_flags, &flags);
+	if (flags != want_flags) {
+		printk(KERN_DEBUG
+		"%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
+			current->comm, current->pid,
+			cattr_name(want_flags),
+			addr, addr + size,
+			cattr_name(flags));
+	}
+}
+
+void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
+{
+	u64 addr = (u64)pfn << PAGE_SHIFT;
+
+	free_memtype(addr, addr + size);
+}
+
-- 
cgit v1.2.2


From 28eb559b5b0b9b51b9165a9b8faa75b0bb91ca8d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 3 Apr 2008 10:14:33 +0200
Subject: pat: cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/pat.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 1489aafbfa71..ef8b64b89c7d 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -284,7 +284,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 		struct memtype *saved_ptr;
 
 		if (parse->start >= end) {
-			printk("New Entry\n");
+			pr_debug("New Entry\n");
 			list_add(&new_entry->nd, parse->nd.prev);
 			new_entry = NULL;
 			break;
@@ -386,7 +386,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 				break;
 			}
 
-			printk("Overlap at 0x%Lx-0x%Lx\n",
+			printk(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
 			       saved_ptr->start, saved_ptr->end);
 			/* No conflict. Go ahead and add this new entry */
 			list_add(&new_entry->nd, &saved_ptr->nd);
@@ -396,7 +396,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	}
 
 	if (err) {
-		printk(
+		printk(KERN_INFO
 	"reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
 			start, end, cattr_name(new_entry->type),
 			cattr_name(req_type));
@@ -408,16 +408,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
 	if (new_entry) {
 		/* No conflict. Not yet added to the list. Add to the tail */
 		list_add_tail(&new_entry->nd, &memtype_list);
-		printk("New Entry\n");
-  	}
+		pr_debug("New Entry\n");
+	}
 
 	if (ret_type) {
-		printk(
+		pr_debug(
 	"reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
 			start, end, cattr_name(actual_type),
 			cattr_name(req_type), cattr_name(*ret_type));
 	} else {
-		printk(
+		pr_debug(
 	"reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
 			start, end, cattr_name(actual_type),
 			cattr_name(req_type));
@@ -454,11 +454,11 @@ int free_memtype(u64 start, u64 end)
 	spin_unlock(&memtype_lock);
 
 	if (err) {
-		printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n",
+		printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n",
 			current->comm, current->pid, start, end);
 	}
 
-	printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end);
+	pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
 	return err;
 }
 
@@ -529,7 +529,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
 	if (pfn <= max_pfn_mapped &&
             ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
 		free_memtype(offset, offset + size);
-		printk(KERN_DEBUG
+		printk(KERN_INFO
 		"%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
 			current->comm, current->pid,
 			cattr_name(flags),
@@ -550,7 +550,7 @@ void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
 
 	reserve_memtype(addr, addr + size, want_flags, &flags);
 	if (flags != want_flags) {
-		printk(KERN_DEBUG
+		printk(KERN_INFO
 		"%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
 			current->comm, current->pid,
 			cattr_name(want_flags),
-- 
cgit v1.2.2


From 1f56cf1c58c81f7ecf16f5e99ac4a333d9dc9aea Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 18 Apr 2008 21:42:36 +0200
Subject: /dev/mem: make promisc the default

default to the old semantics.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 0c1890c41279..239fd9fba0a5 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -7,7 +7,6 @@ source "lib/Kconfig.debug"
 
 config NONPROMISC_DEVMEM
 	bool "Disable promiscuous /dev/mem"
-	default y
 	help
 	  The /dev/mem file by default only allows userspace access to PCI
 	  space and the BIOS code and data regions. This is sufficient for
-- 
cgit v1.2.2


From 79bf6d66abb5a20813a19dd365dfc49104f0bb88 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:36:54 -0700
Subject: x86: convert pgalloc_64.h from macros to inlines

Convert asm-x86/pgalloc_64.h from macros into functions (#include hell
prevents __*_free_tlb from being inline, but they're probably a bit
big to inline anyway).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ff7906a9a4d..2d89b7abbc54 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -662,6 +662,22 @@ int memory_add_physaddr_to_nid(u64 start)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+	pgtable_page_dtor(pte);
+	tlb_remove_page(tlb, pte);
+}
+
+void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+	tlb_remove_page(tlb, virt_to_page(pmd));
+}
+
+void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+{
+	tlb_remove_page(tlb, virt_to_page(pud));
+}
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
-- 
cgit v1.2.2


From 4f76cd382213b29dd3658e3e1ea47c0c2be06f3c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:36:55 -0700
Subject: x86: add common mm/pgtable.c

Add a common arch/x86/mm/pgtable.c file for common pagetable functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/Makefile     |   2 +-
 arch/x86/mm/pgtable.c    | 239 +++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/pgtable_32.c | 187 ------------------------------------
 3 files changed, 240 insertions(+), 188 deletions(-)
 create mode 100644 arch/x86/mm/pgtable.c

(limited to 'arch/x86')

diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 20941d2954e2..b7b3e4c7cfc9 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,5 +1,5 @@
 obj-y	:=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
-	    pat.o
+	    pat.o pgtable.o
 
 obj-$(CONFIG_X86_32)		+= pgtable_32.o
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
new file mode 100644
index 000000000000..d526b46ae188
--- /dev/null
+++ b/arch/x86/mm/pgtable.c
@@ -0,0 +1,239 @@
+#include <linux/mm.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
+
+pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+}
+
+pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+{
+	struct page *pte;
+
+#ifdef CONFIG_HIGHPTE
+	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+#else
+	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+#endif
+	if (pte)
+		pgtable_page_ctor(pte);
+	return pte;
+}
+
+#ifdef CONFIG_X86_64
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	list_add(&page->lru, &pgd_list);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+	unsigned long flags;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	list_del(&page->lru);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	unsigned boundary;
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	if (!pgd)
+		return NULL;
+	pgd_list_add(pgd);
+	/*
+	 * Copy kernel pointers in from init.
+	 * Could keep a freelist or slab cache of those because the kernel
+	 * part never changes.
+	 */
+	boundary = pgd_index(__PAGE_OFFSET);
+	memset(pgd, 0, boundary * sizeof(pgd_t));
+	memcpy(pgd + boundary,
+	       init_level4_pgt + boundary,
+	       (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
+	return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
+	pgd_list_del(pgd);
+	free_page((unsigned long)pgd);
+}
+#else
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * -- wli
+ */
+static inline void pgd_list_add(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	list_add(&page->lru, &pgd_list);
+}
+
+static inline void pgd_list_del(pgd_t *pgd)
+{
+	struct page *page = virt_to_page(pgd);
+
+	list_del(&page->lru);
+}
+
+#define UNSHARED_PTRS_PER_PGD				\
+	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
+
+static void pgd_ctor(void *p)
+{
+	pgd_t *pgd = p;
+	unsigned long flags;
+
+	/* Clear usermode parts of PGD */
+	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+
+	spin_lock_irqsave(&pgd_lock, flags);
+
+	/* If the pgd points to a shared pagetable level (either the
+	   ptes in non-PAE, or shared PMD in PAE), then just copy the
+	   references from swapper_pg_dir. */
+	if (PAGETABLE_LEVELS == 2 ||
+	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
+				swapper_pg_dir + USER_PTRS_PER_PGD,
+				KERNEL_PGD_PTRS);
+		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
+					__pa(swapper_pg_dir) >> PAGE_SHIFT,
+					USER_PTRS_PER_PGD,
+					KERNEL_PGD_PTRS);
+	}
+
+	/* list required to sync kernel mapping updates */
+	if (!SHARED_KERNEL_PMD)
+		pgd_list_add(pgd);
+
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+static void pgd_dtor(void *pgd)
+{
+	unsigned long flags; /* can be called from interrupt context */
+
+	if (SHARED_KERNEL_PMD)
+		return;
+
+	spin_lock_irqsave(&pgd_lock, flags);
+	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
+}
+
+#ifdef CONFIG_X86_PAE
+/*
+ * Mop up any pmd pages which may still be attached to the pgd.
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
+ * preallocate which never got a corresponding vma will need to be
+ * freed manually.
+ */
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
+{
+	int i;
+
+	for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
+		pgd_t pgd = pgdp[i];
+
+		if (pgd_val(pgd) != 0) {
+			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
+
+			pgdp[i] = native_make_pgd(0);
+
+			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
+			pmd_free(mm, pmd);
+		}
+	}
+}
+
+/*
+ * In PAE mode, we need to do a cr3 reload (=tlb flush) when
+ * updating the top-level pagetable entries to guarantee the
+ * processor notices the update.  Since this is expensive, and
+ * all 4 top-level entries are used almost immediately in a
+ * new process's life, we just pre-populate them here.
+ *
+ * Also, if we're in a paravirt environment where the kernel pmd is
+ * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
+ * and initialize the kernel pmds here.
+ */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+	pud_t *pud;
+	unsigned long addr;
+	int i;
+
+	pud = pud_offset(pgd, 0);
+ 	for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
+	     i++, pud++, addr += PUD_SIZE) {
+		pmd_t *pmd = pmd_alloc_one(mm, addr);
+
+		if (!pmd) {
+			pgd_mop_up_pmds(mm, pgd);
+			return 0;
+		}
+
+		if (i >= USER_PTRS_PER_PGD)
+			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
+			       sizeof(pmd_t) * PTRS_PER_PMD);
+
+		pud_populate(mm, pud, pmd);
+	}
+
+	return 1;
+}
+#else  /* !CONFIG_X86_PAE */
+/* No need to prepopulate any pagetable entries in non-PAE modes. */
+static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
+{
+	return 1;
+}
+
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
+{
+}
+#endif	/* CONFIG_X86_PAE */
+
+pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+
+	/* so that alloc_pd can use it */
+	mm->pgd = pgd;
+	if (pgd)
+		pgd_ctor(pgd);
+
+	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
+		pgd_dtor(pgd);
+		free_page((unsigned long)pgd);
+		pgd = NULL;
+	}
+
+	return pgd;
+}
+
+void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+	pgd_mop_up_pmds(mm, pgd);
+	pgd_dtor(pgd);
+	free_page((unsigned long)pgd);
+}
+#endif
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 6fb9e7c6893f..b46893e45d02 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,193 +173,6 @@ void reserve_top_address(unsigned long reserve)
 	__VMALLOC_RESERVE += reserve;
 }
 
-pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
-{
-	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
-{
-	struct page *pte;
-
-#ifdef CONFIG_HIGHPTE
-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
-#else
-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
-#endif
-	if (pte)
-		pgtable_page_ctor(pte);
-	return pte;
-}
-
-/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
- * against pageattr.c; it is the unique case in which a valid change
- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
- * vmalloc faults work because attached pagetables are never freed.
- * -- wli
- */
-static inline void pgd_list_add(pgd_t *pgd)
-{
-	struct page *page = virt_to_page(pgd);
-
-	list_add(&page->lru, &pgd_list);
-}
-
-static inline void pgd_list_del(pgd_t *pgd)
-{
-	struct page *page = virt_to_page(pgd);
-
-	list_del(&page->lru);
-}
-
-#define UNSHARED_PTRS_PER_PGD				\
-	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
-
-static void pgd_ctor(void *p)
-{
-	pgd_t *pgd = p;
-	unsigned long flags;
-
-	/* Clear usermode parts of PGD */
-	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
-
-	spin_lock_irqsave(&pgd_lock, flags);
-
-	/* If the pgd points to a shared pagetable level (either the
-	   ptes in non-PAE, or shared PMD in PAE), then just copy the
-	   references from swapper_pg_dir. */
-	if (PAGETABLE_LEVELS == 2 ||
-	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
-		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
-				swapper_pg_dir + USER_PTRS_PER_PGD,
-				KERNEL_PGD_PTRS);
-		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-					__pa(swapper_pg_dir) >> PAGE_SHIFT,
-					USER_PTRS_PER_PGD,
-					KERNEL_PGD_PTRS);
-	}
-
-	/* list required to sync kernel mapping updates */
-	if (!SHARED_KERNEL_PMD)
-		pgd_list_add(pgd);
-
-	spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-static void pgd_dtor(void *pgd)
-{
-	unsigned long flags; /* can be called from interrupt context */
-
-	if (SHARED_KERNEL_PMD)
-		return;
-
-	spin_lock_irqsave(&pgd_lock, flags);
-	pgd_list_del(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-}
-
-#ifdef CONFIG_X86_PAE
-/*
- * Mop up any pmd pages which may still be attached to the pgd.
- * Normally they will be freed by munmap/exit_mmap, but any pmd we
- * preallocate which never got a corresponding vma will need to be
- * freed manually.
- */
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
-{
-	int i;
-
-	for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
-		pgd_t pgd = pgdp[i];
-
-		if (pgd_val(pgd) != 0) {
-			pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
-
-			pgdp[i] = native_make_pgd(0);
-
-			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
-			pmd_free(mm, pmd);
-		}
-	}
-}
-
-/*
- * In PAE mode, we need to do a cr3 reload (=tlb flush) when
- * updating the top-level pagetable entries to guarantee the
- * processor notices the update.  Since this is expensive, and
- * all 4 top-level entries are used almost immediately in a
- * new process's life, we just pre-populate them here.
- *
- * Also, if we're in a paravirt environment where the kernel pmd is
- * not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
- * and initialize the kernel pmds here.
- */
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-{
-	pud_t *pud;
-	unsigned long addr;
-	int i;
-
-	pud = pud_offset(pgd, 0);
- 	for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
-	     i++, pud++, addr += PUD_SIZE) {
-		pmd_t *pmd = pmd_alloc_one(mm, addr);
-
-		if (!pmd) {
-			pgd_mop_up_pmds(mm, pgd);
-			return 0;
-		}
-
-		if (i >= USER_PTRS_PER_PGD)
-			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
-			       sizeof(pmd_t) * PTRS_PER_PMD);
-
-		pud_populate(mm, pud, pmd);
-	}
-
-	return 1;
-}
-#else  /* !CONFIG_X86_PAE */
-/* No need to prepopulate any pagetable entries in non-PAE modes. */
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-{
-	return 1;
-}
-
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
-{
-}
-#endif	/* CONFIG_X86_PAE */
-
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-
-	/* so that alloc_pd can use it */
-	mm->pgd = pgd;
-	if (pgd)
-		pgd_ctor(pgd);
-
-	if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
-		pgd_dtor(pgd);
-		free_page((unsigned long)pgd);
-		pgd = NULL;
-	}
-
-	return pgd;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	pgd_mop_up_pmds(mm, pgd);
-	pgd_dtor(pgd);
-	free_page((unsigned long)pgd);
-}
-
 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
-- 
cgit v1.2.2


From 1ec1fe73dfb711f9ea5a0ef8a7e3af5b6ac8b653 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Wed, 19 Mar 2008 20:30:40 +0100
Subject: x86: xen unify x86 add common mm pgtable c fix

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index d526b46ae188..ed16b7704a3c 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -200,6 +200,24 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
 
 	return 1;
 }
+
+void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
+{
+	paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
+
+	/* Note: almost everything apart from _PAGE_PRESENT is
+	   reserved at the pmd (PDPT) level. */
+	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+
+	/*
+	 * According to Intel App note "TLBs, Paging-Structure Caches,
+	 * and Their Invalidation", April 2007, document 317080-001,
+	 * section 8.1: in PAE mode we explicitly have to flush the
+	 * TLB via cr3 if the top-level pgd is changed...
+	 */
+	if (mm == current->active_mm)
+		write_cr3(read_cr3());
+}
 #else  /* !CONFIG_X86_PAE */
 /* No need to prepopulate any pagetable entries in non-PAE modes. */
 static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
-- 
cgit v1.2.2


From 1d262d3a4932b5ae7222c8d9900696650ee95188 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:36:56 -0700
Subject: x86: put paravirt stubs into common asm/pgalloc.h

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pageattr.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index f7823a172868..938130d49b76 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,9 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		goto out_unlock;
 
 	pbase = (pte_t *)page_address(base);
-#ifdef CONFIG_X86_32
 	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
-#endif
 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.2


From 397f687ab7f840dbe50353c4b60108672b653d0c Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:36:57 -0700
Subject: x86: move pte functions into common asm/pgalloc.h

Common definitions for 2-level pagetable functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c    | 6 ------
 arch/x86/mm/pgtable.c    | 7 +++++++
 arch/x86/mm/pgtable_32.c | 7 -------
 3 files changed, 7 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 2d89b7abbc54..9c09893e54fb 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -662,12 +662,6 @@ int memory_add_physaddr_to_nid(u64 start)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
-{
-	pgtable_page_dtor(pte);
-	tlb_remove_page(tlb, pte);
-}
-
 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
 	tlb_remove_page(tlb, virt_to_page(pmd));
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index ed16b7704a3c..83765328e265 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -21,6 +21,13 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 	return pte;
 }
 
+void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+{
+	pgtable_page_dtor(pte);
+	paravirt_release_pt(page_to_pfn(pte));
+	tlb_remove_page(tlb, pte);
+}
+
 #ifdef CONFIG_X86_64
 static inline void pgd_list_add(pgd_t *pgd)
 {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index b46893e45d02..b9e3dac32391 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,13 +173,6 @@ void reserve_top_address(unsigned long reserve)
 	__VMALLOC_RESERVE += reserve;
 }
 
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
-{
-	pgtable_page_dtor(pte);
-	paravirt_release_pt(page_to_pfn(pte));
-	tlb_remove_page(tlb, pte);
-}
-
 #ifdef CONFIG_X86_PAE
 
 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
-- 
cgit v1.2.2


From 170fdff7057d4247e3f28cca96d0db1fbc854e3b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:36:58 -0700
Subject: x86: move pmd functions into common asm/pgalloc.h

Common definitions for 3-level pagetable functions.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c    |  5 -----
 arch/x86/mm/pgtable.c    |  8 ++++++++
 arch/x86/mm/pgtable_32.c | 10 ----------
 3 files changed, 8 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9c09893e54fb..4465104f5514 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -662,11 +662,6 @@ int memory_add_physaddr_to_nid(u64 start)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
-{
-	tlb_remove_page(tlb, virt_to_page(pmd));
-}
-
 void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
 	tlb_remove_page(tlb, virt_to_page(pud));
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 83765328e265..1c41efedf6d0 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -28,6 +28,14 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 	tlb_remove_page(tlb, pte);
 }
 
+#if PAGETABLE_LEVELS > 2
+void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+{
+	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+	tlb_remove_page(tlb, virt_to_page(pmd));
+}
+#endif	/* PAGETABLE_LEVELS > 2 */
+
 #ifdef CONFIG_X86_64
 static inline void pgd_list_add(pgd_t *pgd)
 {
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index b9e3dac32391..9ee007be9142 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -173,16 +173,6 @@ void reserve_top_address(unsigned long reserve)
 	__VMALLOC_RESERVE += reserve;
 }
 
-#ifdef CONFIG_X86_PAE
-
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
-{
-	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
-	tlb_remove_page(tlb, virt_to_page(pmd));
-}
-
-#endif
-
 int pmd_bad(pmd_t pmd)
 {
 	WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
-- 
cgit v1.2.2


From 5a5f8f42241cf09caec5530a7639cfa8dccc3a7b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:36:59 -0700
Subject: x86: move pgalloc pud and pgd operations into common place

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/init_64.c | 5 -----
 arch/x86/mm/pgtable.c | 7 +++++++
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4465104f5514..1ff7906a9a4d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -662,11 +662,6 @@ int memory_add_physaddr_to_nid(u64 start)
 EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
-{
-	tlb_remove_page(tlb, virt_to_page(pud));
-}
-
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1c41efedf6d0..c67966e10a95 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -34,6 +34,13 @@ void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
+
+#if PAGETABLE_LEVELS > 3
+void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+{
+	tlb_remove_page(tlb, virt_to_page(pud));
+}
+#endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.2


From 394158559d4c912cc58c311b6346cdea0ed2b1de Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:00 -0700
Subject: x86: move all the pgd_list handling to one place

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable.c | 28 +++++++---------------------
 1 file changed, 7 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index c67966e10a95..0d2866b8f425 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -43,34 +43,31 @@ void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 #endif	/* PAGETABLE_LEVELS > 3 */
 #endif	/* PAGETABLE_LEVELS > 2 */
 
-#ifdef CONFIG_X86_64
 static inline void pgd_list_add(pgd_t *pgd)
 {
 	struct page *page = virt_to_page(pgd);
-	unsigned long flags;
 
-	spin_lock_irqsave(&pgd_lock, flags);
 	list_add(&page->lru, &pgd_list);
-	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
 static inline void pgd_list_del(pgd_t *pgd)
 {
 	struct page *page = virt_to_page(pgd);
-	unsigned long flags;
 
-	spin_lock_irqsave(&pgd_lock, flags);
 	list_del(&page->lru);
-	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+#ifdef CONFIG_X86_64
 pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	unsigned boundary;
 	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
+	unsigned long flags;
 	if (!pgd)
 		return NULL;
+	spin_lock_irqsave(&pgd_lock, flags);
 	pgd_list_add(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
 	/*
 	 * Copy kernel pointers in from init.
 	 * Could keep a freelist or slab cache of those because the kernel
@@ -86,8 +83,11 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 
 void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 {
+	unsigned long flags;
 	BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
+	spin_lock_irqsave(&pgd_lock, flags);
 	pgd_list_del(pgd);
+	spin_unlock_irqrestore(&pgd_lock, flags);
 	free_page((unsigned long)pgd);
 }
 #else
@@ -101,20 +101,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
  * vmalloc faults work because attached pagetables are never freed.
  * -- wli
  */
-static inline void pgd_list_add(pgd_t *pgd)
-{
-	struct page *page = virt_to_page(pgd);
-
-	list_add(&page->lru, &pgd_list);
-}
-
-static inline void pgd_list_del(pgd_t *pgd)
-{
-	struct page *page = virt_to_page(pgd);
-
-	list_del(&page->lru);
-}
-
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
 
-- 
cgit v1.2.2


From 6944a9c8945212a0cc1de3589736d59ec542c539 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:01 -0700
Subject: x86: rename paravirt_alloc_pt etc after the pagetable structure

Rename (alloc|release)_(pt|pd) to pte/pmd to explicitly match the name
of the appropriate pagetable level structure.

[ x86.git merge work by Mark McLoughlin <markmc@redhat.com> ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 10 +++++-----
 arch/x86/kernel/vmi_32.c   | 20 ++++++++++----------
 arch/x86/mm/init_32.c      |  6 +++---
 arch/x86/mm/ioremap.c      |  2 +-
 arch/x86/mm/pageattr.c     |  2 +-
 arch/x86/mm/pgtable.c      | 18 +++++++++---------
 arch/x86/xen/enlighten.c   | 32 ++++++++++++++++----------------
 7 files changed, 45 insertions(+), 45 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 3733412d1357..362653da003f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -366,11 +366,11 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.flush_tlb_single = native_flush_tlb_single,
 	.flush_tlb_others = native_flush_tlb_others,
 
-	.alloc_pt = paravirt_nop,
-	.alloc_pd = paravirt_nop,
-	.alloc_pd_clone = paravirt_nop,
-	.release_pt = paravirt_nop,
-	.release_pd = paravirt_nop,
+	.alloc_pte = paravirt_nop,
+	.alloc_pmd = paravirt_nop,
+	.alloc_pmd_clone = paravirt_nop,
+	.release_pte = paravirt_nop,
+	.release_pmd = paravirt_nop,
 
 	.set_pte = native_set_pte,
 	.set_pte_at = native_set_pte_at,
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 12affe1f9bce..44f7ca153b71 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -392,13 +392,13 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type)
 }
 #endif
 
-static void vmi_allocate_pt(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pte(struct mm_struct *mm, u32 pfn)
 {
 	vmi_set_page_type(pfn, VMI_PAGE_L1);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0);
 }
 
-static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
+static void vmi_allocate_pmd(struct mm_struct *mm, u32 pfn)
 {
  	/*
 	 * This call comes in very early, before mem_map is setup.
@@ -409,20 +409,20 @@ static void vmi_allocate_pd(struct mm_struct *mm, u32 pfn)
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0);
 }
 
-static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
+static void vmi_allocate_pmd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count)
 {
  	vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE);
 	vmi_check_page_type(clonepfn, VMI_PAGE_L2);
 	vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count);
 }
 
-static void vmi_release_pt(u32 pfn)
+static void vmi_release_pte(u32 pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L1);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
 }
 
-static void vmi_release_pd(u32 pfn)
+static void vmi_release_pmd(u32 pfn)
 {
 	vmi_ops.release_page(pfn, VMI_PAGE_L2);
 	vmi_set_page_type(pfn, VMI_PAGE_NORMAL);
@@ -871,15 +871,15 @@ static inline int __init activate_vmi(void)
 
 	vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage);
 	if (vmi_ops.allocate_page) {
-		pv_mmu_ops.alloc_pt = vmi_allocate_pt;
-		pv_mmu_ops.alloc_pd = vmi_allocate_pd;
-		pv_mmu_ops.alloc_pd_clone = vmi_allocate_pd_clone;
+		pv_mmu_ops.alloc_pte = vmi_allocate_pte;
+		pv_mmu_ops.alloc_pmd = vmi_allocate_pmd;
+		pv_mmu_ops.alloc_pmd_clone = vmi_allocate_pmd_clone;
 	}
 
 	vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage);
 	if (vmi_ops.release_page) {
-		pv_mmu_ops.release_pt = vmi_release_pt;
-		pv_mmu_ops.release_pd = vmi_release_pd;
+		pv_mmu_ops.release_pte = vmi_release_pte;
+		pv_mmu_ops.release_pmd = vmi_release_pmd;
 	}
 
 	/* Set linear is needed in all cases */
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 9ec62da85fd7..df490905f377 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -71,7 +71,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
 		pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
 
-		paravirt_alloc_pd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
 		pud = pud_offset(pgd, 0);
 		BUG_ON(pmd_table != pmd_offset(pud, 0));
@@ -100,7 +100,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
 		}
 
-		paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
 		BUG_ON(page_table != pte_offset_kernel(pmd, 0));
 	}
@@ -365,7 +365,7 @@ void __init native_pagetable_setup_start(pgd_t *base)
 
 		pte_clear(NULL, va, pte);
 	}
-	paravirt_alloc_pd(&init_mm, __pa(base) >> PAGE_SHIFT);
+	paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 3a4baf95e24d..36a3f7ded626 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -407,7 +407,7 @@ void __init early_ioremap_clear(void)
 
 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
 	pmd_clear(pmd);
-	paravirt_release_pt(__pa(bm_pte) >> PAGE_SHIFT);
+	paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
 	__flush_tlb_all();
 }
 
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 938130d49b76..57e762c141f7 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -483,7 +483,7 @@ static int split_large_page(pte_t *kpte, unsigned long address)
 		goto out_unlock;
 
 	pbase = (pte_t *)page_address(base);
-	paravirt_alloc_pt(&init_mm, page_to_pfn(base));
+	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 0d2866b8f425..1d44d6dd4c9f 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -24,14 +24,14 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
 void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
 {
 	pgtable_page_dtor(pte);
-	paravirt_release_pt(page_to_pfn(pte));
+	paravirt_release_pte(page_to_pfn(pte));
 	tlb_remove_page(tlb, pte);
 }
 
 #if PAGETABLE_LEVELS > 2
 void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 {
-	paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
+	paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pmd));
 }
 
@@ -122,10 +122,10 @@ static void pgd_ctor(void *p)
 		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
 				swapper_pg_dir + USER_PTRS_PER_PGD,
 				KERNEL_PGD_PTRS);
-		paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
-					__pa(swapper_pg_dir) >> PAGE_SHIFT,
-					USER_PTRS_PER_PGD,
-					KERNEL_PGD_PTRS);
+		paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
+					 __pa(swapper_pg_dir) >> PAGE_SHIFT,
+					 USER_PTRS_PER_PGD,
+					 KERNEL_PGD_PTRS);
 	}
 
 	/* list required to sync kernel mapping updates */
@@ -166,7 +166,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
 
 			pgdp[i] = native_make_pgd(0);
 
-			paravirt_release_pd(pgd_val(pgd) >> PAGE_SHIFT);
+			paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
 			pmd_free(mm, pmd);
 		}
 	}
@@ -211,7 +211,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
 
 void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
 {
-	paravirt_alloc_pd(mm, __pa(pmd) >> PAGE_SHIFT);
+	paravirt_alloc_pmd(mm, __pa(pmd) >> PAGE_SHIFT);
 
 	/* Note: almost everything apart from _PAGE_PRESENT is
 	   reserved at the pmd (PDPT) level. */
@@ -242,7 +242,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 {
 	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 
-	/* so that alloc_pd can use it */
+	/* so that alloc_pmd can use it */
 	mm->pgd = pgd;
 	if (pgd)
 		pgd_ctor(pgd);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c0388220cf97..36f36e6b0874 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -655,15 +655,15 @@ static void xen_write_cr3(unsigned long cr3)
 
 /* Early in boot, while setting up the initial pagetable, assume
    everything is pinned. */
-static __init void xen_alloc_pt_init(struct mm_struct *mm, u32 pfn)
+static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
 {
 	BUG_ON(mem_map);	/* should only be used early */
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
-/* Early release_pt assumes that all pts are pinned, since there's
+/* Early release_pte assumes that all pts are pinned, since there's
    only init_mm and anything attached to that is pinned. */
-static void xen_release_pt_init(u32 pfn)
+static void xen_release_pte_init(u32 pfn)
 {
 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
 }
@@ -697,12 +697,12 @@ static void xen_alloc_ptpage(struct mm_struct *mm, u32 pfn, unsigned level)
 	}
 }
 
-static void xen_alloc_pt(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pte(struct mm_struct *mm, u32 pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PTE);
 }
 
-static void xen_alloc_pd(struct mm_struct *mm, u32 pfn)
+static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PMD);
 }
@@ -722,12 +722,12 @@ static void xen_release_ptpage(u32 pfn, unsigned level)
 	}
 }
 
-static void xen_release_pt(u32 pfn)
+static void xen_release_pte(u32 pfn)
 {
 	xen_release_ptpage(pfn, PT_PTE);
 }
 
-static void xen_release_pd(u32 pfn)
+static void xen_release_pmd(u32 pfn)
 {
 	xen_release_ptpage(pfn, PT_PMD);
 }
@@ -849,10 +849,10 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
 {
 	/* This will work as long as patching hasn't happened yet
 	   (which it hasn't) */
-	pv_mmu_ops.alloc_pt = xen_alloc_pt;
-	pv_mmu_ops.alloc_pd = xen_alloc_pd;
-	pv_mmu_ops.release_pt = xen_release_pt;
-	pv_mmu_ops.release_pd = xen_release_pd;
+	pv_mmu_ops.alloc_pte = xen_alloc_pte;
+	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+	pv_mmu_ops.release_pte = xen_release_pte;
+	pv_mmu_ops.release_pmd = xen_release_pmd;
 	pv_mmu_ops.set_pte = xen_set_pte;
 
 	setup_shared_info();
@@ -1059,11 +1059,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
 	.pte_update = paravirt_nop,
 	.pte_update_defer = paravirt_nop,
 
-	.alloc_pt = xen_alloc_pt_init,
-	.release_pt = xen_release_pt_init,
-	.alloc_pd = xen_alloc_pt_init,
-	.alloc_pd_clone = paravirt_nop,
-	.release_pd = xen_release_pt_init,
+	.alloc_pte = xen_alloc_pte_init,
+	.release_pte = xen_release_pte_init,
+	.alloc_pmd = xen_alloc_pte_init,
+	.alloc_pmd_clone = paravirt_nop,
+	.release_pmd = xen_release_pte_init,
 
 #ifdef CONFIG_HIGHPTE
 	.kmap_atomic_pte = xen_kmap_atomic_pte,
-- 
cgit v1.2.2


From 2761fa0920756dc471d297843646a4a9bca6656f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:02 -0700
Subject: x86: add pud_alloc for 4-level pagetables

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/paravirt.c | 2 ++
 arch/x86/mm/pgtable.c      | 1 +
 2 files changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 362653da003f..74f0c5ea2a03 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -369,8 +369,10 @@ struct pv_mmu_ops pv_mmu_ops = {
 	.alloc_pte = paravirt_nop,
 	.alloc_pmd = paravirt_nop,
 	.alloc_pmd_clone = paravirt_nop,
+	.alloc_pud = paravirt_nop,
 	.release_pte = paravirt_nop,
 	.release_pmd = paravirt_nop,
+	.release_pud = paravirt_nop,
 
 	.set_pte = native_set_pte,
 	.set_pte_at = native_set_pte_at,
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 1d44d6dd4c9f..5accc08683c7 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -38,6 +38,7 @@ void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
 #if PAGETABLE_LEVELS > 3
 void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
 {
+	paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
 	tlb_remove_page(tlb, virt_to_page(pud));
 }
 #endif	/* PAGETABLE_LEVELS > 3 */
-- 
cgit v1.2.2


From ee5aa8d3ba65d76157f22b7afedd089d8acfe524 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:03 -0700
Subject: x86/pgtable.h: demacro ptep_set_access_flags

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 5accc08683c7..e7cda2057e1d 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,5 +1,6 @@
 #include <linux/mm.h>
 #include <asm/pgalloc.h>
+#include <asm/pgtable.h>
 #include <asm/tlb.h>
 
 pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
@@ -264,3 +265,18 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	free_page((unsigned long)pgd);
 }
 #endif
+
+int ptep_set_access_flags(struct vm_area_struct *vma,
+			  unsigned long address, pte_t *ptep,
+			  pte_t entry, int dirty)
+{
+	int changed = !pte_same(*ptep, entry);
+
+	if (changed && dirty) {
+		*ptep = entry;
+		pte_update_defer(vma->vm_mm, address, ptep);
+		flush_tlb_page(vma, address);
+	}
+
+	return changed;
+}
-- 
cgit v1.2.2


From f9fbf1a36a6bb6a639459802bccee01185ee3220 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:04 -0700
Subject: x86/pgtable.h: demacro ptep_test_and_clear_young

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e7cda2057e1d..54bd77a7eee0 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -280,3 +280,18 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 
 	return changed;
 }
+
+int ptep_test_and_clear_young(struct vm_area_struct *vma,
+			      unsigned long addr, pte_t *ptep)
+{
+	int ret = 0;
+
+	if (pte_young(*ptep))
+		ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
+					 &ptep->pte);
+
+	if (ret)
+		pte_update(vma->vm_mm, addr, ptep);
+
+	return ret;
+}
-- 
cgit v1.2.2


From c20311e165eb94f5ef12b15e452cc6ec24bd7813 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:05 -0700
Subject: x86/pgtable.h: demacro ptep_clear_flush_young

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 54bd77a7eee0..af0c50161d95 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -295,3 +295,15 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma,
 
 	return ret;
 }
+
+int ptep_clear_flush_young(struct vm_area_struct *vma,
+			   unsigned long address, pte_t *ptep)
+{
+	int young;
+
+	young = ptep_test_and_clear_young(vma, address, ptep);
+	if (young)
+		flush_tlb_page(vma, address);
+
+	return young;
+}
-- 
cgit v1.2.2


From abf33038ffa65097939d86d2a90f93adc6115aa0 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:07 -0700
Subject: xen: use appropriate pte types

Convert Xen pagetable handling to use appropriate *val_t types.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 2a054ef2a3da..363f7a3b67fb 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -214,35 +214,35 @@ void xen_pmd_clear(pmd_t *pmdp)
 	xen_set_pmd(pmdp, __pmd(0));
 }
 
-unsigned long long xen_pte_val(pte_t pte)
+pteval_t xen_pte_val(pte_t pte)
 {
-	unsigned long long ret = 0;
+	pteval_t ret = 0;
 
 	if (pte.pte_low) {
-		ret = ((unsigned long long)pte.pte_high << 32) | pte.pte_low;
+		ret = ((pteval_t)pte.pte_high << 32) | pte.pte_low;
 		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
 	}
 
 	return ret;
 }
 
-unsigned long long xen_pmd_val(pmd_t pmd)
+pmdval_t xen_pmd_val(pmd_t pmd)
 {
-	unsigned long long ret = pmd.pmd;
+	pmdval_t ret = pmd.pmd;
 	if (ret)
 		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
 	return ret;
 }
 
-unsigned long long xen_pgd_val(pgd_t pgd)
+pgdval_t xen_pgd_val(pgd_t pgd)
 {
-	unsigned long long ret = pgd.pgd;
+	pgdval_t ret = pgd.pgd;
 	if (ret)
 		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
 	return ret;
 }
 
-pte_t xen_make_pte(unsigned long long pte)
+pte_t xen_make_pte(pteval_t pte)
 {
 	if (pte & _PAGE_PRESENT) {
 		pte = phys_to_machine(XPADDR(pte)).maddr;
@@ -252,7 +252,7 @@ pte_t xen_make_pte(unsigned long long pte)
 	return (pte_t){ .pte = pte };
 }
 
-pmd_t xen_make_pmd(unsigned long long pmd)
+pmd_t xen_make_pmd(pmdval_t pmd)
 {
 	if (pmd & 1)
 		pmd = phys_to_machine(XPADDR(pmd)).maddr;
@@ -260,7 +260,7 @@ pmd_t xen_make_pmd(unsigned long long pmd)
 	return (pmd_t){ pmd };
 }
 
-pgd_t xen_make_pgd(unsigned long long pgd)
+pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	if (pgd & _PAGE_PRESENT)
 		pgd = phys_to_machine(XPADDR(pgd)).maddr;
@@ -273,9 +273,9 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
 	*ptep = pte;
 }
 
-unsigned long xen_pte_val(pte_t pte)
+pteval_t xen_pte_val(pte_t pte)
 {
-	unsigned long ret = pte.pte_low;
+	pteval_t ret = pte.pte_low;
 
 	if (ret & _PAGE_PRESENT)
 		ret = machine_to_phys(XMADDR(ret)).paddr;
@@ -283,15 +283,15 @@ unsigned long xen_pte_val(pte_t pte)
 	return ret;
 }
 
-unsigned long xen_pgd_val(pgd_t pgd)
+pgdval_t xen_pgd_val(pgd_t pgd)
 {
-	unsigned long ret = pgd.pgd;
+	pteval_t ret = pgd.pgd;
 	if (ret)
 		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
 	return ret;
 }
 
-pte_t xen_make_pte(unsigned long pte)
+pte_t xen_make_pte(pteval_t pte)
 {
 	if (pte & _PAGE_PRESENT) {
 		pte = phys_to_machine(XPADDR(pte)).maddr;
@@ -301,7 +301,7 @@ pte_t xen_make_pte(unsigned long pte)
 	return (pte_t){ pte };
 }
 
-pgd_t xen_make_pgd(unsigned long pgd)
+pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	if (pgd & _PAGE_PRESENT)
 		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-- 
cgit v1.2.2


From 430442e38e7f049841c5838f8c7027bd9e170045 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:08 -0700
Subject: xen: make use of pte_t union

pte_t always contains a "pte" field for the whole pte value, so make
use of it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 363f7a3b67fb..3148db8e794e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -216,12 +216,10 @@ void xen_pmd_clear(pmd_t *pmdp)
 
 pteval_t xen_pte_val(pte_t pte)
 {
-	pteval_t ret = 0;
+	pteval_t ret = pte.pte;
 
-	if (pte.pte_low) {
-		ret = ((pteval_t)pte.pte_high << 32) | pte.pte_low;
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
-	}
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 
 	return ret;
 }
@@ -229,16 +227,16 @@ pteval_t xen_pte_val(pte_t pte)
 pmdval_t xen_pmd_val(pmd_t pmd)
 {
 	pmdval_t ret = pmd.pmd;
-	if (ret)
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 	return ret;
 }
 
 pgdval_t xen_pgd_val(pgd_t pgd)
 {
 	pgdval_t ret = pgd.pgd;
-	if (ret)
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 	return ret;
 }
 
@@ -254,7 +252,7 @@ pte_t xen_make_pte(pteval_t pte)
 
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
-	if (pmd & 1)
+	if (pmd & _PAGE_PRESENT)
 		pmd = phys_to_machine(XPADDR(pmd)).maddr;
 
 	return (pmd_t){ pmd };
@@ -275,7 +273,7 @@ void xen_set_pte(pte_t *ptep, pte_t pte)
 
 pteval_t xen_pte_val(pte_t pte)
 {
-	pteval_t ret = pte.pte_low;
+	pteval_t ret = pte.pte;
 
 	if (ret & _PAGE_PRESENT)
 		ret = machine_to_phys(XMADDR(ret)).paddr;
@@ -286,8 +284,8 @@ pteval_t xen_pte_val(pte_t pte)
 pgdval_t xen_pgd_val(pgd_t pgd)
 {
 	pteval_t ret = pgd.pgd;
-	if (ret)
-		ret = machine_to_phys(XMADDR(ret)).paddr | 1;
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
 	return ret;
 }
 
-- 
cgit v1.2.2


From 947a69c90c0d07ac7f214e46dabbe49f2a230e00 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:09 -0700
Subject: xen: unify pte operations

We can fold the essentially common pte functions together now.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c | 125 +++++++++++++++++++----------------------------------
 1 file changed, 44 insertions(+), 81 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3148db8e794e..272635aaef88 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -171,6 +171,49 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 	xen_set_pte(ptep, pteval);
 }
 
+pteval_t xen_pte_val(pte_t pte)
+{
+	pteval_t ret = pte.pte;
+
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+
+	return ret;
+}
+
+pgdval_t xen_pgd_val(pgd_t pgd)
+{
+	pgdval_t ret = pgd.pgd;
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+	return ret;
+}
+
+pte_t xen_make_pte(pteval_t pte)
+{
+	if (pte & _PAGE_PRESENT) {
+		pte = phys_to_machine(XPADDR(pte)).maddr;
+		pte &= ~(_PAGE_PCD | _PAGE_PWT);
+	}
+
+	return (pte_t){ .pte = pte };
+}
+
+pgd_t xen_make_pgd(pgdval_t pgd)
+{
+	if (pgd & _PAGE_PRESENT)
+		pgd = phys_to_machine(XPADDR(pgd)).maddr;
+
+	return (pgd_t){ pgd };
+}
+
+pmdval_t xen_pmd_val(pmd_t pmd)
+{
+	pmdval_t ret = native_pmd_val(pmd);
+	if (ret & _PAGE_PRESENT)
+		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+	return ret;
+}
 #ifdef CONFIG_X86_PAE
 void xen_set_pud(pud_t *ptr, pud_t val)
 {
@@ -214,98 +257,18 @@ void xen_pmd_clear(pmd_t *pmdp)
 	xen_set_pmd(pmdp, __pmd(0));
 }
 
-pteval_t xen_pte_val(pte_t pte)
-{
-	pteval_t ret = pte.pte;
-
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-
-	return ret;
-}
-
-pmdval_t xen_pmd_val(pmd_t pmd)
-{
-	pmdval_t ret = pmd.pmd;
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
-}
-
-pgdval_t xen_pgd_val(pgd_t pgd)
-{
-	pgdval_t ret = pgd.pgd;
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
-}
-
-pte_t xen_make_pte(pteval_t pte)
-{
-	if (pte & _PAGE_PRESENT) {
-		pte = phys_to_machine(XPADDR(pte)).maddr;
-		pte &= ~(_PAGE_PCD | _PAGE_PWT);
-	}
-
-	return (pte_t){ .pte = pte };
-}
-
 pmd_t xen_make_pmd(pmdval_t pmd)
 {
 	if (pmd & _PAGE_PRESENT)
 		pmd = phys_to_machine(XPADDR(pmd)).maddr;
 
-	return (pmd_t){ pmd };
-}
-
-pgd_t xen_make_pgd(pgdval_t pgd)
-{
-	if (pgd & _PAGE_PRESENT)
-		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-	return (pgd_t){ pgd };
+	return native_make_pmd(pmd);
 }
 #else  /* !PAE */
 void xen_set_pte(pte_t *ptep, pte_t pte)
 {
 	*ptep = pte;
 }
-
-pteval_t xen_pte_val(pte_t pte)
-{
-	pteval_t ret = pte.pte;
-
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr;
-
-	return ret;
-}
-
-pgdval_t xen_pgd_val(pgd_t pgd)
-{
-	pteval_t ret = pgd.pgd;
-	if (ret & _PAGE_PRESENT)
-		ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
-	return ret;
-}
-
-pte_t xen_make_pte(pteval_t pte)
-{
-	if (pte & _PAGE_PRESENT) {
-		pte = phys_to_machine(XPADDR(pte)).maddr;
-		pte &= ~(_PAGE_PCD | _PAGE_PWT);
-	}
-
-	return (pte_t){ pte };
-}
-
-pgd_t xen_make_pgd(pgdval_t pgd)
-{
-	if (pgd & _PAGE_PRESENT)
-		pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
-	return (pgd_t){ pgd };
-}
 #endif	/* CONFIG_X86_PAE */
 
 /*
-- 
cgit v1.2.2


From 90e9f53662826db3cdd6d99bd394d727b05160c1 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:12 -0700
Subject: xen: make sure iret faults are trapped

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_32.S | 2 +-
 arch/x86/xen/xen-asm.S     | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index f0f8934fc303..568c6ccd7ae2 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -409,7 +409,7 @@ restore_nocheck_notrace:
 irq_return:
 	INTERRUPT_RETURN
 .section .fixup,"ax"
-iret_exc:
+ENTRY(iret_exc)
 	pushl $0			# no error code
 	pushl $do_iret_error
 	jmp error_code
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index fe161ed4b01e..99223cc323be 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -184,8 +184,12 @@ iret_restore_end:
 	   region is OK. */
 	je xen_hypervisor_callback
 
-	iret
+1:	iret
 xen_iret_end_crit:
+.section __ex_table,"a"
+	.align 4
+	.long 1b,iret_exc
+.previous
 
 hyper_iret:
 	/* put this out of line since its very rarely used */
-- 
cgit v1.2.2


From 68db065c845bd9d0eb96946ab104b4c82d0ae9da Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:13 -0700
Subject: x86: unify KERNEL_PGD_PTRS

Make KERNEL_PGD_PTRS common, as previously it was only being defined
for 32-bit.

There are a couple of follow-on changes from this:
 - KERNEL_PGD_PTRS was being defined in terms of USER_PGD_PTRS.  The
   definition of USER_PGD_PTRS doesn't really make much sense on x86-64,
   since it can have two different user address-space configurations.
   I renamed USER_PGD_PTRS to KERNEL_PGD_BOUNDARY, which is meaningful
   for all of 32/32, 32/64 and 64/64 process configurations.

 - USER_PTRS_PER_PGD was also defined and was being used for similar
   purposes.  Converting its users to KERNEL_PGD_BOUNDARY left it
   completely unused, and so I removed it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Zach Amsden <zach@vmware.com>

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/reboot.c            |  4 ++--
 arch/x86/kernel/smpboot.c           |  4 ++--
 arch/x86/kernel/vmi_32.c            |  2 +-
 arch/x86/mach-voyager/voyager_smp.c |  4 ++--
 arch/x86/mm/init_32.c               |  2 +-
 arch/x86/mm/pgtable.c               | 12 ++++++------
 6 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 19c9386ac118..1791a751a772 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -8,6 +8,7 @@
 #include <asm/apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
+#include <asm/pgtable.h>
 #include <asm/reboot_fixups.h>
 #include <asm/reboot.h>
 
@@ -15,7 +16,6 @@
 # include <linux/dmi.h>
 # include <linux/ctype.h>
 # include <linux/mc146818rtc.h>
-# include <asm/pgtable.h>
 #else
 # include <asm/iommu.h>
 #endif
@@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length)
 	/* Remap the kernel at virtual address zero, as well as offset zero
 	   from the kernel segment.  This assumes the kernel segment starts at
 	   virtual address PAGE_OFFSET. */
-	memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
+	memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 		sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS);
 
 	/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6a925394bc7e..2de2f7a2ed5d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1039,8 +1039,8 @@ int __cpuinit native_cpu_up(unsigned int cpu)
 
 #ifdef CONFIG_X86_32
 	/* init low mem mapping */
-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 	flush_tlb_all();
 #endif
 
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 44f7ca153b71..956f38927aa7 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -320,7 +320,7 @@ static void check_zeroed_page(u32 pfn, int type, struct page *page)
 	 * pdes need to be zeroed.
 	 */
 	if (type & VMI_PAGE_CLONE)
-		limit = USER_PTRS_PER_PGD;
+		limit = KERNEL_PGD_BOUNDARY;
 	for (i = 0; i < limit; i++)
 		BUG_ON(ptr[i]);
 }
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 96f60c7cd124..394046effd78 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -560,8 +560,8 @@ static void __init do_boot_cpu(__u8 cpu)
 		hijack_source.idt.Offset, stack_start.sp));
 
 	/* init lowmem identity mapping */
-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS,
-			min_t(unsigned long, KERNEL_PGD_PTRS, USER_PGD_PTRS));
+	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 	flush_tlb_all();
 
 	if (quad_boot) {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index df490905f377..08aa1878fad4 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -457,7 +457,7 @@ void zap_low_mappings(void)
 	 * Note that "pgd_clear()" doesn't do it for
 	 * us, because pgd_clear() is a no-op on i386.
 	 */
-	for (i = 0; i < USER_PTRS_PER_PGD; i++) {
+	for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
 #ifdef CONFIG_X86_PAE
 		set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
 #else
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index af0c50161d95..e2ac320e6151 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -104,7 +104,7 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
  * -- wli
  */
 #define UNSHARED_PTRS_PER_PGD				\
-	(SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
+	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
 
 static void pgd_ctor(void *p)
 {
@@ -112,7 +112,7 @@ static void pgd_ctor(void *p)
 	unsigned long flags;
 
 	/* Clear usermode parts of PGD */
-	memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
+	memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
 
 	spin_lock_irqsave(&pgd_lock, flags);
 
@@ -121,12 +121,12 @@ static void pgd_ctor(void *p)
 	   references from swapper_pg_dir. */
 	if (PAGETABLE_LEVELS == 2 ||
 	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
-		clone_pgd_range(pgd + USER_PTRS_PER_PGD,
-				swapper_pg_dir + USER_PTRS_PER_PGD,
+		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
+				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 				KERNEL_PGD_PTRS);
 		paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
 					 __pa(swapper_pg_dir) >> PAGE_SHIFT,
-					 USER_PTRS_PER_PGD,
+					 KERNEL_PGD_BOUNDARY,
 					 KERNEL_PGD_PTRS);
 	}
 
@@ -201,7 +201,7 @@ static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
 			return 0;
 		}
 
-		if (i >= USER_PTRS_PER_PGD)
+		if (i >= KERNEL_PGD_BOUNDARY)
 			memcpy(pmd, (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
 			       sizeof(pmd_t) * PTRS_PER_PMD);
 
-- 
cgit v1.2.2


From 85958b465c2e0de315575b1d3d7e7c2ce7126880 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:14 -0700
Subject: x86: unify pgd ctor/dtor

All pagetables need fundamentally the same setup and destruction, so
just use the same code for everything.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andi Kleen <ak@suse.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/mm/pgtable.c | 59 ++++++++++++---------------------------------------
 1 file changed, 13 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e2ac320e6151..50159764f694 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -59,50 +59,6 @@ static inline void pgd_list_del(pgd_t *pgd)
 	list_del(&page->lru);
 }
 
-#ifdef CONFIG_X86_64
-pgd_t *pgd_alloc(struct mm_struct *mm)
-{
-	unsigned boundary;
-	pgd_t *pgd = (pgd_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT);
-	unsigned long flags;
-	if (!pgd)
-		return NULL;
-	spin_lock_irqsave(&pgd_lock, flags);
-	pgd_list_add(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-	/*
-	 * Copy kernel pointers in from init.
-	 * Could keep a freelist or slab cache of those because the kernel
-	 * part never changes.
-	 */
-	boundary = pgd_index(__PAGE_OFFSET);
-	memset(pgd, 0, boundary * sizeof(pgd_t));
-	memcpy(pgd + boundary,
-	       init_level4_pgt + boundary,
-	       (PTRS_PER_PGD - boundary) * sizeof(pgd_t));
-	return pgd;
-}
-
-void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-	unsigned long flags;
-	BUG_ON((unsigned long)pgd & (PAGE_SIZE-1));
-	spin_lock_irqsave(&pgd_lock, flags);
-	pgd_list_del(pgd);
-	spin_unlock_irqrestore(&pgd_lock, flags);
-	free_page((unsigned long)pgd);
-}
-#else
-/*
- * List of all pgd's needed for non-PAE so it can invalidate entries
- * in both cached and uncached pgd's; not needed for PAE since the
- * kernel pmd is shared. If PAE were not to share the pmd a similar
- * tactic would be needed. This is essentially codepath-based locking
- * against pageattr.c; it is the unique case in which a valid change
- * of kernel pagetables can't be lazily synchronized by vmalloc faults.
- * vmalloc faults work because attached pagetables are never freed.
- * -- wli
- */
 #define UNSHARED_PTRS_PER_PGD				\
 	(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
 
@@ -120,7 +76,8 @@ static void pgd_ctor(void *p)
 	   ptes in non-PAE, or shared PMD in PAE), then just copy the
 	   references from swapper_pg_dir. */
 	if (PAGETABLE_LEVELS == 2 ||
-	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD)) {
+	    (PAGETABLE_LEVELS == 3 && SHARED_KERNEL_PMD) ||
+	    PAGETABLE_LEVELS == 4) {
 		clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
 				swapper_pg_dir + KERNEL_PGD_BOUNDARY,
 				KERNEL_PGD_PTRS);
@@ -149,6 +106,17 @@ static void pgd_dtor(void *pgd)
 	spin_unlock_irqrestore(&pgd_lock, flags);
 }
 
+/*
+ * List of all pgd's needed for non-PAE so it can invalidate entries
+ * in both cached and uncached pgd's; not needed for PAE since the
+ * kernel pmd is shared. If PAE were not to share the pmd a similar
+ * tactic would be needed. This is essentially codepath-based locking
+ * against pageattr.c; it is the unique case in which a valid change
+ * of kernel pagetables can't be lazily synchronized by vmalloc faults.
+ * vmalloc faults work because attached pagetables are never freed.
+ * -- wli
+ */
+
 #ifdef CONFIG_X86_PAE
 /*
  * Mop up any pmd pages which may still be attached to the pgd.
@@ -264,7 +232,6 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 	pgd_dtor(pgd);
 	free_page((unsigned long)pgd);
 }
-#endif
 
 int ptep_set_access_flags(struct vm_area_struct *vma,
 			  unsigned long address, pte_t *ptep,
-- 
cgit v1.2.2


From e2a81baf6604a2e08e10c7405b0349106f77c8af Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:17 -0700
Subject: xen: support sysenter/sysexit if hypervisor does

64-bit Xen supports sysenter for 32-bit guests, so support its
use.  (sysenter is faster than int $0x80 in 32-on-64.)

sysexit is still not supported, so we fake it up using iret.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_32.S | 18 ++++++++++++++-
 arch/x86/xen/enlighten.c   |  3 +--
 arch/x86/xen/setup.c       | 21 +++++++++++++++++
 arch/x86/xen/smp.c         |  1 +
 arch/x86/xen/xen-asm.S     | 56 ++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h     |  3 +++
 6 files changed, 99 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 568c6ccd7ae2..5d80d53eaff8 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper)
 ENDPROC(kernel_thread_helper)
 
 #ifdef CONFIG_XEN
+/* Xen doesn't set %esp to be precisely what the normal sysenter
+   entrypoint expects, so fix it up before using the normal path. */
+ENTRY(xen_sysenter_target)
+	RING0_INT_FRAME
+	addl $5*4, %esp		/* remove xen-provided frame */
+	jmp sysenter_past_esp
+
 ENTRY(xen_hypervisor_callback)
 	CFI_STARTPROC
 	pushl $0
@@ -1036,8 +1043,17 @@ ENTRY(xen_hypervisor_callback)
 	jae  1f
 
 	call xen_iret_crit_fixup
+	jmp  2f
+
+1:	cmpl $xen_sysexit_start_crit,%eax
+	jb   2f
+	cmpl $xen_sysexit_end_crit,%eax
+	jae  2f
+
+	jmp xen_sysexit_crit_fixup
 
-1:	mov %esp, %eax
+ENTRY(xen_do_upcall)
+2:	mov %esp, %eax
 	call xen_evtchn_do_upcall
 	jmp  ret_from_intr
 	CFI_ENDPROC
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 36f36e6b0874..943684566ebe 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -155,7 +155,6 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	if (*ax == 1)
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
-			    (1 << X86_FEATURE_SEP)  |  /* disable SEP */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
@@ -994,7 +993,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 	.read_pmc = native_read_pmc,
 
 	.iret = xen_iret,
-	.irq_enable_syscall_ret = NULL,  /* never called */
+	.irq_enable_syscall_ret = xen_sysexit,
 
 	.load_tr_desc = paravirt_nop,
 	.set_ldt = xen_set_ldt,
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 2341492bf7a0..82517e4a752a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -16,6 +16,7 @@
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
+#include <xen/interface/callback.h>
 #include <xen/interface/physdev.h>
 #include <xen/features.h>
 
@@ -68,6 +69,24 @@ static void __init fiddle_vdso(void)
 	*mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
 }
 
+void xen_enable_sysenter(void)
+{
+	int cpu = smp_processor_id();
+	extern void xen_sysenter_target(void);
+	/* Mask events on entry, even though they get enabled immediately */
+	static struct callback_register sysenter = {
+		.type = CALLBACKTYPE_sysenter,
+		.address = { __KERNEL_CS, (unsigned long)xen_sysenter_target },
+		.flags = CALLBACKF_mask_events,
+	};
+
+	if (!boot_cpu_has(X86_FEATURE_SEP) ||
+	    HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) {
+		clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP);
+		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP);
+	}
+}
+
 void __init xen_arch_setup(void)
 {
 	struct physdev_set_iopl set_iopl;
@@ -82,6 +101,8 @@ void __init xen_arch_setup(void)
 	HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback,
 				 __KERNEL_CS, (unsigned long)xen_failsafe_callback);
 
+	xen_enable_sysenter();
+
 	set_iopl.iopl = 1;
 	rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
 	if (rc != 0)
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index e340ff92f6b6..d61e4f8b07c7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -72,6 +72,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
 	int cpu = smp_processor_id();
 
 	cpu_init();
+	xen_enable_sysenter();
 
 	preempt_disable();
 	per_cpu(cpu_state, cpu) = CPU_ONLINE;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 99223cc323be..1ac08082a4b4 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -280,6 +280,62 @@ ENTRY(xen_iret_crit_fixup)
 2:	ret
 
 
+ENTRY(xen_sysexit)
+	/* Store vcpu_info pointer for easy access.  Do it this
+	   way to avoid having to reload %fs */
+#ifdef CONFIG_SMP
+	GET_THREAD_INFO(%eax)
+	movl TI_cpu(%eax),%eax
+	movl __per_cpu_offset(,%eax,4),%eax
+	mov per_cpu__xen_vcpu(%eax),%eax
+#else
+	movl per_cpu__xen_vcpu, %eax
+#endif
+
+	/* We can't actually use sysexit in a pv guest,
+	   so fake it up with iret */
+	pushl $__USER_DS		/* user stack segment */
+	pushl %ecx			/* user esp */
+	pushl PT_EFLAGS+2*4(%esp)	/* user eflags */
+	pushl $__USER_CS		/* user code segment */
+	pushl %edx			/* user eip */
+
+xen_sysexit_start_crit:
+	/* Unmask events... */
+	movb $0, XEN_vcpu_info_mask(%eax)
+	/* ...and test for pending.
+	   There's a preempt window here, but it doesn't
+	   matter because we're within the critical section. */
+	testb $0xff, XEN_vcpu_info_pending(%eax)
+
+	/* If there's something pending, mask events again so we
+	   can directly inject it back into the kernel. */
+	jnz   1f
+
+	movl PT_EAX+5*4(%esp),%eax
+2:	iret
+1:	movb $1, XEN_vcpu_info_mask(%eax)
+xen_sysexit_end_crit:
+	addl $5*4, %esp		/* remove iret frame */
+	/* no need to re-save regs, but need to restore kernel %fs */
+	mov $__KERNEL_PERCPU, %eax
+	mov %eax, %fs
+	jmp xen_do_upcall
+.section __ex_table,"a"
+	.align 4
+	.long 2b,iret_exc
+.previous
+
+	.globl xen_sysexit_start_crit, xen_sysexit_end_crit
+/*
+	sysexit fixup is easy, since the old frame is still sitting there
+	on the stack.  We just need to remove the new recursive
+	interrupt and return.
+ */
+ENTRY(xen_sysexit_crit_fixup)
+	addl $PT_OLDESP+5*4, %esp		/* remove frame+iret */
+	jmp xen_do_upcall
+
 /*
 	Force an event check by making a hypercall,
 	but preserve regs before making the call.
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 956a491ea998..01d4ff2ce404 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -19,6 +19,7 @@ extern struct shared_info *HYPERVISOR_shared_info;
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
 void __init xen_init_IRQ(void);
+void xen_enable_sysenter(void);
 
 void xen_setup_timer(int cpu);
 void xen_setup_cpu_clockevents(void);
@@ -64,4 +65,6 @@ DECL_ASM(unsigned long, xen_save_fl_direct, void);
 DECL_ASM(void, xen_restore_fl_direct, unsigned long);
 
 void xen_iret(void);
+void xen_sysexit(void);
+
 #endif /* XEN_OPS_H */
-- 
cgit v1.2.2


From ee523ca1e456d754d66be6deab910131e4e1dbf8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:18 -0700
Subject: xen: implement a debug-interrupt handler

Xen supports the notion of a debug interrupt which can be triggered
from the console.  For now this is implemented to show pending events,
masks and each CPU's pending event set.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/events.c  | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/smp.c     | 19 ++++++++++++++-----
 arch/x86/xen/xen-ops.h |  3 +++
 3 files changed, 64 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
index dcf613e17581..0140981e93c4 100644
--- a/arch/x86/xen/events.c
+++ b/arch/x86/xen/events.c
@@ -455,6 +455,53 @@ void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
 	notify_remote_via_irq(irq);
 }
 
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
+{
+	struct shared_info *sh = HYPERVISOR_shared_info;
+	int cpu = smp_processor_id();
+	int i;
+	unsigned long flags;
+	static DEFINE_SPINLOCK(debug_lock);
+
+	spin_lock_irqsave(&debug_lock, flags);
+
+	printk("vcpu %d\n  ", cpu);
+
+	for_each_online_cpu(i) {
+		struct vcpu_info *v = per_cpu(xen_vcpu, i);
+		printk("%d: masked=%d pending=%d event_sel %08lx\n  ", i,
+			(get_irq_regs() && i == cpu) ? !(get_irq_regs()->flags & X86_EFLAGS_IF) : v->evtchn_upcall_mask,
+			v->evtchn_upcall_pending,
+			v->evtchn_pending_sel);
+	}
+	printk("pending:\n   ");
+	for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
+		printk("%08lx%s", sh->evtchn_pending[i],
+			i % 8 == 0 ? "\n   " : " ");
+	printk("\nmasks:\n   ");
+	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%08lx%s", sh->evtchn_mask[i],
+			i % 8 == 0 ? "\n   " : " ");
+
+	printk("\nunmasked:\n   ");
+	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
+		printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
+			i % 8 == 0 ? "\n   " : " ");
+
+	printk("\npending list:\n");
+	for(i = 0; i < NR_EVENT_CHANNELS; i++) {
+		if (sync_test_bit(i, sh->evtchn_pending)) {
+			printk("  %d: event %d -> irq %d\n",
+				cpu_evtchn[i], i,
+				evtchn_to_irq[i]);
+		}
+	}
+
+	spin_unlock_irqrestore(&debug_lock, flags);
+
+	return IRQ_HANDLED;
+}
+
 
 /*
  * Search the CPUs pending events bitmasks.  For each one found, map
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d61e4f8b07c7..92dd3dbf3ffb 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -36,8 +36,9 @@
 #include "mmu.h"
 
 static cpumask_t xen_cpu_initialized_map;
-static DEFINE_PER_CPU(int, resched_irq);
-static DEFINE_PER_CPU(int, callfunc_irq);
+static DEFINE_PER_CPU(int, resched_irq) = -1;
+static DEFINE_PER_CPU(int, callfunc_irq) = -1;
+static DEFINE_PER_CPU(int, debug_irq) = -1;
 
 /*
  * Structure and data for smp_call_function(). This is designed to minimise
@@ -89,9 +90,7 @@ static __cpuinit void cpu_bringup_and_idle(void)
 static int xen_smp_intr_init(unsigned int cpu)
 {
 	int rc;
-	const char *resched_name, *callfunc_name;
-
-	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
+	const char *resched_name, *callfunc_name, *debug_name;
 
 	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu);
 	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR,
@@ -115,6 +114,14 @@ static int xen_smp_intr_init(unsigned int cpu)
 		goto fail;
 	per_cpu(callfunc_irq, cpu) = rc;
 
+	debug_name = kasprintf(GFP_KERNEL, "debug%d", cpu);
+	rc = bind_virq_to_irqhandler(VIRQ_DEBUG, cpu, xen_debug_interrupt,
+				     IRQF_DISABLED | IRQF_PERCPU | IRQF_NOBALANCING,
+				     debug_name, NULL);
+	if (rc < 0)
+		goto fail;
+	per_cpu(debug_irq, cpu) = rc;
+
 	return 0;
 
  fail:
@@ -122,6 +129,8 @@ static int xen_smp_intr_init(unsigned int cpu)
 		unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
 	if (per_cpu(callfunc_irq, cpu) >= 0)
 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
+	if (per_cpu(debug_irq, cpu) >= 0)
+		unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL);
 	return rc;
 }
 
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 01d4ff2ce404..22395d20dd6e 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -2,6 +2,7 @@
 #define XEN_OPS_H
 
 #include <linux/init.h>
+#include <linux/irqreturn.h>
 
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
@@ -29,6 +30,8 @@ unsigned long xen_get_wallclock(void);
 int xen_set_wallclock(unsigned long time);
 unsigned long long xen_sched_clock(void);
 
+irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
+
 bool xen_vcpu_stolen(int vcpu);
 
 void xen_mark_init_mm_pinned(void);
-- 
cgit v1.2.2


From ee8fa1c67f0b873a324960f0ca9fa1d7e49aa86b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:19 -0700
Subject: xen: make sure retriggered events are set pending

retrigger_dynirq() was incomplete, and didn't properly set the event
to be pending again.  It doesn't seem to actually get used.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/events.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
index 0140981e93c4..f73b53bd65b7 100644
--- a/arch/x86/xen/events.c
+++ b/arch/x86/xen/events.c
@@ -601,10 +601,16 @@ static void ack_dynirq(unsigned int irq)
 static int retrigger_dynirq(unsigned int irq)
 {
 	int evtchn = evtchn_from_irq(irq);
+	struct shared_info *sh = HYPERVISOR_shared_info;
 	int ret = 0;
 
 	if (VALID_EVTCHN(evtchn)) {
-		set_evtchn(evtchn);
+		int masked;
+
+		masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
+		sync_set_bit(evtchn, sh->evtchn_pending);
+		if (!masked)
+			unmask_evtchn(evtchn);
 		ret = 1;
 	}
 
-- 
cgit v1.2.2


From 229664bee6126e01f8662976a5fe2e79813b77c8 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:20 -0700
Subject: xen: short-cut for recursive event handling

If an event comes in while events are currently being processed, then
just increment the counter and have the outer event loop reprocess the
pending events.  This prevents unbounded recursion on heavy event
loads (of course massive event storms will cause infinite loops).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/events.c | 46 ++++++++++++++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
index f73b53bd65b7..85bac298b3cb 100644
--- a/arch/x86/xen/events.c
+++ b/arch/x86/xen/events.c
@@ -517,29 +517,43 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 	int cpu = get_cpu();
 	struct shared_info *s = HYPERVISOR_shared_info;
 	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-	unsigned long pending_words;
+	static DEFINE_PER_CPU(unsigned, nesting_count);
+ 	unsigned count;
 
-	vcpu_info->evtchn_upcall_pending = 0;
+	do {
+		unsigned long pending_words;
 
-	/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
-	pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
-	while (pending_words != 0) {
-		unsigned long pending_bits;
-		int word_idx = __ffs(pending_words);
-		pending_words &= ~(1UL << word_idx);
+		vcpu_info->evtchn_upcall_pending = 0;
 
-		while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
-			int bit_idx = __ffs(pending_bits);
-			int port = (word_idx * BITS_PER_LONG) + bit_idx;
-			int irq = evtchn_to_irq[port];
+		if (__get_cpu_var(nesting_count)++)
+			goto out;
 
-			if (irq != -1) {
-				regs->orig_ax = ~irq;
-				do_IRQ(regs);
+		/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
+		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
+		while (pending_words != 0) {
+			unsigned long pending_bits;
+			int word_idx = __ffs(pending_words);
+			pending_words &= ~(1UL << word_idx);
+
+			while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
+				int bit_idx = __ffs(pending_bits);
+				int port = (word_idx * BITS_PER_LONG) + bit_idx;
+				int irq = evtchn_to_irq[port];
+
+				if (irq != -1) {
+					regs->orig_ax = ~irq;
+					do_IRQ(regs);
+				}
 			}
 		}
-	}
 
+		BUG_ON(!irqs_disabled());
+
+		count = __get_cpu_var(nesting_count);
+		__get_cpu_var(nesting_count) = 0;
+	} while(count != 1);
+
+out:
 	put_cpu();
 }
 
-- 
cgit v1.2.2


From dbe9e994c99ac9ac12d2b66ea42f44558f54fa52 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:21 -0700
Subject: xen: no need for domU to worry about MCE/MCA

Mask MCE/MCA out of cpu caps.  Its harmless to leave them there, but
it does prevent the kernel from starting an unnecessary thread.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/enlighten.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 943684566ebe..8c5ff24a492b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -155,6 +155,8 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 	if (*ax == 1)
 		maskedx = ~((1 << X86_FEATURE_APIC) |  /* disable APIC */
 			    (1 << X86_FEATURE_ACPI) |  /* disable ACPI */
+			    (1 << X86_FEATURE_MCE)  |  /* disable MCE */
+			    (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 			    (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
 	asm(XEN_EMULATE_PREFIX "cpuid"
-- 
cgit v1.2.2


From 0f2c87695219b1129ccf93e0f58acdcdd49724b9 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Mon, 17 Mar 2008 16:37:22 -0700
Subject: xen: jump to iret fixup

Use jmp rather than call for the iret fixup, so its consistent with
the sysexit fixup, and it simplifies the stack (which is already
complex).

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_32.S |  3 +--
 arch/x86/xen/xen-asm.S     | 22 +++++++++-------------
 2 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5d80d53eaff8..209c334bb920 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1042,8 +1042,7 @@ ENTRY(xen_hypervisor_callback)
 	cmpl $xen_iret_end_crit,%eax
 	jae  1f
 
-	call xen_iret_crit_fixup
-	jmp  2f
+	jmp  xen_iret_crit_fixup
 
 1:	cmpl $xen_sysexit_start_crit,%eax
 	jb   2f
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 1ac08082a4b4..53cae923e148 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -223,9 +223,7 @@ hyper_iret:
 	 ds		}  SAVE_ALL state
 	 eax		}
 	  :		:
-	 ebx		}
-	----------------
-	 return addr	 <- esp
+	 ebx		}<- esp
 	----------------
 
    In order to deliver the nested exception properly, we need to shift
@@ -240,10 +238,8 @@ hyper_iret:
    it's usermode state which we eventually need to restore.
  */
 ENTRY(xen_iret_crit_fixup)
-	/* offsets +4 for return address */
-
 	/*
-	   Paranoia: Make sure we're really coming from userspace.
+	   Paranoia: Make sure we're really coming from kernel space.
 	   One could imagine a case where userspace jumps into the
 	   critical range address, but just before the CPU delivers a GP,
 	   it decides to deliver an interrupt instead.  Unlikely?
@@ -252,32 +248,32 @@ ENTRY(xen_iret_crit_fixup)
 	   jump instruction itself, not the destination, but some virtual
 	   environments get this wrong.
 	 */
-	movl PT_CS+4(%esp), %ecx
+	movl PT_CS(%esp), %ecx
 	andl $SEGMENT_RPL_MASK, %ecx
 	cmpl $USER_RPL, %ecx
 	je 2f
 
-	lea PT_ORIG_EAX+4(%esp), %esi
-	lea PT_EFLAGS+4(%esp), %edi
+	lea PT_ORIG_EAX(%esp), %esi
+	lea PT_EFLAGS(%esp), %edi
 
 	/* If eip is before iret_restore_end then stack
 	   hasn't been restored yet. */
 	cmp $iret_restore_end, %eax
 	jae 1f
 
-	movl 0+4(%edi),%eax		/* copy EAX */
-	movl %eax, PT_EAX+4(%esp)
+	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
+	movl %eax, PT_EAX(%esp)
 
 	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
 
 	/* set up the copy */
 1:	std
-	mov $(PT_EIP+4) / 4, %ecx	/* copy ret+saved regs up to orig_eax */
+	mov $PT_EIP / 4, %ecx		/* saved regs up to orig_eax */
 	rep movsl
 	cld
 
 	lea 4(%edi),%esp		/* point esp to new frame */
-2:	ret
+2:	jmp xen_do_upcall
 
 
 ENTRY(xen_sysexit)
-- 
cgit v1.2.2


From af711cda4f94b5fddcdc5eb4134387ae026e3171 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:54 -0700
Subject: xen: move features.c from arch/x86/xen/features.c to drivers/xen

ia64/xen also uses it too. Move it into common place so that
ia64/xen can share the code.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Makefile   |  2 +-
 arch/x86/xen/features.c | 29 -----------------------------
 2 files changed, 1 insertion(+), 30 deletions(-)
 delete mode 100644 arch/x86/xen/features.c

(limited to 'arch/x86')

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 343df246bd3e..c5e9aa489900 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
-obj-y		:= enlighten.o setup.o features.o multicalls.o mmu.o \
+obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
 			events.o time.o manage.o xen-asm.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/features.c b/arch/x86/xen/features.c
deleted file mode 100644
index 0707714e40d6..000000000000
--- a/arch/x86/xen/features.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/******************************************************************************
- * features.c
- *
- * Xen feature flags.
- *
- * Copyright (c) 2006, Ian Campbell, XenSource Inc.
- */
-#include <linux/types.h>
-#include <linux/cache.h>
-#include <linux/module.h>
-#include <asm/xen/hypervisor.h>
-#include <xen/features.h>
-
-u8 xen_features[XENFEAT_NR_SUBMAPS * 32] __read_mostly;
-EXPORT_SYMBOL_GPL(xen_features);
-
-void xen_setup_features(void)
-{
-	struct xen_feature_info fi;
-	int i, j;
-
-	for (i = 0; i < XENFEAT_NR_SUBMAPS; i++) {
-		fi.submap_idx = i;
-		if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
-			break;
-		for (j = 0; j < 32; j++)
-			xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
-	}
-}
-- 
cgit v1.2.2


From e04d0d0767a9c272d3c7300fb7a5221c5e3a71eb Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:53:55 -0700
Subject: xen: move events.c to drivers/xen for IA64/Xen support

move arch/x86/xen/events.c undedr drivers/xen to share codes
with x86 and ia64. And minor adjustment to compile.
ia64/xen also uses events.c

Signed-off-by: Yaozu (Eddie) Dong <eddie.dong@intel.com>
Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Makefile  |   2 +-
 arch/x86/xen/events.c  | 658 -------------------------------------------------
 arch/x86/xen/xen-ops.h |   2 +-
 3 files changed, 2 insertions(+), 660 deletions(-)
 delete mode 100644 arch/x86/xen/events.c

(limited to 'arch/x86')

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index c5e9aa489900..95c59260dccb 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			events.o time.o manage.o xen-asm.o
+			time.o manage.o xen-asm.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/events.c b/arch/x86/xen/events.c
deleted file mode 100644
index 85bac298b3cb..000000000000
--- a/arch/x86/xen/events.c
+++ /dev/null
@@ -1,658 +0,0 @@
-/*
- * Xen event channels
- *
- * Xen models interrupts with abstract event channels.  Because each
- * domain gets 1024 event channels, but NR_IRQ is not that large, we
- * must dynamically map irqs<->event channels.  The event channels
- * interface with the rest of the kernel by defining a xen interrupt
- * chip.  When an event is recieved, it is mapped to an irq and sent
- * through the normal interrupt processing path.
- *
- * There are four kinds of events which can be mapped to an event
- * channel:
- *
- * 1. Inter-domain notifications.  This includes all the virtual
- *    device events, since they're driven by front-ends in another domain
- *    (typically dom0).
- * 2. VIRQs, typically used for timers.  These are per-cpu events.
- * 3. IPIs.
- * 4. Hardware interrupts. Not supported at present.
- *
- * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
- */
-
-#include <linux/linkage.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-#include <asm/ptrace.h>
-#include <asm/irq.h>
-#include <asm/sync_bitops.h>
-#include <asm/xen/hypercall.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/events.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/event_channel.h>
-
-#include "xen-ops.h"
-
-/*
- * This lock protects updates to the following mapping and reference-count
- * arrays. The lock does not need to be acquired to read the mapping tables.
- */
-static DEFINE_SPINLOCK(irq_mapping_update_lock);
-
-/* IRQ <-> VIRQ mapping. */
-static DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
-
-/* IRQ <-> IPI mapping */
-static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
-
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
-struct packed_irq
-{
-	unsigned short evtchn;
-	unsigned char index;
-	unsigned char type;
-};
-
-static struct packed_irq irq_info[NR_IRQS];
-
-/* Binding types. */
-enum {
-	IRQT_UNBOUND,
-	IRQT_PIRQ,
-	IRQT_VIRQ,
-	IRQT_IPI,
-	IRQT_EVTCHN
-};
-
-/* Convenient shorthand for packed representation of an unbound IRQ. */
-#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
-
-static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
-	[0 ... NR_EVENT_CHANNELS-1] = -1
-};
-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
-static u8 cpu_evtchn[NR_EVENT_CHANNELS];
-
-/* Reference counts for bindings to IRQs. */
-static int irq_bindcount[NR_IRQS];
-
-/* Xen will never allocate port zero for any purpose. */
-#define VALID_EVTCHN(chn)	((chn) != 0)
-
-/*
- * Force a proper event-channel callback from Xen after clearing the
- * callback mask. We do this in a very simple manner, by making a call
- * down into Xen. The pending flag will be checked by Xen on return.
- */
-void force_evtchn_callback(void)
-{
-	(void)HYPERVISOR_xen_version(0, NULL);
-}
-EXPORT_SYMBOL_GPL(force_evtchn_callback);
-
-static struct irq_chip xen_dynamic_chip;
-
-/* Constructor for packed IRQ information. */
-static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
-{
-	return (struct packed_irq) { evtchn, index, type };
-}
-
-/*
- * Accessors for packed IRQ information.
- */
-static inline unsigned int evtchn_from_irq(int irq)
-{
-	return irq_info[irq].evtchn;
-}
-
-static inline unsigned int index_from_irq(int irq)
-{
-	return irq_info[irq].index;
-}
-
-static inline unsigned int type_from_irq(int irq)
-{
-	return irq_info[irq].type;
-}
-
-static inline unsigned long active_evtchns(unsigned int cpu,
-					   struct shared_info *sh,
-					   unsigned int idx)
-{
-	return (sh->evtchn_pending[idx] &
-		cpu_evtchn_mask[cpu][idx] &
-		~sh->evtchn_mask[idx]);
-}
-
-static void bind_evtchn_to_cpu(unsigned int chn, unsigned int cpu)
-{
-	int irq = evtchn_to_irq[chn];
-
-	BUG_ON(irq == -1);
-#ifdef CONFIG_SMP
-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
-#endif
-
-	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
-	__set_bit(chn, cpu_evtchn_mask[cpu]);
-
-	cpu_evtchn[chn] = cpu;
-}
-
-static void init_evtchn_cpu_bindings(void)
-{
-#ifdef CONFIG_SMP
-	int i;
-	/* By default all event channels notify CPU#0. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_desc[i].affinity = cpumask_of_cpu(0);
-#endif
-
-	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
-	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
-}
-
-static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
-{
-	return cpu_evtchn[evtchn];
-}
-
-static inline void clear_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_clear_bit(port, &s->evtchn_pending[0]);
-}
-
-static inline void set_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, &s->evtchn_pending[0]);
-}
-
-
-/**
- * notify_remote_via_irq - send event to remote end of event channel via irq
- * @irq: irq of event channel to send event to
- *
- * Unlike notify_remote_via_evtchn(), this is safe to use across
- * save/restore. Notifications on a broken connection are silently
- * dropped.
- */
-void notify_remote_via_irq(int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		notify_remote_via_evtchn(evtchn);
-}
-EXPORT_SYMBOL_GPL(notify_remote_via_irq);
-
-static void mask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	sync_set_bit(port, &s->evtchn_mask[0]);
-}
-
-static void unmask_evtchn(int port)
-{
-	struct shared_info *s = HYPERVISOR_shared_info;
-	unsigned int cpu = get_cpu();
-
-	BUG_ON(!irqs_disabled());
-
-	/* Slow path (hypercall) if this is a non-local port. */
-	if (unlikely(cpu != cpu_from_evtchn(port))) {
-		struct evtchn_unmask unmask = { .port = port };
-		(void)HYPERVISOR_event_channel_op(EVTCHNOP_unmask, &unmask);
-	} else {
-		struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-
-		sync_clear_bit(port, &s->evtchn_mask[0]);
-
-		/*
-		 * The following is basically the equivalent of
-		 * 'hw_resend_irq'. Just like a real IO-APIC we 'lose
-		 * the interrupt edge' if the channel is masked.
-		 */
-		if (sync_test_bit(port, &s->evtchn_pending[0]) &&
-		    !sync_test_and_set_bit(port / BITS_PER_LONG,
-					   &vcpu_info->evtchn_pending_sel))
-			vcpu_info->evtchn_upcall_pending = 1;
-	}
-
-	put_cpu();
-}
-
-static int find_unbound_irq(void)
-{
-	int irq;
-
-	/* Only allocate from dynirq range */
-	for (irq = 0; irq < NR_IRQS; irq++)
-		if (irq_bindcount[irq] == 0)
-			break;
-
-	if (irq == NR_IRQS)
-		panic("No available IRQ to bind to: increase NR_IRQS!\n");
-
-	return irq;
-}
-
-int bind_evtchn_to_irq(unsigned int evtchn)
-{
-	int irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = evtchn_to_irq[evtchn];
-
-	if (irq == -1) {
-		irq = find_unbound_irq();
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "event");
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
-	}
-
-	irq_bindcount[irq]++;
-
-	spin_unlock(&irq_mapping_update_lock);
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irq);
-
-static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
-{
-	struct evtchn_bind_ipi bind_ipi;
-	int evtchn, irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = per_cpu(ipi_to_irq, cpu)[ipi];
-	if (irq == -1) {
-		irq = find_unbound_irq();
-		if (irq < 0)
-			goto out;
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "ipi");
-
-		bind_ipi.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
-						&bind_ipi) != 0)
-			BUG();
-		evtchn = bind_ipi.port;
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
-
-		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-
- out:
-	spin_unlock(&irq_mapping_update_lock);
-	return irq;
-}
-
-
-static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
-{
-	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
-
-	spin_lock(&irq_mapping_update_lock);
-
-	irq = per_cpu(virq_to_irq, cpu)[virq];
-
-	if (irq == -1) {
-		bind_virq.virq = virq;
-		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
-
-		irq = find_unbound_irq();
-
-		dynamic_irq_init(irq);
-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-					      handle_level_irq, "virq");
-
-		evtchn_to_irq[evtchn] = irq;
-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
-
-		per_cpu(virq_to_irq, cpu)[virq] = irq;
-
-		bind_evtchn_to_cpu(evtchn, cpu);
-	}
-
-	irq_bindcount[irq]++;
-
-	spin_unlock(&irq_mapping_update_lock);
-
-	return irq;
-}
-
-static void unbind_from_irq(unsigned int irq)
-{
-	struct evtchn_close close;
-	int evtchn = evtchn_from_irq(irq);
-
-	spin_lock(&irq_mapping_update_lock);
-
-	if (VALID_EVTCHN(evtchn) && (--irq_bindcount[irq] == 0)) {
-		close.port = evtchn;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
-			BUG();
-
-		switch (type_from_irq(irq)) {
-		case IRQT_VIRQ:
-			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
-				[index_from_irq(irq)] = -1;
-			break;
-		default:
-			break;
-		}
-
-		/* Closed ports are implicitly re-bound to VCPU0. */
-		bind_evtchn_to_cpu(evtchn, 0);
-
-		evtchn_to_irq[evtchn] = -1;
-		irq_info[irq] = IRQ_UNBOUND;
-
-		dynamic_irq_init(irq);
-	}
-
-	spin_unlock(&irq_mapping_update_lock);
-}
-
-int bind_evtchn_to_irqhandler(unsigned int evtchn,
-			      irq_handler_t handler,
-			      unsigned long irqflags,
-			      const char *devname, void *dev_id)
-{
-	unsigned int irq;
-	int retval;
-
-	irq = bind_evtchn_to_irq(evtchn);
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_evtchn_to_irqhandler);
-
-int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
-			    irq_handler_t handler,
-			    unsigned long irqflags, const char *devname, void *dev_id)
-{
-	unsigned int irq;
-	int retval;
-
-	irq = bind_virq_to_irq(virq, cpu);
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-EXPORT_SYMBOL_GPL(bind_virq_to_irqhandler);
-
-int bind_ipi_to_irqhandler(enum ipi_vector ipi,
-			   unsigned int cpu,
-			   irq_handler_t handler,
-			   unsigned long irqflags,
-			   const char *devname,
-			   void *dev_id)
-{
-	int irq, retval;
-
-	irq = bind_ipi_to_irq(ipi, cpu);
-	if (irq < 0)
-		return irq;
-
-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
-	if (retval != 0) {
-		unbind_from_irq(irq);
-		return retval;
-	}
-
-	return irq;
-}
-
-void unbind_from_irqhandler(unsigned int irq, void *dev_id)
-{
-	free_irq(irq, dev_id);
-	unbind_from_irq(irq);
-}
-EXPORT_SYMBOL_GPL(unbind_from_irqhandler);
-
-void xen_send_IPI_one(unsigned int cpu, enum ipi_vector vector)
-{
-	int irq = per_cpu(ipi_to_irq, cpu)[vector];
-	BUG_ON(irq < 0);
-	notify_remote_via_irq(irq);
-}
-
-irqreturn_t xen_debug_interrupt(int irq, void *dev_id)
-{
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int cpu = smp_processor_id();
-	int i;
-	unsigned long flags;
-	static DEFINE_SPINLOCK(debug_lock);
-
-	spin_lock_irqsave(&debug_lock, flags);
-
-	printk("vcpu %d\n  ", cpu);
-
-	for_each_online_cpu(i) {
-		struct vcpu_info *v = per_cpu(xen_vcpu, i);
-		printk("%d: masked=%d pending=%d event_sel %08lx\n  ", i,
-			(get_irq_regs() && i == cpu) ? !(get_irq_regs()->flags & X86_EFLAGS_IF) : v->evtchn_upcall_mask,
-			v->evtchn_upcall_pending,
-			v->evtchn_pending_sel);
-	}
-	printk("pending:\n   ");
-	for(i = ARRAY_SIZE(sh->evtchn_pending)-1; i >= 0; i--)
-		printk("%08lx%s", sh->evtchn_pending[i],
-			i % 8 == 0 ? "\n   " : " ");
-	printk("\nmasks:\n   ");
-	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%08lx%s", sh->evtchn_mask[i],
-			i % 8 == 0 ? "\n   " : " ");
-
-	printk("\nunmasked:\n   ");
-	for(i = ARRAY_SIZE(sh->evtchn_mask)-1; i >= 0; i--)
-		printk("%08lx%s", sh->evtchn_pending[i] & ~sh->evtchn_mask[i],
-			i % 8 == 0 ? "\n   " : " ");
-
-	printk("\npending list:\n");
-	for(i = 0; i < NR_EVENT_CHANNELS; i++) {
-		if (sync_test_bit(i, sh->evtchn_pending)) {
-			printk("  %d: event %d -> irq %d\n",
-				cpu_evtchn[i], i,
-				evtchn_to_irq[i]);
-		}
-	}
-
-	spin_unlock_irqrestore(&debug_lock, flags);
-
-	return IRQ_HANDLED;
-}
-
-
-/*
- * Search the CPUs pending events bitmasks.  For each one found, map
- * the event number to an irq, and feed it into do_IRQ() for
- * handling.
- *
- * Xen uses a two-level bitmap to speed searching.  The first level is
- * a bitset of words which contain pending event bits.  The second
- * level is a bitset of pending events themselves.
- */
-void xen_evtchn_do_upcall(struct pt_regs *regs)
-{
-	int cpu = get_cpu();
-	struct shared_info *s = HYPERVISOR_shared_info;
-	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
-	static DEFINE_PER_CPU(unsigned, nesting_count);
- 	unsigned count;
-
-	do {
-		unsigned long pending_words;
-
-		vcpu_info->evtchn_upcall_pending = 0;
-
-		if (__get_cpu_var(nesting_count)++)
-			goto out;
-
-		/* NB. No need for a barrier here -- XCHG is a barrier on x86. */
-		pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
-		while (pending_words != 0) {
-			unsigned long pending_bits;
-			int word_idx = __ffs(pending_words);
-			pending_words &= ~(1UL << word_idx);
-
-			while ((pending_bits = active_evtchns(cpu, s, word_idx)) != 0) {
-				int bit_idx = __ffs(pending_bits);
-				int port = (word_idx * BITS_PER_LONG) + bit_idx;
-				int irq = evtchn_to_irq[port];
-
-				if (irq != -1) {
-					regs->orig_ax = ~irq;
-					do_IRQ(regs);
-				}
-			}
-		}
-
-		BUG_ON(!irqs_disabled());
-
-		count = __get_cpu_var(nesting_count);
-		__get_cpu_var(nesting_count) = 0;
-	} while(count != 1);
-
-out:
-	put_cpu();
-}
-
-/* Rebind an evtchn so that it gets delivered to a specific cpu */
-static void rebind_irq_to_cpu(unsigned irq, unsigned tcpu)
-{
-	struct evtchn_bind_vcpu bind_vcpu;
-	int evtchn = evtchn_from_irq(irq);
-
-	if (!VALID_EVTCHN(evtchn))
-		return;
-
-	/* Send future instances of this interrupt to other vcpu. */
-	bind_vcpu.port = evtchn;
-	bind_vcpu.vcpu = tcpu;
-
-	/*
-	 * If this fails, it usually just indicates that we're dealing with a
-	 * virq or IPI channel, which don't actually need to be rebound. Ignore
-	 * it, but don't do the xenlinux-level rebind in that case.
-	 */
-	if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
-		bind_evtchn_to_cpu(evtchn, tcpu);
-}
-
-
-static void set_affinity_irq(unsigned irq, cpumask_t dest)
-{
-	unsigned tcpu = first_cpu(dest);
-	rebind_irq_to_cpu(irq, tcpu);
-}
-
-static void enable_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		unmask_evtchn(evtchn);
-}
-
-static void disable_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		mask_evtchn(evtchn);
-}
-
-static void ack_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-
-	move_native_irq(irq);
-
-	if (VALID_EVTCHN(evtchn))
-		clear_evtchn(evtchn);
-}
-
-static int retrigger_dynirq(unsigned int irq)
-{
-	int evtchn = evtchn_from_irq(irq);
-	struct shared_info *sh = HYPERVISOR_shared_info;
-	int ret = 0;
-
-	if (VALID_EVTCHN(evtchn)) {
-		int masked;
-
-		masked = sync_test_and_set_bit(evtchn, sh->evtchn_mask);
-		sync_set_bit(evtchn, sh->evtchn_pending);
-		if (!masked)
-			unmask_evtchn(evtchn);
-		ret = 1;
-	}
-
-	return ret;
-}
-
-static struct irq_chip xen_dynamic_chip __read_mostly = {
-	.name		= "xen-dyn",
-	.mask		= disable_dynirq,
-	.unmask		= enable_dynirq,
-	.ack		= ack_dynirq,
-	.set_affinity	= set_affinity_irq,
-	.retrigger	= retrigger_dynirq,
-};
-
-void __init xen_init_IRQ(void)
-{
-	int i;
-
-	init_evtchn_cpu_bindings();
-
-	/* No event channels are 'live' right now. */
-	for (i = 0; i < NR_EVENT_CHANNELS; i++)
-		mask_evtchn(i);
-
-	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
-	for (i = 0; i < NR_IRQS; i++)
-		irq_bindcount[i] = 0;
-
-	irq_ctx_init(smp_processor_id());
-}
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 22395d20dd6e..f1063ae08037 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -3,6 +3,7 @@
 
 #include <linux/init.h>
 #include <linux/irqreturn.h>
+#include <xen/xen-ops.h>
 
 /* These are code, but not functions.  Defined in entry.S */
 extern const char xen_hypervisor_callback[];
@@ -10,7 +11,6 @@ extern const char xen_failsafe_callback[];
 
 void xen_copy_trap_info(struct trap_info *traps);
 
-DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu);
 DECLARE_PER_CPU(unsigned long, xen_cr3);
 DECLARE_PER_CPU(unsigned long, xen_current_cr3);
 
-- 
cgit v1.2.2


From 8d3d2106c19f4e69f208f59fe484ca113fbb48b3 Mon Sep 17 00:00:00 2001
From: Isaku Yamahata <yamahata@valinux.co.jp>
Date: Wed, 2 Apr 2008 10:54:00 -0700
Subject: xen: make grant table arch portable

split out x86 specific part from grant-table.c and
allow ia64/xen specific initialization.
ia64/xen grant table is based on pseudo physical address
(guest physical address) unlike x86/xen. On ia64 init_mm
doesn't map identity straight mapped area.
ia64/xen specific grant table initialization is necessary.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Makefile      |  2 +-
 arch/x86/xen/grant-table.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/xen/grant-table.c

(limited to 'arch/x86')

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 95c59260dccb..3d8df981d5fd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -1,4 +1,4 @@
 obj-y		:= enlighten.o setup.o multicalls.o mmu.o \
-			time.o manage.o xen-asm.o
+			time.o manage.o xen-asm.o grant-table.o
 
 obj-$(CONFIG_SMP)	+= smp.o
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
new file mode 100644
index 000000000000..49ba9b5224d1
--- /dev/null
+++ b/arch/x86/xen/grant-table.c
@@ -0,0 +1,91 @@
+/******************************************************************************
+ * grant_table.c
+ * x86 specific part
+ *
+ * Granting foreign access to our memory reservation.
+ *
+ * Copyright (c) 2005-2006, Christopher Clark
+ * Copyright (c) 2004-2005, K A Fraser
+ * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp>
+ *                    VA Linux Systems Japan. Split out x86 specific part.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+
+#include <xen/interface/xen.h>
+#include <xen/page.h>
+#include <xen/grant_table.h>
+
+#include <asm/pgtable.h>
+
+static int map_pte_fn(pte_t *pte, struct page *pmd_page,
+		      unsigned long addr, void *data)
+{
+	unsigned long **frames = (unsigned long **)data;
+
+	set_pte_at(&init_mm, addr, pte, mfn_pte((*frames)[0], PAGE_KERNEL));
+	(*frames)++;
+	return 0;
+}
+
+static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
+			unsigned long addr, void *data)
+{
+
+	set_pte_at(&init_mm, addr, pte, __pte(0));
+	return 0;
+}
+
+int arch_gnttab_map_shared(unsigned long *frames, unsigned long nr_gframes,
+			   unsigned long max_nr_gframes,
+			   struct grant_entry **__shared)
+{
+	int rc;
+	struct grant_entry *shared = *__shared;
+
+	if (shared == NULL) {
+		struct vm_struct *area =
+			xen_alloc_vm_area(PAGE_SIZE * max_nr_gframes);
+		BUG_ON(area == NULL);
+		shared = area->addr;
+		*__shared = shared;
+	}
+
+	rc = apply_to_page_range(&init_mm, (unsigned long)shared,
+				 PAGE_SIZE * nr_gframes,
+				 map_pte_fn, &frames);
+	return rc;
+}
+
+void arch_gnttab_unmap_shared(struct grant_entry *shared,
+			      unsigned long nr_gframes)
+{
+	apply_to_page_range(&init_mm, (unsigned long)shared,
+			    PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL);
+}
-- 
cgit v1.2.2


From 41e332b2a2dfe514cd441ed0ce1096ed1863e378 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 Apr 2008 10:54:09 -0700
Subject: xen: disable preemption during tlb flush

Various places in the kernel flush the tlb even though preemption doens't
guarantee the tlb flush is happening on any particular CPU.  In many cases
this doesn't seem to matter, so don't make a fuss about it.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/enlighten.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 8c5ff24a492b..bc129146f99f 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -532,26 +532,37 @@ static void xen_apic_write(unsigned long reg, u32 val)
 static void xen_flush_tlb(void)
 {
 	struct mmuext_op *op;
-	struct multicall_space mcs = xen_mc_entry(sizeof(*op));
+	struct multicall_space mcs;
+
+	preempt_disable();
+
+	mcs = xen_mc_entry(sizeof(*op));
 
 	op = mcs.args;
 	op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
 }
 
 static void xen_flush_tlb_single(unsigned long addr)
 {
 	struct mmuext_op *op;
-	struct multicall_space mcs = xen_mc_entry(sizeof(*op));
+	struct multicall_space mcs;
+
+	preempt_disable();
 
+	mcs = xen_mc_entry(sizeof(*op));
 	op = mcs.args;
 	op->cmd = MMUEXT_INVLPG_LOCAL;
 	op->arg1.linear_addr = addr & PAGE_MASK;
 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+	preempt_enable();
 }
 
 static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
-- 
cgit v1.2.2


From 2bd50036b5dfc929390ddc48be7f6314447b2be3 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 Apr 2008 10:54:10 -0700
Subject: xen: allow set_pte_at on init_mm to be lockless

The usual pagetable locking protocol doesn't seem to apply to updates
to init_mm, so don't rely on preemption being disabled in xen_set_pte_at
on init_mm.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/mmu.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 272635aaef88..6cbcf65609ad 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -156,6 +156,10 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
 void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 		    pte_t *ptep, pte_t pteval)
 {
+	/* updates to init_mm may be done without lock */
+	if (mm == &init_mm)
+		preempt_disable();
+
 	if (mm == current->mm || mm == &init_mm) {
 		if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
 			struct multicall_space mcs;
@@ -163,12 +167,16 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
 
 			MULTI_update_va_mapping(mcs.mc, addr, pteval, 0);
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
-			return;
+			goto out;
 		} else
 			if (HYPERVISOR_update_va_mapping(addr, pteval, 0) == 0)
-				return;
+				goto out;
 	}
 	xen_set_pte(ptep, pteval);
+
+out:
+	if (mm == &init_mm)
+		preempt_enable();
 }
 
 pteval_t xen_pte_val(pte_t pte)
-- 
cgit v1.2.2


From b77797fb2bf31bf076e6b69736119bc6a077525b Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 Apr 2008 10:54:11 -0700
Subject: xen: fold xen_sysexit into xen_iret

xen_sysexit and xen_iret were doing essentially the same thing.  Rather
than having a separate implementation for xen_sysexit, we can just strip
the stack back to an iret frame and jump into xen_iret.  This removes
a lot of code and complexity - specifically, another critical region.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/entry_32.S |  9 +-----
 arch/x86/xen/xen-asm.S     | 70 ++++++++++------------------------------------
 2 files changed, 15 insertions(+), 64 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 209c334bb920..2a609dc3271c 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1044,15 +1044,8 @@ ENTRY(xen_hypervisor_callback)
 
 	jmp  xen_iret_crit_fixup
 
-1:	cmpl $xen_sysexit_start_crit,%eax
-	jb   2f
-	cmpl $xen_sysexit_end_crit,%eax
-	jae  2f
-
-	jmp xen_sysexit_crit_fixup
-
 ENTRY(xen_do_upcall)
-2:	mov %esp, %eax
+1:	mov %esp, %eax
 	call xen_evtchn_do_upcall
 	jmp  ret_from_intr
 	CFI_ENDPROC
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index 53cae923e148..2497a30f41de 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -107,6 +107,20 @@ ENDPATCH(xen_restore_fl_direct)
 	ENDPROC(xen_restore_fl_direct)
 	RELOC(xen_restore_fl_direct, 2b+1)
 
+/*
+	We can't use sysexit directly, because we're not running in ring0.
+	But we can easily fake it up using iret.  Assuming xen_sysexit
+	is jumped to with a standard stack frame, we can just strip it
+	back to a standard iret frame and use iret.
+ */
+ENTRY(xen_sysexit)
+	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
+	orl $X86_EFLAGS_IF, PT_EFLAGS(%esp)
+	lea PT_EIP(%esp), %esp
+
+	jmp xen_iret
+ENDPROC(xen_sysexit)
+
 /*
 	This is run where a normal iret would be run, with the same stack setup:
 	      8: eflags
@@ -276,62 +290,6 @@ ENTRY(xen_iret_crit_fixup)
 2:	jmp xen_do_upcall
 
 
-ENTRY(xen_sysexit)
-	/* Store vcpu_info pointer for easy access.  Do it this
-	   way to avoid having to reload %fs */
-#ifdef CONFIG_SMP
-	GET_THREAD_INFO(%eax)
-	movl TI_cpu(%eax),%eax
-	movl __per_cpu_offset(,%eax,4),%eax
-	mov per_cpu__xen_vcpu(%eax),%eax
-#else
-	movl per_cpu__xen_vcpu, %eax
-#endif
-
-	/* We can't actually use sysexit in a pv guest,
-	   so fake it up with iret */
-	pushl $__USER_DS		/* user stack segment */
-	pushl %ecx			/* user esp */
-	pushl PT_EFLAGS+2*4(%esp)	/* user eflags */
-	pushl $__USER_CS		/* user code segment */
-	pushl %edx			/* user eip */
-
-xen_sysexit_start_crit:
-	/* Unmask events... */
-	movb $0, XEN_vcpu_info_mask(%eax)
-	/* ...and test for pending.
-	   There's a preempt window here, but it doesn't
-	   matter because we're within the critical section. */
-	testb $0xff, XEN_vcpu_info_pending(%eax)
-
-	/* If there's something pending, mask events again so we
-	   can directly inject it back into the kernel. */
-	jnz   1f
-
-	movl PT_EAX+5*4(%esp),%eax
-2:	iret
-1:	movb $1, XEN_vcpu_info_mask(%eax)
-xen_sysexit_end_crit:
-	addl $5*4, %esp		/* remove iret frame */
-	/* no need to re-save regs, but need to restore kernel %fs */
-	mov $__KERNEL_PERCPU, %eax
-	mov %eax, %fs
-	jmp xen_do_upcall
-.section __ex_table,"a"
-	.align 4
-	.long 2b,iret_exc
-.previous
-
-	.globl xen_sysexit_start_crit, xen_sysexit_end_crit
-/*
-	sysexit fixup is easy, since the old frame is still sitting there
-	on the stack.  We just need to remove the new recursive
-	interrupt and return.
- */
-ENTRY(xen_sysexit_crit_fixup)
-	addl $PT_OLDESP+5*4, %esp		/* remove frame+iret */
-	jmp xen_do_upcall
-
 /*
 	Force an event check by making a hypercall,
 	but preserve regs before making the call.
-- 
cgit v1.2.2


From af7ae3b9c4a4c1337903f31131d58e3c0d2b6d55 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Wed, 2 Apr 2008 10:54:12 -0700
Subject: xen: allow compilation with non-flat memory

There's no real reason we can't support sparsemem/discontigmem, so do so.
This is mostly useful to support hotplug memory.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/xen/Kconfig     | 2 +-
 arch/x86/xen/enlighten.c | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 4d5f2649bee4..2e641be2737e 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -6,7 +6,7 @@ config XEN
 	bool "Xen guest support"
 	select PARAVIRT
 	depends on X86_32
-	depends on X86_CMPXCHG && X86_TSC && !NEED_MULTIPLE_NODES && !(X86_VISWS || X86_VOYAGER)
+	depends on X86_CMPXCHG && X86_TSC && !(X86_VISWS || X86_VOYAGER)
 	help
 	  This is the Linux Xen port.  Enabling this will allow the
 	  kernel to boot in a paravirtualized environment under the
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bc129146f99f..c8a56e457d61 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -669,7 +669,9 @@ static void xen_write_cr3(unsigned long cr3)
    everything is pinned. */
 static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn)
 {
+#ifdef CONFIG_FLATMEM
 	BUG_ON(mem_map);	/* should only be used early */
+#endif
 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
 }
 
-- 
cgit v1.2.2


From 82a355f5a2fdc203e5a32626d667ec43fc76b8b1 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 25 Apr 2008 18:28:21 +0200
Subject: x86: make __set_fixmap() non-init

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 1ff7906a9a4d..7a81dd020693 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -135,7 +135,7 @@ static __init void *spp_getpage(void)
 	return ptr;
 }
 
-static __init void
+static void
 set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 {
 	pgd_t *pgd;
@@ -214,8 +214,7 @@ void __init cleanup_highmap(void)
 }
 
 /* NOTE: this is meant to be run only at boot */
-void __init
-__set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
+void __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
 {
 	unsigned long address = __fix_to_virt(idx);
 
-- 
cgit v1.2.2


From 70c9f590ffc3f959cc81c1a3cecb6b8133caf35d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 25 Apr 2008 18:05:57 +0200
Subject: x86: remove set_fixmap() warning

set_fixmap()+clear_fixmap() is safe.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/mm/init_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7a81dd020693..b798e7b92b17 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -173,7 +173,7 @@ set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot)
 	new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
 
 	pte = pte_offset_kernel(pmd, vaddr);
-	if (!pte_none(*pte) &&
+	if (!pte_none(*pte) && pte_val(new_pte) &&
 	    pte_val(*pte) != (pte_val(new_pte) & __supported_pte_mask))
 		pte_ERROR(*pte);
 	set_pte(pte, new_pte);
-- 
cgit v1.2.2


From 8b132ecbcfea8b1b556a832df7290379df79ad79 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Mon, 28 Apr 2008 02:51:23 +0200
Subject: x86: fix text_poke()

kernel_text_address returns true even for modules which is not wanted
in text_poke. Use core_kernel_text instead.

This is a regression introduced in e587cadd8f47e202a30712e2906a65a0606d5865
which caused occasionaly crashes after suspend/resume.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
CC: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
CC: Andi Kleen <andi@firstfloor.org>
CC: pageexec@freemail.hu
CC: H. Peter Anvin <hpa@zytor.com>
CC: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/alternative.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index df4099dc1c68..7ab3a9774763 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -515,7 +515,7 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 	BUG_ON(len > sizeof(long));
 	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
 		- ((long)addr & ~(sizeof(long) - 1)));
-	if (kernel_text_address((unsigned long)addr)) {
+	if (core_kernel_text((unsigned long)addr)) {
 		struct page *pages[2] = { virt_to_page(addr),
 			virt_to_page(addr + PAGE_SIZE) };
 		if (!pages[1])
-- 
cgit v1.2.2


From b7b66baa8bc3f8e0cda6576e31e9bde09382565d Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Date: Thu, 24 Apr 2008 11:03:33 -0400
Subject: x86: clean up text_poke()

Clean up the codepath, remove alignment restrictions and do sanity
checking of the end result, to make sure we patched the right site.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/alternative.c | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 7ab3a9774763..60299f61843f 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -511,31 +511,29 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 	unsigned long flags;
 	char *vaddr;
 	int nr_pages = 2;
+	struct page *pages[2];
+	int i;
 
-	BUG_ON(len > sizeof(long));
-	BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1))
-		- ((long)addr & ~(sizeof(long) - 1)));
-	if (core_kernel_text((unsigned long)addr)) {
-		struct page *pages[2] = { virt_to_page(addr),
-			virt_to_page(addr + PAGE_SIZE) };
-		if (!pages[1])
-			nr_pages = 1;
-		vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
-		BUG_ON(!vaddr);
-		local_irq_save(flags);
-		memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
-		local_irq_restore(flags);
-		vunmap(vaddr);
+	if (!core_kernel_text((unsigned long)addr)) {
+		pages[0] = vmalloc_to_page(addr);
+		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 	} else {
-		/*
-		 * modules are in vmalloc'ed memory, always writable.
-		 */
-		local_irq_save(flags);
-		memcpy(addr, opcode, len);
-		local_irq_restore(flags);
+		pages[0] = virt_to_page(addr);
+		pages[1] = virt_to_page(addr + PAGE_SIZE);
 	}
+	BUG_ON(!pages[0]);
+	if (!pages[1])
+		nr_pages = 1;
+	vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+	BUG_ON(!vaddr);
+	local_irq_save(flags);
+	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+	local_irq_restore(flags);
+	vunmap(vaddr);
 	sync_core();
 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
 	   that causes hangs on some VIA CPUs. */
+	for (i = 0; i < len; i++)
+		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
 	return addr;
 }
-- 
cgit v1.2.2


From 00c6b2d5d7b2414bd46c620d6a8c37fa7a716f29 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 25 Apr 2008 17:07:03 +0200
Subject: x86: harden kernel code patching

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/alternative.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 60299f61843f..65c7857a90dd 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -519,6 +519,7 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
 		pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
 	} else {
 		pages[0] = virt_to_page(addr);
+		WARN_ON(!PageReserved(pages[0]));
 		pages[1] = virt_to_page(addr + PAGE_SIZE);
 	}
 	BUG_ON(!pages[0]);
-- 
cgit v1.2.2


From 2cfed60cc24676d65e01278dbf10d0069de02592 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Fri, 25 Apr 2008 04:21:11 -0600
Subject: Update .gitignore files

Add some autogenerated files to various .gitignore files

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/boot/.gitignore                 | 5 +++--
 arch/x86/kernel/acpi/realmode/.gitignore | 3 +++
 2 files changed, 6 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kernel/acpi/realmode/.gitignore

(limited to 'arch/x86')

diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore
index b1bdc4c6f9f2..172cf8a98bdd 100644
--- a/arch/x86/boot/.gitignore
+++ b/arch/x86/boot/.gitignore
@@ -1,7 +1,8 @@
 bootsect
 bzImage
+cpustr.h
+mkcpustr
+offsets.h
 setup
 setup.bin
 setup.elf
-cpustr.h
-mkcpustr
diff --git a/arch/x86/kernel/acpi/realmode/.gitignore b/arch/x86/kernel/acpi/realmode/.gitignore
new file mode 100644
index 000000000000..58f1f48a58f8
--- /dev/null
+++ b/arch/x86/kernel/acpi/realmode/.gitignore
@@ -0,0 +1,3 @@
+wakeup.bin
+wakeup.elf
+wakeup.lds
-- 
cgit v1.2.2