From c41b93fb8551148a93d3bba870365e8489317f02 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Feb 2011 23:41:35 +0100
Subject: ACPI / PM: Drop acpi_restore_state_mem()

The function acpi_restore_state_mem() has never been and most likely
never will be used, so remove it.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/acpi/sleep.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 68d1537b8c81..c27a483094b1 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -110,14 +110,6 @@ int acpi_save_state_mem(void)
 	return 0;
 }
 
-/*
- * acpi_restore_state - undo effects of acpi_save_state_mem
- */
-void acpi_restore_state_mem(void)
-{
-}
-
-
 /**
  * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
  *
-- 
cgit v1.2.2


From f1a2003e22f6b50ea21f7f4b38b38c5ebc9c8017 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Tue, 8 Feb 2011 23:42:22 +0100
Subject: ACPI / PM: Merge do_suspend_lowlevel() into acpi_save_state_mem()

The function do_suspend_lowlevel() is specific to x86 and defined in
assembly code, so it should be called from the x86 low-level suspend
code rather than from acpi_suspend_enter().

Merge do_suspend_lowlevel() into the x86's acpi_save_state_mem() and
change the name of the latter to acpi_suspend_lowlevel(), so that the
function's purpose is better reflected by its name.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/kernel/acpi/sleep.c | 5 +++--
 arch/x86/kernel/acpi/sleep.h | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index c27a483094b1..5f1b747f6ef1 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -29,14 +29,14 @@ static char temp_stack[4096];
 #endif
 
 /**
- * acpi_save_state_mem - save kernel state
+ * acpi_suspend_lowlevel - save kernel state
  *
  * Create an identity mapped page table and copy the wakeup routine to
  * low memory.
  *
  * Note that this is too late to change acpi_wakeup_address.
  */
-int acpi_save_state_mem(void)
+int acpi_suspend_lowlevel(void)
 {
 	struct wakeup_header *header;
 
@@ -107,6 +107,7 @@ int acpi_save_state_mem(void)
        saved_magic = 0x123456789abcdef0L;
 #endif /* CONFIG_64BIT */
 
+	do_suspend_lowlevel();
 	return 0;
 }
 
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index adbcbaa6f1df..31ce13f20297 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -14,3 +14,5 @@ extern char swsusp_pg_dir[PAGE_SIZE];
 
 extern unsigned long acpi_copy_wakeup_routine(unsigned long);
 extern void wakeup_long64(void);
+
+extern void do_suspend_lowlevel(void);
-- 
cgit v1.2.2


From fc66c5210ec2539e800e87d7b3a985323c7be96e Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Sat, 19 Mar 2011 18:20:05 +0100
Subject: perf, x86: Fix Intel fixed counters base initialization

The following patch solves the problems introduced by Robert's
commit 41bf498 and reported by Arun Sharma. This commit gets rid
of the base + index notation for reading and writing PMU msrs.

The problem is that for fixed counters, the new calculation for
the base did not take into account the fixed counter indexes,
thus all fixed counters were read/written from fixed counter 0.
Although all fixed counters share the same config MSR, they each
have their own counter register.

Without:

 $ task -e unhalted_core_cycles -e instructions_retired -e baclears noploop 1 noploop for 1 seconds

  242202299 unhalted_core_cycles (0.00% scaling, ena=1000790892, run=1000790892)
 2389685946 instructions_retired (0.00% scaling, ena=1000790892, run=1000790892)
      49473 baclears             (0.00% scaling, ena=1000790892, run=1000790892)

With:

 $ task -e unhalted_core_cycles -e instructions_retired -e baclears noploop 1 noploop for 1 seconds

 2392703238 unhalted_core_cycles (0.00% scaling, ena=1000840809, run=1000840809)
 2389793744 instructions_retired (0.00% scaling, ena=1000840809, run=1000840809)
      47863 baclears             (0.00% scaling, ena=1000840809, run=1000840809)

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ming.m.lin@intel.com
Cc: robert.richter@amd.com
Cc: asharma@fb.com
Cc: perfmon2-devel@lists.sf.net
LKML-Reference: <20110319172005.GB4978@quad>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e8dbe179587f..ec46eea0c4ed 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -912,7 +912,7 @@ static inline void x86_assign_hw_event(struct perf_event *event,
 		hwc->event_base	= 0;
 	} else if (hwc->idx >= X86_PMC_IDX_FIXED) {
 		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
-		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0;
+		hwc->event_base = MSR_ARCH_PERFMON_FIXED_CTR0 + (hwc->idx - X86_PMC_IDX_FIXED);
 	} else {
 		hwc->config_base = x86_pmu_config_addr(hwc->idx);
 		hwc->event_base  = x86_pmu_event_addr(hwc->idx);
-- 
cgit v1.2.2


From e5f15b45ddf3afa2bbbb10c7ea34fb32b6de0a0e Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Fri, 18 Feb 2011 11:30:30 +0000
Subject: x86: Cleanup highmap after brk is concluded

Now cleanup_highmap actually is in two steps: one is early in head64.c
and only clears above _end; a second one is in init_memory_mapping() and
tries to clean from _brk_end to _end.
It should check if those boundaries are PMD_SIZE aligned but currently
does not.
Also init_memory_mapping() is called several times for numa or memory
hotplug, so we really should not handle initial kernel mappings there.

This patch moves cleanup_highmap() down after _brk_end is settled so
we can do everything in one step.
Also we honor max_pfn_mapped in the implementation of cleanup_highmap.

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
LKML-Reference: <alpine.DEB.2.00.1103171739050.3382@kaball-desktop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/head64.c |  3 ---
 arch/x86/kernel/setup.c  | 25 +++----------------------
 2 files changed, 3 insertions(+), 25 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 2d2673c28aff..5655c2272adb 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -77,9 +77,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
 	/* Make NULL pointers segfault */
 	zap_identity_mappings();
 
-	/* Cleanup the over mapped high alias */
-	cleanup_highmap();
-
 	max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
 
 	for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b176f2b1f45d..4a52a5f9afcb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -294,30 +294,11 @@ static void __init init_gbpages(void)
 	else
 		direct_gbpages = 0;
 }
-
-static void __init cleanup_highmap_brk_end(void)
-{
-	pud_t *pud;
-	pmd_t *pmd;
-
-	mmu_cr4_features = read_cr4();
-
-	/*
-	 * _brk_end cannot change anymore, but it and _end may be
-	 * located on different 2M pages. cleanup_highmap(), however,
-	 * can only consider _end when it runs, so destroy any
-	 * mappings beyond _brk_end here.
-	 */
-	pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
-	pmd = pmd_offset(pud, _brk_end - 1);
-	while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
-		pmd_clear(pmd);
-}
 #else
 static inline void init_gbpages(void)
 {
 }
-static inline void cleanup_highmap_brk_end(void)
+static void __init cleanup_highmap(void)
 {
 }
 #endif
@@ -330,8 +311,6 @@ static void __init reserve_brk(void)
 	/* Mark brk area as locked down and no longer taking any
 	   new allocations */
 	_brk_start = 0;
-
-	cleanup_highmap_brk_end();
 }
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -950,6 +929,8 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	reserve_brk();
 
+	cleanup_highmap();
+
 	memblock.current_limit = get_max_mapped();
 	memblock_x86_fill();
 
-- 
cgit v1.2.2


From b7ed78f56575074f29ec99d8984f347f6c99c914 Mon Sep 17 00:00:00 2001
From: Sage Weil <sage@newdream.net>
Date: Thu, 10 Mar 2011 11:31:30 -0800
Subject: introduce sys_syncfs to sync a single file system

It is frequently useful to sync a single file system, instead of all
mounted file systems via sync(2):

 - On machines with many mounts, it is not at all uncommon for some of
   them to hang (e.g. unresponsive NFS server).  sync(2) will get stuck on
   those and may never get to the one you do care about (e.g., /).
 - Some applications write lots of data to the file system and then
   want to make sure it is flushed to disk.  Calling fsync(2) on each
   file introduces unnecessary ordering constraints that result in a large
   amount of sub-optimal writeback/flush/commit behavior by the file
   system.

There are currently two ways (that I know of) to sync a single super_block:

 - BLKFLSBUF ioctl on the block device: That also invalidates the bdev
   mapping, which isn't usually desirable, and doesn't work for non-block
   file systems.
 - 'mount -o remount,rw' will call sync_filesystem as an artifact of the
   current implemention.  Relying on this little-known side effect for
   something like data safety sounds foolish.

Both of these approaches require root privileges, which some applications
do not have (nor should they need?) given that sync(2) is an unprivileged
operation.

This patch introduces a new system call syncfs(2) that takes an fd and
syncs only the file system it references.  Maybe someday we can

 $ sync /some/path

and not get

 sync: ignoring all arguments

The syscall is motivated by comments by Al and Christoph at the last LSF.
syncfs(2) seems like an appropriate name given statfs(2).

A similar ioctl was also proposed a while back, see
	http://marc.info/?l=linux-fsdevel&m=127970513829285&w=2

Signed-off-by: Sage Weil <sage@newdream.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/syscall_table_32.S | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 5f181742e8f9..abce34d5c79d 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -343,3 +343,4 @@ ENTRY(sys_call_table)
 	.long sys_name_to_handle_at
 	.long sys_open_by_handle_at
 	.long sys_clock_adjtime
+	.long sys_syncfs
-- 
cgit v1.2.2


From 885b976fada5bc6595a9fd3e67e3cb1a3d11f50b Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Mon, 21 Feb 2011 13:54:41 +0800
Subject: ACPI, APEI, Add ERST record ID cache

APEI ERST firmware interface and implementation has no multiple users
in mind.  For example, if there is four records in storage with ID: 1,
2, 3 and 4, if two ERST readers enumerate the records via
GET_NEXT_RECORD_ID as follow,

reader 1		reader 2
1
			2
3
			4
-1
			-1

where -1 signals there is no more record ID.

Reader 1 has no chance to check record 2 and 4, while reader 2 has no
chance to check record 1 and 3.  And any other GET_NEXT_RECORD_ID will
return -1, that is, other readers will has no chance to check any
record even they are not cleared by anyone.

This makes raw GET_NEXT_RECORD_ID not suitable for used by multiple
users.

To solve the issue, an in-memory ERST record ID cache is designed and
implemented.  When enumerating record ID, the ID returned by
GET_NEXT_RECORD_ID is added into cache in addition to be returned to
caller.  So other readers can check the cache to get all record ID
available.

Signed-off-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-apei.c | 42 ++++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 16 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 8209472b27a5..83930deec3c6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
 ssize_t apei_read_mce(struct mce *m, u64 *record_id)
 {
 	struct cper_mce_record rcd;
-	ssize_t len;
-
-	len = erst_read_next(&rcd.hdr, sizeof(rcd));
-	if (len <= 0)
-		return len;
-	/* Can not skip other records in storage via ERST unless clear them */
-	else if (len != sizeof(rcd) ||
-		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) {
-		if (printk_ratelimit())
-			pr_warning(
-			"MCE-APEI: Can not skip the unknown record in ERST");
-		return -EIO;
-	}
-
+	int rc, pos;
+
+	rc = erst_get_record_id_begin(&pos);
+	if (rc)
+		return rc;
+retry:
+	rc = erst_get_record_id_next(&pos, record_id);
+	if (rc)
+		goto out;
+	/* no more record */
+	if (*record_id == APEI_ERST_INVALID_RECORD_ID)
+		goto out;
+	rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
+	/* someone else has cleared the record, try next one */
+	if (rc == -ENOENT)
+		goto retry;
+	else if (rc < 0)
+		goto out;
+	/* try to skip other type records in storage */
+	else if (rc != sizeof(rcd) ||
+		 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+		goto retry;
 	memcpy(m, &rcd.mce, sizeof(*m));
-	*record_id = rcd.hdr.record_id;
+	rc = sizeof(*m);
+out:
+	erst_get_record_id_end();
 
-	return sizeof(*m);
+	return rc;
 }
 
 /* Check whether there is record in ERST */
-- 
cgit v1.2.2


From cbb84c4cc1ad0ab8faaffd899ccc9b14a88c91be Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Tue, 22 Mar 2011 15:24:54 +0600
Subject: x86, mpparse: Move check_slot into CONFIG_X86_IO_APIC context

When CONFIG_X86_MPPARSE=y and CONFIG_X86_IO_APIC=n, then we get
the following warning:

  arch/x86/kernel/mpparse.c:723: warning: 'check_slot' defined but not used

So, put check_slot into CONFIG_X86_IO_APIC context. Its only
called from CONFIG_X86_IO_APIC=y context.

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
LKML-Reference: <AANLkTinsUfGc=NG_GeH_B+zFVu+DXJzZbJKdQLscqfuH@mail.gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 6f789a887c06..5a532ce646bf 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -714,10 +714,6 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
 		*nr_m_spare += 1;
 	}
 }
-#else /* CONFIG_X86_IO_APIC */
-static
-inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
-#endif /* CONFIG_X86_IO_APIC */
 
 static int
 check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
@@ -731,6 +727,10 @@ check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
 
 	return ret;
 }
+#else /* CONFIG_X86_IO_APIC */
+static
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
+#endif /* CONFIG_X86_IO_APIC */
 
 static int  __init replace_intsrc_all(struct mpc_table *mpc,
 					unsigned long mpc_new_phys,
-- 
cgit v1.2.2


From d404ab0a1133e95557bb7deab2a49b348dfeba85 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Tue, 22 Mar 2011 16:34:04 -0700
Subject: move x86 specific oops=panic to generic code

The oops=panic cmdline option is not x86 specific, move it to generic code.
Update documentation.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/dumpstack.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 999e2793590b..81ac6c78c01c 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -322,16 +322,6 @@ void die(const char *str, struct pt_regs *regs, long err)
 	oops_end(flags, regs, sig);
 }
 
-static int __init oops_setup(char *s)
-{
-	if (!s)
-		return -EINVAL;
-	if (!strcmp(s, "panic"))
-		panic_on_oops = 1;
-	return 0;
-}
-early_param("oops", oops_setup);
-
 static int __init kstack_setup(char *s)
 {
 	if (!s)
-- 
cgit v1.2.2


From 4061d68e1a59ffacdf216a55f254cb01d4d1f840 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Tue, 22 Mar 2011 16:34:57 -0700
Subject: x86: only compile 8237A if CONFIG_ISA_DMA_API is enabled

8237A utilizes the interface provided by CONFIG_ISA_DMA_API, specifically
claim_dma_lock() and release_dma_lock().  Thus, there's a strict
dependency on the config option and the module should only be loaded if
the kernel supports ISA-style DMA.

Signed-off-by: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bjorn.helgaas@hp.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 743642f1a36c..7338ef2218bc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-$(CONFIG_X86_64)	+= syscall_64.o vsyscall_64.o
 obj-y			+= bootflag.o e820.o
-obj-y			+= pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
+obj-y			+= pci-dma.o quirks.o topology.o kdebugfs.o
 obj-y			+= alternative.o i8253.o pci-nommu.o hw_breakpoint.o
 obj-y			+= tsc.o io_delay.o rtc.o
 obj-y			+= pci-iommu_table.o
@@ -55,6 +55,7 @@ obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
 obj-$(CONFIG_INTEL_TXT)		+= tboot.o
+obj-$(CONFIG_ISA_DMA_API)	+= i8237.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-y				+= cpu/
 obj-y				+= acpi/
-- 
cgit v1.2.2


From 375906f8765e131a4a159b1ffebf78c15db7b3bf Mon Sep 17 00:00:00 2001
From: Stephen Wilson <wilsons@start.ca>
Date: Sun, 13 Mar 2011 15:49:14 -0400
Subject: x86: mark associated mm when running a task in 32 bit compatibility
 mode

This patch simply follows the same practice as for setting the TIF_IA32 flag.
In particular, an mm is marked as holding 32-bit tasks when a 32-bit binary is
exec'ed.  Both ELF and a.out formats are updated.

Signed-off-by: Stephen Wilson <wilsons@start.ca>
Reviewed-by: Michel Lespinasse <walken@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/process_64.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index bd387e8f73b4..6c9dd922ac0d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -501,6 +501,10 @@ void set_personality_64bit(void)
 	/* Make sure to be in 64bit mode */
 	clear_thread_flag(TIF_IA32);
 
+	/* Ensure the corresponding mm is not marked. */
+	if (current->mm)
+		current->mm->context.ia32_compat = 0;
+
 	/* TBD: overwrites user setup. Should have two bits.
 	   But 64bit processes have always behaved this way,
 	   so it's not too bad. The main problem is just that
@@ -516,6 +520,10 @@ void set_personality_ia32(void)
 	set_thread_flag(TIF_IA32);
 	current->personality |= force_personality32;
 
+	/* Mark the associated mm as containing 32-bit tasks. */
+	if (current->mm)
+		current->mm->context.ia32_compat = 1;
+
 	/* Prepare the first "return" to user space */
 	current_thread_info()->status |= TS_COMPAT;
 }
-- 
cgit v1.2.2


From f3c6ea1b06c71b43f751b36bd99345369fe911af Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 23 Mar 2011 22:15:54 +0100
Subject: x86: Use syscore_ops instead of sysdev classes and sysdevs

Some subsystems in the x86 tree need to carry out suspend/resume and
shutdown operations with one CPU on-line and interrupts disabled and
they define sysdev classes and sysdevs or sysdev drivers for this
purpose.  This leads to unnecessarily complicated code and excessive
memory usage, so switch them to using struct syscore_ops objects for
this purpose instead.

Generally, there are three categories of subsystems that use
sysdevs for implementing PM operations: (1) subsystems whose
suspend/resume callbacks ignore their arguments entirely (the
majority), (2) subsystems whose suspend/resume callbacks use their
struct sys_device argument, but don't really need to do that,
because they can be implemented differently in an arguably simpler
way (io_apic.c), and (3) subsystems whose suspend/resume callbacks
use their struct sys_device argument, but the value of that argument
is always the same and could be ignored (microcode_core.c).  In all
of these cases the subsystems in question may be readily converted to
using struct syscore_ops objects for power management and shutdown.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu_init.c | 26 +++--------
 arch/x86/kernel/apic/apic.c      | 33 ++++----------
 arch/x86/kernel/apic/io_apic.c   | 97 +++++++++++++++++++---------------------
 arch/x86/kernel/cpu/mcheck/mce.c | 21 +++++----
 arch/x86/kernel/cpu/mtrr/main.c  | 10 ++---
 arch/x86/kernel/i8237.c          | 30 +++----------
 arch/x86/kernel/i8259.c          | 33 +++++---------
 arch/x86/kernel/microcode_core.c | 34 ++++++--------
 arch/x86/kernel/pci-gart_64.c    | 32 +++----------
 9 files changed, 116 insertions(+), 200 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 6e11c8134158..246d727b65b7 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -21,7 +21,7 @@
 #include <linux/acpi.h>
 #include <linux/list.h>
 #include <linux/slab.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/interrupt.h>
 #include <linux/msi.h>
 #include <asm/pci-direct.h>
@@ -1260,7 +1260,7 @@ static void disable_iommus(void)
  * disable suspend until real resume implemented
  */
 
-static int amd_iommu_resume(struct sys_device *dev)
+static void amd_iommu_resume(void)
 {
 	struct amd_iommu *iommu;
 
@@ -1276,11 +1276,9 @@ static int amd_iommu_resume(struct sys_device *dev)
 	 */
 	amd_iommu_flush_all_devices();
 	amd_iommu_flush_all_domains();
-
-	return 0;
 }
 
-static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
+static int amd_iommu_suspend(void)
 {
 	/* disable IOMMUs to go out of the way for BIOS */
 	disable_iommus();
@@ -1288,17 +1286,11 @@ static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static struct sysdev_class amd_iommu_sysdev_class = {
-	.name = "amd_iommu",
+static struct syscore_ops amd_iommu_syscore_ops = {
 	.suspend = amd_iommu_suspend,
 	.resume = amd_iommu_resume,
 };
 
-static struct sys_device device_amd_iommu = {
-	.id = 0,
-	.cls = &amd_iommu_sysdev_class,
-};
-
 /*
  * This is the core init function for AMD IOMMU hardware in the system.
  * This function is called from the generic x86 DMA layer initialization
@@ -1415,14 +1407,6 @@ static int __init amd_iommu_init(void)
 		goto free;
 	}
 
-	ret = sysdev_class_register(&amd_iommu_sysdev_class);
-	if (ret)
-		goto free;
-
-	ret = sysdev_register(&device_amd_iommu);
-	if (ret)
-		goto free;
-
 	ret = amd_iommu_init_devices();
 	if (ret)
 		goto free;
@@ -1441,6 +1425,8 @@ static int __init amd_iommu_init(void)
 
 	amd_iommu_init_notifier();
 
+	register_syscore_ops(&amd_iommu_syscore_ops);
+
 	if (iommu_pass_through)
 		goto out;
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 966673f44141..fabf01eff771 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -24,7 +24,7 @@
 #include <linux/ftrace.h>
 #include <linux/ioport.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/timex.h>
 #include <linux/dmar.h>
@@ -2046,7 +2046,7 @@ static struct {
 	unsigned int apic_thmr;
 } apic_pm_state;
 
-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
+static int lapic_suspend(void)
 {
 	unsigned long flags;
 	int maxlvt;
@@ -2084,23 +2084,21 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
 	return 0;
 }
 
-static int lapic_resume(struct sys_device *dev)
+static void lapic_resume(void)
 {
 	unsigned int l, h;
 	unsigned long flags;
-	int maxlvt;
-	int ret = 0;
+	int maxlvt, ret;
 	struct IO_APIC_route_entry **ioapic_entries = NULL;
 
 	if (!apic_pm_state.active)
-		return 0;
+		return;
 
 	local_irq_save(flags);
 	if (intr_remapping_enabled) {
 		ioapic_entries = alloc_ioapic_entries();
 		if (!ioapic_entries) {
 			WARN(1, "Alloc ioapic_entries in lapic resume failed.");
-			ret = -ENOMEM;
 			goto restore;
 		}
 
@@ -2162,8 +2160,6 @@ static int lapic_resume(struct sys_device *dev)
 	}
 restore:
 	local_irq_restore(flags);
-
-	return ret;
 }
 
 /*
@@ -2171,17 +2167,11 @@ restore:
  * are needed on every CPU up until machine_halt/restart/poweroff.
  */
 
-static struct sysdev_class lapic_sysclass = {
-	.name		= "lapic",
+static struct syscore_ops lapic_syscore_ops = {
 	.resume		= lapic_resume,
 	.suspend	= lapic_suspend,
 };
 
-static struct sys_device device_lapic = {
-	.id	= 0,
-	.cls	= &lapic_sysclass,
-};
-
 static void __cpuinit apic_pm_activate(void)
 {
 	apic_pm_state.active = 1;
@@ -2189,16 +2179,11 @@ static void __cpuinit apic_pm_activate(void)
 
 static int __init init_lapic_sysfs(void)
 {
-	int error;
-
-	if (!cpu_has_apic)
-		return 0;
 	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+	if (cpu_has_apic)
+		register_syscore_ops(&lapic_syscore_ops);
 
-	error = sysdev_class_register(&lapic_sysclass);
-	if (!error)
-		error = sysdev_register(&device_lapic);
-	return error;
+	return 0;
 }
 
 /* local apic needs to resume before other devices access its registers. */
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 180ca240e03c..68df09bba92e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -30,7 +30,7 @@
 #include <linux/compiler.h>
 #include <linux/acpi.h>
 #include <linux/module.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/msi.h>
 #include <linux/htirq.h>
 #include <linux/freezer.h>
@@ -2918,89 +2918,84 @@ static int __init io_apic_bug_finalize(void)
 
 late_initcall(io_apic_bug_finalize);
 
-struct sysfs_ioapic_data {
-	struct sys_device dev;
-	struct IO_APIC_route_entry entry[0];
-};
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+static struct IO_APIC_route_entry *ioapic_saved_data[MAX_IO_APICS];
 
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+static void suspend_ioapic(int ioapic_id)
 {
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
+	struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
 	int i;
 
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
-		*entry = ioapic_read_entry(dev->id, i);
+	if (!saved_data)
+		return;
+
+	for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+		saved_data[i] = ioapic_read_entry(ioapic_id, i);
+}
+
+static int ioapic_suspend(void)
+{
+	int ioapic_id;
+
+	for (ioapic_id = 0; ioapic_id < nr_ioapics; ioapic_id++)
+		suspend_ioapic(ioapic_id);
 
 	return 0;
 }
 
-static int ioapic_resume(struct sys_device *dev)
+static void resume_ioapic(int ioapic_id)
 {
-	struct IO_APIC_route_entry *entry;
-	struct sysfs_ioapic_data *data;
+	struct IO_APIC_route_entry *saved_data = ioapic_saved_data[ioapic_id];
 	unsigned long flags;
 	union IO_APIC_reg_00 reg_00;
 	int i;
 
-	data = container_of(dev, struct sysfs_ioapic_data, dev);
-	entry = data->entry;
+	if (!saved_data)
+		return;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	reg_00.raw = io_apic_read(dev->id, 0);
-	if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
-		reg_00.bits.ID = mp_ioapics[dev->id].apicid;
-		io_apic_write(dev->id, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(ioapic_id, 0);
+	if (reg_00.bits.ID != mp_ioapics[ioapic_id].apicid) {
+		reg_00.bits.ID = mp_ioapics[ioapic_id].apicid;
+		io_apic_write(ioapic_id, 0, reg_00.raw);
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
-		ioapic_write_entry(dev->id, i, entry[i]);
+	for (i = 0; i < nr_ioapic_registers[ioapic_id]; i++)
+		ioapic_write_entry(ioapic_id, i, saved_data[i]);
+}
 
-	return 0;
+static void ioapic_resume(void)
+{
+	int ioapic_id;
+
+	for (ioapic_id = nr_ioapics - 1; ioapic_id >= 0; ioapic_id--)
+		resume_ioapic(ioapic_id);
 }
 
-static struct sysdev_class ioapic_sysdev_class = {
-	.name = "ioapic",
+static struct syscore_ops ioapic_syscore_ops = {
 	.suspend = ioapic_suspend,
 	.resume = ioapic_resume,
 };
 
-static int __init ioapic_init_sysfs(void)
+static int __init ioapic_init_ops(void)
 {
-	struct sys_device * dev;
-	int i, size, error;
+	int i;
 
-	error = sysdev_class_register(&ioapic_sysdev_class);
-	if (error)
-		return error;
+	for (i = 0; i < nr_ioapics; i++) {
+		unsigned int size;
 
-	for (i = 0; i < nr_ioapics; i++ ) {
-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+		size = nr_ioapic_registers[i]
 			* sizeof(struct IO_APIC_route_entry);
-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
-		if (!mp_ioapic_data[i]) {
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
-		dev = &mp_ioapic_data[i]->dev;
-		dev->id = i;
-		dev->cls = &ioapic_sysdev_class;
-		error = sysdev_register(dev);
-		if (error) {
-			kfree(mp_ioapic_data[i]);
-			mp_ioapic_data[i] = NULL;
-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
-			continue;
-		}
+		ioapic_saved_data[i] = kzalloc(size, GFP_KERNEL);
+		if (!ioapic_saved_data[i])
+			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
 	}
 
+	register_syscore_ops(&ioapic_syscore_ops);
+
 	return 0;
 }
 
-device_initcall(ioapic_init_sysfs);
+device_initcall(ioapic_init_ops);
 
 /*
  * Dynamic irq allocate and deallocation
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ab1122998dba..5a05ef63eb4a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -21,6 +21,7 @@
 #include <linux/percpu.h>
 #include <linux/string.h>
 #include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/delay.h>
 #include <linux/ctype.h>
 #include <linux/sched.h>
@@ -1749,14 +1750,14 @@ static int mce_disable_error_reporting(void)
 	return 0;
 }
 
-static int mce_suspend(struct sys_device *dev, pm_message_t state)
+static int mce_suspend(void)
 {
 	return mce_disable_error_reporting();
 }
 
-static int mce_shutdown(struct sys_device *dev)
+static void mce_shutdown(void)
 {
-	return mce_disable_error_reporting();
+	mce_disable_error_reporting();
 }
 
 /*
@@ -1764,14 +1765,18 @@ static int mce_shutdown(struct sys_device *dev)
  * Only one CPU is active at this time, the others get re-added later using
  * CPU hotplug:
  */
-static int mce_resume(struct sys_device *dev)
+static void mce_resume(void)
 {
 	__mcheck_cpu_init_generic();
 	__mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
-
-	return 0;
 }
 
+static struct syscore_ops mce_syscore_ops = {
+	.suspend	= mce_suspend,
+	.shutdown	= mce_shutdown,
+	.resume		= mce_resume,
+};
+
 static void mce_cpu_restart(void *data)
 {
 	del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1808,9 +1813,6 @@ static void mce_enable_ce(void *all)
 }
 
 static struct sysdev_class mce_sysclass = {
-	.suspend	= mce_suspend,
-	.shutdown	= mce_shutdown,
-	.resume		= mce_resume,
 	.name		= "machinecheck",
 };
 
@@ -2139,6 +2141,7 @@ static __init int mcheck_init_device(void)
 			return err;
 	}
 
+	register_syscore_ops(&mce_syscore_ops);
 	register_hotcpu_notifier(&mce_cpu_notifier);
 	misc_register(&mce_log_device);
 
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index bebabec5b448..307dfbbf4a8e 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -45,6 +45,7 @@
 #include <linux/cpu.h>
 #include <linux/pci.h>
 #include <linux/smp.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/processor.h>
 #include <asm/e820.h>
@@ -630,7 +631,7 @@ struct mtrr_value {
 
 static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
 
-static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
+static int mtrr_save(void)
 {
 	int i;
 
@@ -642,7 +643,7 @@ static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
 	return 0;
 }
 
-static int mtrr_restore(struct sys_device *sysdev)
+static void mtrr_restore(void)
 {
 	int i;
 
@@ -653,12 +654,11 @@ static int mtrr_restore(struct sys_device *sysdev)
 				    mtrr_value[i].ltype);
 		}
 	}
-	return 0;
 }
 
 
-static struct sysdev_driver mtrr_sysdev_driver = {
+static struct syscore_ops mtrr_syscore_ops = {
 	.suspend	= mtrr_save,
 	.resume		= mtrr_restore,
 };
@@ -839,7 +839,7 @@ static int __init mtrr_init_finialize(void)
 	 * TBD: is there any system with such CPU which supports
 	 * suspend/resume? If no, we should remove the code.
 	 */
-	sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
+	register_syscore_ops(&mtrr_syscore_ops);
 
 	return 0;
 }
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index b42ca694dc68..8eeaa81de066 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -10,7 +10,7 @@
  */
 
 #include <linux/init.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/dma.h>
 
@@ -21,7 +21,7 @@
  * in asm/dma.h.
  */
 
-static int i8237A_resume(struct sys_device *dev)
+static void i8237A_resume(void)
 {
 	unsigned long flags;
 	int i;
@@ -41,31 +41,15 @@ static int i8237A_resume(struct sys_device *dev)
 	enable_dma(4);
 
 	release_dma_lock(flags);
-
-	return 0;
 }
 
-static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
-{
-	return 0;
-}
-
-static struct sysdev_class i8237_sysdev_class = {
-	.name		= "i8237",
-	.suspend	= i8237A_suspend,
+static struct syscore_ops i8237_syscore_ops = {
 	.resume		= i8237A_resume,
 };
 
-static struct sys_device device_i8237A = {
-	.id		= 0,
-	.cls		= &i8237_sysdev_class,
-};
-
-static int __init i8237A_init_sysfs(void)
+static int __init i8237A_init_ops(void)
 {
-	int error = sysdev_class_register(&i8237_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8237A);
-	return error;
+	register_syscore_ops(&i8237_syscore_ops);
+	return 0;
 }
-device_initcall(i8237A_init_sysfs);
+device_initcall(i8237A_init_ops);
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index d9ca749c123b..65b8f5c2eebf 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -8,7 +8,7 @@
 #include <linux/random.h>
 #include <linux/init.h>
 #include <linux/kernel_stat.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/bitops.h>
 #include <linux/acpi.h>
 #include <linux/io.h>
@@ -245,20 +245,19 @@ static void save_ELCR(char *trigger)
 	trigger[1] = inb(0x4d1) & 0xDE;
 }
 
-static int i8259A_resume(struct sys_device *dev)
+static void i8259A_resume(void)
 {
 	init_8259A(i8259A_auto_eoi);
 	restore_ELCR(irq_trigger);
-	return 0;
 }
 
-static int i8259A_suspend(struct sys_device *dev, pm_message_t state)
+static int i8259A_suspend(void)
 {
 	save_ELCR(irq_trigger);
 	return 0;
 }
 
-static int i8259A_shutdown(struct sys_device *dev)
+static void i8259A_shutdown(void)
 {
 	/* Put the i8259A into a quiescent state that
 	 * the kernel initialization code can get it
@@ -266,21 +265,14 @@ static int i8259A_shutdown(struct sys_device *dev)
 	 */
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
-	return 0;
 }
 
-static struct sysdev_class i8259_sysdev_class = {
-	.name = "i8259",
+static struct syscore_ops i8259_syscore_ops = {
 	.suspend = i8259A_suspend,
 	.resume = i8259A_resume,
 	.shutdown = i8259A_shutdown,
 };
 
-static struct sys_device device_i8259A = {
-	.id	= 0,
-	.cls	= &i8259_sysdev_class,
-};
-
 static void mask_8259A(void)
 {
 	unsigned long flags;
@@ -399,17 +391,12 @@ struct legacy_pic default_legacy_pic = {
 
 struct legacy_pic *legacy_pic = &default_legacy_pic;
 
-static int __init i8259A_init_sysfs(void)
+static int __init i8259A_init_ops(void)
 {
-	int error;
-
-	if (legacy_pic != &default_legacy_pic)
-		return 0;
+	if (legacy_pic == &default_legacy_pic)
+		register_syscore_ops(&i8259_syscore_ops);
 
-	error = sysdev_class_register(&i8259_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_i8259A);
-	return error;
+	return 0;
 }
 
-device_initcall(i8259A_init_sysfs);
+device_initcall(i8259A_init_ops);
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 87af68e0e1e1..5ed0ab549eb8 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -82,6 +82,7 @@
 #include <linux/cpu.h>
 #include <linux/fs.h>
 #include <linux/mm.h>
+#include <linux/syscore_ops.h>
 
 #include <asm/microcode.h>
 #include <asm/processor.h>
@@ -438,33 +439,25 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 	return 0;
 }
 
-static int mc_sysdev_resume(struct sys_device *dev)
+static struct sysdev_driver mc_sysdev_driver = {
+	.add			= mc_sysdev_add,
+	.remove			= mc_sysdev_remove,
+};
+
+/**
+ * mc_bp_resume - Update boot CPU microcode during resume.
+ */
+static void mc_bp_resume(void)
 {
-	int cpu = dev->id;
+	int cpu = smp_processor_id();
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 
-	if (!cpu_online(cpu))
-		return 0;
-
-	/*
-	 * All non-bootup cpus are still disabled,
-	 * so only CPU 0 will apply ucode here.
-	 *
-	 * Moreover, there can be no concurrent
-	 * updates from any other places at this point.
-	 */
-	WARN_ON(cpu != 0);
-
 	if (uci->valid && uci->mc)
 		microcode_ops->apply_microcode(cpu);
-
-	return 0;
 }
 
-static struct sysdev_driver mc_sysdev_driver = {
-	.add			= mc_sysdev_add,
-	.remove			= mc_sysdev_remove,
-	.resume			= mc_sysdev_resume,
+static struct syscore_ops mc_syscore_ops = {
+	.resume			= mc_bp_resume,
 };
 
 static __cpuinit int
@@ -542,6 +535,7 @@ static int __init microcode_init(void)
 	if (error)
 		return error;
 
+	register_syscore_ops(&mc_syscore_ops);
 	register_hotcpu_notifier(&mc_cpu_notifier);
 
 	pr_info("Microcode Update Driver: v" MICROCODE_VERSION
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index c01ffa5b9b87..82ada01625b9 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -27,7 +27,7 @@
 #include <linux/kdebug.h>
 #include <linux/scatterlist.h>
 #include <linux/iommu-helper.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
 #include <linux/io.h>
 #include <linux/gfp.h>
 #include <asm/atomic.h>
@@ -589,7 +589,7 @@ void set_up_gart_resume(u32 aper_order, u32 aper_alloc)
 	aperture_alloc = aper_alloc;
 }
 
-static void gart_fixup_northbridges(struct sys_device *dev)
+static void gart_fixup_northbridges(void)
 {
 	int i;
 
@@ -613,33 +613,20 @@ static void gart_fixup_northbridges(struct sys_device *dev)
 	}
 }
 
-static int gart_resume(struct sys_device *dev)
+static void gart_resume(void)
 {
 	pr_info("PCI-DMA: Resuming GART IOMMU\n");
 
-	gart_fixup_northbridges(dev);
+	gart_fixup_northbridges();
 
 	enable_gart_translations();
-
-	return 0;
 }
 
-static int gart_suspend(struct sys_device *dev, pm_message_t state)
-{
-	return 0;
-}
-
-static struct sysdev_class gart_sysdev_class = {
-	.name		= "gart",
-	.suspend	= gart_suspend,
+static struct syscore_ops gart_syscore_ops = {
 	.resume		= gart_resume,
 
 };
 
-static struct sys_device device_gart = {
-	.cls		= &gart_sysdev_class,
-};
-
 /*
  * Private Northbridge GATT initialization in case we cannot use the
  * AGP driver for some reason.
@@ -650,7 +637,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
 	unsigned aper_base, new_aper_base;
 	struct pci_dev *dev;
 	void *gatt;
-	int i, error;
+	int i;
 
 	pr_info("PCI-DMA: Disabling AGP.\n");
 
@@ -685,12 +672,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
 
 	agp_gatt_table = gatt;
 
-	error = sysdev_class_register(&gart_sysdev_class);
-	if (!error)
-		error = sysdev_register(&device_gart);
-	if (error)
-		panic("Could not register gart_sysdev -- "
-		      "would corrupt data on next suspend");
+	register_syscore_ops(&gart_syscore_ops);
 
 	flush_gart();
 
-- 
cgit v1.2.2


From 93a72052be81823fa1584b9be037d51924f9efa4 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Wed, 23 Mar 2011 16:43:29 -0700
Subject: crash_dump: export is_kdump_kernel to modules, consolidate
 elfcorehdr_addr, setup_elfcorehdr and saved_max_pfn

The Xen PV drivers in a crashed HVM guest can not connect to the dom0
backend drivers because both frontend and backend drivers are still in
connected state.  To run the connection reset function only in case of a
crashdump, the is_kdump_kernel() function needs to be available for the PV
driver modules.

Consolidate elfcorehdr_addr, setup_elfcorehdr and saved_max_pfn into
kernel/crash_dump.c Also export elfcorehdr_addr to make is_kdump_kernel()
usable for modules.

Leave 'elfcorehdr' as early_param().  This changes powerpc from __setup()
to early_param().  It adds an address range check from x86 also on ia64
and powerpc.

[akpm@linux-foundation.org: additional #includes]
[akpm@linux-foundation.org: remove elfcorehdr_addr export]
[akpm@linux-foundation.org: fix for Tejun's mm/nobootmem.c changes]
Signed-off-by: Olaf Hering <olaf@aepfle.de>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/crash_dump_32.c |  3 ---
 arch/x86/kernel/crash_dump_64.c |  3 ---
 arch/x86/kernel/e820.c          |  1 +
 arch/x86/kernel/setup.c         | 22 ----------------------
 4 files changed, 1 insertion(+), 28 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/crash_dump_32.c b/arch/x86/kernel/crash_dump_32.c
index d5cd13945d5a..642f75a68cd5 100644
--- a/arch/x86/kernel/crash_dump_32.c
+++ b/arch/x86/kernel/crash_dump_32.c
@@ -14,9 +14,6 @@
 
 static void *kdump_buf_page;
 
-/* Stores the physical address of elf header of crash image. */
-unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-
 static inline bool is_crashed_pfn_valid(unsigned long pfn)
 {
 #ifndef CONFIG_X86_PAE
diff --git a/arch/x86/kernel/crash_dump_64.c b/arch/x86/kernel/crash_dump_64.c
index 994828899e09..afa64adb75ee 100644
--- a/arch/x86/kernel/crash_dump_64.c
+++ b/arch/x86/kernel/crash_dump_64.c
@@ -10,9 +10,6 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 
-/* Stores the physical address of elf header of crash image. */
-unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
-
 /**
  * copy_oldmem_page - copy one page from "oldmem"
  * @pfn: page frame number to be copied
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index cdf5bfd9d4d5..3e2ef8425316 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/crash_dump.h>
 #include <linux/bootmem.h>
 #include <linux/pfn.h>
 #include <linux/suspend.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 32bd87cbf982..5a0484a95ad6 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -619,28 +619,6 @@ void __init reserve_standard_io_resources(void)
 
 }
 
-/*
- * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
- * is_kdump_kernel() to determine if we are booting after a panic. Hence
- * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
- */
-
-#ifdef CONFIG_CRASH_DUMP
-/* elfcorehdr= specifies the location of elf core header
- * stored by the crashed kernel. This option will be passed
- * by kexec loader to the capture kernel.
- */
-static int __init setup_elfcorehdr(char *arg)
-{
-	char *end;
-	if (!arg)
-		return -EINVAL;
-	elfcorehdr_addr = memparse(arg, &end);
-	return end > arg ? 0 : -EINVAL;
-}
-early_param("elfcorehdr", setup_elfcorehdr);
-#endif
-
 static __init void reserve_ibft_region(void)
 {
 	unsigned long addr, size = 0;
-- 
cgit v1.2.2


From 71f9e59800e5ad4e6b683348424c9fe54306cd43 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@gmail.com>
Date: Thu, 24 Mar 2011 11:42:30 +0900
Subject: x86, dumpstack: Use %pB format specifier for stack trace

Improve noreturn function entries in call traces:

Before:

 Call Trace:
  [<ffffffff812a8502>] panic+0x8c/0x18d
  [<ffffffffa000012a>] deep01+0x0/0x38 [test_panic]  <--- bad
  [<ffffffff81104666>] proc_file_write+0x73/0x8d
  [<ffffffff811000b3>] proc_reg_write+0x8d/0xac
  [<ffffffff810c7d32>] vfs_write+0xa1/0xc5
  [<ffffffff810c7e0f>] sys_write+0x45/0x6c
  [<ffffffff8f02943b>] system_call_fastpath+0x16/0x1b

After:

 Call Trace:
  [<ffffffff812bce69>] panic+0x8c/0x18d
  [<ffffffffa000012a>] panic_write+0x20/0x20 [test_panic] <--- good
  [<ffffffff81115fab>] proc_file_write+0x73/0x8d
  [<ffffffff81111a5f>] proc_reg_write+0x8d/0xac
  [<ffffffff810d90ee>] vfs_write+0xa1/0xc5
  [<ffffffff810d91cb>] sys_write+0x45/0x6c
  [<ffffffff812c07fb>] system_call_fastpath+0x16/0x1b

Signed-off-by: Namhyung Kim <namhyung@gmail.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <1300934550-21394-2-git-send-email-namhyung@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/dumpstack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 999e2793590b..24d0479025f9 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -27,7 +27,7 @@ static int die_counter;
 
 void printk_address(unsigned long address, int reliable)
 {
-	printk(" [<%p>] %s%pS\n", (void *) address,
+	printk(" [<%p>] %s%pB\n", (void *) address,
 			reliable ? "" : "? ", (void *) address);
 }
 
-- 
cgit v1.2.2


From 242214f9c1eeaae40eca11e3b4d37bfce960a7cd Mon Sep 17 00:00:00 2001
From: Don Zickus <dzickus@redhat.com>
Date: Thu, 24 Mar 2011 23:36:25 +0300
Subject: perf, x86: P4 PMU - Read proper MSR register to catch unflagged
 overflows

The read of a proper MSR register was missed and instead of
counter the configration register was tested (it has
ARCH_P4_UNFLAGGED_BIT always cleared) leading to unknown NMI
hitting the system. As result the user may obtain "Dazed and
confused, but trying to continue" message. Fix it by reading a
proper MSR register.

When an NMI happens on a P4, the perf nmi handler checks the
configuration register to see if the overflow bit is set or not
before taking appropriate action.  Unfortunately, various P4
machines had a broken overflow bit, so a backup mechanism was
implemented.  This mechanism checked to see if the counter
rolled over or not.

A previous commit that implemented this backup mechanism was
broken. Instead of reading the counter register, it used the
configuration register to determine if the counter rolled over
or not. Reading that bit would give incorrect results.

This would lead to 'Dazed and confused' messages for the end
user when using the perf tool (or if the nmi watchdog is
running).

The fix is to read the counter register before determining if
the counter rolled over or not.

Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Lin Ming <ming.m.lin@intel.com>
LKML-Reference: <4D8BAB49.3080701@openvz.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_p4.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 3769ac822f96..d3d7b59841e5 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -777,6 +777,7 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
 	 * the counter has reached zero value and continued counting before
 	 * real NMI signal was received:
 	 */
+	rdmsrl(hwc->event_base, v);
 	if (!(v & ARCH_P4_UNFLAGGED_BIT))
 		return 1;
 
-- 
cgit v1.2.2


From 00a30b254b88d2d4f5af00835a9b7f70326def9b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 24 Mar 2011 22:53:10 +0100
Subject: x86: DT: Fix return condition in irq_create_of_mapping()

The xlate() function returns 0 or a negative error code. Returning the
error code blindly will be seen as an huge irq number by the calling
function because irq_create_of_mapping() returns an unsigned value.

Return 0 (NO_IRQ) as required.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/x86/kernel/devicetree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 7a8cebc9ff29..9c91badb6ca9 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -65,7 +65,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
 		return 0;
 	ret = ih->xlate(ih, intspec, intsize, &virq, &type);
 	if (ret)
-		return ret;
+		return 0;
 	if (type == IRQ_TYPE_NONE)
 		return virq;
 	/* set the mask if it is different from current */
-- 
cgit v1.2.2


From 07611dbda5ccbd9a628f29686d62bafdd007db7b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 24 Mar 2011 21:41:57 +0100
Subject: x86: DT: Cleanup namespace and call irq_set_irq_type() unconditional

That call escaped the name space cleanup. Fix it up.

We really want to call there. The chip might have changed since the
irq was setup initially. So let the core code and the chip decide what
to do. The status is just an unreliable snapshot.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
 arch/x86/kernel/devicetree.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 9c91badb6ca9..706a9fb46a58 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -68,9 +68,7 @@ unsigned int irq_create_of_mapping(struct device_node *controller,
 		return 0;
 	if (type == IRQ_TYPE_NONE)
 		return virq;
-	/* set the mask if it is different from current */
-	if (type == (irq_to_desc(virq)->status & IRQF_TRIGGER_MASK))
-		set_irq_type(virq, type);
+	irq_set_irq_type(virq, type);
 	return virq;
 }
 EXPORT_SYMBOL_GPL(irq_create_of_mapping);
-- 
cgit v1.2.2


From 45daae575e08bbf7405c5a3633e956fa364d1b4f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 25 Mar 2011 10:24:23 +0100
Subject: perf, x86: Complain louder about BIOSen corrupting CPU/PMU state and
 continue

Eric Dumazet reported that hardware PMU events do not work on his
system, due to the BIOS corrupting PMU state:

    Performance Events: PEBS fmt0+, Core2 events, Broken BIOS detected, using software events only.
    [Firmware Bug]: the BIOS has corrupted hw-PMU resources (MSR 186 is 43003c)

Linus suggested that we continue in the face of such BIOS-induced CPU
state corruption:

   http://lkml.org/lkml/2011/3/24/608

Such BIOSes will have to be fixed - Linux developers rely on a working and
fully capable PMU and the BIOS interfering with the CPU's PMU state is simply
not acceptable.

So this patch changes perf to continue when it detects such BIOS
interaction, some hardware events may be unreliable due to the BIOS
writing and re-writing them - there's not much the kernel can do
about that but to detect the corruption and report it.

Reported-and-tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ec46eea0c4ed..eb00677ee2ae 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -500,12 +500,17 @@ static bool check_hw_exists(void)
 	return true;
 
 bios_fail:
-	printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+	/*
+	 * We still allow the PMU driver to operate:
+	 */
+	printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n");
 	printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
-	return false;
+
+	return true;
 
 msr_fail:
 	printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+
 	return false;
 }
 
-- 
cgit v1.2.2


From 21431c2900a0b669080b5bfaae2a7d9d9c026e9b Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 15 Mar 2010 07:28:00 -0500
Subject: kgdb,x86_64: fix compile warning found with sparse

Fix sparse warning:

arch/x86/kernel/kgdb.c:123:9: warning: switch with no cases

Reported-by: Namhyung Kim <namhyung@gmail.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86/kernel')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index a4130005028a..3c2fb0f25abd 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -121,8 +121,8 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 		memcpy(mem, (void *)regs + dbg_reg_def[regno].offset,
 		       dbg_reg_def[regno].size);
 
-	switch (regno) {
 #ifdef CONFIG_X86_32
+	switch (regno) {
 	case GDB_SS:
 		if (!user_mode_vm(regs))
 			*(unsigned long *)mem = __KERNEL_DS;
@@ -135,8 +135,8 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
 	case GDB_FS:
 		*(unsigned long *)mem = 0xFFFF;
 		break;
-#endif
 	}
+#endif
 	return dbg_reg_def[regno].name;
 }
 
-- 
cgit v1.2.2