aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-04 04:59:36 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-04 04:59:36 -0500
commit4010b0192ddf6ec7ec1b9feb9b0953692aeb7329 (patch)
tree188a36186f6ce580b479a9f90404fa7bfd8b22d7 /arch
parent79ff56ebd3edfb16f8badc558cb439b203a3298f (diff)
parent7d3b56ba37a95f1f370f50258ed3954c304c524b (diff)
Merge branch 'linus' into core/urgent
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/topology.h17
-rw-r--r--arch/alpha/kernel/Makefile2
-rw-r--r--arch/alpha/kernel/binfmt_loader.c51
-rw-r--r--arch/alpha/kernel/irq.c3
-rw-r--r--arch/alpha/kernel/setup.c5
-rw-r--r--arch/avr32/include/asm/bitops.h5
-rw-r--r--arch/blackfin/include/asm/bitops.h1
-rw-r--r--arch/cris/include/asm/bitops.h1
-rw-r--r--arch/h8300/include/asm/bitops.h1
-rw-r--r--arch/ia64/Kconfig3
-rw-r--r--arch/ia64/include/asm/irq.h2
-rw-r--r--arch/ia64/include/asm/kvm_host.h2
-rw-r--r--arch/ia64/include/asm/topology.h9
-rw-r--r--arch/ia64/kernel/acpi.c3
-rw-r--r--arch/ia64/kernel/iosapic.c23
-rw-r--r--arch/ia64/kernel/irq.c4
-rw-r--r--arch/ia64/kernel/time.c18
-rw-r--r--arch/ia64/kvm/Makefile4
-rw-r--r--arch/ia64/kvm/kvm-ia64.c3
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c27
-rw-r--r--arch/m32r/kernel/smpboot.c2
-rw-r--r--arch/m68k/Kconfig1
-rw-r--r--arch/m68knommu/include/asm/bitops.h1
-rw-r--r--arch/mips/include/asm/mach-ip27/topology.h4
-rw-r--r--arch/parisc/include/asm/smp.h2
-rw-r--r--arch/powerpc/include/asm/topology.h12
-rw-r--r--arch/powerpc/kernel/process.c1
-rw-r--r--arch/powerpc/kernel/time.c18
-rw-r--r--arch/powerpc/platforms/cell/spu_priv1_mmio.c6
-rw-r--r--arch/powerpc/platforms/cell/spufs/sched.c4
-rw-r--r--arch/s390/include/asm/cpu.h7
-rw-r--r--arch/s390/include/asm/cputime.h42
-rw-r--r--arch/s390/include/asm/lowcore.h49
-rw-r--r--arch/s390/include/asm/system.h4
-rw-r--r--arch/s390/include/asm/thread_info.h2
-rw-r--r--arch/s390/include/asm/timer.h16
-rw-r--r--arch/s390/include/asm/topology.h2
-rw-r--r--arch/s390/include/asm/vdso.h15
-rw-r--r--arch/s390/kernel/asm-offsets.c5
-rw-r--r--arch/s390/kernel/entry.S5
-rw-r--r--arch/s390/kernel/entry64.S50
-rw-r--r--arch/s390/kernel/head64.S2
-rw-r--r--arch/s390/kernel/process.c43
-rw-r--r--arch/s390/kernel/s390_ext.c2
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c34
-rw-r--r--arch/s390/kernel/topology.c5
-rw-r--r--arch/s390/kernel/vdso.c123
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S5
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S39
-rw-r--r--arch/s390/kernel/vtime.c486
-rw-r--r--arch/sh/include/asm/topology.h1
-rw-r--r--arch/sparc/include/asm/topology_64.h13
-rw-r--r--arch/sparc/kernel/of_device_64.c2
-rw-r--r--arch/sparc/kernel/pci_msi.c2
-rw-r--r--arch/x86/Kconfig13
-rw-r--r--arch/x86/ia32/ia32_signal.c3
-rw-r--r--arch/x86/ia32/ipc32.c1
-rw-r--r--arch/x86/ia32/sys_ia32.c2
-rw-r--r--arch/x86/include/asm/amd_iommu_types.h61
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/efi.h1
-rw-r--r--arch/x86/include/asm/es7000/apic.h32
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/lguest.h2
-rw-r--r--arch/x86/include/asm/mpspec.h2
-rw-r--r--arch/x86/include/asm/numaq/apic.h4
-rw-r--r--arch/x86/include/asm/pci.h10
-rw-r--r--arch/x86/include/asm/pci_x86.h (renamed from arch/x86/pci/pci.h)17
-rw-r--r--arch/x86/include/asm/summit/apic.h42
-rw-r--r--arch/x86/include/asm/sys_ia32.h101
-rw-r--r--arch/x86/include/asm/topology.h36
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h46
-rw-r--r--arch/x86/kernel/acpi/boot.c31
-rw-r--r--arch/x86/kernel/amd_iommu.c666
-rw-r--r--arch/x86/kernel/amd_iommu_init.c19
-rw-r--r--arch/x86/kernel/apic.c12
-rw-r--r--arch/x86/kernel/bios_uv.c2
-rw-r--r--arch/x86/kernel/cpu/common.c2
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c28
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k7.c9
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c24
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c6
-rw-r--r--arch/x86/kernel/cpuid.c8
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/genx2apic_phys.c4
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/io_apic.c8
-rw-r--r--arch/x86/kernel/ldt.c4
-rw-r--r--arch/x86/kernel/mmconf-fam10h_64.c3
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c3
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/reboot.c6
-rw-r--r--arch/x86/kernel/setup_percpu.c33
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kernel/tlb_uv.c9
-rw-r--r--arch/x86/kernel/traps.c33
-rw-r--r--arch/x86/kernel/xsave.c2
-rw-r--r--arch/x86/kvm/Makefile4
-rw-r--r--arch/x86/kvm/x86.c3
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c7
-rw-r--r--arch/x86/pci/acpi.c2
-rw-r--r--arch/x86/pci/amd_bus.c2
-rw-r--r--arch/x86/pci/common.c3
-rw-r--r--arch/x86/pci/direct.c2
-rw-r--r--arch/x86/pci/early.c2
-rw-r--r--arch/x86/pci/fixup.c3
-rw-r--r--arch/x86/pci/i386.c2
-rw-r--r--arch/x86/pci/init.c2
-rw-r--r--arch/x86/pci/irq.c3
-rw-r--r--arch/x86/pci/legacy.c2
-rw-r--r--arch/x86/pci/mmconfig-shared.c3
-rw-r--r--arch/x86/pci/mmconfig_32.c2
-rw-r--r--arch/x86/pci/mmconfig_64.c3
-rw-r--r--arch/x86/pci/numaq_32.c2
-rw-r--r--arch/x86/pci/olpc.c2
-rw-r--r--arch/x86/pci/pcbios.c5
-rw-r--r--arch/x86/pci/visws.c3
-rw-r--r--arch/x86/xen/time.c10
123 files changed, 1795 insertions, 803 deletions
diff --git a/arch/alpha/include/asm/topology.h b/arch/alpha/include/asm/topology.h
index 149532e162c4..b4f284c72ff3 100644
--- a/arch/alpha/include/asm/topology.h
+++ b/arch/alpha/include/asm/topology.h
@@ -39,7 +39,24 @@ static inline cpumask_t node_to_cpumask(int node)
39 return node_cpu_mask; 39 return node_cpu_mask;
40} 40}
41 41
42extern struct cpumask node_to_cpumask_map[];
43/* FIXME: This is dumb, recalculating every time. But simple. */
44static const struct cpumask *cpumask_of_node(int node)
45{
46 int cpu;
47
48 cpumask_clear(&node_to_cpumask_map[node]);
49
50 for_each_online_cpu(cpu) {
51 if (cpu_to_node(cpu) == node)
52 cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
53 }
54
55 return &node_to_cpumask_map[node];
56}
57
42#define pcibus_to_cpumask(bus) (cpu_online_map) 58#define pcibus_to_cpumask(bus) (cpu_online_map)
59#define cpumask_of_pcibus(bus) (cpu_online_mask)
43 60
44#endif /* !CONFIG_NUMA */ 61#endif /* !CONFIG_NUMA */
45# include <asm-generic/topology.h> 62# include <asm-generic/topology.h>
diff --git a/arch/alpha/kernel/Makefile b/arch/alpha/kernel/Makefile
index ac706c1d7ada..b4697759a123 100644
--- a/arch/alpha/kernel/Makefile
+++ b/arch/alpha/kernel/Makefile
@@ -8,7 +8,7 @@ EXTRA_CFLAGS := -Werror -Wno-sign-compare
8 8
9obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \ 9obj-y := entry.o traps.o process.o init_task.o osf_sys.o irq.o \
10 irq_alpha.o signal.o setup.o ptrace.o time.o \ 10 irq_alpha.o signal.o setup.o ptrace.o time.o \
11 alpha_ksyms.o systbls.o err_common.o io.o 11 alpha_ksyms.o systbls.o err_common.o io.o binfmt_loader.o
12 12
13obj-$(CONFIG_VGA_HOSE) += console.o 13obj-$(CONFIG_VGA_HOSE) += console.o
14obj-$(CONFIG_SMP) += smp.o 14obj-$(CONFIG_SMP) += smp.o
diff --git a/arch/alpha/kernel/binfmt_loader.c b/arch/alpha/kernel/binfmt_loader.c
new file mode 100644
index 000000000000..4a0af906b00a
--- /dev/null
+++ b/arch/alpha/kernel/binfmt_loader.c
@@ -0,0 +1,51 @@
1#include <linux/init.h>
2#include <linux/fs.h>
3#include <linux/file.h>
4#include <linux/mm_types.h>
5#include <linux/binfmts.h>
6#include <linux/a.out.h>
7
8static int load_binary(struct linux_binprm *bprm, struct pt_regs *regs)
9{
10 struct exec *eh = (struct exec *)bprm->buf;
11 unsigned long loader;
12 struct file *file;
13 int retval;
14
15 if (eh->fh.f_magic != 0x183 || (eh->fh.f_flags & 0x3000) != 0x3000)
16 return -ENOEXEC;
17
18 if (bprm->loader)
19 return -ENOEXEC;
20
21 allow_write_access(bprm->file);
22 fput(bprm->file);
23 bprm->file = NULL;
24
25 loader = bprm->vma->vm_end - sizeof(void *);
26
27 file = open_exec("/sbin/loader");
28 retval = PTR_ERR(file);
29 if (IS_ERR(file))
30 return retval;
31
32 /* Remember if the application is TASO. */
33 bprm->taso = eh->ah.entry < 0x100000000UL;
34
35 bprm->file = file;
36 bprm->loader = loader;
37 retval = prepare_binprm(bprm);
38 if (retval < 0)
39 return retval;
40 return search_binary_handler(bprm,regs);
41}
42
43static struct linux_binfmt loader_format = {
44 .load_binary = load_binary,
45};
46
47static int __init init_loader_binfmt(void)
48{
49 return register_binfmt(&loader_format);
50}
51arch_initcall(init_loader_binfmt);
diff --git a/arch/alpha/kernel/irq.c b/arch/alpha/kernel/irq.c
index d0f1620007f7..703731accda6 100644
--- a/arch/alpha/kernel/irq.c
+++ b/arch/alpha/kernel/irq.c
@@ -50,7 +50,8 @@ int irq_select_affinity(unsigned int irq)
50 if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq]) 50 if (!irq_desc[irq].chip->set_affinity || irq_user_affinity[irq])
51 return 1; 51 return 1;
52 52
53 while (!cpu_possible(cpu) || !cpu_isset(cpu, irq_default_affinity)) 53 while (!cpu_possible(cpu) ||
54 !cpumask_test_cpu(cpu, irq_default_affinity))
54 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0); 55 cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
55 last_cpu = cpu; 56 last_cpu = cpu;
56 57
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index a449e999027c..02bee6983ce2 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -79,6 +79,11 @@ int alpha_l3_cacheshape;
79unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON; 79unsigned long alpha_verbose_mcheck = CONFIG_VERBOSE_MCHECK_ON;
80#endif 80#endif
81 81
82#ifdef CONFIG_NUMA
83struct cpumask node_to_cpumask_map[MAX_NUMNODES] __read_mostly;
84EXPORT_SYMBOL(node_to_cpumask_map);
85#endif
86
82/* Which processor we booted from. */ 87/* Which processor we booted from. */
83int boot_cpuid; 88int boot_cpuid;
84 89
diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h
index 1a50b69b1a19..f7dd5f71edf7 100644
--- a/arch/avr32/include/asm/bitops.h
+++ b/arch/avr32/include/asm/bitops.h
@@ -263,6 +263,11 @@ static inline int fls(unsigned long word)
263 return 32 - result; 263 return 32 - result;
264} 264}
265 265
266static inline int __fls(unsigned long word)
267{
268 return fls(word) - 1;
269}
270
266unsigned long find_first_zero_bit(const unsigned long *addr, 271unsigned long find_first_zero_bit(const unsigned long *addr,
267 unsigned long size); 272 unsigned long size);
268unsigned long find_next_zero_bit(const unsigned long *addr, 273unsigned long find_next_zero_bit(const unsigned long *addr,
diff --git a/arch/blackfin/include/asm/bitops.h b/arch/blackfin/include/asm/bitops.h
index b39a175c79c1..c428e4106f89 100644
--- a/arch/blackfin/include/asm/bitops.h
+++ b/arch/blackfin/include/asm/bitops.h
@@ -213,6 +213,7 @@ static __inline__ int __test_bit(int nr, const void *addr)
213#endif /* __KERNEL__ */ 213#endif /* __KERNEL__ */
214 214
215#include <asm-generic/bitops/fls.h> 215#include <asm-generic/bitops/fls.h>
216#include <asm-generic/bitops/__fls.h>
216#include <asm-generic/bitops/fls64.h> 217#include <asm-generic/bitops/fls64.h>
217 218
218#endif /* _BLACKFIN_BITOPS_H */ 219#endif /* _BLACKFIN_BITOPS_H */
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index c0e62f811e09..9e69cfb7f134 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -148,6 +148,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
148#define ffs kernel_ffs 148#define ffs kernel_ffs
149 149
150#include <asm-generic/bitops/fls.h> 150#include <asm-generic/bitops/fls.h>
151#include <asm-generic/bitops/__fls.h>
151#include <asm-generic/bitops/fls64.h> 152#include <asm-generic/bitops/fls64.h>
152#include <asm-generic/bitops/hweight.h> 153#include <asm-generic/bitops/hweight.h>
153#include <asm-generic/bitops/find.h> 154#include <asm-generic/bitops/find.h>
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index cb18e3b0aa94..cb9ddf5fc54f 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -207,6 +207,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
207#endif /* __KERNEL__ */ 207#endif /* __KERNEL__ */
208 208
209#include <asm-generic/bitops/fls.h> 209#include <asm-generic/bitops/fls.h>
210#include <asm-generic/bitops/__fls.h>
210#include <asm-generic/bitops/fls64.h> 211#include <asm-generic/bitops/fls64.h>
211 212
212#endif /* _H8300_BITOPS_H */ 213#endif /* _H8300_BITOPS_H */
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 7fa8f615ba6e..3d31636cbafb 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -687,3 +687,6 @@ config IRQ_PER_CPU
687 687
688config IOMMU_HELPER 688config IOMMU_HELPER
689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) 689 def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB)
690
691config IOMMU_API
692 def_bool (DMAR)
diff --git a/arch/ia64/include/asm/irq.h b/arch/ia64/include/asm/irq.h
index 3627116fb0e2..36429a532630 100644
--- a/arch/ia64/include/asm/irq.h
+++ b/arch/ia64/include/asm/irq.h
@@ -27,7 +27,7 @@ irq_canonicalize (int irq)
27} 27}
28 28
29extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); 29extern void set_irq_affinity_info (unsigned int irq, int dest, int redir);
30bool is_affinity_mask_valid(cpumask_t cpumask); 30bool is_affinity_mask_valid(cpumask_var_t cpumask);
31 31
32#define is_affinity_mask_valid is_affinity_mask_valid 32#define is_affinity_mask_valid is_affinity_mask_valid
33 33
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index 0560f3fae538..348663661659 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -467,7 +467,7 @@ struct kvm_arch {
467 struct kvm_sal_data rdv_sal_data; 467 struct kvm_sal_data rdv_sal_data;
468 468
469 struct list_head assigned_dev_head; 469 struct list_head assigned_dev_head;
470 struct dmar_domain *intel_iommu_domain; 470 struct iommu_domain *iommu_domain;
471 struct hlist_head irq_ack_notifier_list; 471 struct hlist_head irq_ack_notifier_list;
472 472
473 unsigned long irq_sources_bitmap; 473 unsigned long irq_sources_bitmap;
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index a3cc9f65f954..76a33a91ca69 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -34,6 +34,7 @@
34 * Returns a bitmask of CPUs on Node 'node'. 34 * Returns a bitmask of CPUs on Node 'node'.
35 */ 35 */
36#define node_to_cpumask(node) (node_to_cpu_mask[node]) 36#define node_to_cpumask(node) (node_to_cpu_mask[node])
37#define cpumask_of_node(node) (&node_to_cpu_mask[node])
37 38
38/* 39/*
39 * Returns the number of the node containing Node 'nid'. 40 * Returns the number of the node containing Node 'nid'.
@@ -45,7 +46,7 @@
45/* 46/*
46 * Returns the number of the first CPU on Node 'node'. 47 * Returns the number of the first CPU on Node 'node'.
47 */ 48 */
48#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) 49#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node)))
49 50
50/* 51/*
51 * Determines the node for a given pci bus 52 * Determines the node for a given pci bus
@@ -109,6 +110,8 @@ void build_cpu_to_node_map(void);
109#define topology_core_id(cpu) (cpu_data(cpu)->core_id) 110#define topology_core_id(cpu) (cpu_data(cpu)->core_id)
110#define topology_core_siblings(cpu) (cpu_core_map[cpu]) 111#define topology_core_siblings(cpu) (cpu_core_map[cpu])
111#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 112#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
113#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
114#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
112#define smt_capable() (smp_num_siblings > 1) 115#define smt_capable() (smp_num_siblings > 1)
113#endif 116#endif
114 117
@@ -119,6 +122,10 @@ extern void arch_fix_phys_package_id(int num, u32 slot);
119 node_to_cpumask(pcibus_to_node(bus)) \ 122 node_to_cpumask(pcibus_to_node(bus)) \
120 ) 123 )
121 124
125#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
126 cpu_all_mask : \
127 cpumask_from_node(pcibus_to_node(bus)))
128
122#include <asm-generic/topology.h> 129#include <asm-generic/topology.h>
123 130
124#endif /* _ASM_IA64_TOPOLOGY_H */ 131#endif /* _ASM_IA64_TOPOLOGY_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index bd7acc71e8a9..0553648b7595 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -202,7 +202,6 @@ char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
202 Boot-time Table Parsing 202 Boot-time Table Parsing
203 -------------------------------------------------------------------------- */ 203 -------------------------------------------------------------------------- */
204 204
205static int total_cpus __initdata;
206static int available_cpus __initdata; 205static int available_cpus __initdata;
207struct acpi_table_madt *acpi_madt __initdata; 206struct acpi_table_madt *acpi_madt __initdata;
208static u8 has_8259; 207static u8 has_8259;
@@ -1001,7 +1000,7 @@ acpi_map_iosapic(acpi_handle handle, u32 depth, void *context, void **ret)
1001 node = pxm_to_node(pxm); 1000 node = pxm_to_node(pxm);
1002 1001
1003 if (node >= MAX_NUMNODES || !node_online(node) || 1002 if (node >= MAX_NUMNODES || !node_online(node) ||
1004 cpus_empty(node_to_cpumask(node))) 1003 cpumask_empty(cpumask_of_node(node)))
1005 return AE_OK; 1004 return AE_OK;
1006 1005
1007 /* We know a gsi to node mapping! */ 1006 /* We know a gsi to node mapping! */
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index c8adecd5b416..5cfd3d91001a 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -695,32 +695,31 @@ get_target_cpu (unsigned int gsi, int irq)
695#ifdef CONFIG_NUMA 695#ifdef CONFIG_NUMA
696 { 696 {
697 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; 697 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
698 cpumask_t cpu_mask; 698 const struct cpumask *cpu_mask;
699 699
700 iosapic_index = find_iosapic(gsi); 700 iosapic_index = find_iosapic(gsi);
701 if (iosapic_index < 0 || 701 if (iosapic_index < 0 ||
702 iosapic_lists[iosapic_index].node == MAX_NUMNODES) 702 iosapic_lists[iosapic_index].node == MAX_NUMNODES)
703 goto skip_numa_setup; 703 goto skip_numa_setup;
704 704
705 cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); 705 cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node);
706 cpus_and(cpu_mask, cpu_mask, domain); 706 num_cpus = 0;
707 for_each_cpu_mask(numa_cpu, cpu_mask) { 707 for_each_cpu_and(numa_cpu, cpu_mask, &domain) {
708 if (!cpu_online(numa_cpu)) 708 if (cpu_online(numa_cpu))
709 cpu_clear(numa_cpu, cpu_mask); 709 num_cpus++;
710 } 710 }
711 711
712 num_cpus = cpus_weight(cpu_mask);
713
714 if (!num_cpus) 712 if (!num_cpus)
715 goto skip_numa_setup; 713 goto skip_numa_setup;
716 714
717 /* Use irq assignment to distribute across cpus in node */ 715 /* Use irq assignment to distribute across cpus in node */
718 cpu_index = irq % num_cpus; 716 cpu_index = irq % num_cpus;
719 717
720 for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) 718 for_each_cpu_and(numa_cpu, cpu_mask, &domain)
721 numa_cpu = next_cpu(numa_cpu, cpu_mask); 719 if (cpu_online(numa_cpu) && i++ >= cpu_index)
720 break;
722 721
723 if (numa_cpu != NR_CPUS) 722 if (numa_cpu < nr_cpu_ids)
724 return cpu_physical_id(numa_cpu); 723 return cpu_physical_id(numa_cpu);
725 } 724 }
726skip_numa_setup: 725skip_numa_setup:
@@ -731,7 +730,7 @@ skip_numa_setup:
731 * case of NUMA.) 730 * case of NUMA.)
732 */ 731 */
733 do { 732 do {
734 if (++cpu >= NR_CPUS) 733 if (++cpu >= nr_cpu_ids)
735 cpu = 0; 734 cpu = 0;
736 } while (!cpu_online(cpu) || !cpu_isset(cpu, domain)); 735 } while (!cpu_online(cpu) || !cpu_isset(cpu, domain));
737 736
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index 0b6db53fedcf..95ff16cb05d8 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -112,11 +112,11 @@ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
112 } 112 }
113} 113}
114 114
115bool is_affinity_mask_valid(cpumask_t cpumask) 115bool is_affinity_mask_valid(cpumask_var_t cpumask)
116{ 116{
117 if (ia64_platform_is("sn2")) { 117 if (ia64_platform_is("sn2")) {
118 /* Only allow one CPU to be specified in the smp_affinity mask */ 118 /* Only allow one CPU to be specified in the smp_affinity mask */
119 if (cpus_weight(cpumask) != 1) 119 if (cpumask_weight(cpumask) != 1)
120 return false; 120 return false;
121 } 121 }
122 return true; 122 return true;
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 65c10a42c88f..f0ebb342409d 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -93,13 +93,14 @@ void ia64_account_on_switch(struct task_struct *prev, struct task_struct *next)
93 now = ia64_get_itc(); 93 now = ia64_get_itc();
94 94
95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp)); 95 delta_stime = cycle_to_cputime(pi->ac_stime + (now - pi->ac_stamp));
96 account_system_time(prev, 0, delta_stime); 96 if (idle_task(smp_processor_id()) != prev)
97 account_system_time_scaled(prev, delta_stime); 97 account_system_time(prev, 0, delta_stime, delta_stime);
98 else
99 account_idle_time(delta_stime);
98 100
99 if (pi->ac_utime) { 101 if (pi->ac_utime) {
100 delta_utime = cycle_to_cputime(pi->ac_utime); 102 delta_utime = cycle_to_cputime(pi->ac_utime);
101 account_user_time(prev, delta_utime); 103 account_user_time(prev, delta_utime, delta_utime);
102 account_user_time_scaled(prev, delta_utime);
103 } 104 }
104 105
105 pi->ac_stamp = ni->ac_stamp = now; 106 pi->ac_stamp = ni->ac_stamp = now;
@@ -122,8 +123,10 @@ void account_system_vtime(struct task_struct *tsk)
122 now = ia64_get_itc(); 123 now = ia64_get_itc();
123 124
124 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); 125 delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp));
125 account_system_time(tsk, 0, delta_stime); 126 if (irq_count() || idle_task(smp_processor_id()) != tsk)
126 account_system_time_scaled(tsk, delta_stime); 127 account_system_time(tsk, 0, delta_stime, delta_stime);
128 else
129 account_idle_time(delta_stime);
127 ti->ac_stime = 0; 130 ti->ac_stime = 0;
128 131
129 ti->ac_stamp = now; 132 ti->ac_stamp = now;
@@ -143,8 +146,7 @@ void account_process_tick(struct task_struct *p, int user_tick)
143 146
144 if (ti->ac_utime) { 147 if (ti->ac_utime) {
145 delta_utime = cycle_to_cputime(ti->ac_utime); 148 delta_utime = cycle_to_cputime(ti->ac_utime);
146 account_user_time(p, delta_utime); 149 account_user_time(p, delta_utime, delta_utime);
147 account_user_time_scaled(p, delta_utime);
148 ti->ac_utime = 0; 150 ti->ac_utime = 0;
149 } 151 }
150} 152}
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 76464dc312e6..0bb99b732908 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -51,8 +51,8 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ 51common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
52 coalesced_mmio.o irq_comm.o) 52 coalesced_mmio.o irq_comm.o)
53 53
54ifeq ($(CONFIG_DMAR),y) 54ifeq ($(CONFIG_IOMMU_API),y)
55common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 55common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
56endif 56endif
57 57
58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o 58kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0f5ebd948437..4e586f6110aa 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
31#include <linux/bitops.h> 31#include <linux/bitops.h>
32#include <linux/hrtimer.h> 32#include <linux/hrtimer.h>
33#include <linux/uaccess.h> 33#include <linux/uaccess.h>
34#include <linux/iommu.h>
34#include <linux/intel-iommu.h> 35#include <linux/intel-iommu.h>
35 36
36#include <asm/pgtable.h> 37#include <asm/pgtable.h>
@@ -188,7 +189,7 @@ int kvm_dev_ioctl_check_extension(long ext)
188 r = KVM_COALESCED_MMIO_PAGE_OFFSET; 189 r = KVM_COALESCED_MMIO_PAGE_OFFSET;
189 break; 190 break;
190 case KVM_CAP_IOMMU: 191 case KVM_CAP_IOMMU:
191 r = intel_iommu_found(); 192 r = iommu_found();
192 break; 193 break;
193 default: 194 default:
194 r = 0; 195 r = 0;
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
index 636588e7e068..be339477f906 100644
--- a/arch/ia64/sn/kernel/sn2/sn_hwperf.c
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -385,7 +385,6 @@ static int sn_topology_show(struct seq_file *s, void *d)
385 int j; 385 int j;
386 const char *slabname; 386 const char *slabname;
387 int ordinal; 387 int ordinal;
388 cpumask_t cpumask;
389 char slice; 388 char slice;
390 struct cpuinfo_ia64 *c; 389 struct cpuinfo_ia64 *c;
391 struct sn_hwperf_port_info *ptdata; 390 struct sn_hwperf_port_info *ptdata;
@@ -473,23 +472,21 @@ static int sn_topology_show(struct seq_file *s, void *d)
473 * CPUs on this node, if any 472 * CPUs on this node, if any
474 */ 473 */
475 if (!SN_HWPERF_IS_IONODE(obj)) { 474 if (!SN_HWPERF_IS_IONODE(obj)) {
476 cpumask = node_to_cpumask(ordinal); 475 for_each_cpu_and(i, cpu_online_mask,
477 for_each_online_cpu(i) { 476 cpumask_of_node(ordinal)) {
478 if (cpu_isset(i, cpumask)) { 477 slice = 'a' + cpuid_to_slice(i);
479 slice = 'a' + cpuid_to_slice(i); 478 c = cpu_data(i);
480 c = cpu_data(i); 479 seq_printf(s, "cpu %d %s%c local"
481 seq_printf(s, "cpu %d %s%c local" 480 " freq %luMHz, arch ia64",
482 " freq %luMHz, arch ia64", 481 i, obj->location, slice,
483 i, obj->location, slice, 482 c->proc_freq / 1000000);
484 c->proc_freq / 1000000); 483 for_each_online_cpu(j) {
485 for_each_online_cpu(j) { 484 seq_printf(s, j ? ":%d" : ", dist %d",
486 seq_printf(s, j ? ":%d" : ", dist %d", 485 node_distance(
487 node_distance(
488 cpu_to_node(i), 486 cpu_to_node(i),
489 cpu_to_node(j))); 487 cpu_to_node(j)));
490 }
491 seq_putc(s, '\n');
492 } 488 }
489 seq_putc(s, '\n');
493 } 490 }
494 } 491 }
495 } 492 }
diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c
index 0f06b3722e96..2547d6c4a827 100644
--- a/arch/m32r/kernel/smpboot.c
+++ b/arch/m32r/kernel/smpboot.c
@@ -592,7 +592,7 @@ int setup_profiling_timer(unsigned int multiplier)
592 * accounting. At that time they also adjust their APIC timers 592 * accounting. At that time they also adjust their APIC timers
593 * accordingly. 593 * accordingly.
594 */ 594 */
595 for (i = 0; i < NR_CPUS; ++i) 595 for_each_possible_cpu(i)
596 per_cpu(prof_multiplier, i) = multiplier; 596 per_cpu(prof_multiplier, i) = multiplier;
597 597
598 return 0; 598 return 0;
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 836fb66f080d..c825bde17cb3 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -280,7 +280,6 @@ config M68060
280 280
281config MMU_MOTOROLA 281config MMU_MOTOROLA
282 bool 282 bool
283 depends on MMU && !MMU_SUN3
284 283
285config MMU_SUN3 284config MMU_SUN3
286 bool 285 bool
diff --git a/arch/m68knommu/include/asm/bitops.h b/arch/m68knommu/include/asm/bitops.h
index 6f3685eab44c..9d3cbe5fad1e 100644
--- a/arch/m68knommu/include/asm/bitops.h
+++ b/arch/m68knommu/include/asm/bitops.h
@@ -331,6 +331,7 @@ found_middle:
331#endif /* __KERNEL__ */ 331#endif /* __KERNEL__ */
332 332
333#include <asm-generic/bitops/fls.h> 333#include <asm-generic/bitops/fls.h>
334#include <asm-generic/bitops/__fls.h>
334#include <asm-generic/bitops/fls64.h> 335#include <asm-generic/bitops/fls64.h>
335 336
336#endif /* _M68KNOMMU_BITOPS_H */ 337#endif /* _M68KNOMMU_BITOPS_H */
diff --git a/arch/mips/include/asm/mach-ip27/topology.h b/arch/mips/include/asm/mach-ip27/topology.h
index 1fb959f98982..55d481569a1f 100644
--- a/arch/mips/include/asm/mach-ip27/topology.h
+++ b/arch/mips/include/asm/mach-ip27/topology.h
@@ -25,11 +25,13 @@ extern struct cpuinfo_ip27 sn_cpu_info[NR_CPUS];
25#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid) 25#define cpu_to_node(cpu) (sn_cpu_info[(cpu)].p_nodeid)
26#define parent_node(node) (node) 26#define parent_node(node) (node)
27#define node_to_cpumask(node) (hub_data(node)->h_cpus) 27#define node_to_cpumask(node) (hub_data(node)->h_cpus)
28#define node_to_first_cpu(node) (first_cpu(node_to_cpumask(node))) 28#define cpumask_of_node(node) (&hub_data(node)->h_cpus)
29#define node_to_first_cpu(node) (cpumask_first(cpumask_of_node(node)))
29struct pci_bus; 30struct pci_bus;
30extern int pcibus_to_node(struct pci_bus *); 31extern int pcibus_to_node(struct pci_bus *);
31 32
32#define pcibus_to_cpumask(bus) (cpu_online_map) 33#define pcibus_to_cpumask(bus) (cpu_online_map)
34#define cpumask_of_pcibus(bus) (cpu_online_mask)
33 35
34extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES]; 36extern unsigned char __node_distances[MAX_COMPACT_NODES][MAX_COMPACT_NODES];
35 37
diff --git a/arch/parisc/include/asm/smp.h b/arch/parisc/include/asm/smp.h
index 409e698f4361..6ef4b7867b1b 100644
--- a/arch/parisc/include/asm/smp.h
+++ b/arch/parisc/include/asm/smp.h
@@ -16,8 +16,6 @@
16#include <linux/cpumask.h> 16#include <linux/cpumask.h>
17typedef unsigned long address_t; 17typedef unsigned long address_t;
18 18
19extern cpumask_t cpu_online_map;
20
21 19
22/* 20/*
23 * Private routines/data 21 * Private routines/data
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 373fca394a54..375258559ae6 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -22,11 +22,11 @@ static inline cpumask_t node_to_cpumask(int node)
22 return numa_cpumask_lookup_table[node]; 22 return numa_cpumask_lookup_table[node];
23} 23}
24 24
25#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
26
25static inline int node_to_first_cpu(int node) 27static inline int node_to_first_cpu(int node)
26{ 28{
27 cpumask_t tmp; 29 return cpumask_first(cpumask_of_node(node));
28 tmp = node_to_cpumask(node);
29 return first_cpu(tmp);
30} 30}
31 31
32int of_node_to_nid(struct device_node *device); 32int of_node_to_nid(struct device_node *device);
@@ -46,6 +46,10 @@ static inline int pcibus_to_node(struct pci_bus *bus)
46 node_to_cpumask(pcibus_to_node(bus)) \ 46 node_to_cpumask(pcibus_to_node(bus)) \
47 ) 47 )
48 48
49#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
50 cpu_all_mask : \
51 cpumask_of_node(pcibus_to_node(bus)))
52
49/* sched_domains SD_NODE_INIT for PPC64 machines */ 53/* sched_domains SD_NODE_INIT for PPC64 machines */
50#define SD_NODE_INIT (struct sched_domain) { \ 54#define SD_NODE_INIT (struct sched_domain) { \
51 .parent = NULL, \ 55 .parent = NULL, \
@@ -108,6 +112,8 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
108 112
109#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 113#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
110#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) 114#define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu))
115#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
116#define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu))
111#define topology_core_id(cpu) (cpu_to_core_id(cpu)) 117#define topology_core_id(cpu) (cpu_to_core_id(cpu))
112#endif 118#endif
113#endif 119#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 51b201ddf9a1..fb7049c054c0 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -33,6 +33,7 @@
33#include <linux/mqueue.h> 33#include <linux/mqueue.h>
34#include <linux/hardirq.h> 34#include <linux/hardirq.h>
35#include <linux/utsname.h> 35#include <linux/utsname.h>
36#include <linux/kernel_stat.h>
36 37
37#include <asm/pgtable.h> 38#include <asm/pgtable.h>
38#include <asm/uaccess.h> 39#include <asm/uaccess.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 99f1ddd68582..c9564031a2a9 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -256,8 +256,10 @@ void account_system_vtime(struct task_struct *tsk)
256 delta += sys_time; 256 delta += sys_time;
257 get_paca()->system_time = 0; 257 get_paca()->system_time = 0;
258 } 258 }
259 account_system_time(tsk, 0, delta); 259 if (in_irq() || idle_task(smp_processor_id()) != tsk)
260 account_system_time_scaled(tsk, deltascaled); 260 account_system_time(tsk, 0, delta, deltascaled);
261 else
262 account_idle_time(delta);
261 per_cpu(cputime_last_delta, smp_processor_id()) = delta; 263 per_cpu(cputime_last_delta, smp_processor_id()) = delta;
262 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled; 264 per_cpu(cputime_scaled_last_delta, smp_processor_id()) = deltascaled;
263 local_irq_restore(flags); 265 local_irq_restore(flags);
@@ -275,10 +277,8 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
275 277
276 utime = get_paca()->user_time; 278 utime = get_paca()->user_time;
277 get_paca()->user_time = 0; 279 get_paca()->user_time = 0;
278 account_user_time(tsk, utime);
279
280 utimescaled = cputime_to_scaled(utime); 280 utimescaled = cputime_to_scaled(utime);
281 account_user_time_scaled(tsk, utimescaled); 281 account_user_time(tsk, utime, utimescaled);
282} 282}
283 283
284/* 284/*
@@ -338,8 +338,12 @@ void calculate_steal_time(void)
338 tb = mftb(); 338 tb = mftb();
339 purr = mfspr(SPRN_PURR); 339 purr = mfspr(SPRN_PURR);
340 stolen = (tb - pme->tb) - (purr - pme->purr); 340 stolen = (tb - pme->tb) - (purr - pme->purr);
341 if (stolen > 0) 341 if (stolen > 0) {
342 account_steal_time(current, stolen); 342 if (idle_task(smp_processor_id()) != current)
343 account_steal_time(stolen);
344 else
345 account_idle_time(stolen);
346 }
343 pme->tb = tb; 347 pme->tb = tb;
344 pme->purr = purr; 348 pme->purr = purr;
345} 349}
diff --git a/arch/powerpc/platforms/cell/spu_priv1_mmio.c b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
index 906a0a2a9fe1..1410443731eb 100644
--- a/arch/powerpc/platforms/cell/spu_priv1_mmio.c
+++ b/arch/powerpc/platforms/cell/spu_priv1_mmio.c
@@ -80,10 +80,10 @@ static void cpu_affinity_set(struct spu *spu, int cpu)
80 u64 route; 80 u64 route;
81 81
82 if (nr_cpus_node(spu->node)) { 82 if (nr_cpus_node(spu->node)) {
83 cpumask_t spumask = node_to_cpumask(spu->node); 83 const struct cpumask *spumask = cpumask_of_node(spu->node),
84 cpumask_t cpumask = node_to_cpumask(cpu_to_node(cpu)); 84 *cpumask = cpumask_of_node(cpu_to_node(cpu));
85 85
86 if (!cpus_intersects(spumask, cpumask)) 86 if (!cpumask_intersects(spumask, cpumask))
87 return; 87 return;
88 } 88 }
89 89
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 2ad914c47493..6a0ad196aeb3 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -166,9 +166,9 @@ void spu_update_sched_info(struct spu_context *ctx)
166static int __node_allowed(struct spu_context *ctx, int node) 166static int __node_allowed(struct spu_context *ctx, int node)
167{ 167{
168 if (nr_cpus_node(node)) { 168 if (nr_cpus_node(node)) {
169 cpumask_t mask = node_to_cpumask(node); 169 const struct cpumask *mask = cpumask_of_node(node);
170 170
171 if (cpus_intersects(mask, ctx->cpus_allowed)) 171 if (cpumask_intersects(mask, &ctx->cpus_allowed))
172 return 1; 172 return 1;
173 } 173 }
174 174
diff --git a/arch/s390/include/asm/cpu.h b/arch/s390/include/asm/cpu.h
index e5a6a9ba3adf..d60a2eefb17b 100644
--- a/arch/s390/include/asm/cpu.h
+++ b/arch/s390/include/asm/cpu.h
@@ -14,7 +14,6 @@
14 14
15struct s390_idle_data { 15struct s390_idle_data {
16 spinlock_t lock; 16 spinlock_t lock;
17 unsigned int in_idle;
18 unsigned long long idle_count; 17 unsigned long long idle_count;
19 unsigned long long idle_enter; 18 unsigned long long idle_enter;
20 unsigned long long idle_time; 19 unsigned long long idle_time;
@@ -22,12 +21,12 @@ struct s390_idle_data {
22 21
23DECLARE_PER_CPU(struct s390_idle_data, s390_idle); 22DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
24 23
25void s390_idle_leave(void); 24void vtime_start_cpu(void);
26 25
27static inline void s390_idle_check(void) 26static inline void s390_idle_check(void)
28{ 27{
29 if ((&__get_cpu_var(s390_idle))->in_idle) 28 if ((&__get_cpu_var(s390_idle))->idle_enter != 0ULL)
30 s390_idle_leave(); 29 vtime_start_cpu();
31} 30}
32 31
33#endif /* _ASM_S390_CPU_H_ */ 32#endif /* _ASM_S390_CPU_H_ */
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index 133ce054fc89..521726430afa 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -11,7 +11,7 @@
11 11
12#include <asm/div64.h> 12#include <asm/div64.h>
13 13
14/* We want to use micro-second resolution. */ 14/* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */
15 15
16typedef unsigned long long cputime_t; 16typedef unsigned long long cputime_t;
17typedef unsigned long long cputime64_t; 17typedef unsigned long long cputime64_t;
@@ -53,9 +53,9 @@ __div(unsigned long long n, unsigned int base)
53#define cputime_ge(__a, __b) ((__a) >= (__b)) 53#define cputime_ge(__a, __b) ((__a) >= (__b))
54#define cputime_lt(__a, __b) ((__a) < (__b)) 54#define cputime_lt(__a, __b) ((__a) < (__b))
55#define cputime_le(__a, __b) ((__a) <= (__b)) 55#define cputime_le(__a, __b) ((__a) <= (__b))
56#define cputime_to_jiffies(__ct) (__div((__ct), 1000000 / HZ)) 56#define cputime_to_jiffies(__ct) (__div((__ct), 4096000000ULL / HZ))
57#define cputime_to_scaled(__ct) (__ct) 57#define cputime_to_scaled(__ct) (__ct)
58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (1000000 / HZ)) 58#define jiffies_to_cputime(__hz) ((cputime_t)(__hz) * (4096000000ULL / HZ))
59 59
60#define cputime64_zero (0ULL) 60#define cputime64_zero (0ULL)
61#define cputime64_add(__a, __b) ((__a) + (__b)) 61#define cputime64_add(__a, __b) ((__a) + (__b))
@@ -64,7 +64,7 @@ __div(unsigned long long n, unsigned int base)
64static inline u64 64static inline u64
65cputime64_to_jiffies64(cputime64_t cputime) 65cputime64_to_jiffies64(cputime64_t cputime)
66{ 66{
67 do_div(cputime, 1000000 / HZ); 67 do_div(cputime, 4096000000ULL / HZ);
68 return cputime; 68 return cputime;
69} 69}
70 70
@@ -74,13 +74,13 @@ cputime64_to_jiffies64(cputime64_t cputime)
74static inline unsigned int 74static inline unsigned int
75cputime_to_msecs(const cputime_t cputime) 75cputime_to_msecs(const cputime_t cputime)
76{ 76{
77 return __div(cputime, 1000); 77 return __div(cputime, 4096000);
78} 78}
79 79
80static inline cputime_t 80static inline cputime_t
81msecs_to_cputime(const unsigned int m) 81msecs_to_cputime(const unsigned int m)
82{ 82{
83 return (cputime_t) m * 1000; 83 return (cputime_t) m * 4096000;
84} 84}
85 85
86/* 86/*
@@ -89,13 +89,13 @@ msecs_to_cputime(const unsigned int m)
89static inline unsigned int 89static inline unsigned int
90cputime_to_secs(const cputime_t cputime) 90cputime_to_secs(const cputime_t cputime)
91{ 91{
92 return __div(cputime, 1000000); 92 return __div(cputime, 2048000000) >> 1;
93} 93}
94 94
95static inline cputime_t 95static inline cputime_t
96secs_to_cputime(const unsigned int s) 96secs_to_cputime(const unsigned int s)
97{ 97{
98 return (cputime_t) s * 1000000; 98 return (cputime_t) s * 4096000000ULL;
99} 99}
100 100
101/* 101/*
@@ -104,7 +104,7 @@ secs_to_cputime(const unsigned int s)
104static inline cputime_t 104static inline cputime_t
105timespec_to_cputime(const struct timespec *value) 105timespec_to_cputime(const struct timespec *value)
106{ 106{
107 return value->tv_nsec / 1000 + (u64) value->tv_sec * 1000000; 107 return value->tv_nsec * 4096 / 1000 + (u64) value->tv_sec * 4096000000ULL;
108} 108}
109 109
110static inline void 110static inline void
@@ -114,12 +114,12 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
114 register_pair rp; 114 register_pair rp;
115 115
116 rp.pair = cputime >> 1; 116 rp.pair = cputime >> 1;
117 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 117 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
118 value->tv_nsec = rp.subreg.even * 1000; 118 value->tv_nsec = rp.subreg.even * 1000 / 4096;
119 value->tv_sec = rp.subreg.odd; 119 value->tv_sec = rp.subreg.odd;
120#else 120#else
121 value->tv_nsec = (cputime % 1000000) * 1000; 121 value->tv_nsec = (cputime % 4096000000ULL) * 1000 / 4096;
122 value->tv_sec = cputime / 1000000; 122 value->tv_sec = cputime / 4096000000ULL;
123#endif 123#endif
124} 124}
125 125
@@ -131,7 +131,7 @@ cputime_to_timespec(const cputime_t cputime, struct timespec *value)
131static inline cputime_t 131static inline cputime_t
132timeval_to_cputime(const struct timeval *value) 132timeval_to_cputime(const struct timeval *value)
133{ 133{
134 return value->tv_usec + (u64) value->tv_sec * 1000000; 134 return value->tv_usec * 4096 + (u64) value->tv_sec * 4096000000ULL;
135} 135}
136 136
137static inline void 137static inline void
@@ -141,12 +141,12 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
141 register_pair rp; 141 register_pair rp;
142 142
143 rp.pair = cputime >> 1; 143 rp.pair = cputime >> 1;
144 asm ("dr %0,%1" : "+d" (rp) : "d" (1000000 >> 1)); 144 asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL));
145 value->tv_usec = rp.subreg.even; 145 value->tv_usec = rp.subreg.even / 4096;
146 value->tv_sec = rp.subreg.odd; 146 value->tv_sec = rp.subreg.odd;
147#else 147#else
148 value->tv_usec = cputime % 1000000; 148 value->tv_usec = cputime % 4096000000ULL;
149 value->tv_sec = cputime / 1000000; 149 value->tv_sec = cputime / 4096000000ULL;
150#endif 150#endif
151} 151}
152 152
@@ -156,13 +156,13 @@ cputime_to_timeval(const cputime_t cputime, struct timeval *value)
156static inline clock_t 156static inline clock_t
157cputime_to_clock_t(cputime_t cputime) 157cputime_to_clock_t(cputime_t cputime)
158{ 158{
159 return __div(cputime, 1000000 / USER_HZ); 159 return __div(cputime, 4096000000ULL / USER_HZ);
160} 160}
161 161
162static inline cputime_t 162static inline cputime_t
163clock_t_to_cputime(unsigned long x) 163clock_t_to_cputime(unsigned long x)
164{ 164{
165 return (cputime_t) x * (1000000 / USER_HZ); 165 return (cputime_t) x * (4096000000ULL / USER_HZ);
166} 166}
167 167
168/* 168/*
@@ -171,7 +171,7 @@ clock_t_to_cputime(unsigned long x)
171static inline clock_t 171static inline clock_t
172cputime64_to_clock_t(cputime64_t cputime) 172cputime64_to_clock_t(cputime64_t cputime)
173{ 173{
174 return __div(cputime, 1000000 / USER_HZ); 174 return __div(cputime, 4096000000ULL / USER_HZ);
175} 175}
176 176
177#endif /* _S390_CPUTIME_H */ 177#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 0bc51d52a899..ffdef5fe8587 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -67,11 +67,11 @@
67#define __LC_SYNC_ENTER_TIMER 0x248 67#define __LC_SYNC_ENTER_TIMER 0x248
68#define __LC_ASYNC_ENTER_TIMER 0x250 68#define __LC_ASYNC_ENTER_TIMER 0x250
69#define __LC_EXIT_TIMER 0x258 69#define __LC_EXIT_TIMER 0x258
70#define __LC_LAST_UPDATE_TIMER 0x260 70#define __LC_USER_TIMER 0x260
71#define __LC_USER_TIMER 0x268 71#define __LC_SYSTEM_TIMER 0x268
72#define __LC_SYSTEM_TIMER 0x270 72#define __LC_STEAL_TIMER 0x270
73#define __LC_LAST_UPDATE_CLOCK 0x278 73#define __LC_LAST_UPDATE_TIMER 0x278
74#define __LC_STEAL_CLOCK 0x280 74#define __LC_LAST_UPDATE_CLOCK 0x280
75#define __LC_RETURN_MCCK_PSW 0x288 75#define __LC_RETURN_MCCK_PSW 0x288
76#define __LC_KERNEL_STACK 0xC40 76#define __LC_KERNEL_STACK 0xC40
77#define __LC_THREAD_INFO 0xC44 77#define __LC_THREAD_INFO 0xC44
@@ -89,11 +89,11 @@
89#define __LC_SYNC_ENTER_TIMER 0x250 89#define __LC_SYNC_ENTER_TIMER 0x250
90#define __LC_ASYNC_ENTER_TIMER 0x258 90#define __LC_ASYNC_ENTER_TIMER 0x258
91#define __LC_EXIT_TIMER 0x260 91#define __LC_EXIT_TIMER 0x260
92#define __LC_LAST_UPDATE_TIMER 0x268 92#define __LC_USER_TIMER 0x268
93#define __LC_USER_TIMER 0x270 93#define __LC_SYSTEM_TIMER 0x270
94#define __LC_SYSTEM_TIMER 0x278 94#define __LC_STEAL_TIMER 0x278
95#define __LC_LAST_UPDATE_CLOCK 0x280 95#define __LC_LAST_UPDATE_TIMER 0x280
96#define __LC_STEAL_CLOCK 0x288 96#define __LC_LAST_UPDATE_CLOCK 0x288
97#define __LC_RETURN_MCCK_PSW 0x290 97#define __LC_RETURN_MCCK_PSW 0x290
98#define __LC_KERNEL_STACK 0xD40 98#define __LC_KERNEL_STACK 0xD40
99#define __LC_THREAD_INFO 0xD48 99#define __LC_THREAD_INFO 0xD48
@@ -106,8 +106,10 @@
106#define __LC_IPLDEV 0xDB8 106#define __LC_IPLDEV 0xDB8
107#define __LC_CURRENT 0xDD8 107#define __LC_CURRENT 0xDD8
108#define __LC_INT_CLOCK 0xDE8 108#define __LC_INT_CLOCK 0xDE8
109#define __LC_VDSO_PER_CPU 0xE38
109#endif /* __s390x__ */ 110#endif /* __s390x__ */
110 111
112#define __LC_PASTE 0xE40
111 113
112#define __LC_PANIC_MAGIC 0xE00 114#define __LC_PANIC_MAGIC 0xE00
113#ifndef __s390x__ 115#ifndef __s390x__
@@ -252,11 +254,11 @@ struct _lowcore
252 __u64 sync_enter_timer; /* 0x248 */ 254 __u64 sync_enter_timer; /* 0x248 */
253 __u64 async_enter_timer; /* 0x250 */ 255 __u64 async_enter_timer; /* 0x250 */
254 __u64 exit_timer; /* 0x258 */ 256 __u64 exit_timer; /* 0x258 */
255 __u64 last_update_timer; /* 0x260 */ 257 __u64 user_timer; /* 0x260 */
256 __u64 user_timer; /* 0x268 */ 258 __u64 system_timer; /* 0x268 */
257 __u64 system_timer; /* 0x270 */ 259 __u64 steal_timer; /* 0x270 */
258 __u64 last_update_clock; /* 0x278 */ 260 __u64 last_update_timer; /* 0x278 */
259 __u64 steal_clock; /* 0x280 */ 261 __u64 last_update_clock; /* 0x280 */
260 psw_t return_mcck_psw; /* 0x288 */ 262 psw_t return_mcck_psw; /* 0x288 */
261 __u8 pad8[0xc00-0x290]; /* 0x290 */ 263 __u8 pad8[0xc00-0x290]; /* 0x290 */
262 264
@@ -343,11 +345,11 @@ struct _lowcore
343 __u64 sync_enter_timer; /* 0x250 */ 345 __u64 sync_enter_timer; /* 0x250 */
344 __u64 async_enter_timer; /* 0x258 */ 346 __u64 async_enter_timer; /* 0x258 */
345 __u64 exit_timer; /* 0x260 */ 347 __u64 exit_timer; /* 0x260 */
346 __u64 last_update_timer; /* 0x268 */ 348 __u64 user_timer; /* 0x268 */
347 __u64 user_timer; /* 0x270 */ 349 __u64 system_timer; /* 0x270 */
348 __u64 system_timer; /* 0x278 */ 350 __u64 steal_timer; /* 0x278 */
349 __u64 last_update_clock; /* 0x280 */ 351 __u64 last_update_timer; /* 0x280 */
350 __u64 steal_clock; /* 0x288 */ 352 __u64 last_update_clock; /* 0x288 */
351 psw_t return_mcck_psw; /* 0x290 */ 353 psw_t return_mcck_psw; /* 0x290 */
352 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */ 354 __u8 pad8[0xc00-0x2a0]; /* 0x2a0 */
353 /* System info area */ 355 /* System info area */
@@ -381,7 +383,12 @@ struct _lowcore
381 /* whether the kernel died with panic() or not */ 383 /* whether the kernel died with panic() or not */
382 __u32 panic_magic; /* 0xe00 */ 384 __u32 panic_magic; /* 0xe00 */
383 385
384 __u8 pad13[0x11b8-0xe04]; /* 0xe04 */ 386 /* Per cpu primary space access list */
387 __u8 pad_0xe04[0xe3c-0xe04]; /* 0xe04 */
388 __u32 vdso_per_cpu_data; /* 0xe3c */
389 __u32 paste[16]; /* 0xe40 */
390
391 __u8 pad13[0x11b8-0xe80]; /* 0xe80 */
385 392
386 /* 64 bit extparam used for pfault, diag 250 etc */ 393 /* 64 bit extparam used for pfault, diag 250 etc */
387 __u64 ext_params2; /* 0x11B8 */ 394 __u64 ext_params2; /* 0x11B8 */
diff --git a/arch/s390/include/asm/system.h b/arch/s390/include/asm/system.h
index 024ef42ed6d7..3a8b26eb1f2e 100644
--- a/arch/s390/include/asm/system.h
+++ b/arch/s390/include/asm/system.h
@@ -99,7 +99,7 @@ static inline void restore_access_regs(unsigned int *acrs)
99 prev = __switch_to(prev,next); \ 99 prev = __switch_to(prev,next); \
100} while (0) 100} while (0)
101 101
102extern void account_vtime(struct task_struct *); 102extern void account_vtime(struct task_struct *, struct task_struct *);
103extern void account_tick_vtime(struct task_struct *); 103extern void account_tick_vtime(struct task_struct *);
104extern void account_system_vtime(struct task_struct *); 104extern void account_system_vtime(struct task_struct *);
105 105
@@ -121,7 +121,7 @@ static inline void cmma_init(void) { }
121 121
122#define finish_arch_switch(prev) do { \ 122#define finish_arch_switch(prev) do { \
123 set_fs(current->thread.mm_segment); \ 123 set_fs(current->thread.mm_segment); \
124 account_vtime(prev); \ 124 account_vtime(prev, current); \
125} while (0) 125} while (0)
126 126
127#define nop() asm volatile("nop") 127#define nop() asm volatile("nop")
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index c1eaf9604da7..c544aa524535 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -47,6 +47,8 @@ struct thread_info {
47 unsigned int cpu; /* current CPU */ 47 unsigned int cpu; /* current CPU */
48 int preempt_count; /* 0 => preemptable, <0 => BUG */ 48 int preempt_count; /* 0 => preemptable, <0 => BUG */
49 struct restart_block restart_block; 49 struct restart_block restart_block;
50 __u64 user_timer;
51 __u64 system_timer;
50}; 52};
51 53
52/* 54/*
diff --git a/arch/s390/include/asm/timer.h b/arch/s390/include/asm/timer.h
index 61705d60f995..e4bcab739c19 100644
--- a/arch/s390/include/asm/timer.h
+++ b/arch/s390/include/asm/timer.h
@@ -23,20 +23,18 @@ struct vtimer_list {
23 __u64 expires; 23 __u64 expires;
24 __u64 interval; 24 __u64 interval;
25 25
26 spinlock_t lock;
27 unsigned long magic;
28
29 void (*function)(unsigned long); 26 void (*function)(unsigned long);
30 unsigned long data; 27 unsigned long data;
31}; 28};
32 29
33/* the offset value will wrap after ca. 71 years */ 30/* the vtimer value will wrap after ca. 71 years */
34struct vtimer_queue { 31struct vtimer_queue {
35 struct list_head list; 32 struct list_head list;
36 spinlock_t lock; 33 spinlock_t lock;
37 __u64 to_expire; /* current event expire time */ 34 __u64 timer; /* last programmed timer */
38 __u64 offset; /* list offset to zero */ 35 __u64 elapsed; /* elapsed time of timer expire values */
39 __u64 idle; /* temp var for idle */ 36 __u64 idle; /* temp var for idle */
37 int do_spt; /* =1: reprogram cpu timer in idle */
40}; 38};
41 39
42extern void init_virt_timer(struct vtimer_list *timer); 40extern void init_virt_timer(struct vtimer_list *timer);
@@ -48,8 +46,8 @@ extern int del_virt_timer(struct vtimer_list *timer);
48extern void init_cpu_vtimer(void); 46extern void init_cpu_vtimer(void);
49extern void vtime_init(void); 47extern void vtime_init(void);
50 48
51extern void vtime_start_cpu_timer(void); 49extern void vtime_stop_cpu(void);
52extern void vtime_stop_cpu_timer(void); 50extern void vtime_start_leave(void);
53 51
54#endif /* __KERNEL__ */ 52#endif /* __KERNEL__ */
55 53
diff --git a/arch/s390/include/asm/topology.h b/arch/s390/include/asm/topology.h
index d96c91643458..c93eb50e1d09 100644
--- a/arch/s390/include/asm/topology.h
+++ b/arch/s390/include/asm/topology.h
@@ -6,10 +6,12 @@
6#define mc_capable() (1) 6#define mc_capable() (1)
7 7
8cpumask_t cpu_coregroup_map(unsigned int cpu); 8cpumask_t cpu_coregroup_map(unsigned int cpu);
9const struct cpumask *cpu_coregroup_mask(unsigned int cpu);
9 10
10extern cpumask_t cpu_core_map[NR_CPUS]; 11extern cpumask_t cpu_core_map[NR_CPUS];
11 12
12#define topology_core_siblings(cpu) (cpu_core_map[cpu]) 13#define topology_core_siblings(cpu) (cpu_core_map[cpu])
14#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
13 15
14int topology_set_cpu_management(int fc); 16int topology_set_cpu_management(int fc);
15void topology_schedule_update(void); 17void topology_schedule_update(void);
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index a44f4fe16a35..7bdd7c8ebc91 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -12,9 +12,9 @@
12#ifndef __ASSEMBLY__ 12#ifndef __ASSEMBLY__
13 13
14/* 14/*
15 * Note about this structure: 15 * Note about the vdso_data and vdso_per_cpu_data structures:
16 * 16 *
17 * NEVER USE THIS IN USERSPACE CODE DIRECTLY. The layout of this 17 * NEVER USE THEM IN USERSPACE CODE DIRECTLY. The layout of the
18 * structure is supposed to be known only to the function in the vdso 18 * structure is supposed to be known only to the function in the vdso
19 * itself and may change without notice. 19 * itself and may change without notice.
20 */ 20 */
@@ -28,10 +28,21 @@ struct vdso_data {
28 __u64 wtom_clock_nsec; /* 0x28 */ 28 __u64 wtom_clock_nsec; /* 0x28 */
29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */ 29 __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
30 __u32 tz_dsttime; /* Type of dst correction 0x34 */ 30 __u32 tz_dsttime; /* Type of dst correction 0x34 */
31 __u32 ectg_available;
32};
33
34struct vdso_per_cpu_data {
35 __u64 ectg_timer_base;
36 __u64 ectg_user_time;
31}; 37};
32 38
33extern struct vdso_data *vdso_data; 39extern struct vdso_data *vdso_data;
34 40
41#ifdef CONFIG_64BIT
42int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore);
43void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore);
44#endif
45
35#endif /* __ASSEMBLY__ */ 46#endif /* __ASSEMBLY__ */
36 47
37#endif /* __KERNEL__ */ 48#endif /* __KERNEL__ */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e641f60bac99..67a60016babb 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -48,6 +48,11 @@ int main(void)
48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec)); 48 DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec)); 49 DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest)); 50 DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
51 DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
52 DEFINE(__VDSO_ECTG_BASE,
53 offsetof(struct vdso_per_cpu_data, ectg_timer_base));
54 DEFINE(__VDSO_ECTG_USER,
55 offsetof(struct vdso_per_cpu_data, ectg_user_time));
51 /* constants used by the vdso */ 56 /* constants used by the vdso */
52 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME); 57 DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
53 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC); 58 DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 55de521aef77..1268aa2991bf 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -583,8 +583,8 @@ kernel_per:
583 583
584 .globl io_int_handler 584 .globl io_int_handler
585io_int_handler: 585io_int_handler:
586 stpt __LC_ASYNC_ENTER_TIMER
587 stck __LC_INT_CLOCK 586 stck __LC_INT_CLOCK
587 stpt __LC_ASYNC_ENTER_TIMER
588 SAVE_ALL_BASE __LC_SAVE_AREA+16 588 SAVE_ALL_BASE __LC_SAVE_AREA+16
589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 589 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16 590 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+16
@@ -723,8 +723,8 @@ io_notify_resume:
723 723
724 .globl ext_int_handler 724 .globl ext_int_handler
725ext_int_handler: 725ext_int_handler:
726 stpt __LC_ASYNC_ENTER_TIMER
727 stck __LC_INT_CLOCK 726 stck __LC_INT_CLOCK
727 stpt __LC_ASYNC_ENTER_TIMER
728 SAVE_ALL_BASE __LC_SAVE_AREA+16 728 SAVE_ALL_BASE __LC_SAVE_AREA+16
729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 729 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16 730 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+16
@@ -750,6 +750,7 @@ __critical_end:
750 750
751 .globl mcck_int_handler 751 .globl mcck_int_handler
752mcck_int_handler: 752mcck_int_handler:
753 stck __LC_INT_CLOCK
753 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA # revalidate cpu timer
754 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs 755 lm %r0,%r15,__LC_GPREGS_SAVE_AREA # revalidate gprs
755 SAVE_ALL_BASE __LC_SAVE_AREA+32 756 SAVE_ALL_BASE __LC_SAVE_AREA+32
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 16bb4fd1a403..c6fbde13971a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -177,8 +177,11 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
177 .if !\sync 177 .if !\sync
178 ni \psworg+1,0xfd # clear wait state bit 178 ni \psworg+1,0xfd # clear wait state bit
179 .endif 179 .endif
180 lmg %r0,%r15,SP_R0(%r15) # load gprs 0-15 of user 180 lg %r14,__LC_VDSO_PER_CPU
181 lmg %r0,%r13,SP_R0(%r15) # load gprs 0-13 of user
181 stpt __LC_EXIT_TIMER 182 stpt __LC_EXIT_TIMER
183 mvc __VDSO_ECTG_BASE(16,%r14),__LC_EXIT_TIMER
184 lmg %r14,%r15,SP_R14(%r15) # load grps 14-15 of user
182 lpswe \psworg # back to caller 185 lpswe \psworg # back to caller
183 .endm 186 .endm
184 187
@@ -559,8 +562,8 @@ kernel_per:
559 */ 562 */
560 .globl io_int_handler 563 .globl io_int_handler
561io_int_handler: 564io_int_handler:
562 stpt __LC_ASYNC_ENTER_TIMER
563 stck __LC_INT_CLOCK 565 stck __LC_INT_CLOCK
566 stpt __LC_ASYNC_ENTER_TIMER
564 SAVE_ALL_BASE __LC_SAVE_AREA+32 567 SAVE_ALL_BASE __LC_SAVE_AREA+32
565 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 568 SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
566 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32 569 CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
@@ -721,8 +724,8 @@ io_notify_resume:
721 */ 724 */
722 .globl ext_int_handler 725 .globl ext_int_handler
723ext_int_handler: 726ext_int_handler:
724 stpt __LC_ASYNC_ENTER_TIMER
725 stck __LC_INT_CLOCK 727 stck __LC_INT_CLOCK
728 stpt __LC_ASYNC_ENTER_TIMER
726 SAVE_ALL_BASE __LC_SAVE_AREA+32 729 SAVE_ALL_BASE __LC_SAVE_AREA+32
727 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 730 SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
728 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32 731 CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
@@ -746,6 +749,7 @@ __critical_end:
746 */ 749 */
747 .globl mcck_int_handler 750 .globl mcck_int_handler
748mcck_int_handler: 751mcck_int_handler:
752 stck __LC_INT_CLOCK
749 la %r1,4095 # revalidate r1 753 la %r1,4095 # revalidate r1
750 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer 754 spt __LC_CPU_TIMER_SAVE_AREA-4095(%r1) # revalidate cpu timer
751 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs 755 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
@@ -979,23 +983,23 @@ cleanup_sysc_return:
979 983
980cleanup_sysc_leave: 984cleanup_sysc_leave:
981 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn) 985 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn)
982 je 2f 986 je 3f
983 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
984 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8) 987 clc 8(8,%r12),BASED(cleanup_sysc_leave_insn+8)
985 je 2f 988 jhe 0f
986 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 989 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
9900: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
987 cghi %r12,__LC_MCK_OLD_PSW 991 cghi %r12,__LC_MCK_OLD_PSW
988 jne 0f 992 jne 1f
989 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 993 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
990 j 1f 994 j 2f
9910: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 9951: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
9921: lmg %r0,%r11,SP_R0(%r15) 9962: lmg %r0,%r11,SP_R0(%r15)
993 lg %r15,SP_R15(%r15) 997 lg %r15,SP_R15(%r15)
9942: la %r12,__LC_RETURN_PSW 9983: la %r12,__LC_RETURN_PSW
995 br %r14 999 br %r14
996cleanup_sysc_leave_insn: 1000cleanup_sysc_leave_insn:
997 .quad sysc_done - 4 1001 .quad sysc_done - 4
998 .quad sysc_done - 8 1002 .quad sysc_done - 16
999 1003
1000cleanup_io_return: 1004cleanup_io_return:
1001 mvc __LC_RETURN_PSW(8),0(%r12) 1005 mvc __LC_RETURN_PSW(8),0(%r12)
@@ -1005,23 +1009,23 @@ cleanup_io_return:
1005 1009
1006cleanup_io_leave: 1010cleanup_io_leave:
1007 clc 8(8,%r12),BASED(cleanup_io_leave_insn) 1011 clc 8(8,%r12),BASED(cleanup_io_leave_insn)
1008 je 2f 1012 je 3f
1009 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
1010 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8) 1013 clc 8(8,%r12),BASED(cleanup_io_leave_insn+8)
1011 je 2f 1014 jhe 0f
1012 mvc __LC_RETURN_PSW(16),SP_PSW(%r15) 1015 mvc __LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
10160: mvc __LC_RETURN_PSW(16),SP_PSW(%r15)
1013 cghi %r12,__LC_MCK_OLD_PSW 1017 cghi %r12,__LC_MCK_OLD_PSW
1014 jne 0f 1018 jne 1f
1015 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15) 1019 mvc __LC_SAVE_AREA+64(32),SP_R12(%r15)
1016 j 1f 1020 j 2f
10170: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15) 10211: mvc __LC_SAVE_AREA+32(32),SP_R12(%r15)
10181: lmg %r0,%r11,SP_R0(%r15) 10222: lmg %r0,%r11,SP_R0(%r15)
1019 lg %r15,SP_R15(%r15) 1023 lg %r15,SP_R15(%r15)
10202: la %r12,__LC_RETURN_PSW 10243: la %r12,__LC_RETURN_PSW
1021 br %r14 1025 br %r14
1022cleanup_io_leave_insn: 1026cleanup_io_leave_insn:
1023 .quad io_done - 4 1027 .quad io_done - 4
1024 .quad io_done - 8 1028 .quad io_done - 16
1025 1029
1026/* 1030/*
1027 * Integer constants 1031 * Integer constants
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 3ccd36b24b8f..f9f70aa15244 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -87,6 +87,8 @@ startup_continue:
87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area 87 lg %r12,.Lparmaddr-.LPG1(%r13) # pointer to parameter area
88 # move IPL device to lowcore 88 # move IPL device to lowcore
89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12) 89 mvc __LC_IPLDEV(4),IPL_DEVICE+4-PARMAREA(%r12)
90 lghi %r0,__LC_PASTE
91 stg %r0,__LC_VDSO_PER_CPU
90# 92#
91# Setup stack 93# Setup stack
92# 94#
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 04f8c67a6101..b6110bdf8dc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -38,6 +38,7 @@
38#include <linux/utsname.h> 38#include <linux/utsname.h>
39#include <linux/tick.h> 39#include <linux/tick.h>
40#include <linux/elfcore.h> 40#include <linux/elfcore.h>
41#include <linux/kernel_stat.h>
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
43#include <asm/system.h> 44#include <asm/system.h>
@@ -45,7 +46,6 @@
45#include <asm/processor.h> 46#include <asm/processor.h>
46#include <asm/irq.h> 47#include <asm/irq.h>
47#include <asm/timer.h> 48#include <asm/timer.h>
48#include <asm/cpu.h>
49#include "entry.h" 49#include "entry.h"
50 50
51asmlinkage void ret_from_fork(void) asm ("ret_from_fork"); 51asmlinkage void ret_from_fork(void) asm ("ret_from_fork");
@@ -75,36 +75,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
75 return sf->gprs[8]; 75 return sf->gprs[8];
76} 76}
77 77
78DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
79 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
80};
81
82static int s390_idle_enter(void)
83{
84 struct s390_idle_data *idle;
85
86 idle = &__get_cpu_var(s390_idle);
87 spin_lock(&idle->lock);
88 idle->idle_count++;
89 idle->in_idle = 1;
90 idle->idle_enter = get_clock();
91 spin_unlock(&idle->lock);
92 vtime_stop_cpu_timer();
93 return NOTIFY_OK;
94}
95
96void s390_idle_leave(void)
97{
98 struct s390_idle_data *idle;
99
100 vtime_start_cpu_timer();
101 idle = &__get_cpu_var(s390_idle);
102 spin_lock(&idle->lock);
103 idle->idle_time += get_clock() - idle->idle_enter;
104 idle->in_idle = 0;
105 spin_unlock(&idle->lock);
106}
107
108extern void s390_handle_mcck(void); 78extern void s390_handle_mcck(void);
109/* 79/*
110 * The idle loop on a S390... 80 * The idle loop on a S390...
@@ -117,10 +87,6 @@ static void default_idle(void)
117 local_irq_enable(); 87 local_irq_enable();
118 return; 88 return;
119 } 89 }
120 if (s390_idle_enter() == NOTIFY_BAD) {
121 local_irq_enable();
122 return;
123 }
124#ifdef CONFIG_HOTPLUG_CPU 90#ifdef CONFIG_HOTPLUG_CPU
125 if (cpu_is_offline(smp_processor_id())) { 91 if (cpu_is_offline(smp_processor_id())) {
126 preempt_enable_no_resched(); 92 preempt_enable_no_resched();
@@ -130,7 +96,6 @@ static void default_idle(void)
130 local_mcck_disable(); 96 local_mcck_disable();
131 if (test_thread_flag(TIF_MCCK_PENDING)) { 97 if (test_thread_flag(TIF_MCCK_PENDING)) {
132 local_mcck_enable(); 98 local_mcck_enable();
133 s390_idle_leave();
134 local_irq_enable(); 99 local_irq_enable();
135 s390_handle_mcck(); 100 s390_handle_mcck();
136 return; 101 return;
@@ -138,9 +103,9 @@ static void default_idle(void)
138 trace_hardirqs_on(); 103 trace_hardirqs_on();
139 /* Don't trace preempt off for idle. */ 104 /* Don't trace preempt off for idle. */
140 stop_critical_timings(); 105 stop_critical_timings();
141 /* Wait for external, I/O or machine check interrupt. */ 106 /* Stop virtual timer and halt the cpu. */
142 __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT | 107 vtime_stop_cpu();
143 PSW_MASK_IO | PSW_MASK_EXT); 108 /* Reenable preemption tracer. */
144 start_critical_timings(); 109 start_critical_timings();
145} 110}
146 111
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index e019b419efc6..a0d2d55d7fb3 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -119,8 +119,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
119 struct pt_regs *old_regs; 119 struct pt_regs *old_regs;
120 120
121 old_regs = set_irq_regs(regs); 121 old_regs = set_irq_regs(regs);
122 irq_enter();
123 s390_idle_check(); 122 s390_idle_check();
123 irq_enter();
124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator) 124 if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
125 /* Serve timer interrupts first. */ 125 /* Serve timer interrupts first. */
126 clock_comparator_work(); 126 clock_comparator_work();
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b7a1efd5522c..d825f4950e4e 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,6 +427,8 @@ setup_lowcore(void)
427 /* enable extended save area */ 427 /* enable extended save area */
428 __ctl_set_bit(14, 29); 428 __ctl_set_bit(14, 29);
429 } 429 }
430#else
431 lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
430#endif 432#endif
431 set_prefix((u32)(unsigned long) lc); 433 set_prefix((u32)(unsigned long) lc);
432} 434}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 3ed5c7a83c6c..9c0ccb532a45 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -47,6 +47,7 @@
47#include <asm/lowcore.h> 47#include <asm/lowcore.h>
48#include <asm/sclp.h> 48#include <asm/sclp.h>
49#include <asm/cpu.h> 49#include <asm/cpu.h>
50#include <asm/vdso.h>
50#include "entry.h" 51#include "entry.h"
51 52
52/* 53/*
@@ -500,6 +501,9 @@ static int __cpuinit smp_alloc_lowcore(int cpu)
500 goto out; 501 goto out;
501 lowcore->extended_save_area_addr = (u32) save_area; 502 lowcore->extended_save_area_addr = (u32) save_area;
502 } 503 }
504#else
505 if (vdso_alloc_per_cpu(cpu, lowcore))
506 goto out;
503#endif 507#endif
504 lowcore_ptr[cpu] = lowcore; 508 lowcore_ptr[cpu] = lowcore;
505 return 0; 509 return 0;
@@ -522,6 +526,8 @@ static void smp_free_lowcore(int cpu)
522#ifndef CONFIG_64BIT 526#ifndef CONFIG_64BIT
523 if (MACHINE_HAS_IEEE) 527 if (MACHINE_HAS_IEEE)
524 free_page((unsigned long) lowcore->extended_save_area_addr); 528 free_page((unsigned long) lowcore->extended_save_area_addr);
529#else
530 vdso_free_per_cpu(cpu, lowcore);
525#endif 531#endif
526 free_page(lowcore->panic_stack - PAGE_SIZE); 532 free_page(lowcore->panic_stack - PAGE_SIZE);
527 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER); 533 free_pages(lowcore->async_stack - ASYNC_SIZE, ASYNC_ORDER);
@@ -664,6 +670,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
664 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order); 670 lowcore = (void *) __get_free_pages(GFP_KERNEL | GFP_DMA, lc_order);
665 panic_stack = __get_free_page(GFP_KERNEL); 671 panic_stack = __get_free_page(GFP_KERNEL);
666 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER); 672 async_stack = __get_free_pages(GFP_KERNEL, ASYNC_ORDER);
673 BUG_ON(!lowcore || !panic_stack || !async_stack);
667#ifndef CONFIG_64BIT 674#ifndef CONFIG_64BIT
668 if (MACHINE_HAS_IEEE) 675 if (MACHINE_HAS_IEEE)
669 save_area = get_zeroed_page(GFP_KERNEL); 676 save_area = get_zeroed_page(GFP_KERNEL);
@@ -677,6 +684,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
677#ifndef CONFIG_64BIT 684#ifndef CONFIG_64BIT
678 if (MACHINE_HAS_IEEE) 685 if (MACHINE_HAS_IEEE)
679 lowcore->extended_save_area_addr = (u32) save_area; 686 lowcore->extended_save_area_addr = (u32) save_area;
687#else
688 BUG_ON(vdso_alloc_per_cpu(smp_processor_id(), lowcore));
680#endif 689#endif
681 set_prefix((u32)(unsigned long) lowcore); 690 set_prefix((u32)(unsigned long) lowcore);
682 local_mcck_enable(); 691 local_mcck_enable();
@@ -845,9 +854,11 @@ static ssize_t show_idle_count(struct sys_device *dev,
845 unsigned long long idle_count; 854 unsigned long long idle_count;
846 855
847 idle = &per_cpu(s390_idle, dev->id); 856 idle = &per_cpu(s390_idle, dev->id);
848 spin_lock_irq(&idle->lock); 857 spin_lock(&idle->lock);
849 idle_count = idle->idle_count; 858 idle_count = idle->idle_count;
850 spin_unlock_irq(&idle->lock); 859 if (idle->idle_enter)
860 idle_count++;
861 spin_unlock(&idle->lock);
851 return sprintf(buf, "%llu\n", idle_count); 862 return sprintf(buf, "%llu\n", idle_count);
852} 863}
853static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL); 864static SYSDEV_ATTR(idle_count, 0444, show_idle_count, NULL);
@@ -856,18 +867,17 @@ static ssize_t show_idle_time(struct sys_device *dev,
856 struct sysdev_attribute *attr, char *buf) 867 struct sysdev_attribute *attr, char *buf)
857{ 868{
858 struct s390_idle_data *idle; 869 struct s390_idle_data *idle;
859 unsigned long long new_time; 870 unsigned long long now, idle_time, idle_enter;
860 871
861 idle = &per_cpu(s390_idle, dev->id); 872 idle = &per_cpu(s390_idle, dev->id);
862 spin_lock_irq(&idle->lock); 873 spin_lock(&idle->lock);
863 if (idle->in_idle) { 874 now = get_clock();
864 new_time = get_clock(); 875 idle_time = idle->idle_time;
865 idle->idle_time += new_time - idle->idle_enter; 876 idle_enter = idle->idle_enter;
866 idle->idle_enter = new_time; 877 if (idle_enter != 0ULL && idle_enter < now)
867 } 878 idle_time += now - idle_enter;
868 new_time = idle->idle_time; 879 spin_unlock(&idle->lock);
869 spin_unlock_irq(&idle->lock); 880 return sprintf(buf, "%llu\n", idle_time >> 12);
870 return sprintf(buf, "%llu\n", new_time >> 12);
871} 881}
872static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL); 882static SYSDEV_ATTR(idle_time_us, 0444, show_idle_time, NULL);
873 883
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 90e9ba11eba1..cc362c9ea8f1 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -97,6 +97,11 @@ cpumask_t cpu_coregroup_map(unsigned int cpu)
97 return mask; 97 return mask;
98} 98}
99 99
100const struct cpumask *cpu_coregroup_mask(unsigned int cpu)
101{
102 return &cpu_core_map[cpu];
103}
104
100static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core) 105static void add_cpus_to_core(struct tl_cpu *tl_cpu, struct core_info *core)
101{ 106{
102 unsigned int cpu; 107 unsigned int cpu;
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 10a6ccef4412..25a6a82f1c02 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -31,9 +31,6 @@
31#include <asm/sections.h> 31#include <asm/sections.h>
32#include <asm/vdso.h> 32#include <asm/vdso.h>
33 33
34/* Max supported size for symbol names */
35#define MAX_SYMNAME 64
36
37#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 34#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
38extern char vdso32_start, vdso32_end; 35extern char vdso32_start, vdso32_end;
39static void *vdso32_kbase = &vdso32_start; 36static void *vdso32_kbase = &vdso32_start;
@@ -71,6 +68,119 @@ static union {
71struct vdso_data *vdso_data = &vdso_data_store.data; 68struct vdso_data *vdso_data = &vdso_data_store.data;
72 69
73/* 70/*
71 * Setup vdso data page.
72 */
73static void vdso_init_data(struct vdso_data *vd)
74{
75 unsigned int facility_list;
76
77 facility_list = stfl();
78 vd->ectg_available = switch_amode && (facility_list & 1);
79}
80
81#ifdef CONFIG_64BIT
82/*
83 * Setup per cpu vdso data page.
84 */
85static void vdso_init_per_cpu_data(int cpu, struct vdso_per_cpu_data *vpcd)
86{
87}
88
89/*
90 * Allocate/free per cpu vdso data.
91 */
92#ifdef CONFIG_64BIT
93#define SEGMENT_ORDER 2
94#else
95#define SEGMENT_ORDER 1
96#endif
97
98int vdso_alloc_per_cpu(int cpu, struct _lowcore *lowcore)
99{
100 unsigned long segment_table, page_table, page_frame;
101 u32 *psal, *aste;
102 int i;
103
104 lowcore->vdso_per_cpu_data = __LC_PASTE;
105
106 if (!switch_amode || !vdso_enabled)
107 return 0;
108
109 segment_table = __get_free_pages(GFP_KERNEL, SEGMENT_ORDER);
110 page_table = get_zeroed_page(GFP_KERNEL | GFP_DMA);
111 page_frame = get_zeroed_page(GFP_KERNEL);
112 if (!segment_table || !page_table || !page_frame)
113 goto out;
114
115 clear_table((unsigned long *) segment_table, _SEGMENT_ENTRY_EMPTY,
116 PAGE_SIZE << SEGMENT_ORDER);
117 clear_table((unsigned long *) page_table, _PAGE_TYPE_EMPTY,
118 256*sizeof(unsigned long));
119
120 *(unsigned long *) segment_table = _SEGMENT_ENTRY + page_table;
121 *(unsigned long *) page_table = _PAGE_RO + page_frame;
122
123 psal = (u32 *) (page_table + 256*sizeof(unsigned long));
124 aste = psal + 32;
125
126 for (i = 4; i < 32; i += 4)
127 psal[i] = 0x80000000;
128
129 lowcore->paste[4] = (u32)(addr_t) psal;
130 psal[0] = 0x20000000;
131 psal[2] = (u32)(addr_t) aste;
132 *(unsigned long *) (aste + 2) = segment_table +
133 _ASCE_TABLE_LENGTH + _ASCE_USER_BITS + _ASCE_TYPE_SEGMENT;
134 aste[4] = (u32)(addr_t) psal;
135 lowcore->vdso_per_cpu_data = page_frame;
136
137 vdso_init_per_cpu_data(cpu, (struct vdso_per_cpu_data *) page_frame);
138 return 0;
139
140out:
141 free_page(page_frame);
142 free_page(page_table);
143 free_pages(segment_table, SEGMENT_ORDER);
144 return -ENOMEM;
145}
146
147#ifdef CONFIG_HOTPLUG_CPU
148void vdso_free_per_cpu(int cpu, struct _lowcore *lowcore)
149{
150 unsigned long segment_table, page_table, page_frame;
151 u32 *psal, *aste;
152
153 if (!switch_amode || !vdso_enabled)
154 return;
155
156 psal = (u32 *)(addr_t) lowcore->paste[4];
157 aste = (u32 *)(addr_t) psal[2];
158 segment_table = *(unsigned long *)(aste + 2) & PAGE_MASK;
159 page_table = *(unsigned long *) segment_table;
160 page_frame = *(unsigned long *) page_table;
161
162 free_page(page_frame);
163 free_page(page_table);
164 free_pages(segment_table, SEGMENT_ORDER);
165}
166#endif /* CONFIG_HOTPLUG_CPU */
167
168static void __vdso_init_cr5(void *dummy)
169{
170 unsigned long cr5;
171
172 cr5 = offsetof(struct _lowcore, paste);
173 __ctl_load(cr5, 5, 5);
174}
175
176static void vdso_init_cr5(void)
177{
178 if (switch_amode && vdso_enabled)
179 on_each_cpu(__vdso_init_cr5, NULL, 1);
180}
181#endif /* CONFIG_64BIT */
182
183/*
74 * This is called from binfmt_elf, we create the special vma for the 184 * This is called from binfmt_elf, we create the special vma for the
75 * vDSO and insert it into the mm struct tree 185 * vDSO and insert it into the mm struct tree
76 */ 186 */
@@ -172,6 +282,9 @@ static int __init vdso_init(void)
172{ 282{
173 int i; 283 int i;
174 284
285 if (!vdso_enabled)
286 return 0;
287 vdso_init_data(vdso_data);
175#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT) 288#if defined(CONFIG_32BIT) || defined(CONFIG_COMPAT)
176 /* Calculate the size of the 32 bit vDSO */ 289 /* Calculate the size of the 32 bit vDSO */
177 vdso32_pages = ((&vdso32_end - &vdso32_start 290 vdso32_pages = ((&vdso32_end - &vdso32_start
@@ -208,6 +321,10 @@ static int __init vdso_init(void)
208 } 321 }
209 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data); 322 vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
210 vdso64_pagelist[vdso64_pages] = NULL; 323 vdso64_pagelist[vdso64_pages] = NULL;
324#ifndef CONFIG_SMP
325 BUG_ON(vdso_alloc_per_cpu(0, S390_lowcore));
326#endif
327 vdso_init_cr5();
211#endif /* CONFIG_64BIT */ 328#endif /* CONFIG_64BIT */
212 329
213 get_page(virt_to_page(vdso_data)); 330 get_page(virt_to_page(vdso_data));
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 488e31a3c0e7..9ce8caafdb4e 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -22,7 +22,12 @@ __kernel_clock_getres:
22 cghi %r2,CLOCK_REALTIME 22 cghi %r2,CLOCK_REALTIME
23 je 0f 23 je 0f
24 cghi %r2,CLOCK_MONOTONIC 24 cghi %r2,CLOCK_MONOTONIC
25 je 0f
26 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
25 jne 2f 27 jne 2f
28 larl %r5,_vdso_data
29 icm %r0,15,__LC_ECTG_OK(%r5)
30 jz 2f
260: ltgr %r3,%r3 310: ltgr %r3,%r3
27 jz 1f /* res == NULL */ 32 jz 1f /* res == NULL */
28 larl %r1,3f 33 larl %r1,3f
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 738a410b7eb2..79dbfee831ec 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -22,8 +22,10 @@ __kernel_clock_gettime:
22 larl %r5,_vdso_data 22 larl %r5,_vdso_data
23 cghi %r2,CLOCK_REALTIME 23 cghi %r2,CLOCK_REALTIME
24 je 4f 24 je 4f
25 cghi %r2,-2 /* CLOCK_THREAD_CPUTIME_ID for this thread */
26 je 9f
25 cghi %r2,CLOCK_MONOTONIC 27 cghi %r2,CLOCK_MONOTONIC
26 jne 9f 28 jne 12f
27 29
28 /* CLOCK_MONOTONIC */ 30 /* CLOCK_MONOTONIC */
29 ltgr %r3,%r3 31 ltgr %r3,%r3
@@ -42,7 +44,7 @@ __kernel_clock_gettime:
42 alg %r0,__VDSO_WTOM_SEC(%r5) 44 alg %r0,__VDSO_WTOM_SEC(%r5)
43 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 45 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
44 jne 0b 46 jne 0b
45 larl %r5,10f 47 larl %r5,13f
461: clg %r1,0(%r5) 481: clg %r1,0(%r5)
47 jl 2f 49 jl 2f
48 slg %r1,0(%r5) 50 slg %r1,0(%r5)
@@ -68,7 +70,7 @@ __kernel_clock_gettime:
68 lg %r0,__VDSO_XTIME_SEC(%r5) 70 lg %r0,__VDSO_XTIME_SEC(%r5)
69 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */ 71 clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
70 jne 5b 72 jne 5b
71 larl %r5,10f 73 larl %r5,13f
726: clg %r1,0(%r5) 746: clg %r1,0(%r5)
73 jl 7f 75 jl 7f
74 slg %r1,0(%r5) 76 slg %r1,0(%r5)
@@ -79,11 +81,38 @@ __kernel_clock_gettime:
798: lghi %r2,0 818: lghi %r2,0
80 br %r14 82 br %r14
81 83
84 /* CLOCK_THREAD_CPUTIME_ID for this thread */
859: icm %r0,15,__VDSO_ECTG_OK(%r5)
86 jz 12f
87 ear %r2,%a4
88 llilh %r4,0x0100
89 sar %a4,%r4
90 lghi %r4,0
91 sacf 512 /* Magic ectg instruction */
92 .insn ssf,0xc80100000000,__VDSO_ECTG_BASE(4),__VDSO_ECTG_USER(4),4
93 sacf 0
94 sar %a4,%r2
95 algr %r1,%r0 /* r1 = cputime as TOD value */
96 mghi %r1,1000 /* convert to nanoseconds */
97 srlg %r1,%r1,12 /* r1 = cputime in nanosec */
98 lgr %r4,%r1
99 larl %r5,13f
100 srlg %r1,%r1,9 /* divide by 1000000000 */
101 mlg %r0,8(%r5)
102 srlg %r0,%r0,11 /* r0 = tv_sec */
103 stg %r0,0(%r3)
104 msg %r0,0(%r5) /* calculate tv_nsec */
105 slgr %r4,%r0 /* r4 = tv_nsec */
106 stg %r4,8(%r3)
107 lghi %r2,0
108 br %r14
109
82 /* Fallback to system call */ 110 /* Fallback to system call */
839: lghi %r1,__NR_clock_gettime 11112: lghi %r1,__NR_clock_gettime
84 svc 0 112 svc 0
85 br %r14 113 br %r14
86 114
8710: .quad 1000000000 11513: .quad 1000000000
11614: .quad 19342813113834067
88 .cfi_endproc 117 .cfi_endproc
89 .size __kernel_clock_gettime,.-__kernel_clock_gettime 118 .size __kernel_clock_gettime,.-__kernel_clock_gettime
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 75a6e62ea973..2fb36e462194 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -23,19 +23,43 @@
23#include <asm/s390_ext.h> 23#include <asm/s390_ext.h>
24#include <asm/timer.h> 24#include <asm/timer.h>
25#include <asm/irq_regs.h> 25#include <asm/irq_regs.h>
26#include <asm/cpu.h>
26 27
27static ext_int_info_t ext_int_info_timer; 28static ext_int_info_t ext_int_info_timer;
29
28static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer); 30static DEFINE_PER_CPU(struct vtimer_queue, virt_cpu_timer);
29 31
32DEFINE_PER_CPU(struct s390_idle_data, s390_idle) = {
33 .lock = __SPIN_LOCK_UNLOCKED(s390_idle.lock)
34};
35
36static inline __u64 get_vtimer(void)
37{
38 __u64 timer;
39
40 asm volatile("STPT %0" : "=m" (timer));
41 return timer;
42}
43
44static inline void set_vtimer(__u64 expires)
45{
46 __u64 timer;
47
48 asm volatile (" STPT %0\n" /* Store current cpu timer value */
49 " SPT %1" /* Set new value immediatly afterwards */
50 : "=m" (timer) : "m" (expires) );
51 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
52 S390_lowcore.last_update_timer = expires;
53}
54
30/* 55/*
31 * Update process times based on virtual cpu times stored by entry.S 56 * Update process times based on virtual cpu times stored by entry.S
32 * to the lowcore fields user_timer, system_timer & steal_clock. 57 * to the lowcore fields user_timer, system_timer & steal_clock.
33 */ 58 */
34void account_process_tick(struct task_struct *tsk, int user_tick) 59static void do_account_vtime(struct task_struct *tsk, int hardirq_offset)
35{ 60{
36 cputime_t cputime; 61 struct thread_info *ti = task_thread_info(tsk);
37 __u64 timer, clock; 62 __u64 timer, clock, user, system, steal;
38 int rcu_user_flag;
39 63
40 timer = S390_lowcore.last_update_timer; 64 timer = S390_lowcore.last_update_timer;
41 clock = S390_lowcore.last_update_clock; 65 clock = S390_lowcore.last_update_clock;
@@ -44,50 +68,41 @@ void account_process_tick(struct task_struct *tsk, int user_tick)
44 : "=m" (S390_lowcore.last_update_timer), 68 : "=m" (S390_lowcore.last_update_timer),
45 "=m" (S390_lowcore.last_update_clock) ); 69 "=m" (S390_lowcore.last_update_clock) );
46 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 70 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
47 S390_lowcore.steal_clock += S390_lowcore.last_update_clock - clock; 71 S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock;
48 72
49 cputime = S390_lowcore.user_timer >> 12; 73 user = S390_lowcore.user_timer - ti->user_timer;
50 rcu_user_flag = cputime != 0; 74 S390_lowcore.steal_timer -= user;
51 S390_lowcore.user_timer -= cputime << 12; 75 ti->user_timer = S390_lowcore.user_timer;
52 S390_lowcore.steal_clock -= cputime << 12; 76 account_user_time(tsk, user, user);
53 account_user_time(tsk, cputime); 77
54 78 system = S390_lowcore.system_timer - ti->system_timer;
55 cputime = S390_lowcore.system_timer >> 12; 79 S390_lowcore.steal_timer -= system;
56 S390_lowcore.system_timer -= cputime << 12; 80 ti->system_timer = S390_lowcore.system_timer;
57 S390_lowcore.steal_clock -= cputime << 12; 81 account_system_time(tsk, hardirq_offset, system, system);
58 account_system_time(tsk, HARDIRQ_OFFSET, cputime); 82
59 83 steal = S390_lowcore.steal_timer;
60 cputime = S390_lowcore.steal_clock; 84 if ((s64) steal > 0) {
61 if ((__s64) cputime > 0) { 85 S390_lowcore.steal_timer = 0;
62 cputime >>= 12; 86 account_steal_time(steal);
63 S390_lowcore.steal_clock -= cputime << 12;
64 account_steal_time(tsk, cputime);
65 } 87 }
66} 88}
67 89
68/* 90void account_vtime(struct task_struct *prev, struct task_struct *next)
69 * Update process times based on virtual cpu times stored by entry.S
70 * to the lowcore fields user_timer, system_timer & steal_clock.
71 */
72void account_vtime(struct task_struct *tsk)
73{ 91{
74 cputime_t cputime; 92 struct thread_info *ti;
75 __u64 timer; 93
76 94 do_account_vtime(prev, 0);
77 timer = S390_lowcore.last_update_timer; 95 ti = task_thread_info(prev);
78 asm volatile (" STPT %0" /* Store current cpu timer value */ 96 ti->user_timer = S390_lowcore.user_timer;
79 : "=m" (S390_lowcore.last_update_timer) ); 97 ti->system_timer = S390_lowcore.system_timer;
80 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 98 ti = task_thread_info(next);
81 99 S390_lowcore.user_timer = ti->user_timer;
82 cputime = S390_lowcore.user_timer >> 12; 100 S390_lowcore.system_timer = ti->system_timer;
83 S390_lowcore.user_timer -= cputime << 12; 101}
84 S390_lowcore.steal_clock -= cputime << 12;
85 account_user_time(tsk, cputime);
86 102
87 cputime = S390_lowcore.system_timer >> 12; 103void account_process_tick(struct task_struct *tsk, int user_tick)
88 S390_lowcore.system_timer -= cputime << 12; 104{
89 S390_lowcore.steal_clock -= cputime << 12; 105 do_account_vtime(tsk, HARDIRQ_OFFSET);
90 account_system_time(tsk, 0, cputime);
91} 106}
92 107
93/* 108/*
@@ -96,80 +111,131 @@ void account_vtime(struct task_struct *tsk)
96 */ 111 */
97void account_system_vtime(struct task_struct *tsk) 112void account_system_vtime(struct task_struct *tsk)
98{ 113{
99 cputime_t cputime; 114 struct thread_info *ti = task_thread_info(tsk);
100 __u64 timer; 115 __u64 timer, system;
101 116
102 timer = S390_lowcore.last_update_timer; 117 timer = S390_lowcore.last_update_timer;
103 asm volatile (" STPT %0" /* Store current cpu timer value */ 118 S390_lowcore.last_update_timer = get_vtimer();
104 : "=m" (S390_lowcore.last_update_timer) );
105 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; 119 S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer;
106 120
107 cputime = S390_lowcore.system_timer >> 12; 121 system = S390_lowcore.system_timer - ti->system_timer;
108 S390_lowcore.system_timer -= cputime << 12; 122 S390_lowcore.steal_timer -= system;
109 S390_lowcore.steal_clock -= cputime << 12; 123 ti->system_timer = S390_lowcore.system_timer;
110 account_system_time(tsk, 0, cputime); 124 account_system_time(tsk, 0, system, system);
111} 125}
112EXPORT_SYMBOL_GPL(account_system_vtime); 126EXPORT_SYMBOL_GPL(account_system_vtime);
113 127
114static inline void set_vtimer(__u64 expires) 128void vtime_start_cpu(void)
115{
116 __u64 timer;
117
118 asm volatile (" STPT %0\n" /* Store current cpu timer value */
119 " SPT %1" /* Set new value immediatly afterwards */
120 : "=m" (timer) : "m" (expires) );
121 S390_lowcore.system_timer += S390_lowcore.last_update_timer - timer;
122 S390_lowcore.last_update_timer = expires;
123
124 /* store expire time for this CPU timer */
125 __get_cpu_var(virt_cpu_timer).to_expire = expires;
126}
127
128void vtime_start_cpu_timer(void)
129{ 129{
130 struct vtimer_queue *vt_list; 130 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
131 131 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
132 vt_list = &__get_cpu_var(virt_cpu_timer); 132 __u64 idle_time, expires;
133 133
134 /* CPU timer interrupt is pending, don't reprogramm it */ 134 /* Account time spent with enabled wait psw loaded as idle time. */
135 if (vt_list->idle & 1LL<<63) 135 idle_time = S390_lowcore.int_clock - idle->idle_enter;
136 return; 136 account_idle_time(idle_time);
137 S390_lowcore.last_update_clock = S390_lowcore.int_clock;
138
139 /* Account system time spent going idle. */
140 S390_lowcore.system_timer += S390_lowcore.last_update_timer - vq->idle;
141 S390_lowcore.last_update_timer = S390_lowcore.async_enter_timer;
142
143 /* Restart vtime CPU timer */
144 if (vq->do_spt) {
145 /* Program old expire value but first save progress. */
146 expires = vq->idle - S390_lowcore.async_enter_timer;
147 expires += get_vtimer();
148 set_vtimer(expires);
149 } else {
150 /* Don't account the CPU timer delta while the cpu was idle. */
151 vq->elapsed -= vq->idle - S390_lowcore.async_enter_timer;
152 }
137 153
138 if (!list_empty(&vt_list->list)) 154 spin_lock(&idle->lock);
139 set_vtimer(vt_list->idle); 155 idle->idle_time += idle_time;
156 idle->idle_enter = 0ULL;
157 idle->idle_count++;
158 spin_unlock(&idle->lock);
140} 159}
141 160
142void vtime_stop_cpu_timer(void) 161void vtime_stop_cpu(void)
143{ 162{
144 struct vtimer_queue *vt_list; 163 struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
145 164 struct vtimer_queue *vq = &__get_cpu_var(virt_cpu_timer);
146 vt_list = &__get_cpu_var(virt_cpu_timer); 165 psw_t psw;
147 166
148 /* nothing to do */ 167 /* Wait for external, I/O or machine check interrupt. */
149 if (list_empty(&vt_list->list)) { 168 psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
150 vt_list->idle = VTIMER_MAX_SLICE; 169
151 goto fire; 170 /* Check if the CPU timer needs to be reprogrammed. */
171 if (vq->do_spt) {
172 __u64 vmax = VTIMER_MAX_SLICE;
173 /*
174 * The inline assembly is equivalent to
175 * vq->idle = get_cpu_timer();
176 * set_cpu_timer(VTIMER_MAX_SLICE);
177 * idle->idle_enter = get_clock();
178 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
179 * PSW_MASK_IO | PSW_MASK_EXT);
180 * The difference is that the inline assembly makes sure that
181 * the last three instruction are stpt, stck and lpsw in that
182 * order. This is done to increase the precision.
183 */
184 asm volatile(
185#ifndef CONFIG_64BIT
186 " basr 1,0\n"
187 "0: ahi 1,1f-0b\n"
188 " st 1,4(%2)\n"
189#else /* CONFIG_64BIT */
190 " larl 1,1f\n"
191 " stg 1,8(%2)\n"
192#endif /* CONFIG_64BIT */
193 " stpt 0(%4)\n"
194 " spt 0(%5)\n"
195 " stck 0(%3)\n"
196#ifndef CONFIG_64BIT
197 " lpsw 0(%2)\n"
198#else /* CONFIG_64BIT */
199 " lpswe 0(%2)\n"
200#endif /* CONFIG_64BIT */
201 "1:"
202 : "=m" (idle->idle_enter), "=m" (vq->idle)
203 : "a" (&psw), "a" (&idle->idle_enter),
204 "a" (&vq->idle), "a" (&vmax), "m" (vmax), "m" (psw)
205 : "memory", "cc", "1");
206 } else {
207 /*
208 * The inline assembly is equivalent to
209 * vq->idle = get_cpu_timer();
210 * idle->idle_enter = get_clock();
211 * __load_psw_mask(psw_kernel_bits | PSW_MASK_WAIT |
212 * PSW_MASK_IO | PSW_MASK_EXT);
213 * The difference is that the inline assembly makes sure that
214 * the last three instruction are stpt, stck and lpsw in that
215 * order. This is done to increase the precision.
216 */
217 asm volatile(
218#ifndef CONFIG_64BIT
219 " basr 1,0\n"
220 "0: ahi 1,1f-0b\n"
221 " st 1,4(%2)\n"
222#else /* CONFIG_64BIT */
223 " larl 1,1f\n"
224 " stg 1,8(%2)\n"
225#endif /* CONFIG_64BIT */
226 " stpt 0(%4)\n"
227 " stck 0(%3)\n"
228#ifndef CONFIG_64BIT
229 " lpsw 0(%2)\n"
230#else /* CONFIG_64BIT */
231 " lpswe 0(%2)\n"
232#endif /* CONFIG_64BIT */
233 "1:"
234 : "=m" (idle->idle_enter), "=m" (vq->idle)
235 : "a" (&psw), "a" (&idle->idle_enter),
236 "a" (&vq->idle), "m" (psw)
237 : "memory", "cc", "1");
152 } 238 }
153
154 /* store the actual expire value */
155 asm volatile ("STPT %0" : "=m" (vt_list->idle));
156
157 /*
158 * If the CPU timer is negative we don't reprogramm
159 * it because we will get instantly an interrupt.
160 */
161 if (vt_list->idle & 1LL<<63)
162 return;
163
164 vt_list->offset += vt_list->to_expire - vt_list->idle;
165
166 /*
167 * We cannot halt the CPU timer, we just write a value that
168 * nearly never expires (only after 71 years) and re-write
169 * the stored expire value if we continue the timer
170 */
171 fire:
172 set_vtimer(VTIMER_MAX_SLICE);
173} 239}
174 240
175/* 241/*
@@ -195,30 +261,23 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
195 */ 261 */
196static void do_callbacks(struct list_head *cb_list) 262static void do_callbacks(struct list_head *cb_list)
197{ 263{
198 struct vtimer_queue *vt_list; 264 struct vtimer_queue *vq;
199 struct vtimer_list *event, *tmp; 265 struct vtimer_list *event, *tmp;
200 void (*fn)(unsigned long);
201 unsigned long data;
202 266
203 if (list_empty(cb_list)) 267 if (list_empty(cb_list))
204 return; 268 return;
205 269
206 vt_list = &__get_cpu_var(virt_cpu_timer); 270 vq = &__get_cpu_var(virt_cpu_timer);
207 271
208 list_for_each_entry_safe(event, tmp, cb_list, entry) { 272 list_for_each_entry_safe(event, tmp, cb_list, entry) {
209 fn = event->function; 273 list_del_init(&event->entry);
210 data = event->data; 274 (event->function)(event->data);
211 fn(data); 275 if (event->interval) {
212 276 /* Recharge interval timer */
213 if (!event->interval) 277 event->expires = event->interval + vq->elapsed;
214 /* delete one shot timer */ 278 spin_lock(&vq->lock);
215 list_del_init(&event->entry); 279 list_add_sorted(event, &vq->list);
216 else { 280 spin_unlock(&vq->lock);
217 /* move interval timer back to list */
218 spin_lock(&vt_list->lock);
219 list_del_init(&event->entry);
220 list_add_sorted(event, &vt_list->list);
221 spin_unlock(&vt_list->lock);
222 } 281 }
223 } 282 }
224} 283}
@@ -228,64 +287,57 @@ static void do_callbacks(struct list_head *cb_list)
228 */ 287 */
229static void do_cpu_timer_interrupt(__u16 error_code) 288static void do_cpu_timer_interrupt(__u16 error_code)
230{ 289{
231 __u64 next, delta; 290 struct vtimer_queue *vq;
232 struct vtimer_queue *vt_list;
233 struct vtimer_list *event, *tmp; 291 struct vtimer_list *event, *tmp;
234 struct list_head *ptr; 292 struct list_head cb_list; /* the callback queue */
235 /* the callback queue */ 293 __u64 elapsed, next;
236 struct list_head cb_list;
237 294
238 INIT_LIST_HEAD(&cb_list); 295 INIT_LIST_HEAD(&cb_list);
239 vt_list = &__get_cpu_var(virt_cpu_timer); 296 vq = &__get_cpu_var(virt_cpu_timer);
240 297
241 /* walk timer list, fire all expired events */ 298 /* walk timer list, fire all expired events */
242 spin_lock(&vt_list->lock); 299 spin_lock(&vq->lock);
243 300
244 if (vt_list->to_expire < VTIMER_MAX_SLICE) 301 elapsed = vq->elapsed + (vq->timer - S390_lowcore.async_enter_timer);
245 vt_list->offset += vt_list->to_expire; 302 BUG_ON((s64) elapsed < 0);
246 303 vq->elapsed = 0;
247 list_for_each_entry_safe(event, tmp, &vt_list->list, entry) { 304 list_for_each_entry_safe(event, tmp, &vq->list, entry) {
248 if (event->expires > vt_list->offset) 305 if (event->expires < elapsed)
249 /* found first unexpired event, leave */ 306 /* move expired timer to the callback queue */
250 break; 307 list_move_tail(&event->entry, &cb_list);
251 308 else
252 /* re-charge interval timer, we have to add the offset */ 309 event->expires -= elapsed;
253 if (event->interval)
254 event->expires = event->interval + vt_list->offset;
255
256 /* move expired timer to the callback queue */
257 list_move_tail(&event->entry, &cb_list);
258 } 310 }
259 spin_unlock(&vt_list->lock); 311 spin_unlock(&vq->lock);
312
313 vq->do_spt = list_empty(&cb_list);
260 do_callbacks(&cb_list); 314 do_callbacks(&cb_list);
261 315
262 /* next event is first in list */ 316 /* next event is first in list */
263 spin_lock(&vt_list->lock); 317 next = VTIMER_MAX_SLICE;
264 if (!list_empty(&vt_list->list)) { 318 spin_lock(&vq->lock);
265 ptr = vt_list->list.next; 319 if (!list_empty(&vq->list)) {
266 event = list_entry(ptr, struct vtimer_list, entry); 320 event = list_first_entry(&vq->list, struct vtimer_list, entry);
267 next = event->expires - vt_list->offset; 321 next = event->expires;
268 322 } else
269 /* add the expired time from this interrupt handler 323 vq->do_spt = 0;
270 * and the callback functions 324 spin_unlock(&vq->lock);
271 */ 325 /*
272 asm volatile ("STPT %0" : "=m" (delta)); 326 * To improve precision add the time spent by the
273 delta = 0xffffffffffffffffLL - delta + 1; 327 * interrupt handler to the elapsed time.
274 vt_list->offset += delta; 328 * Note: CPU timer counts down and we got an interrupt,
275 next -= delta; 329 * the current content is negative
276 } else { 330 */
277 vt_list->offset = 0; 331 elapsed = S390_lowcore.async_enter_timer - get_vtimer();
278 next = VTIMER_MAX_SLICE; 332 set_vtimer(next - elapsed);
279 } 333 vq->timer = next - elapsed;
280 spin_unlock(&vt_list->lock); 334 vq->elapsed = elapsed;
281 set_vtimer(next);
282} 335}
283 336
284void init_virt_timer(struct vtimer_list *timer) 337void init_virt_timer(struct vtimer_list *timer)
285{ 338{
286 timer->function = NULL; 339 timer->function = NULL;
287 INIT_LIST_HEAD(&timer->entry); 340 INIT_LIST_HEAD(&timer->entry);
288 spin_lock_init(&timer->lock);
289} 341}
290EXPORT_SYMBOL(init_virt_timer); 342EXPORT_SYMBOL(init_virt_timer);
291 343
@@ -299,44 +351,40 @@ static inline int vtimer_pending(struct vtimer_list *timer)
299 */ 351 */
300static void internal_add_vtimer(struct vtimer_list *timer) 352static void internal_add_vtimer(struct vtimer_list *timer)
301{ 353{
354 struct vtimer_queue *vq;
302 unsigned long flags; 355 unsigned long flags;
303 __u64 done; 356 __u64 left, expires;
304 struct vtimer_list *event;
305 struct vtimer_queue *vt_list;
306 357
307 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 358 vq = &per_cpu(virt_cpu_timer, timer->cpu);
308 spin_lock_irqsave(&vt_list->lock, flags); 359 spin_lock_irqsave(&vq->lock, flags);
309 360
310 BUG_ON(timer->cpu != smp_processor_id()); 361 BUG_ON(timer->cpu != smp_processor_id());
311 362
312 /* if list is empty we only have to set the timer */ 363 if (list_empty(&vq->list)) {
313 if (list_empty(&vt_list->list)) { 364 /* First timer on this cpu, just program it. */
314 /* reset the offset, this may happen if the last timer was 365 list_add(&timer->entry, &vq->list);
315 * just deleted by mod_virt_timer and the interrupt 366 set_vtimer(timer->expires);
316 * didn't happen until here 367 vq->timer = timer->expires;
317 */ 368 vq->elapsed = 0;
318 vt_list->offset = 0; 369 } else {
319 goto fire; 370 /* Check progress of old timers. */
371 expires = timer->expires;
372 left = get_vtimer();
373 if (likely((s64) expires < (s64) left)) {
374 /* The new timer expires before the current timer. */
375 set_vtimer(expires);
376 vq->elapsed += vq->timer - left;
377 vq->timer = expires;
378 } else {
379 vq->elapsed += vq->timer - left;
380 vq->timer = left;
381 }
382 /* Insert new timer into per cpu list. */
383 timer->expires += vq->elapsed;
384 list_add_sorted(timer, &vq->list);
320 } 385 }
321 386
322 /* save progress */ 387 spin_unlock_irqrestore(&vq->lock, flags);
323 asm volatile ("STPT %0" : "=m" (done));
324
325 /* calculate completed work */
326 done = vt_list->to_expire - done + vt_list->offset;
327 vt_list->offset = 0;
328
329 list_for_each_entry(event, &vt_list->list, entry)
330 event->expires -= done;
331
332 fire:
333 list_add_sorted(timer, &vt_list->list);
334
335 /* get first element, which is the next vtimer slice */
336 event = list_entry(vt_list->list.next, struct vtimer_list, entry);
337
338 set_vtimer(event->expires);
339 spin_unlock_irqrestore(&vt_list->lock, flags);
340 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */ 388 /* release CPU acquired in prepare_vtimer or mod_virt_timer() */
341 put_cpu(); 389 put_cpu();
342} 390}
@@ -381,14 +429,15 @@ EXPORT_SYMBOL(add_virt_timer_periodic);
381 * If we change a pending timer the function must be called on the CPU 429 * If we change a pending timer the function must be called on the CPU
382 * where the timer is running on, e.g. by smp_call_function_single() 430 * where the timer is running on, e.g. by smp_call_function_single()
383 * 431 *
384 * The original mod_timer adds the timer if it is not pending. For compatibility 432 * The original mod_timer adds the timer if it is not pending. For
385 * we do the same. The timer will be added on the current CPU as a oneshot timer. 433 * compatibility we do the same. The timer will be added on the current
434 * CPU as a oneshot timer.
386 * 435 *
387 * returns whether it has modified a pending timer (1) or not (0) 436 * returns whether it has modified a pending timer (1) or not (0)
388 */ 437 */
389int mod_virt_timer(struct vtimer_list *timer, __u64 expires) 438int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
390{ 439{
391 struct vtimer_queue *vt_list; 440 struct vtimer_queue *vq;
392 unsigned long flags; 441 unsigned long flags;
393 int cpu; 442 int cpu;
394 443
@@ -404,17 +453,17 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
404 return 1; 453 return 1;
405 454
406 cpu = get_cpu(); 455 cpu = get_cpu();
407 vt_list = &per_cpu(virt_cpu_timer, cpu); 456 vq = &per_cpu(virt_cpu_timer, cpu);
408 457
409 /* check if we run on the right CPU */ 458 /* check if we run on the right CPU */
410 BUG_ON(timer->cpu != cpu); 459 BUG_ON(timer->cpu != cpu);
411 460
412 /* disable interrupts before test if timer is pending */ 461 /* disable interrupts before test if timer is pending */
413 spin_lock_irqsave(&vt_list->lock, flags); 462 spin_lock_irqsave(&vq->lock, flags);
414 463
415 /* if timer isn't pending add it on the current CPU */ 464 /* if timer isn't pending add it on the current CPU */
416 if (!vtimer_pending(timer)) { 465 if (!vtimer_pending(timer)) {
417 spin_unlock_irqrestore(&vt_list->lock, flags); 466 spin_unlock_irqrestore(&vq->lock, flags);
418 /* we do not activate an interval timer with mod_virt_timer */ 467 /* we do not activate an interval timer with mod_virt_timer */
419 timer->interval = 0; 468 timer->interval = 0;
420 timer->expires = expires; 469 timer->expires = expires;
@@ -431,7 +480,7 @@ int mod_virt_timer(struct vtimer_list *timer, __u64 expires)
431 timer->interval = expires; 480 timer->interval = expires;
432 481
433 /* the timer can't expire anymore so we can release the lock */ 482 /* the timer can't expire anymore so we can release the lock */
434 spin_unlock_irqrestore(&vt_list->lock, flags); 483 spin_unlock_irqrestore(&vq->lock, flags);
435 internal_add_vtimer(timer); 484 internal_add_vtimer(timer);
436 return 1; 485 return 1;
437} 486}
@@ -445,25 +494,19 @@ EXPORT_SYMBOL(mod_virt_timer);
445int del_virt_timer(struct vtimer_list *timer) 494int del_virt_timer(struct vtimer_list *timer)
446{ 495{
447 unsigned long flags; 496 unsigned long flags;
448 struct vtimer_queue *vt_list; 497 struct vtimer_queue *vq;
449 498
450 /* check if timer is pending */ 499 /* check if timer is pending */
451 if (!vtimer_pending(timer)) 500 if (!vtimer_pending(timer))
452 return 0; 501 return 0;
453 502
454 vt_list = &per_cpu(virt_cpu_timer, timer->cpu); 503 vq = &per_cpu(virt_cpu_timer, timer->cpu);
455 spin_lock_irqsave(&vt_list->lock, flags); 504 spin_lock_irqsave(&vq->lock, flags);
456 505
457 /* we don't interrupt a running timer, just let it expire! */ 506 /* we don't interrupt a running timer, just let it expire! */
458 list_del_init(&timer->entry); 507 list_del_init(&timer->entry);
459 508
460 /* last timer removed */ 509 spin_unlock_irqrestore(&vq->lock, flags);
461 if (list_empty(&vt_list->list)) {
462 vt_list->to_expire = 0;
463 vt_list->offset = 0;
464 }
465
466 spin_unlock_irqrestore(&vt_list->lock, flags);
467 return 1; 510 return 1;
468} 511}
469EXPORT_SYMBOL(del_virt_timer); 512EXPORT_SYMBOL(del_virt_timer);
@@ -473,24 +516,19 @@ EXPORT_SYMBOL(del_virt_timer);
473 */ 516 */
474void init_cpu_vtimer(void) 517void init_cpu_vtimer(void)
475{ 518{
476 struct vtimer_queue *vt_list; 519 struct vtimer_queue *vq;
477 520
478 /* kick the virtual timer */ 521 /* kick the virtual timer */
479 S390_lowcore.exit_timer = VTIMER_MAX_SLICE;
480 S390_lowcore.last_update_timer = VTIMER_MAX_SLICE;
481 asm volatile ("SPT %0" : : "m" (S390_lowcore.last_update_timer));
482 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock)); 522 asm volatile ("STCK %0" : "=m" (S390_lowcore.last_update_clock));
523 asm volatile ("STPT %0" : "=m" (S390_lowcore.last_update_timer));
524
525 /* initialize per cpu vtimer structure */
526 vq = &__get_cpu_var(virt_cpu_timer);
527 INIT_LIST_HEAD(&vq->list);
528 spin_lock_init(&vq->lock);
483 529
484 /* enable cpu timer interrupts */ 530 /* enable cpu timer interrupts */
485 __ctl_set_bit(0,10); 531 __ctl_set_bit(0,10);
486
487 vt_list = &__get_cpu_var(virt_cpu_timer);
488 INIT_LIST_HEAD(&vt_list->list);
489 spin_lock_init(&vt_list->lock);
490 vt_list->to_expire = 0;
491 vt_list->offset = 0;
492 vt_list->idle = 0;
493
494} 532}
495 533
496void __init vtime_init(void) 534void __init vtime_init(void)
diff --git a/arch/sh/include/asm/topology.h b/arch/sh/include/asm/topology.h
index 279d9cc4a007..066f0fba590e 100644
--- a/arch/sh/include/asm/topology.h
+++ b/arch/sh/include/asm/topology.h
@@ -32,6 +32,7 @@
32#define parent_node(node) ((void)(node),0) 32#define parent_node(node) ((void)(node),0)
33 33
34#define node_to_cpumask(node) ((void)node, cpu_online_map) 34#define node_to_cpumask(node) ((void)node, cpu_online_map)
35#define cpumask_of_node(node) ((void)node, cpu_online_mask)
35#define node_to_first_cpu(node) ((void)(node),0) 36#define node_to_first_cpu(node) ((void)(node),0)
36 37
37#define pcibus_to_node(bus) ((void)(bus), -1) 38#define pcibus_to_node(bus) ((void)(bus), -1)
diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h
index 001c04027c82..b8a65b64e1df 100644
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -16,8 +16,12 @@ static inline cpumask_t node_to_cpumask(int node)
16{ 16{
17 return numa_cpumask_lookup_table[node]; 17 return numa_cpumask_lookup_table[node];
18} 18}
19#define cpumask_of_node(node) (&numa_cpumask_lookup_table[node])
19 20
20/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 21/*
22 * Returns a pointer to the cpumask of CPUs on Node 'node'.
23 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
24 */
21#define node_to_cpumask_ptr(v, node) \ 25#define node_to_cpumask_ptr(v, node) \
22 cpumask_t *v = &(numa_cpumask_lookup_table[node]) 26 cpumask_t *v = &(numa_cpumask_lookup_table[node])
23 27
@@ -26,9 +30,7 @@ static inline cpumask_t node_to_cpumask(int node)
26 30
27static inline int node_to_first_cpu(int node) 31static inline int node_to_first_cpu(int node)
28{ 32{
29 cpumask_t tmp; 33 return cpumask_first(cpumask_of_node(node));
30 tmp = node_to_cpumask(node);
31 return first_cpu(tmp);
32} 34}
33 35
34struct pci_bus; 36struct pci_bus;
@@ -77,10 +79,13 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
77#define topology_core_id(cpu) (cpu_data(cpu).core_id) 79#define topology_core_id(cpu) (cpu_data(cpu).core_id)
78#define topology_core_siblings(cpu) (cpu_core_map[cpu]) 80#define topology_core_siblings(cpu) (cpu_core_map[cpu])
79#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) 81#define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu))
82#define topology_core_cpumask(cpu) (&cpu_core_map[cpu])
83#define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu))
80#define mc_capable() (sparc64_multi_core) 84#define mc_capable() (sparc64_multi_core)
81#define smt_capable() (sparc64_multi_core) 85#define smt_capable() (sparc64_multi_core)
82#endif /* CONFIG_SMP */ 86#endif /* CONFIG_SMP */
83 87
84#define cpu_coregroup_map(cpu) (cpu_core_map[cpu]) 88#define cpu_coregroup_map(cpu) (cpu_core_map[cpu])
89#define cpu_coregroup_mask(cpu) (&cpu_core_map[cpu])
85 90
86#endif /* _ASM_SPARC64_TOPOLOGY_H */ 91#endif /* _ASM_SPARC64_TOPOLOGY_H */
diff --git a/arch/sparc/kernel/of_device_64.c b/arch/sparc/kernel/of_device_64.c
index 322046cdf85f..4873f28905b0 100644
--- a/arch/sparc/kernel/of_device_64.c
+++ b/arch/sparc/kernel/of_device_64.c
@@ -778,7 +778,7 @@ static unsigned int __init build_one_device_irq(struct of_device *op,
778out: 778out:
779 nid = of_node_to_nid(dp); 779 nid = of_node_to_nid(dp);
780 if (nid != -1) { 780 if (nid != -1) {
781 cpumask_t numa_mask = node_to_cpumask(nid); 781 cpumask_t numa_mask = *cpumask_of_node(nid);
782 782
783 irq_set_affinity(irq, &numa_mask); 783 irq_set_affinity(irq, &numa_mask);
784 } 784 }
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 0d0cd815e83e..4ef282e81912 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -286,7 +286,7 @@ static int bringup_one_msi_queue(struct pci_pbm_info *pbm,
286 286
287 nid = pbm->numa_node; 287 nid = pbm->numa_node;
288 if (nid != -1) { 288 if (nid != -1) {
289 cpumask_t numa_mask = node_to_cpumask(nid); 289 cpumask_t numa_mask = *cpumask_of_node(nid);
290 290
291 irq_set_affinity(irq, &numa_mask); 291 irq_set_affinity(irq, &numa_mask);
292 } 292 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 249d1e0824b5..862adb9bf0d4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -586,6 +586,16 @@ config AMD_IOMMU
586 your BIOS for an option to enable it or if you have an IVRS ACPI 586 your BIOS for an option to enable it or if you have an IVRS ACPI
587 table. 587 table.
588 588
589config AMD_IOMMU_STATS
590 bool "Export AMD IOMMU statistics to debugfs"
591 depends on AMD_IOMMU
592 select DEBUG_FS
593 help
594 This option enables code in the AMD IOMMU driver to collect various
595 statistics about whats happening in the driver and exports that
596 information to userspace via debugfs.
597 If unsure, say N.
598
589# need this always selected by IOMMU for the VIA workaround 599# need this always selected by IOMMU for the VIA workaround
590config SWIOTLB 600config SWIOTLB
591 def_bool y if X86_64 601 def_bool y if X86_64
@@ -599,6 +609,9 @@ config SWIOTLB
599config IOMMU_HELPER 609config IOMMU_HELPER
600 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 610 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
601 611
612config IOMMU_API
613 def_bool (AMD_IOMMU || DMAR)
614
602config MAXSMP 615config MAXSMP
603 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 616 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
604 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL 617 depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index b195f85526e3..9dabd00e9805 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -24,15 +24,14 @@
24#include <asm/ucontext.h> 24#include <asm/ucontext.h>
25#include <asm/uaccess.h> 25#include <asm/uaccess.h>
26#include <asm/i387.h> 26#include <asm/i387.h>
27#include <asm/ia32.h>
28#include <asm/ptrace.h> 27#include <asm/ptrace.h>
29#include <asm/ia32_unistd.h> 28#include <asm/ia32_unistd.h>
30#include <asm/user32.h> 29#include <asm/user32.h>
31#include <asm/sigcontext32.h> 30#include <asm/sigcontext32.h>
32#include <asm/proto.h> 31#include <asm/proto.h>
33#include <asm/vdso.h> 32#include <asm/vdso.h>
34
35#include <asm/sigframe.h> 33#include <asm/sigframe.h>
34#include <asm/sys_ia32.h>
36 35
37#define DEBUG_SIG 0 36#define DEBUG_SIG 0
38 37
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c
index d21991ce606c..29cdcd02ead3 100644
--- a/arch/x86/ia32/ipc32.c
+++ b/arch/x86/ia32/ipc32.c
@@ -8,6 +8,7 @@
8#include <linux/shm.h> 8#include <linux/shm.h>
9#include <linux/ipc.h> 9#include <linux/ipc.h>
10#include <linux/compat.h> 10#include <linux/compat.h>
11#include <asm/sys_ia32.h>
11 12
12asmlinkage long sys32_ipc(u32 call, int first, int second, int third, 13asmlinkage long sys32_ipc(u32 call, int first, int second, int third,
13 compat_uptr_t ptr, u32 fifth) 14 compat_uptr_t ptr, u32 fifth)
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 2e09dcd3c0a6..6c0d7f6231af 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -44,8 +44,8 @@
44#include <asm/types.h> 44#include <asm/types.h>
45#include <asm/uaccess.h> 45#include <asm/uaccess.h>
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <asm/ia32.h>
48#include <asm/vgtod.h> 47#include <asm/vgtod.h>
48#include <asm/sys_ia32.h>
49 49
50#define AA(__x) ((unsigned long)(__x)) 50#define AA(__x) ((unsigned long)(__x))
51 51
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index ac302a2fa339..95c8cd9d22b5 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -190,16 +190,23 @@
190/* FIXME: move this macro to <linux/pci.h> */ 190/* FIXME: move this macro to <linux/pci.h> */
191#define PCI_BUS(x) (((x) >> 8) & 0xff) 191#define PCI_BUS(x) (((x) >> 8) & 0xff)
192 192
193/* Protection domain flags */
194#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
195#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
196 domain for an IOMMU */
197
193/* 198/*
194 * This structure contains generic data for IOMMU protection domains 199 * This structure contains generic data for IOMMU protection domains
195 * independent of their use. 200 * independent of their use.
196 */ 201 */
197struct protection_domain { 202struct protection_domain {
198 spinlock_t lock; /* mostly used to lock the page table*/ 203 spinlock_t lock; /* mostly used to lock the page table*/
199 u16 id; /* the domain id written to the device table */ 204 u16 id; /* the domain id written to the device table */
200 int mode; /* paging mode (0-6 levels) */ 205 int mode; /* paging mode (0-6 levels) */
201 u64 *pt_root; /* page table root pointer */ 206 u64 *pt_root; /* page table root pointer */
202 void *priv; /* private data */ 207 unsigned long flags; /* flags to find out type of domain */
208 unsigned dev_cnt; /* devices assigned to this domain */
209 void *priv; /* private data */
203}; 210};
204 211
205/* 212/*
@@ -295,7 +302,7 @@ struct amd_iommu {
295 bool int_enabled; 302 bool int_enabled;
296 303
297 /* if one, we need to send a completion wait command */ 304 /* if one, we need to send a completion wait command */
298 int need_sync; 305 bool need_sync;
299 306
300 /* default dma_ops domain for that IOMMU */ 307 /* default dma_ops domain for that IOMMU */
301 struct dma_ops_domain *default_dom; 308 struct dma_ops_domain *default_dom;
@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
374extern unsigned long *amd_iommu_pd_alloc_bitmap; 381extern unsigned long *amd_iommu_pd_alloc_bitmap;
375 382
376/* will be 1 if device isolation is enabled */ 383/* will be 1 if device isolation is enabled */
377extern int amd_iommu_isolate; 384extern bool amd_iommu_isolate;
378 385
379/* 386/*
380 * If true, the addresses will be flushed on unmap time, not when 387 * If true, the addresses will be flushed on unmap time, not when
@@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
382 */ 389 */
383extern bool amd_iommu_unmap_flush; 390extern bool amd_iommu_unmap_flush;
384 391
385/* takes a PCI device id and prints it out in a readable form */
386static inline void print_devid(u16 devid, int nl)
387{
388 int bus = devid >> 8;
389 int dev = devid >> 3 & 0x1f;
390 int fn = devid & 0x07;
391
392 printk("%02x:%02x.%x", bus, dev, fn);
393 if (nl)
394 printk("\n");
395}
396
397/* takes bus and device/function and returns the device id 392/* takes bus and device/function and returns the device id
398 * FIXME: should that be in generic PCI code? */ 393 * FIXME: should that be in generic PCI code? */
399static inline u16 calc_devid(u8 bus, u8 devfn) 394static inline u16 calc_devid(u8 bus, u8 devfn)
@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
401 return (((u16)bus) << 8) | devfn; 396 return (((u16)bus) << 8) | devfn;
402} 397}
403 398
399#ifdef CONFIG_AMD_IOMMU_STATS
400
401struct __iommu_counter {
402 char *name;
403 struct dentry *dent;
404 u64 value;
405};
406
407#define DECLARE_STATS_COUNTER(nm) \
408 static struct __iommu_counter nm = { \
409 .name = #nm, \
410 }
411
412#define INC_STATS_COUNTER(name) name.value += 1
413#define ADD_STATS_COUNTER(name, x) name.value += (x)
414#define SUB_STATS_COUNTER(name, x) name.value -= (x)
415
416#else /* CONFIG_AMD_IOMMU_STATS */
417
418#define DECLARE_STATS_COUNTER(name)
419#define INC_STATS_COUNTER(name)
420#define ADD_STATS_COUNTER(name, x)
421#define SUB_STATS_COUNTER(name, x)
422
423static inline void amd_iommu_stats_init(void) { }
424
425#endif /* CONFIG_AMD_IOMMU_STATS */
426
404#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ 427#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index 25caa0738af5..ab1d51a8855e 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -54,7 +54,6 @@ extern int disable_apic;
54extern int is_vsmp_box(void); 54extern int is_vsmp_box(void);
55extern void xapic_wait_icr_idle(void); 55extern void xapic_wait_icr_idle(void);
56extern u32 safe_xapic_wait_icr_idle(void); 56extern u32 safe_xapic_wait_icr_idle(void);
57extern u64 xapic_icr_read(void);
58extern void xapic_icr_write(u32, u32); 57extern void xapic_icr_write(u32, u32);
59extern int setup_profiling_timer(unsigned int); 58extern int setup_profiling_timer(unsigned int);
60 59
@@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg)
93} 92}
94 93
95#ifndef CONFIG_X86_32 94#ifndef CONFIG_X86_32
96extern int x2apic, x2apic_preenabled; 95extern int x2apic;
97extern void check_x2apic(void); 96extern void check_x2apic(void);
98extern void enable_x2apic(void); 97extern void enable_x2apic(void);
99extern void enable_IR_x2apic(void); 98extern void enable_IR_x2apic(void);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index a2e545c91c35..ca5ffb2856b6 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size);
90 90
91#endif /* CONFIG_X86_32 */ 91#endif /* CONFIG_X86_32 */
92 92
93extern int add_efi_memmap;
93extern void efi_reserve_early(void); 94extern void efi_reserve_early(void);
94extern void efi_call_phys_prelog(void); 95extern void efi_call_phys_prelog(void);
95extern void efi_call_phys_epilog(void); 96extern void efi_call_phys_epilog(void);
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h
index 51ac1230294e..bc53d5ef1386 100644
--- a/arch/x86/include/asm/es7000/apic.h
+++ b/arch/x86/include/asm/es7000/apic.h
@@ -157,7 +157,7 @@ cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
157 157
158 num_bits_set = cpumask_weight(cpumask); 158 num_bits_set = cpumask_weight(cpumask);
159 /* Return id to all */ 159 /* Return id to all */
160 if (num_bits_set == NR_CPUS) 160 if (num_bits_set == nr_cpu_ids)
161 return 0xFF; 161 return 0xFF;
162 /* 162 /*
163 * The cpus in the mask must all be on the apic cluster. If are not 163 * The cpus in the mask must all be on the apic cluster. If are not
@@ -190,7 +190,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
190 190
191 num_bits_set = cpus_weight(*cpumask); 191 num_bits_set = cpus_weight(*cpumask);
192 /* Return id to all */ 192 /* Return id to all */
193 if (num_bits_set == NR_CPUS) 193 if (num_bits_set == nr_cpu_ids)
194 return cpu_to_logical_apicid(0); 194 return cpu_to_logical_apicid(0);
195 /* 195 /*
196 * The cpus in the mask must all be on the apic cluster. If are not 196 * The cpus in the mask must all be on the apic cluster. If are not
@@ -218,9 +218,6 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
218static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, 218static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
219 const struct cpumask *andmask) 219 const struct cpumask *andmask)
220{ 220{
221 int num_bits_set;
222 int cpus_found = 0;
223 int cpu;
224 int apicid = cpu_to_logical_apicid(0); 221 int apicid = cpu_to_logical_apicid(0);
225 cpumask_var_t cpumask; 222 cpumask_var_t cpumask;
226 223
@@ -229,31 +226,8 @@ static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
229 226
230 cpumask_and(cpumask, inmask, andmask); 227 cpumask_and(cpumask, inmask, andmask);
231 cpumask_and(cpumask, cpumask, cpu_online_mask); 228 cpumask_and(cpumask, cpumask, cpu_online_mask);
229 apicid = cpu_mask_to_apicid(cpumask);
232 230
233 num_bits_set = cpumask_weight(cpumask);
234 /* Return id to all */
235 if (num_bits_set == NR_CPUS)
236 goto exit;
237 /*
238 * The cpus in the mask must all be on the apic cluster. If are not
239 * on the same apicid cluster return default value of TARGET_CPUS.
240 */
241 cpu = cpumask_first(cpumask);
242 apicid = cpu_to_logical_apicid(cpu);
243 while (cpus_found < num_bits_set) {
244 if (cpumask_test_cpu(cpu, cpumask)) {
245 int new_apicid = cpu_to_logical_apicid(cpu);
246 if (apicid_cluster(apicid) !=
247 apicid_cluster(new_apicid)){
248 printk ("%s: Not a valid mask!\n", __func__);
249 return cpu_to_logical_apicid(0);
250 }
251 apicid = new_apicid;
252 cpus_found++;
253 }
254 cpu++;
255 }
256exit:
257 free_cpumask_var(cpumask); 231 free_cpumask_var(cpumask);
258 return apicid; 232 return apicid;
259} 233}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 97215a458e5f..730843d1d2fb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -360,7 +360,7 @@ struct kvm_arch{
360 struct list_head active_mmu_pages; 360 struct list_head active_mmu_pages;
361 struct list_head assigned_dev_head; 361 struct list_head assigned_dev_head;
362 struct list_head oos_global_pages; 362 struct list_head oos_global_pages;
363 struct dmar_domain *intel_iommu_domain; 363 struct iommu_domain *iommu_domain;
364 struct kvm_pic *vpic; 364 struct kvm_pic *vpic;
365 struct kvm_ioapic *vioapic; 365 struct kvm_ioapic *vioapic;
366 struct kvm_pit *vpit; 366 struct kvm_pit *vpit;
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h
index d28a507cef39..1caf57628b9c 100644
--- a/arch/x86/include/asm/lguest.h
+++ b/arch/x86/include/asm/lguest.h
@@ -15,7 +15,7 @@
15#define SHARED_SWITCHER_PAGES \ 15#define SHARED_SWITCHER_PAGES \
16 DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) 16 DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
17/* Pages for switcher itself, then two pages per cpu */ 17/* Pages for switcher itself, then two pages per cpu */
18#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) 18#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
19 19
20/* We map at -4M for ease of mapping into the guest (one PTE page). */ 20/* We map at -4M for ease of mapping into the guest (one PTE page). */
21#define SWITCHER_ADDR 0xFFC00000 21#define SWITCHER_ADDR 0xFFC00000
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h
index 91885c28f66b..62d14ce3cd00 100644
--- a/arch/x86/include/asm/mpspec.h
+++ b/arch/x86/include/asm/mpspec.h
@@ -6,13 +6,13 @@
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7 7
8extern int apic_version[MAX_APICS]; 8extern int apic_version[MAX_APICS];
9extern int pic_mode;
9 10
10#ifdef CONFIG_X86_32 11#ifdef CONFIG_X86_32
11#include <mach_mpspec.h> 12#include <mach_mpspec.h>
12 13
13extern unsigned int def_to_bigsmp; 14extern unsigned int def_to_bigsmp;
14extern u8 apicid_2_node[]; 15extern u8 apicid_2_node[];
15extern int pic_mode;
16 16
17#ifdef CONFIG_X86_NUMAQ 17#ifdef CONFIG_X86_NUMAQ
18extern int mp_bus_id_to_node[MAX_MP_BUSSES]; 18extern int mp_bus_id_to_node[MAX_MP_BUSSES];
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h
index c80f00d29965..bf37bc49bd8e 100644
--- a/arch/x86/include/asm/numaq/apic.h
+++ b/arch/x86/include/asm/numaq/apic.h
@@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
63extern u8 cpu_2_logical_apicid[]; 63extern u8 cpu_2_logical_apicid[];
64static inline int cpu_to_logical_apicid(int cpu) 64static inline int cpu_to_logical_apicid(int cpu)
65{ 65{
66 if (cpu >= NR_CPUS) 66 if (cpu >= nr_cpu_ids)
67 return BAD_APICID; 67 return BAD_APICID;
68 return (int)cpu_2_logical_apicid[cpu]; 68 return (int)cpu_2_logical_apicid[cpu];
69} 69}
70 70
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 66834c41c049..a977de23cb4d 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -102,9 +102,9 @@ extern void pci_iommu_alloc(void);
102 102
103#ifdef CONFIG_NUMA 103#ifdef CONFIG_NUMA
104/* Returns the node based on pci bus */ 104/* Returns the node based on pci bus */
105static inline int __pcibus_to_node(struct pci_bus *bus) 105static inline int __pcibus_to_node(const struct pci_bus *bus)
106{ 106{
107 struct pci_sysdata *sd = bus->sysdata; 107 const struct pci_sysdata *sd = bus->sysdata;
108 108
109 return sd->node; 109 return sd->node;
110} 110}
@@ -113,6 +113,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
113{ 113{
114 return node_to_cpumask(__pcibus_to_node(bus)); 114 return node_to_cpumask(__pcibus_to_node(bus));
115} 115}
116
117static inline const struct cpumask *
118cpumask_of_pcibus(const struct pci_bus *bus)
119{
120 return cpumask_of_node(__pcibus_to_node(bus));
121}
116#endif 122#endif
117 123
118#endif /* _ASM_X86_PCI_H */ 124#endif /* _ASM_X86_PCI_H */
diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h
index 1959018aac02..e60fd3e14bdf 100644
--- a/arch/x86/pci/pci.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops;
57struct irq_info { 57struct irq_info {
58 u8 bus, devfn; /* Bus, device and function */ 58 u8 bus, devfn; /* Bus, device and function */
59 struct { 59 struct {
60 u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ 60 u8 link; /* IRQ line ID, chipset dependent,
61 0 = not routed */
61 u16 bitmap; /* Available IRQs */ 62 u16 bitmap; /* Available IRQs */
62 } __attribute__((packed)) irq[4]; 63 } __attribute__((packed)) irq[4];
63 u8 slot; /* Slot number, 0=onboard */ 64 u8 slot; /* Slot number, 0=onboard */
@@ -69,11 +70,13 @@ struct irq_routing_table {
69 u16 version; /* PIRQ_VERSION */ 70 u16 version; /* PIRQ_VERSION */
70 u16 size; /* Table size in bytes */ 71 u16 size; /* Table size in bytes */
71 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ 72 u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */
72 u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ 73 u16 exclusive_irqs; /* IRQs devoted exclusively to
73 u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ 74 PCI usage */
75 u16 rtr_vendor, rtr_device; /* Vendor and device ID of
76 interrupt router */
74 u32 miniport_data; /* Crap */ 77 u32 miniport_data; /* Crap */
75 u8 rfu[11]; 78 u8 rfu[11];
76 u8 checksum; /* Modulo 256 checksum must give zero */ 79 u8 checksum; /* Modulo 256 checksum must give 0 */
77 struct irq_info slots[0]; 80 struct irq_info slots[0];
78} __attribute__((packed)); 81} __attribute__((packed));
79 82
@@ -148,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos)
148 151
149static inline void mmio_config_writeb(void __iomem *pos, u8 val) 152static inline void mmio_config_writeb(void __iomem *pos, u8 val)
150{ 153{
151 asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); 154 asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory");
152} 155}
153 156
154static inline void mmio_config_writew(void __iomem *pos, u16 val) 157static inline void mmio_config_writew(void __iomem *pos, u16 val)
155{ 158{
156 asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); 159 asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory");
157} 160}
158 161
159static inline void mmio_config_writel(void __iomem *pos, u32 val) 162static inline void mmio_config_writel(void __iomem *pos, u32 val)
160{ 163{
161 asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); 164 asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory");
162} 165}
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h
index 99327d1be49f..4bb5fb34f030 100644
--- a/arch/x86/include/asm/summit/apic.h
+++ b/arch/x86/include/asm/summit/apic.h
@@ -52,7 +52,7 @@ static inline void init_apic_ldr(void)
52 int i; 52 int i;
53 53
54 /* Create logical APIC IDs by counting CPUs already in cluster. */ 54 /* Create logical APIC IDs by counting CPUs already in cluster. */
55 for (count = 0, i = NR_CPUS; --i >= 0; ) { 55 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
56 lid = cpu_2_logical_apicid[i]; 56 lid = cpu_2_logical_apicid[i];
57 if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) 57 if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
58 ++count; 58 ++count;
@@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid)
97static inline int cpu_to_logical_apicid(int cpu) 97static inline int cpu_to_logical_apicid(int cpu)
98{ 98{
99#ifdef CONFIG_SMP 99#ifdef CONFIG_SMP
100 if (cpu >= NR_CPUS) 100 if (cpu >= nr_cpu_ids)
101 return BAD_APICID; 101 return BAD_APICID;
102 return (int)cpu_2_logical_apicid[cpu]; 102 return (int)cpu_2_logical_apicid[cpu];
103#else 103#else
104 return logical_smp_processor_id(); 104 return logical_smp_processor_id();
@@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu)
107 107
108static inline int cpu_present_to_apicid(int mps_cpu) 108static inline int cpu_present_to_apicid(int mps_cpu)
109{ 109{
110 if (mps_cpu < NR_CPUS) 110 if (mps_cpu < nr_cpu_ids)
111 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); 111 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
112 else 112 else
113 return BAD_APICID; 113 return BAD_APICID;
@@ -146,7 +146,7 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
146 146
147 num_bits_set = cpus_weight(*cpumask); 147 num_bits_set = cpus_weight(*cpumask);
148 /* Return id to all */ 148 /* Return id to all */
149 if (num_bits_set == NR_CPUS) 149 if (num_bits_set >= nr_cpu_ids)
150 return (int) 0xFF; 150 return (int) 0xFF;
151 /* 151 /*
152 * The cpus in the mask must all be on the apic cluster. If are not 152 * The cpus in the mask must all be on the apic cluster. If are not
@@ -173,42 +173,16 @@ static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
173static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, 173static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
174 const struct cpumask *andmask) 174 const struct cpumask *andmask)
175{ 175{
176 int num_bits_set; 176 int apicid = cpu_to_logical_apicid(0);
177 int cpus_found = 0;
178 int cpu;
179 int apicid = 0xFF;
180 cpumask_var_t cpumask; 177 cpumask_var_t cpumask;
181 178
182 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) 179 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
183 return (int) 0xFF; 180 return apicid;
184 181
185 cpumask_and(cpumask, inmask, andmask); 182 cpumask_and(cpumask, inmask, andmask);
186 cpumask_and(cpumask, cpumask, cpu_online_mask); 183 cpumask_and(cpumask, cpumask, cpu_online_mask);
184 apicid = cpu_mask_to_apicid(cpumask);
187 185
188 num_bits_set = cpumask_weight(cpumask);
189 /* Return id to all */
190 if (num_bits_set == nr_cpu_ids)
191 goto exit;
192 /*
193 * The cpus in the mask must all be on the apic cluster. If are not
194 * on the same apicid cluster return default value of TARGET_CPUS.
195 */
196 cpu = cpumask_first(cpumask);
197 apicid = cpu_to_logical_apicid(cpu);
198 while (cpus_found < num_bits_set) {
199 if (cpumask_test_cpu(cpu, cpumask)) {
200 int new_apicid = cpu_to_logical_apicid(cpu);
201 if (apicid_cluster(apicid) !=
202 apicid_cluster(new_apicid)){
203 printk ("%s: Not a valid mask!\n", __func__);
204 return 0xFF;
205 }
206 apicid = apicid | new_apicid;
207 cpus_found++;
208 }
209 cpu++;
210 }
211exit:
212 free_cpumask_var(cpumask); 186 free_cpumask_var(cpumask);
213 return apicid; 187 return apicid;
214} 188}
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
new file mode 100644
index 000000000000..ffb08be2a530
--- /dev/null
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -0,0 +1,101 @@
1/*
2 * sys_ia32.h - Linux ia32 syscall interfaces
3 *
4 * Copyright (c) 2008 Jaswinder Singh Rajput
5 *
6 * This file is released under the GPLv2.
7 * See the file COPYING for more details.
8 */
9
10#ifndef _ASM_X86_SYS_IA32_H
11#define _ASM_X86_SYS_IA32_H
12
13#include <linux/compiler.h>
14#include <linux/linkage.h>
15#include <linux/types.h>
16#include <linux/signal.h>
17#include <asm/compat.h>
18#include <asm/ia32.h>
19
20/* ia32/sys_ia32.c */
21asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long);
22asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long);
23
24asmlinkage long sys32_stat64(char __user *, struct stat64 __user *);
25asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *);
26asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *);
27asmlinkage long sys32_fstatat(unsigned int, char __user *,
28 struct stat64 __user *, int);
29struct mmap_arg_struct;
30asmlinkage long sys32_mmap(struct mmap_arg_struct __user *);
31asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long);
32
33asmlinkage long sys32_pipe(int __user *);
34struct sigaction32;
35struct old_sigaction32;
36asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *,
37 struct sigaction32 __user *, unsigned int);
38asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *,
39 struct old_sigaction32 __user *);
40asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *,
41 compat_sigset_t __user *, unsigned int);
42asmlinkage long sys32_alarm(unsigned int);
43
44struct sel_arg_struct;
45asmlinkage long sys32_old_select(struct sel_arg_struct __user *);
46asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int);
47asmlinkage long sys32_sysfs(int, u32, u32);
48
49asmlinkage long sys32_sched_rr_get_interval(compat_pid_t,
50 struct compat_timespec __user *);
51asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t);
52asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *);
53
54#ifdef CONFIG_SYSCTL_SYSCALL
55struct sysctl_ia32;
56asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *);
57#endif
58
59asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32);
60asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32);
61
62asmlinkage long sys32_personality(unsigned long);
63asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
64
65asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long,
66 unsigned long, unsigned long, unsigned long);
67
68struct oldold_utsname;
69struct old_utsname;
70asmlinkage long sys32_olduname(struct oldold_utsname __user *);
71long sys32_uname(struct old_utsname __user *);
72
73long sys32_ustat(unsigned, struct ustat32 __user *);
74
75asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *,
76 compat_uptr_t __user *, struct pt_regs *);
77asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
78
79long sys32_lseek(unsigned int, int, unsigned int);
80long sys32_kill(int, int);
81long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int);
82long sys32_vm86_warning(void);
83long sys32_lookup_dcookie(u32, u32, char __user *, size_t);
84
85asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t);
86asmlinkage long sys32_sync_file_range(int, unsigned, unsigned,
87 unsigned, unsigned, int);
88asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int);
89asmlinkage long sys32_fallocate(int, int, unsigned,
90 unsigned, unsigned, unsigned);
91
92/* ia32/ia32_signal.c */
93asmlinkage long sys32_sigsuspend(int, int, old_sigset_t);
94asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *,
95 stack_ia32_t __user *, struct pt_regs *);
96asmlinkage long sys32_sigreturn(struct pt_regs *);
97asmlinkage long sys32_rt_sigreturn(struct pt_regs *);
98
99/* ia32/ipc32.c */
100asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32);
101#endif /* _ASM_X86_SYS_IA32_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 79e31e9dcdda..4e2f2e0aab27 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu)
61 * 61 *
62 * Side note: this function creates the returned cpumask on the stack 62 * Side note: this function creates the returned cpumask on the stack
63 * so with a high NR_CPUS count, excessive stack space is used. The 63 * so with a high NR_CPUS count, excessive stack space is used. The
64 * node_to_cpumask_ptr function should be used whenever possible. 64 * cpumask_of_node function should be used whenever possible.
65 */ 65 */
66static inline cpumask_t node_to_cpumask(int node) 66static inline cpumask_t node_to_cpumask(int node)
67{ 67{
68 return node_to_cpumask_map[node]; 68 return node_to_cpumask_map[node];
69} 69}
70 70
71/* Returns a bitmask of CPUs on Node 'node'. */
72static inline const struct cpumask *cpumask_of_node(int node)
73{
74 return &node_to_cpumask_map[node];
75}
76
71#else /* CONFIG_X86_64 */ 77#else /* CONFIG_X86_64 */
72 78
73/* Mappings between node number and cpus on that node. */ 79/* Mappings between node number and cpus on that node. */
@@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
82#ifdef CONFIG_DEBUG_PER_CPU_MAPS 88#ifdef CONFIG_DEBUG_PER_CPU_MAPS
83extern int cpu_to_node(int cpu); 89extern int cpu_to_node(int cpu);
84extern int early_cpu_to_node(int cpu); 90extern int early_cpu_to_node(int cpu);
85extern const cpumask_t *_node_to_cpumask_ptr(int node); 91extern const cpumask_t *cpumask_of_node(int node);
86extern cpumask_t node_to_cpumask(int node); 92extern cpumask_t node_to_cpumask(int node);
87 93
88#else /* !CONFIG_DEBUG_PER_CPU_MAPS */ 94#else /* !CONFIG_DEBUG_PER_CPU_MAPS */
@@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu)
103} 109}
104 110
105/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
106static inline const cpumask_t *_node_to_cpumask_ptr(int node) 112static inline const cpumask_t *cpumask_of_node(int node)
107{ 113{
108 return &node_to_cpumask_map[node]; 114 return &node_to_cpumask_map[node];
109} 115}
@@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node)
116 122
117#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 123#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
118 124
119/* Replace default node_to_cpumask_ptr with optimized version */ 125/*
126 * Replace default node_to_cpumask_ptr with optimized version
127 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
128 */
120#define node_to_cpumask_ptr(v, node) \ 129#define node_to_cpumask_ptr(v, node) \
121 const cpumask_t *v = _node_to_cpumask_ptr(node) 130 const cpumask_t *v = cpumask_of_node(node)
122 131
123#define node_to_cpumask_ptr_next(v, node) \ 132#define node_to_cpumask_ptr_next(v, node) \
124 v = _node_to_cpumask_ptr(node) 133 v = cpumask_of_node(node)
125 134
126#endif /* CONFIG_X86_64 */ 135#endif /* CONFIG_X86_64 */
127 136
@@ -187,7 +196,7 @@ extern int __node_distance(int, int);
187#define cpu_to_node(cpu) 0 196#define cpu_to_node(cpu) 0
188#define early_cpu_to_node(cpu) 0 197#define early_cpu_to_node(cpu) 0
189 198
190static inline const cpumask_t *_node_to_cpumask_ptr(int node) 199static inline const cpumask_t *cpumask_of_node(int node)
191{ 200{
192 return &cpu_online_map; 201 return &cpu_online_map;
193} 202}
@@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node)
200 return first_cpu(cpu_online_map); 209 return first_cpu(cpu_online_map);
201} 210}
202 211
203/* Replace default node_to_cpumask_ptr with optimized version */ 212/*
213 * Replace default node_to_cpumask_ptr with optimized version
214 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
215 */
204#define node_to_cpumask_ptr(v, node) \ 216#define node_to_cpumask_ptr(v, node) \
205 const cpumask_t *v = _node_to_cpumask_ptr(node) 217 const cpumask_t *v = cpumask_of_node(node)
206 218
207#define node_to_cpumask_ptr_next(v, node) \ 219#define node_to_cpumask_ptr_next(v, node) \
208 v = _node_to_cpumask_ptr(node) 220 v = cpumask_of_node(node)
209#endif 221#endif
210 222
211#include <asm-generic/topology.h> 223#include <asm-generic/topology.h>
@@ -214,12 +226,12 @@ static inline int node_to_first_cpu(int node)
214/* Returns the number of the first CPU on Node 'node'. */ 226/* Returns the number of the first CPU on Node 'node'. */
215static inline int node_to_first_cpu(int node) 227static inline int node_to_first_cpu(int node)
216{ 228{
217 node_to_cpumask_ptr(mask, node); 229 return cpumask_first(cpumask_of_node(node));
218 return first_cpu(*mask);
219} 230}
220#endif 231#endif
221 232
222extern cpumask_t cpu_coregroup_map(int cpu); 233extern cpumask_t cpu_coregroup_map(int cpu);
234extern const struct cpumask *cpu_coregroup_mask(int cpu);
223 235
224#ifdef ENABLE_TOPO_DEFINES 236#ifdef ENABLE_TOPO_DEFINES
225#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) 237#define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id)
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index e2363253bbbf..50423c7b56b2 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -133,61 +133,61 @@ struct bau_msg_payload {
133 * see table 4.2.3.0.1 in broacast_assist spec. 133 * see table 4.2.3.0.1 in broacast_assist spec.
134 */ 134 */
135struct bau_msg_header { 135struct bau_msg_header {
136 int dest_subnodeid:6; /* must be zero */ 136 unsigned int dest_subnodeid:6; /* must be zero */
137 /* bits 5:0 */ 137 /* bits 5:0 */
138 int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ 138 unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */
139 /* bits 20:6 */ 139 /* bits 20:6 */ /* first bit in node_map */
140 int command:8; /* message type */ 140 unsigned int command:8; /* message type */
141 /* bits 28:21 */ 141 /* bits 28:21 */
142 /* 0x38: SN3net EndPoint Message */ 142 /* 0x38: SN3net EndPoint Message */
143 int rsvd_1:3; /* must be zero */ 143 unsigned int rsvd_1:3; /* must be zero */
144 /* bits 31:29 */ 144 /* bits 31:29 */
145 /* int will align on 32 bits */ 145 /* int will align on 32 bits */
146 int rsvd_2:9; /* must be zero */ 146 unsigned int rsvd_2:9; /* must be zero */
147 /* bits 40:32 */ 147 /* bits 40:32 */
148 /* Suppl_A is 56-41 */ 148 /* Suppl_A is 56-41 */
149 int payload_2a:8; /* becomes byte 16 of msg */ 149 unsigned int payload_2a:8;/* becomes byte 16 of msg */
150 /* bits 48:41 */ /* not currently using */ 150 /* bits 48:41 */ /* not currently using */
151 int payload_2b:8; /* becomes byte 17 of msg */ 151 unsigned int payload_2b:8;/* becomes byte 17 of msg */
152 /* bits 56:49 */ /* not currently using */ 152 /* bits 56:49 */ /* not currently using */
153 /* Address field (96:57) is never used as an 153 /* Address field (96:57) is never used as an
154 address (these are address bits 42:3) */ 154 address (these are address bits 42:3) */
155 int rsvd_3:1; /* must be zero */ 155 unsigned int rsvd_3:1; /* must be zero */
156 /* bit 57 */ 156 /* bit 57 */
157 /* address bits 27:4 are payload */ 157 /* address bits 27:4 are payload */
158 /* these 24 bits become bytes 12-14 of msg */ 158 /* these 24 bits become bytes 12-14 of msg */
159 int replied_to:1; /* sent as 0 by the source to byte 12 */ 159 unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */
160 /* bit 58 */ 160 /* bit 58 */
161 161
162 int payload_1a:5; /* not currently used */ 162 unsigned int payload_1a:5;/* not currently used */
163 /* bits 63:59 */ 163 /* bits 63:59 */
164 int payload_1b:8; /* not currently used */ 164 unsigned int payload_1b:8;/* not currently used */
165 /* bits 71:64 */ 165 /* bits 71:64 */
166 int payload_1c:8; /* not currently used */ 166 unsigned int payload_1c:8;/* not currently used */
167 /* bits 79:72 */ 167 /* bits 79:72 */
168 int payload_1d:2; /* not currently used */ 168 unsigned int payload_1d:2;/* not currently used */
169 /* bits 81:80 */ 169 /* bits 81:80 */
170 170
171 int rsvd_4:7; /* must be zero */ 171 unsigned int rsvd_4:7; /* must be zero */
172 /* bits 88:82 */ 172 /* bits 88:82 */
173 int sw_ack_flag:1; /* software acknowledge flag */ 173 unsigned int sw_ack_flag:1;/* software acknowledge flag */
174 /* bit 89 */ 174 /* bit 89 */
175 /* INTD trasactions at destination are to 175 /* INTD trasactions at destination are to
176 wait for software acknowledge */ 176 wait for software acknowledge */
177 int rsvd_5:6; /* must be zero */ 177 unsigned int rsvd_5:6; /* must be zero */
178 /* bits 95:90 */ 178 /* bits 95:90 */
179 int rsvd_6:5; /* must be zero */ 179 unsigned int rsvd_6:5; /* must be zero */
180 /* bits 100:96 */ 180 /* bits 100:96 */
181 int int_both:1; /* if 1, interrupt both sockets on the blade */ 181 unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */
182 /* bit 101*/ 182 /* bit 101*/
183 int fairness:3; /* usually zero */ 183 unsigned int fairness:3;/* usually zero */
184 /* bits 104:102 */ 184 /* bits 104:102 */
185 int multilevel:1; /* multi-level multicast format */ 185 unsigned int multilevel:1; /* multi-level multicast format */
186 /* bit 105 */ 186 /* bit 105 */
187 /* 0 for TLB: endpoint multi-unicast messages */ 187 /* 0 for TLB: endpoint multi-unicast messages */
188 int chaining:1; /* next descriptor is part of this activation*/ 188 unsigned int chaining:1;/* next descriptor is part of this activation*/
189 /* bit 106 */ 189 /* bit 106 */
190 int rsvd_7:21; /* must be zero */ 190 unsigned int rsvd_7:21; /* must be zero */
191 /* bits 127:107 */ 191 /* bits 127:107 */
192}; 192};
193 193
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 65d0b72777ea..29dc0c89d4af 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
538 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; 538 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
539 union acpi_object *obj; 539 union acpi_object *obj;
540 struct acpi_madt_local_apic *lapic; 540 struct acpi_madt_local_apic *lapic;
541 cpumask_t tmp_map, new_map; 541 cpumask_var_t tmp_map, new_map;
542 u8 physid; 542 u8 physid;
543 int cpu; 543 int cpu;
544 int retval = -ENOMEM;
544 545
545 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) 546 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
546 return -EINVAL; 547 return -EINVAL;
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
569 buffer.length = ACPI_ALLOCATE_BUFFER; 570 buffer.length = ACPI_ALLOCATE_BUFFER;
570 buffer.pointer = NULL; 571 buffer.pointer = NULL;
571 572
572 tmp_map = cpu_present_map; 573 if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL))
574 goto out;
575
576 if (!alloc_cpumask_var(&new_map, GFP_KERNEL))
577 goto free_tmp_map;
578
579 cpumask_copy(tmp_map, cpu_present_mask);
573 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); 580 acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED);
574 581
575 /* 582 /*
576 * If mp_register_lapic successfully generates a new logical cpu 583 * If mp_register_lapic successfully generates a new logical cpu
577 * number, then the following will get us exactly what was mapped 584 * number, then the following will get us exactly what was mapped
578 */ 585 */
579 cpus_andnot(new_map, cpu_present_map, tmp_map); 586 cpumask_andnot(new_map, cpu_present_mask, tmp_map);
580 if (cpus_empty(new_map)) { 587 if (cpumask_empty(new_map)) {
581 printk ("Unable to map lapic to logical cpu number\n"); 588 printk ("Unable to map lapic to logical cpu number\n");
582 return -EINVAL; 589 retval = -EINVAL;
590 goto free_new_map;
583 } 591 }
584 592
585 cpu = first_cpu(new_map); 593 cpu = cpumask_first(new_map);
586 594
587 *pcpu = cpu; 595 *pcpu = cpu;
588 return 0; 596 retval = 0;
597
598free_new_map:
599 free_cpumask_var(new_map);
600free_tmp_map:
601 free_cpumask_var(tmp_map);
602out:
603 return retval;
589} 604}
590 605
591/* wrapper to silence section mismatch warning */ 606/* wrapper to silence section mismatch warning */
@@ -598,7 +613,7 @@ EXPORT_SYMBOL(acpi_map_lsapic);
598int acpi_unmap_lsapic(int cpu) 613int acpi_unmap_lsapic(int cpu)
599{ 614{
600 per_cpu(x86_cpu_to_apicid, cpu) = -1; 615 per_cpu(x86_cpu_to_apicid, cpu) = -1;
601 cpu_clear(cpu, cpu_present_map); 616 set_cpu_present(cpu, false);
602 num_processors--; 617 num_processors--;
603 618
604 return (0); 619 return (0);
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2e2da717b350..5113c080f0c4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -20,8 +20,12 @@
20#include <linux/pci.h> 20#include <linux/pci.h>
21#include <linux/gfp.h> 21#include <linux/gfp.h>
22#include <linux/bitops.h> 22#include <linux/bitops.h>
23#include <linux/debugfs.h>
23#include <linux/scatterlist.h> 24#include <linux/scatterlist.h>
24#include <linux/iommu-helper.h> 25#include <linux/iommu-helper.h>
26#ifdef CONFIG_IOMMU_API
27#include <linux/iommu.h>
28#endif
25#include <asm/proto.h> 29#include <asm/proto.h>
26#include <asm/iommu.h> 30#include <asm/iommu.h>
27#include <asm/gart.h> 31#include <asm/gart.h>
@@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
38static LIST_HEAD(iommu_pd_list); 42static LIST_HEAD(iommu_pd_list);
39static DEFINE_SPINLOCK(iommu_pd_list_lock); 43static DEFINE_SPINLOCK(iommu_pd_list_lock);
40 44
45#ifdef CONFIG_IOMMU_API
46static struct iommu_ops amd_iommu_ops;
47#endif
48
41/* 49/*
42 * general struct to manage commands send to an IOMMU 50 * general struct to manage commands send to an IOMMU
43 */ 51 */
@@ -47,6 +55,68 @@ struct iommu_cmd {
47 55
48static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, 56static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
49 struct unity_map_entry *e); 57 struct unity_map_entry *e);
58static struct dma_ops_domain *find_protection_domain(u16 devid);
59
60
61#ifdef CONFIG_AMD_IOMMU_STATS
62
63/*
64 * Initialization code for statistics collection
65 */
66
67DECLARE_STATS_COUNTER(compl_wait);
68DECLARE_STATS_COUNTER(cnt_map_single);
69DECLARE_STATS_COUNTER(cnt_unmap_single);
70DECLARE_STATS_COUNTER(cnt_map_sg);
71DECLARE_STATS_COUNTER(cnt_unmap_sg);
72DECLARE_STATS_COUNTER(cnt_alloc_coherent);
73DECLARE_STATS_COUNTER(cnt_free_coherent);
74DECLARE_STATS_COUNTER(cross_page);
75DECLARE_STATS_COUNTER(domain_flush_single);
76DECLARE_STATS_COUNTER(domain_flush_all);
77DECLARE_STATS_COUNTER(alloced_io_mem);
78DECLARE_STATS_COUNTER(total_map_requests);
79
80static struct dentry *stats_dir;
81static struct dentry *de_isolate;
82static struct dentry *de_fflush;
83
84static void amd_iommu_stats_add(struct __iommu_counter *cnt)
85{
86 if (stats_dir == NULL)
87 return;
88
89 cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
90 &cnt->value);
91}
92
93static void amd_iommu_stats_init(void)
94{
95 stats_dir = debugfs_create_dir("amd-iommu", NULL);
96 if (stats_dir == NULL)
97 return;
98
99 de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
100 (u32 *)&amd_iommu_isolate);
101
102 de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
103 (u32 *)&amd_iommu_unmap_flush);
104
105 amd_iommu_stats_add(&compl_wait);
106 amd_iommu_stats_add(&cnt_map_single);
107 amd_iommu_stats_add(&cnt_unmap_single);
108 amd_iommu_stats_add(&cnt_map_sg);
109 amd_iommu_stats_add(&cnt_unmap_sg);
110 amd_iommu_stats_add(&cnt_alloc_coherent);
111 amd_iommu_stats_add(&cnt_free_coherent);
112 amd_iommu_stats_add(&cross_page);
113 amd_iommu_stats_add(&domain_flush_single);
114 amd_iommu_stats_add(&domain_flush_all);
115 amd_iommu_stats_add(&alloced_io_mem);
116 amd_iommu_stats_add(&total_map_requests);
117}
118
119#endif
50 120
51/* returns !0 if the IOMMU is caching non-present entries in its TLB */ 121/* returns !0 if the IOMMU is caching non-present entries in its TLB */
52static int iommu_has_npcache(struct amd_iommu *iommu) 122static int iommu_has_npcache(struct amd_iommu *iommu)
@@ -189,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
189 spin_lock_irqsave(&iommu->lock, flags); 259 spin_lock_irqsave(&iommu->lock, flags);
190 ret = __iommu_queue_command(iommu, cmd); 260 ret = __iommu_queue_command(iommu, cmd);
191 if (!ret) 261 if (!ret)
192 iommu->need_sync = 1; 262 iommu->need_sync = true;
193 spin_unlock_irqrestore(&iommu->lock, flags); 263 spin_unlock_irqrestore(&iommu->lock, flags);
194 264
195 return ret; 265 return ret;
196} 266}
197 267
198/* 268/*
269 * This function waits until an IOMMU has completed a completion
270 * wait command
271 */
272static void __iommu_wait_for_completion(struct amd_iommu *iommu)
273{
274 int ready = 0;
275 unsigned status = 0;
276 unsigned long i = 0;
277
278 INC_STATS_COUNTER(compl_wait);
279
280 while (!ready && (i < EXIT_LOOP_COUNT)) {
281 ++i;
282 /* wait for the bit to become one */
283 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
284 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
285 }
286
287 /* set bit back to zero */
288 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
289 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
290
291 if (unlikely(i == EXIT_LOOP_COUNT))
292 panic("AMD IOMMU: Completion wait loop failed\n");
293}
294
295/*
296 * This function queues a completion wait command into the command
297 * buffer of an IOMMU
298 */
299static int __iommu_completion_wait(struct amd_iommu *iommu)
300{
301 struct iommu_cmd cmd;
302
303 memset(&cmd, 0, sizeof(cmd));
304 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
305 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
306
307 return __iommu_queue_command(iommu, &cmd);
308}
309
310/*
199 * This function is called whenever we need to ensure that the IOMMU has 311 * This function is called whenever we need to ensure that the IOMMU has
200 * completed execution of all commands we sent. It sends a 312 * completed execution of all commands we sent. It sends a
201 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs 313 * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
@@ -204,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
204 */ 316 */
205static int iommu_completion_wait(struct amd_iommu *iommu) 317static int iommu_completion_wait(struct amd_iommu *iommu)
206{ 318{
207 int ret = 0, ready = 0; 319 int ret = 0;
208 unsigned status = 0; 320 unsigned long flags;
209 struct iommu_cmd cmd;
210 unsigned long flags, i = 0;
211
212 memset(&cmd, 0, sizeof(cmd));
213 cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
214 CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
215 321
216 spin_lock_irqsave(&iommu->lock, flags); 322 spin_lock_irqsave(&iommu->lock, flags);
217 323
218 if (!iommu->need_sync) 324 if (!iommu->need_sync)
219 goto out; 325 goto out;
220 326
221 iommu->need_sync = 0; 327 ret = __iommu_completion_wait(iommu);
222 328
223 ret = __iommu_queue_command(iommu, &cmd); 329 iommu->need_sync = false;
224 330
225 if (ret) 331 if (ret)
226 goto out; 332 goto out;
227 333
228 while (!ready && (i < EXIT_LOOP_COUNT)) { 334 __iommu_wait_for_completion(iommu);
229 ++i;
230 /* wait for the bit to become one */
231 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
232 ready = status & MMIO_STATUS_COM_WAIT_INT_MASK;
233 }
234
235 /* set bit back to zero */
236 status &= ~MMIO_STATUS_COM_WAIT_INT_MASK;
237 writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET);
238
239 if (unlikely(i == EXIT_LOOP_COUNT))
240 panic("AMD IOMMU: Completion wait loop failed\n");
241 335
242out: 336out:
243 spin_unlock_irqrestore(&iommu->lock, flags); 337 spin_unlock_irqrestore(&iommu->lock, flags);
@@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
264 return ret; 358 return ret;
265} 359}
266 360
361static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
362 u16 domid, int pde, int s)
363{
364 memset(cmd, 0, sizeof(*cmd));
365 address &= PAGE_MASK;
366 CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
367 cmd->data[1] |= domid;
368 cmd->data[2] = lower_32_bits(address);
369 cmd->data[3] = upper_32_bits(address);
370 if (s) /* size bit - we flush more than one 4kb page */
371 cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
372 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
373 cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
374}
375
267/* 376/*
268 * Generic command send function for invalidaing TLB entries 377 * Generic command send function for invalidaing TLB entries
269 */ 378 */
@@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
273 struct iommu_cmd cmd; 382 struct iommu_cmd cmd;
274 int ret; 383 int ret;
275 384
276 memset(&cmd, 0, sizeof(cmd)); 385 __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
277 address &= PAGE_MASK;
278 CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
279 cmd.data[1] |= domid;
280 cmd.data[2] = lower_32_bits(address);
281 cmd.data[3] = upper_32_bits(address);
282 if (s) /* size bit - we flush more than one 4kb page */
283 cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
284 if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
285 cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
286 386
287 ret = iommu_queue_command(iommu, &cmd); 387 ret = iommu_queue_command(iommu, &cmd);
288 388
@@ -321,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
321{ 421{
322 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; 422 u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
323 423
424 INC_STATS_COUNTER(domain_flush_single);
425
324 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); 426 iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
325} 427}
326 428
429/*
430 * This function is used to flush the IO/TLB for a given protection domain
431 * on every IOMMU in the system
432 */
433static void iommu_flush_domain(u16 domid)
434{
435 unsigned long flags;
436 struct amd_iommu *iommu;
437 struct iommu_cmd cmd;
438
439 INC_STATS_COUNTER(domain_flush_all);
440
441 __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
442 domid, 1, 1);
443
444 list_for_each_entry(iommu, &amd_iommu_list, list) {
445 spin_lock_irqsave(&iommu->lock, flags);
446 __iommu_queue_command(iommu, &cmd);
447 __iommu_completion_wait(iommu);
448 __iommu_wait_for_completion(iommu);
449 spin_unlock_irqrestore(&iommu->lock, flags);
450 }
451}
452
327/**************************************************************************** 453/****************************************************************************
328 * 454 *
329 * The functions below are used the create the page table mappings for 455 * The functions below are used the create the page table mappings for
@@ -338,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
338 * supporting all features of AMD IOMMU page tables like level skipping 464 * supporting all features of AMD IOMMU page tables like level skipping
339 * and full 64 bit address spaces. 465 * and full 64 bit address spaces.
340 */ 466 */
341static int iommu_map(struct protection_domain *dom, 467static int iommu_map_page(struct protection_domain *dom,
342 unsigned long bus_addr, 468 unsigned long bus_addr,
343 unsigned long phys_addr, 469 unsigned long phys_addr,
344 int prot) 470 int prot)
345{ 471{
346 u64 __pte, *pte, *page; 472 u64 __pte, *pte, *page;
347 473
@@ -388,6 +514,28 @@ static int iommu_map(struct protection_domain *dom,
388 return 0; 514 return 0;
389} 515}
390 516
517static void iommu_unmap_page(struct protection_domain *dom,
518 unsigned long bus_addr)
519{
520 u64 *pte;
521
522 pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
523
524 if (!IOMMU_PTE_PRESENT(*pte))
525 return;
526
527 pte = IOMMU_PTE_PAGE(*pte);
528 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
529
530 if (!IOMMU_PTE_PRESENT(*pte))
531 return;
532
533 pte = IOMMU_PTE_PAGE(*pte);
534 pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
535
536 *pte = 0;
537}
538
391/* 539/*
392 * This function checks if a specific unity mapping entry is needed for 540 * This function checks if a specific unity mapping entry is needed for
393 * this specific IOMMU. 541 * this specific IOMMU.
@@ -440,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
440 588
441 for (addr = e->address_start; addr < e->address_end; 589 for (addr = e->address_start; addr < e->address_end;
442 addr += PAGE_SIZE) { 590 addr += PAGE_SIZE) {
443 ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); 591 ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
444 if (ret) 592 if (ret)
445 return ret; 593 return ret;
446 /* 594 /*
@@ -571,6 +719,16 @@ static u16 domain_id_alloc(void)
571 return id; 719 return id;
572} 720}
573 721
722static void domain_id_free(int id)
723{
724 unsigned long flags;
725
726 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
727 if (id > 0 && id < MAX_DOMAIN_ID)
728 __clear_bit(id, amd_iommu_pd_alloc_bitmap);
729 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
730}
731
574/* 732/*
575 * Used to reserve address ranges in the aperture (e.g. for exclusion 733 * Used to reserve address ranges in the aperture (e.g. for exclusion
576 * ranges. 734 * ranges.
@@ -587,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
587 iommu_area_reserve(dom->bitmap, start_page, pages); 745 iommu_area_reserve(dom->bitmap, start_page, pages);
588} 746}
589 747
590static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) 748static void free_pagetable(struct protection_domain *domain)
591{ 749{
592 int i, j; 750 int i, j;
593 u64 *p1, *p2, *p3; 751 u64 *p1, *p2, *p3;
594 752
595 p1 = dma_dom->domain.pt_root; 753 p1 = domain->pt_root;
596 754
597 if (!p1) 755 if (!p1)
598 return; 756 return;
@@ -613,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
613 } 771 }
614 772
615 free_page((unsigned long)p1); 773 free_page((unsigned long)p1);
774
775 domain->pt_root = NULL;
616} 776}
617 777
618/* 778/*
@@ -624,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
624 if (!dom) 784 if (!dom)
625 return; 785 return;
626 786
627 dma_ops_free_pagetable(dom); 787 free_pagetable(&dom->domain);
628 788
629 kfree(dom->pte_pages); 789 kfree(dom->pte_pages);
630 790
@@ -663,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
663 goto free_dma_dom; 823 goto free_dma_dom;
664 dma_dom->domain.mode = PAGE_MODE_3_LEVEL; 824 dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
665 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); 825 dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
826 dma_dom->domain.flags = PD_DMA_OPS_MASK;
666 dma_dom->domain.priv = dma_dom; 827 dma_dom->domain.priv = dma_dom;
667 if (!dma_dom->domain.pt_root) 828 if (!dma_dom->domain.pt_root)
668 goto free_dma_dom; 829 goto free_dma_dom;
@@ -725,6 +886,15 @@ free_dma_dom:
725} 886}
726 887
727/* 888/*
889 * little helper function to check whether a given protection domain is a
890 * dma_ops domain
891 */
892static bool dma_ops_domain(struct protection_domain *domain)
893{
894 return domain->flags & PD_DMA_OPS_MASK;
895}
896
897/*
728 * Find out the protection domain structure for a given PCI device. This 898 * Find out the protection domain structure for a given PCI device. This
729 * will give us the pointer to the page table root for example. 899 * will give us the pointer to the page table root for example.
730 */ 900 */
@@ -744,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid)
744 * If a device is not yet associated with a domain, this function does 914 * If a device is not yet associated with a domain, this function does
745 * assigns it visible for the hardware 915 * assigns it visible for the hardware
746 */ 916 */
747static void set_device_domain(struct amd_iommu *iommu, 917static void attach_device(struct amd_iommu *iommu,
748 struct protection_domain *domain, 918 struct protection_domain *domain,
749 u16 devid) 919 u16 devid)
750{ 920{
751 unsigned long flags; 921 unsigned long flags;
752
753 u64 pte_root = virt_to_phys(domain->pt_root); 922 u64 pte_root = virt_to_phys(domain->pt_root);
754 923
924 domain->dev_cnt += 1;
925
755 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) 926 pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
756 << DEV_ENTRY_MODE_SHIFT; 927 << DEV_ENTRY_MODE_SHIFT;
757 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; 928 pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@@ -767,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu,
767 iommu_queue_inv_dev_entry(iommu, devid); 938 iommu_queue_inv_dev_entry(iommu, devid);
768} 939}
769 940
941/*
942 * Removes a device from a protection domain (unlocked)
943 */
944static void __detach_device(struct protection_domain *domain, u16 devid)
945{
946
947 /* lock domain */
948 spin_lock(&domain->lock);
949
950 /* remove domain from the lookup table */
951 amd_iommu_pd_table[devid] = NULL;
952
953 /* remove entry from the device table seen by the hardware */
954 amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
955 amd_iommu_dev_table[devid].data[1] = 0;
956 amd_iommu_dev_table[devid].data[2] = 0;
957
958 /* decrease reference counter */
959 domain->dev_cnt -= 1;
960
961 /* ready */
962 spin_unlock(&domain->lock);
963}
964
965/*
966 * Removes a device from a protection domain (with devtable_lock held)
967 */
968static void detach_device(struct protection_domain *domain, u16 devid)
969{
970 unsigned long flags;
971
972 /* lock device table */
973 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
974 __detach_device(domain, devid);
975 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
976}
977
978static int device_change_notifier(struct notifier_block *nb,
979 unsigned long action, void *data)
980{
981 struct device *dev = data;
982 struct pci_dev *pdev = to_pci_dev(dev);
983 u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
984 struct protection_domain *domain;
985 struct dma_ops_domain *dma_domain;
986 struct amd_iommu *iommu;
987 int order = amd_iommu_aperture_order;
988 unsigned long flags;
989
990 if (devid > amd_iommu_last_bdf)
991 goto out;
992
993 devid = amd_iommu_alias_table[devid];
994
995 iommu = amd_iommu_rlookup_table[devid];
996 if (iommu == NULL)
997 goto out;
998
999 domain = domain_for_device(devid);
1000
1001 if (domain && !dma_ops_domain(domain))
1002 WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
1003 "to a non-dma-ops domain\n", dev_name(dev));
1004
1005 switch (action) {
1006 case BUS_NOTIFY_BOUND_DRIVER:
1007 if (domain)
1008 goto out;
1009 dma_domain = find_protection_domain(devid);
1010 if (!dma_domain)
1011 dma_domain = iommu->default_dom;
1012 attach_device(iommu, &dma_domain->domain, devid);
1013 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
1014 "device %s\n", dma_domain->domain.id, dev_name(dev));
1015 break;
1016 case BUS_NOTIFY_UNBIND_DRIVER:
1017 if (!domain)
1018 goto out;
1019 detach_device(domain, devid);
1020 break;
1021 case BUS_NOTIFY_ADD_DEVICE:
1022 /* allocate a protection domain if a device is added */
1023 dma_domain = find_protection_domain(devid);
1024 if (dma_domain)
1025 goto out;
1026 dma_domain = dma_ops_domain_alloc(iommu, order);
1027 if (!dma_domain)
1028 goto out;
1029 dma_domain->target_dev = devid;
1030
1031 spin_lock_irqsave(&iommu_pd_list_lock, flags);
1032 list_add_tail(&dma_domain->list, &iommu_pd_list);
1033 spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
1034
1035 break;
1036 default:
1037 goto out;
1038 }
1039
1040 iommu_queue_inv_dev_entry(iommu, devid);
1041 iommu_completion_wait(iommu);
1042
1043out:
1044 return 0;
1045}
1046
1047struct notifier_block device_nb = {
1048 .notifier_call = device_change_notifier,
1049};
1050
770/***************************************************************************** 1051/*****************************************************************************
771 * 1052 *
772 * The next functions belong to the dma_ops mapping/unmapping code. 1053 * The next functions belong to the dma_ops mapping/unmapping code.
@@ -802,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
802 list_for_each_entry(entry, &iommu_pd_list, list) { 1083 list_for_each_entry(entry, &iommu_pd_list, list) {
803 if (entry->target_dev == devid) { 1084 if (entry->target_dev == devid) {
804 ret = entry; 1085 ret = entry;
805 list_del(&ret->list);
806 break; 1086 break;
807 } 1087 }
808 } 1088 }
@@ -853,14 +1133,13 @@ static int get_device_resources(struct device *dev,
853 if (!dma_dom) 1133 if (!dma_dom)
854 dma_dom = (*iommu)->default_dom; 1134 dma_dom = (*iommu)->default_dom;
855 *domain = &dma_dom->domain; 1135 *domain = &dma_dom->domain;
856 set_device_domain(*iommu, *domain, *bdf); 1136 attach_device(*iommu, *domain, *bdf);
857 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " 1137 printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
858 "device ", (*domain)->id); 1138 "device %s\n", (*domain)->id, dev_name(dev));
859 print_devid(_bdf, 1);
860 } 1139 }
861 1140
862 if (domain_for_device(_bdf) == NULL) 1141 if (domain_for_device(_bdf) == NULL)
863 set_device_domain(*iommu, *domain, _bdf); 1142 attach_device(*iommu, *domain, _bdf);
864 1143
865 return 1; 1144 return 1;
866} 1145}
@@ -946,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev,
946 pages = iommu_num_pages(paddr, size, PAGE_SIZE); 1225 pages = iommu_num_pages(paddr, size, PAGE_SIZE);
947 paddr &= PAGE_MASK; 1226 paddr &= PAGE_MASK;
948 1227
1228 INC_STATS_COUNTER(total_map_requests);
1229
1230 if (pages > 1)
1231 INC_STATS_COUNTER(cross_page);
1232
949 if (align) 1233 if (align)
950 align_mask = (1UL << get_order(size)) - 1; 1234 align_mask = (1UL << get_order(size)) - 1;
951 1235
@@ -962,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev,
962 } 1246 }
963 address += offset; 1247 address += offset;
964 1248
1249 ADD_STATS_COUNTER(alloced_io_mem, size);
1250
965 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { 1251 if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
966 iommu_flush_tlb(iommu, dma_dom->domain.id); 1252 iommu_flush_tlb(iommu, dma_dom->domain.id);
967 dma_dom->need_flush = false; 1253 dma_dom->need_flush = false;
@@ -998,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu,
998 start += PAGE_SIZE; 1284 start += PAGE_SIZE;
999 } 1285 }
1000 1286
1287 SUB_STATS_COUNTER(alloced_io_mem, size);
1288
1001 dma_ops_free_addresses(dma_dom, dma_addr, pages); 1289 dma_ops_free_addresses(dma_dom, dma_addr, pages);
1002 1290
1003 if (amd_iommu_unmap_flush || dma_dom->need_flush) { 1291 if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@@ -1019,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1019 dma_addr_t addr; 1307 dma_addr_t addr;
1020 u64 dma_mask; 1308 u64 dma_mask;
1021 1309
1310 INC_STATS_COUNTER(cnt_map_single);
1311
1022 if (!check_device(dev)) 1312 if (!check_device(dev))
1023 return bad_dma_address; 1313 return bad_dma_address;
1024 1314
@@ -1030,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
1030 /* device not handled by any AMD IOMMU */ 1320 /* device not handled by any AMD IOMMU */
1031 return (dma_addr_t)paddr; 1321 return (dma_addr_t)paddr;
1032 1322
1323 if (!dma_ops_domain(domain))
1324 return bad_dma_address;
1325
1033 spin_lock_irqsave(&domain->lock, flags); 1326 spin_lock_irqsave(&domain->lock, flags);
1034 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, 1327 addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
1035 dma_mask); 1328 dma_mask);
@@ -1055,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
1055 struct protection_domain *domain; 1348 struct protection_domain *domain;
1056 u16 devid; 1349 u16 devid;
1057 1350
1351 INC_STATS_COUNTER(cnt_unmap_single);
1352
1058 if (!check_device(dev) || 1353 if (!check_device(dev) ||
1059 !get_device_resources(dev, &iommu, &domain, &devid)) 1354 !get_device_resources(dev, &iommu, &domain, &devid))
1060 /* device not handled by any AMD IOMMU */ 1355 /* device not handled by any AMD IOMMU */
1061 return; 1356 return;
1062 1357
1358 if (!dma_ops_domain(domain))
1359 return;
1360
1063 spin_lock_irqsave(&domain->lock, flags); 1361 spin_lock_irqsave(&domain->lock, flags);
1064 1362
1065 __unmap_single(iommu, domain->priv, dma_addr, size, dir); 1363 __unmap_single(iommu, domain->priv, dma_addr, size, dir);
@@ -1104,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1104 int mapped_elems = 0; 1402 int mapped_elems = 0;
1105 u64 dma_mask; 1403 u64 dma_mask;
1106 1404
1405 INC_STATS_COUNTER(cnt_map_sg);
1406
1107 if (!check_device(dev)) 1407 if (!check_device(dev))
1108 return 0; 1408 return 0;
1109 1409
@@ -1114,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
1114 if (!iommu || !domain) 1414 if (!iommu || !domain)
1115 return map_sg_no_iommu(dev, sglist, nelems, dir); 1415 return map_sg_no_iommu(dev, sglist, nelems, dir);
1116 1416
1417 if (!dma_ops_domain(domain))
1418 return 0;
1419
1117 spin_lock_irqsave(&domain->lock, flags); 1420 spin_lock_irqsave(&domain->lock, flags);
1118 1421
1119 for_each_sg(sglist, s, nelems, i) { 1422 for_each_sg(sglist, s, nelems, i) {
@@ -1163,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
1163 u16 devid; 1466 u16 devid;
1164 int i; 1467 int i;
1165 1468
1469 INC_STATS_COUNTER(cnt_unmap_sg);
1470
1166 if (!check_device(dev) || 1471 if (!check_device(dev) ||
1167 !get_device_resources(dev, &iommu, &domain, &devid)) 1472 !get_device_resources(dev, &iommu, &domain, &devid))
1168 return; 1473 return;
1169 1474
1475 if (!dma_ops_domain(domain))
1476 return;
1477
1170 spin_lock_irqsave(&domain->lock, flags); 1478 spin_lock_irqsave(&domain->lock, flags);
1171 1479
1172 for_each_sg(sglist, s, nelems, i) { 1480 for_each_sg(sglist, s, nelems, i) {
@@ -1194,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
1194 phys_addr_t paddr; 1502 phys_addr_t paddr;
1195 u64 dma_mask = dev->coherent_dma_mask; 1503 u64 dma_mask = dev->coherent_dma_mask;
1196 1504
1505 INC_STATS_COUNTER(cnt_alloc_coherent);
1506
1197 if (!check_device(dev)) 1507 if (!check_device(dev))
1198 return NULL; 1508 return NULL;
1199 1509
@@ -1212,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
1212 return virt_addr; 1522 return virt_addr;
1213 } 1523 }
1214 1524
1525 if (!dma_ops_domain(domain))
1526 goto out_free;
1527
1215 if (!dma_mask) 1528 if (!dma_mask)
1216 dma_mask = *dev->dma_mask; 1529 dma_mask = *dev->dma_mask;
1217 1530
@@ -1220,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
1220 *dma_addr = __map_single(dev, iommu, domain->priv, paddr, 1533 *dma_addr = __map_single(dev, iommu, domain->priv, paddr,
1221 size, DMA_BIDIRECTIONAL, true, dma_mask); 1534 size, DMA_BIDIRECTIONAL, true, dma_mask);
1222 1535
1223 if (*dma_addr == bad_dma_address) { 1536 if (*dma_addr == bad_dma_address)
1224 free_pages((unsigned long)virt_addr, get_order(size)); 1537 goto out_free;
1225 virt_addr = NULL;
1226 goto out;
1227 }
1228 1538
1229 iommu_completion_wait(iommu); 1539 iommu_completion_wait(iommu);
1230 1540
1231out:
1232 spin_unlock_irqrestore(&domain->lock, flags); 1541 spin_unlock_irqrestore(&domain->lock, flags);
1233 1542
1234 return virt_addr; 1543 return virt_addr;
1544
1545out_free:
1546
1547 free_pages((unsigned long)virt_addr, get_order(size));
1548
1549 return NULL;
1235} 1550}
1236 1551
1237/* 1552/*
@@ -1245,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size,
1245 struct protection_domain *domain; 1560 struct protection_domain *domain;
1246 u16 devid; 1561 u16 devid;
1247 1562
1563 INC_STATS_COUNTER(cnt_free_coherent);
1564
1248 if (!check_device(dev)) 1565 if (!check_device(dev))
1249 return; 1566 return;
1250 1567
@@ -1253,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size,
1253 if (!iommu || !domain) 1570 if (!iommu || !domain)
1254 goto free_mem; 1571 goto free_mem;
1255 1572
1573 if (!dma_ops_domain(domain))
1574 goto free_mem;
1575
1256 spin_lock_irqsave(&domain->lock, flags); 1576 spin_lock_irqsave(&domain->lock, flags);
1257 1577
1258 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); 1578 __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@@ -1296,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
1296 * we don't need to preallocate the protection domains anymore. 1616 * we don't need to preallocate the protection domains anymore.
1297 * For now we have to. 1617 * For now we have to.
1298 */ 1618 */
1299void prealloc_protection_domains(void) 1619static void prealloc_protection_domains(void)
1300{ 1620{
1301 struct pci_dev *dev = NULL; 1621 struct pci_dev *dev = NULL;
1302 struct dma_ops_domain *dma_dom; 1622 struct dma_ops_domain *dma_dom;
@@ -1305,7 +1625,7 @@ void prealloc_protection_domains(void)
1305 u16 devid; 1625 u16 devid;
1306 1626
1307 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { 1627 while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1308 devid = (dev->bus->number << 8) | dev->devfn; 1628 devid = calc_devid(dev->bus->number, dev->devfn);
1309 if (devid > amd_iommu_last_bdf) 1629 if (devid > amd_iommu_last_bdf)
1310 continue; 1630 continue;
1311 devid = amd_iommu_alias_table[devid]; 1631 devid = amd_iommu_alias_table[devid];
@@ -1352,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void)
1352 iommu->default_dom = dma_ops_domain_alloc(iommu, order); 1672 iommu->default_dom = dma_ops_domain_alloc(iommu, order);
1353 if (iommu->default_dom == NULL) 1673 if (iommu->default_dom == NULL)
1354 return -ENOMEM; 1674 return -ENOMEM;
1675 iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
1355 ret = iommu_init_unity_mappings(iommu); 1676 ret = iommu_init_unity_mappings(iommu);
1356 if (ret) 1677 if (ret)
1357 goto free_domains; 1678 goto free_domains;
@@ -1375,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void)
1375 /* Make the driver finally visible to the drivers */ 1696 /* Make the driver finally visible to the drivers */
1376 dma_ops = &amd_iommu_dma_ops; 1697 dma_ops = &amd_iommu_dma_ops;
1377 1698
1699 register_iommu(&amd_iommu_ops);
1700
1701 bus_register_notifier(&pci_bus_type, &device_nb);
1702
1703 amd_iommu_stats_init();
1704
1378 return 0; 1705 return 0;
1379 1706
1380free_domains: 1707free_domains:
@@ -1386,3 +1713,224 @@ free_domains:
1386 1713
1387 return ret; 1714 return ret;
1388} 1715}
1716
1717/*****************************************************************************
1718 *
1719 * The following functions belong to the exported interface of AMD IOMMU
1720 *
1721 * This interface allows access to lower level functions of the IOMMU
1722 * like protection domain handling and assignement of devices to domains
1723 * which is not possible with the dma_ops interface.
1724 *
1725 *****************************************************************************/
1726
1727static void cleanup_domain(struct protection_domain *domain)
1728{
1729 unsigned long flags;
1730 u16 devid;
1731
1732 write_lock_irqsave(&amd_iommu_devtable_lock, flags);
1733
1734 for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
1735 if (amd_iommu_pd_table[devid] == domain)
1736 __detach_device(domain, devid);
1737
1738 write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
1739}
1740
1741static int amd_iommu_domain_init(struct iommu_domain *dom)
1742{
1743 struct protection_domain *domain;
1744
1745 domain = kzalloc(sizeof(*domain), GFP_KERNEL);
1746 if (!domain)
1747 return -ENOMEM;
1748
1749 spin_lock_init(&domain->lock);
1750 domain->mode = PAGE_MODE_3_LEVEL;
1751 domain->id = domain_id_alloc();
1752 if (!domain->id)
1753 goto out_free;
1754 domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
1755 if (!domain->pt_root)
1756 goto out_free;
1757
1758 dom->priv = domain;
1759
1760 return 0;
1761
1762out_free:
1763 kfree(domain);
1764
1765 return -ENOMEM;
1766}
1767
1768static void amd_iommu_domain_destroy(struct iommu_domain *dom)
1769{
1770 struct protection_domain *domain = dom->priv;
1771
1772 if (!domain)
1773 return;
1774
1775 if (domain->dev_cnt > 0)
1776 cleanup_domain(domain);
1777
1778 BUG_ON(domain->dev_cnt != 0);
1779
1780 free_pagetable(domain);
1781
1782 domain_id_free(domain->id);
1783
1784 kfree(domain);
1785
1786 dom->priv = NULL;
1787}
1788
1789static void amd_iommu_detach_device(struct iommu_domain *dom,
1790 struct device *dev)
1791{
1792 struct protection_domain *domain = dom->priv;
1793 struct amd_iommu *iommu;
1794 struct pci_dev *pdev;
1795 u16 devid;
1796
1797 if (dev->bus != &pci_bus_type)
1798 return;
1799
1800 pdev = to_pci_dev(dev);
1801
1802 devid = calc_devid(pdev->bus->number, pdev->devfn);
1803
1804 if (devid > 0)
1805 detach_device(domain, devid);
1806
1807 iommu = amd_iommu_rlookup_table[devid];
1808 if (!iommu)
1809 return;
1810
1811 iommu_queue_inv_dev_entry(iommu, devid);
1812 iommu_completion_wait(iommu);
1813}
1814
1815static int amd_iommu_attach_device(struct iommu_domain *dom,
1816 struct device *dev)
1817{
1818 struct protection_domain *domain = dom->priv;
1819 struct protection_domain *old_domain;
1820 struct amd_iommu *iommu;
1821 struct pci_dev *pdev;
1822 u16 devid;
1823
1824 if (dev->bus != &pci_bus_type)
1825 return -EINVAL;
1826
1827 pdev = to_pci_dev(dev);
1828
1829 devid = calc_devid(pdev->bus->number, pdev->devfn);
1830
1831 if (devid >= amd_iommu_last_bdf ||
1832 devid != amd_iommu_alias_table[devid])
1833 return -EINVAL;
1834
1835 iommu = amd_iommu_rlookup_table[devid];
1836 if (!iommu)
1837 return -EINVAL;
1838
1839 old_domain = domain_for_device(devid);
1840 if (old_domain)
1841 return -EBUSY;
1842
1843 attach_device(iommu, domain, devid);
1844
1845 iommu_completion_wait(iommu);
1846
1847 return 0;
1848}
1849
1850static int amd_iommu_map_range(struct iommu_domain *dom,
1851 unsigned long iova, phys_addr_t paddr,
1852 size_t size, int iommu_prot)
1853{
1854 struct protection_domain *domain = dom->priv;
1855 unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
1856 int prot = 0;
1857 int ret;
1858
1859 if (iommu_prot & IOMMU_READ)
1860 prot |= IOMMU_PROT_IR;
1861 if (iommu_prot & IOMMU_WRITE)
1862 prot |= IOMMU_PROT_IW;
1863
1864 iova &= PAGE_MASK;
1865 paddr &= PAGE_MASK;
1866
1867 for (i = 0; i < npages; ++i) {
1868 ret = iommu_map_page(domain, iova, paddr, prot);
1869 if (ret)
1870 return ret;
1871
1872 iova += PAGE_SIZE;
1873 paddr += PAGE_SIZE;
1874 }
1875
1876 return 0;
1877}
1878
1879static void amd_iommu_unmap_range(struct iommu_domain *dom,
1880 unsigned long iova, size_t size)
1881{
1882
1883 struct protection_domain *domain = dom->priv;
1884 unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
1885
1886 iova &= PAGE_MASK;
1887
1888 for (i = 0; i < npages; ++i) {
1889 iommu_unmap_page(domain, iova);
1890 iova += PAGE_SIZE;
1891 }
1892
1893 iommu_flush_domain(domain->id);
1894}
1895
1896static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
1897 unsigned long iova)
1898{
1899 struct protection_domain *domain = dom->priv;
1900 unsigned long offset = iova & ~PAGE_MASK;
1901 phys_addr_t paddr;
1902 u64 *pte;
1903
1904 pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
1905
1906 if (!IOMMU_PTE_PRESENT(*pte))
1907 return 0;
1908
1909 pte = IOMMU_PTE_PAGE(*pte);
1910 pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
1911
1912 if (!IOMMU_PTE_PRESENT(*pte))
1913 return 0;
1914
1915 pte = IOMMU_PTE_PAGE(*pte);
1916 pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
1917
1918 if (!IOMMU_PTE_PRESENT(*pte))
1919 return 0;
1920
1921 paddr = *pte & IOMMU_PAGE_MASK;
1922 paddr |= offset;
1923
1924 return paddr;
1925}
1926
1927static struct iommu_ops amd_iommu_ops = {
1928 .domain_init = amd_iommu_domain_init,
1929 .domain_destroy = amd_iommu_domain_destroy,
1930 .attach_dev = amd_iommu_attach_device,
1931 .detach_dev = amd_iommu_detach_device,
1932 .map = amd_iommu_map_range,
1933 .unmap = amd_iommu_unmap_range,
1934 .iova_to_phys = amd_iommu_iova_to_phys,
1935};
1936
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index c625800c55ca..42c33cebf00f 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings 122LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
123 we find in ACPI */ 123 we find in ACPI */
124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ 124unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
125int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ 125bool amd_iommu_isolate = true; /* if true, device isolation is
126 enabled */
126bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ 127bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
127 128
128LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the 129LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@@ -243,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
243} 244}
244 245
245/* Function to enable the hardware */ 246/* Function to enable the hardware */
246void __init iommu_enable(struct amd_iommu *iommu) 247static void __init iommu_enable(struct amd_iommu *iommu)
247{ 248{
248 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " 249 printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
249 "at %02x:%02x.%x cap 0x%hx\n", 250 dev_name(&iommu->dev->dev), iommu->cap_ptr);
250 iommu->dev->bus->number,
251 PCI_SLOT(iommu->dev->devfn),
252 PCI_FUNC(iommu->dev->devfn),
253 iommu->cap_ptr);
254 251
255 iommu_feature_enable(iommu, CONTROL_IOMMU_EN); 252 iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
256} 253}
257 254
258/* Function to enable IOMMU event logging and event interrupts */ 255/* Function to enable IOMMU event logging and event interrupts */
259void __init iommu_enable_event_logging(struct amd_iommu *iommu) 256static void __init iommu_enable_event_logging(struct amd_iommu *iommu)
260{ 257{
261 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); 258 iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN);
262 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); 259 iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
@@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
1218{ 1215{
1219 for (; *str; ++str) { 1216 for (; *str; ++str) {
1220 if (strncmp(str, "isolate", 7) == 0) 1217 if (strncmp(str, "isolate", 7) == 0)
1221 amd_iommu_isolate = 1; 1218 amd_iommu_isolate = true;
1222 if (strncmp(str, "share", 5) == 0) 1219 if (strncmp(str, "share", 5) == 0)
1223 amd_iommu_isolate = 0; 1220 amd_iommu_isolate = false;
1224 if (strncmp(str, "fullflush", 9) == 0) 1221 if (strncmp(str, "fullflush", 9) == 0)
1225 amd_iommu_unmap_flush = true; 1222 amd_iommu_unmap_flush = true;
1226 } 1223 }
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 6b7f824db160..b13d3c4dbd42 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -98,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer);
98#ifdef HAVE_X2APIC 98#ifdef HAVE_X2APIC
99int x2apic; 99int x2apic;
100/* x2apic enabled before OS handover */ 100/* x2apic enabled before OS handover */
101int x2apic_preenabled; 101static int x2apic_preenabled;
102int disable_x2apic; 102static int disable_x2apic;
103static __init int setup_nox2apic(char *str) 103static __init int setup_nox2apic(char *str)
104{ 104{
105 disable_x2apic = 1; 105 disable_x2apic = 1;
@@ -140,7 +140,7 @@ static int lapic_next_event(unsigned long delta,
140 struct clock_event_device *evt); 140 struct clock_event_device *evt);
141static void lapic_timer_setup(enum clock_event_mode mode, 141static void lapic_timer_setup(enum clock_event_mode mode,
142 struct clock_event_device *evt); 142 struct clock_event_device *evt);
143static void lapic_timer_broadcast(const cpumask_t *mask); 143static void lapic_timer_broadcast(const struct cpumask *mask);
144static void apic_pm_activate(void); 144static void apic_pm_activate(void);
145 145
146/* 146/*
@@ -226,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id)
226 apic_write(APIC_ICR, low); 226 apic_write(APIC_ICR, low);
227} 227}
228 228
229u64 xapic_icr_read(void) 229static u64 xapic_icr_read(void)
230{ 230{
231 u32 icr1, icr2; 231 u32 icr1, icr2;
232 232
@@ -266,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id)
266 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); 266 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
267} 267}
268 268
269u64 x2apic_icr_read(void) 269static u64 x2apic_icr_read(void)
270{ 270{
271 unsigned long val; 271 unsigned long val;
272 272
@@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
453/* 453/*
454 * Local APIC timer broadcast function 454 * Local APIC timer broadcast function
455 */ 455 */
456static void lapic_timer_broadcast(const cpumask_t *mask) 456static void lapic_timer_broadcast(const struct cpumask *mask)
457{ 457{
458#ifdef CONFIG_SMP 458#ifdef CONFIG_SMP
459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 459 send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c
index 2a0a2a3cac26..f63882728d91 100644
--- a/arch/x86/kernel/bios_uv.c
+++ b/arch/x86/kernel/bios_uv.c
@@ -25,7 +25,7 @@
25#include <asm/uv/bios.h> 25#include <asm/uv/bios.h>
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27 27
28struct uv_systab uv_systab; 28static struct uv_systab uv_systab;
29 29
30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) 30s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5)
31{ 31{
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 42e0853030cb..3f95a40f718a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -355,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
355 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 355 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
356 } else if (smp_num_siblings > 1) { 356 } else if (smp_num_siblings > 1) {
357 357
358 if (smp_num_siblings > NR_CPUS) { 358 if (smp_num_siblings > nr_cpu_ids) {
359 printk(KERN_WARNING "CPU: Unsupported number of siblings %d", 359 printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
360 smp_num_siblings); 360 smp_num_siblings);
361 smp_num_siblings = 1; 361 smp_num_siblings = 1;
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 88ea02dcb622..28102ad1a363 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -517,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
517 } 517 }
518} 518}
519 519
520static void free_acpi_perf_data(void)
521{
522 unsigned int i;
523
524 /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */
525 for_each_possible_cpu(i)
526 free_cpumask_var(per_cpu_ptr(acpi_perf_data, i)
527 ->shared_cpu_map);
528 free_percpu(acpi_perf_data);
529}
530
520/* 531/*
521 * acpi_cpufreq_early_init - initialize ACPI P-States library 532 * acpi_cpufreq_early_init - initialize ACPI P-States library
522 * 533 *
@@ -527,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu)
527 */ 538 */
528static int __init acpi_cpufreq_early_init(void) 539static int __init acpi_cpufreq_early_init(void)
529{ 540{
541 unsigned int i;
530 dprintk("acpi_cpufreq_early_init\n"); 542 dprintk("acpi_cpufreq_early_init\n");
531 543
532 acpi_perf_data = alloc_percpu(struct acpi_processor_performance); 544 acpi_perf_data = alloc_percpu(struct acpi_processor_performance);
@@ -534,6 +546,16 @@ static int __init acpi_cpufreq_early_init(void)
534 dprintk("Memory allocation error for acpi_perf_data.\n"); 546 dprintk("Memory allocation error for acpi_perf_data.\n");
535 return -ENOMEM; 547 return -ENOMEM;
536 } 548 }
549 for_each_possible_cpu(i) {
550 if (!alloc_cpumask_var_node(
551 &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map,
552 GFP_KERNEL, cpu_to_node(i))) {
553
554 /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */
555 free_acpi_perf_data();
556 return -ENOMEM;
557 }
558 }
537 559
538 /* Do initialization in ACPI core */ 560 /* Do initialization in ACPI core */
539 acpi_processor_preregister_performance(acpi_perf_data); 561 acpi_processor_preregister_performance(acpi_perf_data);
@@ -604,9 +626,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
604 */ 626 */
605 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || 627 if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL ||
606 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { 628 policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) {
607 policy->cpus = perf->shared_cpu_map; 629 cpumask_copy(&policy->cpus, perf->shared_cpu_map);
608 } 630 }
609 policy->related_cpus = perf->shared_cpu_map; 631 cpumask_copy(&policy->related_cpus, perf->shared_cpu_map);
610 632
611#ifdef CONFIG_SMP 633#ifdef CONFIG_SMP
612 dmi_check_system(sw_any_bug_dmi_table); 634 dmi_check_system(sw_any_bug_dmi_table);
@@ -795,7 +817,7 @@ static int __init acpi_cpufreq_init(void)
795 817
796 ret = cpufreq_register_driver(&acpi_cpufreq_driver); 818 ret = cpufreq_register_driver(&acpi_cpufreq_driver);
797 if (ret) 819 if (ret)
798 free_percpu(acpi_perf_data); 820 free_acpi_perf_data();
799 821
800 return ret; 822 return ret;
801} 823}
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
index 7c7d56b43136..1b446d79a8fd 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void)
310 goto err0; 310 goto err0;
311 } 311 }
312 312
313 if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map,
314 GFP_KERNEL)) {
315 retval = -ENOMEM;
316 goto err05;
317 }
318
313 if (acpi_processor_register_performance(acpi_processor_perf, 0)) { 319 if (acpi_processor_register_performance(acpi_processor_perf, 0)) {
314 retval = -EIO; 320 retval = -EIO;
315 goto err1; 321 goto err1;
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void)
412err2: 418err2:
413 acpi_processor_unregister_performance(acpi_processor_perf, 0); 419 acpi_processor_unregister_performance(acpi_processor_perf, 0);
414err1: 420err1:
421 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
422err05:
415 kfree(acpi_processor_perf); 423 kfree(acpi_processor_perf);
416err0: 424err0:
417 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); 425 printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n");
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) {
652#ifdef CONFIG_X86_POWERNOW_K7_ACPI 660#ifdef CONFIG_X86_POWERNOW_K7_ACPI
653 if (acpi_processor_perf) { 661 if (acpi_processor_perf) {
654 acpi_processor_unregister_performance(acpi_processor_perf, 0); 662 acpi_processor_unregister_performance(acpi_processor_perf, 0);
663 free_cpumask_var(acpi_processor_perf->shared_cpu_map);
655 kfree(acpi_processor_perf); 664 kfree(acpi_processor_perf);
656 } 665 }
657#endif 666#endif
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 7f05f44b97e9..c3c9adbaa26f 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned
766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) 766static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
767{ 767{
768 struct cpufreq_frequency_table *powernow_table; 768 struct cpufreq_frequency_table *powernow_table;
769 int ret_val; 769 int ret_val = -ENODEV;
770 770
771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { 771 if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) {
772 dprintk("register performance failed: bad ACPI data\n"); 772 dprintk("register performance failed: bad ACPI data\n");
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data)
815 /* notify BIOS that we exist */ 815 /* notify BIOS that we exist */
816 acpi_processor_notify_smm(THIS_MODULE); 816 acpi_processor_notify_smm(THIS_MODULE);
817 817
818 if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) {
819 printk(KERN_ERR PFX
820 "unable to alloc powernow_k8_data cpumask\n");
821 ret_val = -ENOMEM;
822 goto err_out_mem;
823 }
824
818 return 0; 825 return 0;
819 826
820err_out_mem: 827err_out_mem:
@@ -826,7 +833,7 @@ err_out:
826 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ 833 /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */
827 data->acpi_data.state_count = 0; 834 data->acpi_data.state_count = 0;
828 835
829 return -ENODEV; 836 return ret_val;
830} 837}
831 838
832static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) 839static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table)
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
929{ 936{
930 if (data->acpi_data.state_count) 937 if (data->acpi_data.state_count)
931 acpi_processor_unregister_performance(&data->acpi_data, data->cpu); 938 acpi_processor_unregister_performance(&data->acpi_data, data->cpu);
939 free_cpumask_var(data->acpi_data.shared_cpu_map);
932} 940}
933 941
934#else 942#else
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1134 data->cpu = pol->cpu; 1142 data->cpu = pol->cpu;
1135 data->currpstate = HW_PSTATE_INVALID; 1143 data->currpstate = HW_PSTATE_INVALID;
1136 1144
1137 if (powernow_k8_cpu_init_acpi(data)) { 1145 rc = powernow_k8_cpu_init_acpi(data);
1146 if (rc) {
1138 /* 1147 /*
1139 * Use the PSB BIOS structure. This is only availabe on 1148 * Use the PSB BIOS structure. This is only availabe on
1140 * an UP version, and is deprecated by AMD. 1149 * an UP version, and is deprecated by AMD.
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1152 "ACPI maintainers and complain to your BIOS " 1161 "ACPI maintainers and complain to your BIOS "
1153 "vendor.\n"); 1162 "vendor.\n");
1154#endif 1163#endif
1155 kfree(data); 1164 goto err_out;
1156 return -ENODEV;
1157 } 1165 }
1158 if (pol->cpu != 0) { 1166 if (pol->cpu != 0) {
1159 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " 1167 printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for "
1160 "CPU other than CPU0. Complain to your BIOS " 1168 "CPU other than CPU0. Complain to your BIOS "
1161 "vendor.\n"); 1169 "vendor.\n");
1162 kfree(data); 1170 goto err_out;
1163 return -ENODEV;
1164 } 1171 }
1165 rc = find_psb_table(data); 1172 rc = find_psb_table(data);
1166 if (rc) { 1173 if (rc) {
1167 kfree(data); 1174 goto err_out;
1168 return -ENODEV;
1169 } 1175 }
1170 } 1176 }
1171 1177
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index c6ecda64f5f1..48533d77be78 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -534,7 +534,7 @@ static void __cpuinit free_cache_attributes(unsigned int cpu)
534 per_cpu(cpuid4_info, cpu) = NULL; 534 per_cpu(cpuid4_info, cpu) = NULL;
535} 535}
536 536
537static void get_cpu_leaves(void *_retval) 537static void __cpuinit get_cpu_leaves(void *_retval)
538{ 538{
539 int j, *retval = _retval, cpu = smp_processor_id(); 539 int j, *retval = _retval, cpu = smp_processor_id();
540 540
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index d6ec7ec30274..d259e5d2e054 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -824,16 +824,14 @@ static int enable_mtrr_cleanup __initdata =
824 824
825static int __init disable_mtrr_cleanup_setup(char *str) 825static int __init disable_mtrr_cleanup_setup(char *str)
826{ 826{
827 if (enable_mtrr_cleanup != -1) 827 enable_mtrr_cleanup = 0;
828 enable_mtrr_cleanup = 0;
829 return 0; 828 return 0;
830} 829}
831early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); 830early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
832 831
833static int __init enable_mtrr_cleanup_setup(char *str) 832static int __init enable_mtrr_cleanup_setup(char *str)
834{ 833{
835 if (enable_mtrr_cleanup != -1) 834 enable_mtrr_cleanup = 1;
836 enable_mtrr_cleanup = 1;
837 return 0; 835 return 0;
838} 836}
839early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); 837early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 72cefd1e649b..2ac1f0c2beb3 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -39,10 +39,10 @@
39#include <linux/device.h> 39#include <linux/device.h>
40#include <linux/cpu.h> 40#include <linux/cpu.h>
41#include <linux/notifier.h> 41#include <linux/notifier.h>
42#include <linux/uaccess.h>
42 43
43#include <asm/processor.h> 44#include <asm/processor.h>
44#include <asm/msr.h> 45#include <asm/msr.h>
45#include <asm/uaccess.h>
46#include <asm/system.h> 46#include <asm/system.h>
47 47
48static struct class *cpuid_class; 48static struct class *cpuid_class;
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig)
82} 82}
83 83
84static ssize_t cpuid_read(struct file *file, char __user *buf, 84static ssize_t cpuid_read(struct file *file, char __user *buf,
85 size_t count, loff_t * ppos) 85 size_t count, loff_t *ppos)
86{ 86{
87 char __user *tmp = buf; 87 char __user *tmp = buf;
88 struct cpuid_regs cmd; 88 struct cpuid_regs cmd;
@@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file)
117 unsigned int cpu; 117 unsigned int cpu;
118 struct cpuinfo_x86 *c; 118 struct cpuinfo_x86 *c;
119 int ret = 0; 119 int ret = 0;
120 120
121 lock_kernel(); 121 lock_kernel();
122 122
123 cpu = iminor(file->f_path.dentry->d_inode); 123 cpu = iminor(file->f_path.dentry->d_inode);
124 if (cpu >= NR_CPUS || !cpu_online(cpu)) { 124 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
125 ret = -ENXIO; /* No such CPU */ 125 ret = -ENXIO; /* No such CPU */
126 goto out; 126 goto out;
127 } 127 }
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 23b138e31e9c..504ad198e4ad 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -886,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...)
886 va_list ap; 886 va_list ap;
887 887
888 va_start(ap, fmt); 888 va_start(ap, fmt);
889 n = vscnprintf(buf, 512, fmt, ap); 889 n = vscnprintf(buf, sizeof(buf), fmt, ap);
890 early_console->write(early_console, buf, n); 890 early_console->write(early_console, buf, n);
891 va_end(ap); 891 va_end(ap);
892} 892}
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 62895cf315ff..21bcc0e098ba 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -161,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb)
161 return current_cpu_data.initial_apicid >> index_msb; 161 return current_cpu_data.initial_apicid >> index_msb;
162} 162}
163 163
164void x2apic_send_IPI_self(int vector) 164static void x2apic_send_IPI_self(int vector)
165{ 165{
166 apic_write(APIC_SELF_IPI, vector); 166 apic_write(APIC_SELF_IPI, vector);
167} 167}
168 168
169void init_x2apic_ldr(void) 169static void init_x2apic_ldr(void)
170{ 170{
171 return; 171 return;
172} 172}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 388e05a5fc17..b9a4d8c4b935 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -27,7 +27,7 @@
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */ 29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda __read_mostly; 30static struct x8664_pda _boot_cpu_pda;
31 31
32#ifdef CONFIG_SMP 32#ifdef CONFIG_SMP
33/* 33/*
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 62ecfc991e1e..3639442aa7a4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -214,11 +214,11 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu)
214 214
215 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); 215 cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
216 if (cfg) { 216 if (cfg) {
217 /* FIXME: needs alloc_cpumask_var_node() */ 217 if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
218 if (!alloc_cpumask_var(&cfg->domain, GFP_ATOMIC)) {
219 kfree(cfg); 218 kfree(cfg);
220 cfg = NULL; 219 cfg = NULL;
221 } else if (!alloc_cpumask_var(&cfg->old_domain, GFP_ATOMIC)) { 220 } else if (!alloc_cpumask_var_node(&cfg->old_domain,
221 GFP_ATOMIC, node)) {
222 free_cpumask_var(cfg->domain); 222 free_cpumask_var(cfg->domain);
223 kfree(cfg); 223 kfree(cfg);
224 cfg = NULL; 224 cfg = NULL;
@@ -706,7 +706,7 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
706} 706}
707 707
708#ifdef CONFIG_X86_64 708#ifdef CONFIG_X86_64
709void io_apic_sync(struct irq_pin_list *entry) 709static void io_apic_sync(struct irq_pin_list *entry)
710{ 710{
711 /* 711 /*
712 * Synchronize the IO-APIC and the CPU by doing 712 * Synchronize the IO-APIC and the CPU by doing
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index eee32b43fee3..71f1d99a635d 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -12,8 +12,8 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/smp.h> 13#include <linux/smp.h>
14#include <linux/vmalloc.h> 14#include <linux/vmalloc.h>
15#include <linux/uaccess.h>
15 16
16#include <asm/uaccess.h>
17#include <asm/system.h> 17#include <asm/system.h>
18#include <asm/ldt.h> 18#include <asm/ldt.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old)
93 if (err < 0) 93 if (err < 0)
94 return err; 94 return err;
95 95
96 for(i = 0; i < old->size; i++) 96 for (i = 0; i < old->size; i++)
97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); 97 write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE);
98 return 0; 98 return 0;
99} 99}
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c
index efc2f361fe85..666e43df51f9 100644
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -13,8 +13,7 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/acpi.h> 14#include <asm/acpi.h>
15#include <asm/mmconfig.h> 15#include <asm/mmconfig.h>
16 16#include <asm/pci_x86.h>
17#include "../pci/pci.h"
18 17
19struct pci_hostbridge_probe { 18struct pci_hostbridge_probe {
20 u32 bus; 19 u32 bus;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 45e3b69808ba..c5c5b8df1dbc 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -16,14 +16,14 @@
16#include <linux/bitops.h> 16#include <linux/bitops.h>
17#include <linux/acpi.h> 17#include <linux/acpi.h>
18#include <linux/module.h> 18#include <linux/module.h>
19#include <linux/smp.h>
20#include <linux/acpi.h>
19 21
20#include <asm/smp.h>
21#include <asm/mtrr.h> 22#include <asm/mtrr.h>
22#include <asm/mpspec.h> 23#include <asm/mpspec.h>
23#include <asm/pgalloc.h> 24#include <asm/pgalloc.h>
24#include <asm/io_apic.h> 25#include <asm/io_apic.h>
25#include <asm/proto.h> 26#include <asm/proto.h>
26#include <asm/acpi.h>
27#include <asm/bios_ebda.h> 27#include <asm/bios_ebda.h>
28#include <asm/e820.h> 28#include <asm/e820.h>
29#include <asm/trampoline.h> 29#include <asm/trampoline.h>
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
95#endif 95#endif
96 96
97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { 97 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
98 set_bit(m->mpc_busid, mp_bus_not_pci); 98 set_bit(m->mpc_busid, mp_bus_not_pci);
99#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 99#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 100 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
101#endif 101#endif
102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { 102 } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m)
104 x86_quirks->mpc_oem_pci_bus(m); 104 x86_quirks->mpc_oem_pci_bus(m);
105 105
106 clear_bit(m->mpc_busid, mp_bus_not_pci); 106 clear_bit(m->mpc_busid, mp_bus_not_pci);
107#if defined(CONFIG_EISA) || defined (CONFIG_MCA) 107#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; 108 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { 109 } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; 110 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 82a7c7ed6d45..726266695b2c 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file)
136 lock_kernel(); 136 lock_kernel();
137 cpu = iminor(file->f_path.dentry->d_inode); 137 cpu = iminor(file->f_path.dentry->d_inode);
138 138
139 if (cpu >= NR_CPUS || !cpu_online(cpu)) { 139 if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
140 ret = -ENXIO; /* No such CPU */ 140 ret = -ENXIO; /* No such CPU */
141 goto out; 141 goto out;
142 } 142 }
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8bd1bf9622a7..45a09ccdc214 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -26,11 +26,10 @@
26#include <linux/kernel_stat.h> 26#include <linux/kernel_stat.h>
27#include <linux/kdebug.h> 27#include <linux/kdebug.h>
28#include <linux/smp.h> 28#include <linux/smp.h>
29#include <linux/nmi.h>
29 30
30#include <asm/i8259.h> 31#include <asm/i8259.h>
31#include <asm/io_apic.h> 32#include <asm/io_apic.h>
32#include <asm/smp.h>
33#include <asm/nmi.h>
34#include <asm/proto.h> 33#include <asm/proto.h>
35#include <asm/timer.h> 34#include <asm/timer.h>
36 35
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a35eaa379ff6..00c2bcd41463 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */
52 * to trigger bugs with some popular PCI cards, in particular 3ware (but 52 * to trigger bugs with some popular PCI cards, in particular 3ware (but
53 * has been also also seen with Qlogic at least). 53 * has been also also seen with Qlogic at least).
54 */ 54 */
55int iommu_fullflush = 1; 55static int iommu_fullflush = 1;
56 56
57/* Allocation bitmap for the remapping area: */ 57/* Allocation bitmap for the remapping area: */
58static DEFINE_SPINLOCK(iommu_bitmap_lock); 58static DEFINE_SPINLOCK(iommu_bitmap_lock);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 39643b1df061..2b46eb41643b 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -12,6 +12,7 @@
12#include <asm/proto.h> 12#include <asm/proto.h>
13#include <asm/reboot_fixups.h> 13#include <asm/reboot_fixups.h>
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h>
15#include <asm/virtext.h> 16#include <asm/virtext.h>
16 17
17#ifdef CONFIG_X86_32 18#ifdef CONFIG_X86_32
@@ -24,7 +25,6 @@
24 25
25#include <mach_ipi.h> 26#include <mach_ipi.h>
26 27
27
28/* 28/*
29 * Power off function, if any 29 * Power off function, if any
30 */ 30 */
@@ -501,7 +501,7 @@ void native_machine_shutdown(void)
501 501
502#ifdef CONFIG_X86_32 502#ifdef CONFIG_X86_32
503 /* See if there has been given a command line override */ 503 /* See if there has been given a command line override */
504 if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && 504 if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) &&
505 cpu_online(reboot_cpu)) 505 cpu_online(reboot_cpu))
506 reboot_cpu_id = reboot_cpu; 506 reboot_cpu_id = reboot_cpu;
507#endif 507#endif
@@ -511,7 +511,7 @@ void native_machine_shutdown(void)
511 reboot_cpu_id = smp_processor_id(); 511 reboot_cpu_id = smp_processor_id();
512 512
513 /* Make certain I only run on the appropriate processor */ 513 /* Make certain I only run on the appropriate processor */
514 set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); 514 set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id));
515 515
516 /* O.K Now that I'm on the appropriate processor, 516 /* O.K Now that I'm on the appropriate processor,
517 * stop all of the others. 517 * stop all of the others.
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 0b63b08e7530..a4b619c33106 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -153,12 +153,10 @@ void __init setup_per_cpu_areas(void)
153 align = max_t(unsigned long, PAGE_SIZE, align); 153 align = max_t(unsigned long, PAGE_SIZE, align);
154 size = roundup(old_size, align); 154 size = roundup(old_size, align);
155 155
156 printk(KERN_INFO 156 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
157 "NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
158 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 157 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
159 158
160 printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", 159 pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
161 size);
162 160
163 for_each_possible_cpu(cpu) { 161 for_each_possible_cpu(cpu) {
164#ifndef CONFIG_NEED_MULTIPLE_NODES 162#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -169,22 +167,15 @@ void __init setup_per_cpu_areas(void)
169 if (!node_online(node) || !NODE_DATA(node)) { 167 if (!node_online(node) || !NODE_DATA(node)) {
170 ptr = __alloc_bootmem(size, align, 168 ptr = __alloc_bootmem(size, align,
171 __pa(MAX_DMA_ADDRESS)); 169 __pa(MAX_DMA_ADDRESS));
172 printk(KERN_INFO 170 pr_info("cpu %d has no node %d or node-local memory\n",
173 "cpu %d has no node %d or node-local memory\n",
174 cpu, node); 171 cpu, node);
175 if (ptr) 172 pr_debug("per cpu data for cpu%d at %016lx\n",
176 printk(KERN_DEBUG 173 cpu, __pa(ptr));
177 "per cpu data for cpu%d at %016lx\n", 174 } else {
178 cpu, __pa(ptr));
179 }
180 else {
181 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 175 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
182 __pa(MAX_DMA_ADDRESS)); 176 __pa(MAX_DMA_ADDRESS));
183 if (ptr) 177 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
184 printk(KERN_DEBUG 178 cpu, node, __pa(ptr));
185 "per cpu data for cpu%d on node%d "
186 "at %016lx\n",
187 cpu, node, __pa(ptr));
188 } 179 }
189#endif 180#endif
190 per_cpu_offset(cpu) = ptr - __per_cpu_start; 181 per_cpu_offset(cpu) = ptr - __per_cpu_start;
@@ -339,25 +330,25 @@ static const cpumask_t cpu_mask_none;
339/* 330/*
340 * Returns a pointer to the bitmask of CPUs on Node 'node'. 331 * Returns a pointer to the bitmask of CPUs on Node 'node'.
341 */ 332 */
342const cpumask_t *_node_to_cpumask_ptr(int node) 333const cpumask_t *cpumask_of_node(int node)
343{ 334{
344 if (node_to_cpumask_map == NULL) { 335 if (node_to_cpumask_map == NULL) {
345 printk(KERN_WARNING 336 printk(KERN_WARNING
346 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 337 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
347 node); 338 node);
348 dump_stack(); 339 dump_stack();
349 return (const cpumask_t *)&cpu_online_map; 340 return (const cpumask_t *)&cpu_online_map;
350 } 341 }
351 if (node >= nr_node_ids) { 342 if (node >= nr_node_ids) {
352 printk(KERN_WARNING 343 printk(KERN_WARNING
353 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 344 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
354 node, nr_node_ids); 345 node, nr_node_ids);
355 dump_stack(); 346 dump_stack();
356 return &cpu_mask_none; 347 return &cpu_mask_none;
357 } 348 }
358 return &node_to_cpumask_map[node]; 349 return &node_to_cpumask_map[node];
359} 350}
360EXPORT_SYMBOL(_node_to_cpumask_ptr); 351EXPORT_SYMBOL(cpumask_of_node);
361 352
362/* 353/*
363 * Returns a bitmask of CPUs on Node 'node'. 354 * Returns a bitmask of CPUs on Node 'node'.
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 31869bf5fabd..6bd4d9b73870 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -496,7 +496,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
496} 496}
497 497
498/* maps the cpu to the sched domain representing multi-core */ 498/* maps the cpu to the sched domain representing multi-core */
499cpumask_t cpu_coregroup_map(int cpu) 499const struct cpumask *cpu_coregroup_mask(int cpu)
500{ 500{
501 struct cpuinfo_x86 *c = &cpu_data(cpu); 501 struct cpuinfo_x86 *c = &cpu_data(cpu);
502 /* 502 /*
@@ -504,9 +504,14 @@ cpumask_t cpu_coregroup_map(int cpu)
504 * And for power savings, we return cpu_core_map 504 * And for power savings, we return cpu_core_map
505 */ 505 */
506 if (sched_mc_power_savings || sched_smt_power_savings) 506 if (sched_mc_power_savings || sched_smt_power_savings)
507 return per_cpu(cpu_core_map, cpu); 507 return &per_cpu(cpu_core_map, cpu);
508 else 508 else
509 return c->llc_shared_map; 509 return &c->llc_shared_map;
510}
511
512cpumask_t cpu_coregroup_map(int cpu)
513{
514 return *cpu_coregroup_mask(cpu);
510} 515}
511 516
512static void impress_friends(void) 517static void impress_friends(void)
@@ -1149,7 +1154,7 @@ static void __init smp_cpu_index_default(void)
1149 for_each_possible_cpu(i) { 1154 for_each_possible_cpu(i) {
1150 c = &cpu_data(i); 1155 c = &cpu_data(i);
1151 /* mark all to hotplug */ 1156 /* mark all to hotplug */
1152 c->cpu_index = NR_CPUS; 1157 c->cpu_index = nr_cpu_ids;
1153 } 1158 }
1154} 1159}
1155 1160
@@ -1293,6 +1298,8 @@ __init void prefill_possible_map(void)
1293 else 1298 else
1294 possible = setup_possible_cpus; 1299 possible = setup_possible_cpus;
1295 1300
1301 total_cpus = max_t(int, possible, num_processors + disabled_cpus);
1302
1296 if (possible > CONFIG_NR_CPUS) { 1303 if (possible > CONFIG_NR_CPUS) {
1297 printk(KERN_WARNING 1304 printk(KERN_WARNING
1298 "%d Processors exceeds NR_CPUS limit of %d\n", 1305 "%d Processors exceeds NR_CPUS limit of %d\n",
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6a00e5faaa74..f885023167e0 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -582,7 +582,6 @@ static int __init uv_ptc_init(void)
582static struct bau_control * __init uv_table_bases_init(int blade, int node) 582static struct bau_control * __init uv_table_bases_init(int blade, int node)
583{ 583{
584 int i; 584 int i;
585 int *ip;
586 struct bau_msg_status *msp; 585 struct bau_msg_status *msp;
587 struct bau_control *bau_tabp; 586 struct bau_control *bau_tabp;
588 587
@@ -599,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node)
599 bau_cpubits_clear(&msp->seen_by, (int) 598 bau_cpubits_clear(&msp->seen_by, (int)
600 uv_blade_nr_possible_cpus(blade)); 599 uv_blade_nr_possible_cpus(blade));
601 600
602 bau_tabp->watching =
603 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
604 BUG_ON(!bau_tabp->watching);
605
606 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
607 *ip = 0;
608
609 uv_bau_table_bases[blade] = bau_tabp; 601 uv_bau_table_bases[blade] = bau_tabp;
610 602
611 return bau_tabp; 603 return bau_tabp;
@@ -628,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu,
628 bcp->bau_msg_head = bau_tablesp->va_queue_first; 620 bcp->bau_msg_head = bau_tablesp->va_queue_first;
629 bcp->va_queue_first = bau_tablesp->va_queue_first; 621 bcp->va_queue_first = bau_tablesp->va_queue_first;
630 bcp->va_queue_last = bau_tablesp->va_queue_last; 622 bcp->va_queue_last = bau_tablesp->va_queue_last;
631 bcp->watching = bau_tablesp->watching;
632 bcp->msg_statuses = bau_tablesp->msg_statuses; 623 bcp->msg_statuses = bau_tablesp->msg_statuses;
633 bcp->descriptor_base = adp; 624 bcp->descriptor_base = adp;
634 } 625 }
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 2d1f4c7e4052..ce6650eb64e9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -292,8 +292,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
292 tsk->thread.error_code = error_code; 292 tsk->thread.error_code = error_code;
293 tsk->thread.trap_no = 8; 293 tsk->thread.trap_no = 8;
294 294
295 /* This is always a kernel trap and never fixable (and thus must 295 /*
296 never return). */ 296 * This is always a kernel trap and never fixable (and thus must
297 * never return).
298 */
297 for (;;) 299 for (;;)
298 die(str, regs, error_code); 300 die(str, regs, error_code);
299} 301}
@@ -520,9 +522,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
520} 522}
521 523
522#ifdef CONFIG_X86_64 524#ifdef CONFIG_X86_64
523/* Help handler running on IST stack to switch back to user stack 525/*
524 for scheduling or signal handling. The actual stack switch is done in 526 * Help handler running on IST stack to switch back to user stack
525 entry.S */ 527 * for scheduling or signal handling. The actual stack switch is done in
528 * entry.S
529 */
526asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) 530asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
527{ 531{
528 struct pt_regs *regs = eregs; 532 struct pt_regs *regs = eregs;
@@ -532,8 +536,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
532 /* Exception from user space */ 536 /* Exception from user space */
533 else if (user_mode(eregs)) 537 else if (user_mode(eregs))
534 regs = task_pt_regs(current); 538 regs = task_pt_regs(current);
535 /* Exception from kernel and interrupts are enabled. Move to 539 /*
536 kernel process stack. */ 540 * Exception from kernel and interrupts are enabled. Move to
541 * kernel process stack.
542 */
537 else if (eregs->flags & X86_EFLAGS_IF) 543 else if (eregs->flags & X86_EFLAGS_IF)
538 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); 544 regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
539 if (eregs != regs) 545 if (eregs != regs)
@@ -685,12 +691,7 @@ void math_error(void __user *ip)
685 cwd = get_fpu_cwd(task); 691 cwd = get_fpu_cwd(task);
686 swd = get_fpu_swd(task); 692 swd = get_fpu_swd(task);
687 693
688 err = swd & ~cwd & 0x3f; 694 err = swd & ~cwd;
689
690#ifdef CONFIG_X86_32
691 if (!err)
692 return;
693#endif
694 695
695 if (err & 0x001) { /* Invalid op */ 696 if (err & 0x001) { /* Invalid op */
696 /* 697 /*
@@ -708,7 +709,11 @@ void math_error(void __user *ip)
708 } else if (err & 0x020) { /* Precision */ 709 } else if (err & 0x020) { /* Precision */
709 info.si_code = FPE_FLTRES; 710 info.si_code = FPE_FLTRES;
710 } else { 711 } else {
711 info.si_code = __SI_FAULT|SI_KERNEL; /* WTF? */ 712 /*
713 * If we're using IRQ 13, or supposedly even some trap 16
714 * implementations, it's possible we get a spurious trap...
715 */
716 return; /* Spurious trap, no error */
712 } 717 }
713 force_sig_info(SIGFPE, &info, task); 718 force_sig_info(SIGFPE, &info, task);
714} 719}
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 15c3e6999182..2b54fe002e94 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf)
159 * Restore the extended state if present. Otherwise, restore the FP/SSE 159 * Restore the extended state if present. Otherwise, restore the FP/SSE
160 * state. 160 * state.
161 */ 161 */
162int restore_user_xstate(void __user *buf) 162static int restore_user_xstate(void __user *buf)
163{ 163{
164 struct _fpx_sw_bytes fx_sw_user; 164 struct _fpx_sw_bytes fx_sw_user;
165 u64 mask; 165 u64 mask;
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index c02343594b4d..d3ec292f00f2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
7ifeq ($(CONFIG_KVM_TRACE),y) 7ifeq ($(CONFIG_KVM_TRACE),y)
8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) 8common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
9endif 9endif
10ifeq ($(CONFIG_DMAR),y) 10ifeq ($(CONFIG_IOMMU_API),y)
11common-objs += $(addprefix ../../../virt/kvm/, vtd.o) 11common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
12endif 12endif
13 13
14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm 14EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0e6aa8141dcd..cc17546a2406 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -34,6 +34,7 @@
34#include <linux/module.h> 34#include <linux/module.h>
35#include <linux/mman.h> 35#include <linux/mman.h>
36#include <linux/highmem.h> 36#include <linux/highmem.h>
37#include <linux/iommu.h>
37#include <linux/intel-iommu.h> 38#include <linux/intel-iommu.h>
38 39
39#include <asm/uaccess.h> 40#include <asm/uaccess.h>
@@ -989,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext)
989 r = !tdp_enabled; 990 r = !tdp_enabled;
990 break; 991 break;
991 case KVM_CAP_IOMMU: 992 case KVM_CAP_IOMMU:
992 r = intel_iommu_found(); 993 r = iommu_found();
993 break; 994 break;
994 default: 995 default:
995 r = 0; 996 r = 0;
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 37b9ae4d44c5..df167f265622 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -133,29 +133,28 @@ void __init time_init_hook(void)
133 **/ 133 **/
134void mca_nmi_hook(void) 134void mca_nmi_hook(void)
135{ 135{
136 /* If I recall correctly, there's a whole bunch of other things that 136 /*
137 * If I recall correctly, there's a whole bunch of other things that
137 * we can do to check for NMI problems, but that's all I know about 138 * we can do to check for NMI problems, but that's all I know about
138 * at the moment. 139 * at the moment.
139 */ 140 */
140 141 pr_warning("NMI generated from unknown source!\n");
141 printk("NMI generated from unknown source!\n");
142} 142}
143#endif 143#endif
144 144
145static __init int no_ipi_broadcast(char *str) 145static __init int no_ipi_broadcast(char *str)
146{ 146{
147 get_option(&str, &no_broadcast); 147 get_option(&str, &no_broadcast);
148 printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : 148 pr_info("Using %s mode\n",
149 "IPI Broadcast"); 149 no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
150 return 1; 150 return 1;
151} 151}
152
153__setup("no_ipi_broadcast=", no_ipi_broadcast); 152__setup("no_ipi_broadcast=", no_ipi_broadcast);
154 153
155static int __init print_ipi_mode(void) 154static int __init print_ipi_mode(void)
156{ 155{
157 printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : 156 pr_info("Using IPI %s mode\n",
158 "Shortcut"); 157 no_broadcast ? "No-Shortcut" : "Shortcut");
159 return 0; 158 return 0;
160} 159}
161 160
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index a5bc05492b1e..9840b7ec749a 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -357,9 +357,8 @@ void __init find_smp_config(void)
357 printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); 357 printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
358 358
359 /* initialize the CPU structures (moved from smp_boot_cpus) */ 359 /* initialize the CPU structures (moved from smp_boot_cpus) */
360 for (i = 0; i < NR_CPUS; i++) { 360 for (i = 0; i < nr_cpu_ids; i++)
361 cpu_irq_affinity[i] = ~0; 361 cpu_irq_affinity[i] = ~0;
362 }
363 cpu_online_map = cpumask_of_cpu(boot_cpu_id); 362 cpu_online_map = cpumask_of_cpu(boot_cpu_id);
364 363
365 /* The boot CPU must be extended */ 364 /* The boot CPU must be extended */
@@ -1227,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier)
1227 * new values until the next timer interrupt in which they do process 1226 * new values until the next timer interrupt in which they do process
1228 * accounting. 1227 * accounting.
1229 */ 1228 */
1230 for (i = 0; i < NR_CPUS; ++i) 1229 for (i = 0; i < nr_cpu_ids; ++i)
1231 per_cpu(prof_multiplier, i) = multiplier; 1230 per_cpu(prof_multiplier, i) = multiplier;
1232 1231
1233 return 0; 1232 return 0;
@@ -1257,7 +1256,7 @@ void __init voyager_smp_intr_init(void)
1257 int i; 1256 int i;
1258 1257
1259 /* initialize the per cpu irq mask to all disabled */ 1258 /* initialize the per cpu irq mask to all disabled */
1260 for (i = 0; i < NR_CPUS; i++) 1259 for (i = 0; i < nr_cpu_ids; i++)
1261 vic_irq_mask[i] = 0xFFFF; 1260 vic_irq_mask[i] = 0xFFFF;
1262 1261
1263 VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); 1262 VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 1d88d2b39771..9e5752fe4d15 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -4,7 +4,7 @@
4#include <linux/irq.h> 4#include <linux/irq.h>
5#include <linux/dmi.h> 5#include <linux/dmi.h>
6#include <asm/numa.h> 6#include <asm/numa.h>
7#include "pci.h" 7#include <asm/pci_x86.h>
8 8
9struct pci_root_info { 9struct pci_root_info {
10 char *name; 10 char *name;
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c
index 22e057665e55..9bb09823b362 100644
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -2,7 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <linux/topology.h> 3#include <linux/topology.h>
4#include <linux/cpu.h> 4#include <linux/cpu.h>
5#include "pci.h" 5#include <asm/pci_x86.h>
6 6
7#ifdef CONFIG_X86_64 7#ifdef CONFIG_X86_64
8#include <asm/pci-direct.h> 8#include <asm/pci-direct.h>
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index bb1a01f089e2..62ddb73e09ed 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -14,8 +14,7 @@
14#include <asm/segment.h> 14#include <asm/segment.h>
15#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/smp.h> 16#include <asm/smp.h>
17 17#include <asm/pci_x86.h>
18#include "pci.h"
19 18
20unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | 19unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
21 PCI_PROBE_MMCONF; 20 PCI_PROBE_MMCONF;
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c
index 9a5af6c8fbe9..bd13c3e4c6db 100644
--- a/arch/x86/pci/direct.c
+++ b/arch/x86/pci/direct.c
@@ -5,7 +5,7 @@
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/dmi.h> 7#include <linux/dmi.h>
8#include "pci.h" 8#include <asm/pci_x86.h>
9 9
10/* 10/*
11 * Functions for accessing PCI base (first 256 bytes) and extended 11 * Functions for accessing PCI base (first 256 bytes) and extended
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index 86631ccbc25a..f6adf2c6d751 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -2,7 +2,7 @@
2#include <linux/pci.h> 2#include <linux/pci.h>
3#include <asm/pci-direct.h> 3#include <asm/pci-direct.h>
4#include <asm/io.h> 4#include <asm/io.h>
5#include "pci.h" 5#include <asm/pci_x86.h>
6 6
7/* Direct PCI access. This is used for PCI accesses in early boot before 7/* Direct PCI access. This is used for PCI accesses in early boot before
8 the PCI subsystem works. */ 8 the PCI subsystem works. */
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 2051dc96b8e9..7d388d5cf548 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -6,8 +6,7 @@
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/pci.h> 7#include <linux/pci.h>
8#include <linux/init.h> 8#include <linux/init.h>
9#include "pci.h" 9#include <asm/pci_x86.h>
10
11 10
12static void __devinit pci_fixup_i450nx(struct pci_dev *d) 11static void __devinit pci_fixup_i450nx(struct pci_dev *d)
13{ 12{
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 844df0cbbd3e..e51bf2cda4b0 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -34,8 +34,8 @@
34 34
35#include <asm/pat.h> 35#include <asm/pat.h>
36#include <asm/e820.h> 36#include <asm/e820.h>
37#include <asm/pci_x86.h>
37 38
38#include "pci.h"
39 39
40static int 40static int
41skip_isa_ioresource_align(struct pci_dev *dev) { 41skip_isa_ioresource_align(struct pci_dev *dev) {
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
index d6c950f81858..bec3b048e72b 100644
--- a/arch/x86/pci/init.c
+++ b/arch/x86/pci/init.c
@@ -1,6 +1,6 @@
1#include <linux/pci.h> 1#include <linux/pci.h>
2#include <linux/init.h> 2#include <linux/init.h>
3#include "pci.h" 3#include <asm/pci_x86.h>
4 4
5/* arch_initcall has too random ordering, so call the initializers 5/* arch_initcall has too random ordering, so call the initializers
6 in the right sequence from here. */ 6 in the right sequence from here. */
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index bf69dbe08bff..373b9afe6d44 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -16,8 +16,7 @@
16#include <asm/io_apic.h> 16#include <asm/io_apic.h>
17#include <linux/irq.h> 17#include <linux/irq.h>
18#include <linux/acpi.h> 18#include <linux/acpi.h>
19 19#include <asm/pci_x86.h>
20#include "pci.h"
21 20
22#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) 21#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
23#define PIRQ_VERSION 0x0100 22#define PIRQ_VERSION 0x0100
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c
index b722dd481b39..f1065b129e9c 100644
--- a/arch/x86/pci/legacy.c
+++ b/arch/x86/pci/legacy.c
@@ -3,7 +3,7 @@
3 */ 3 */
4#include <linux/init.h> 4#include <linux/init.h>
5#include <linux/pci.h> 5#include <linux/pci.h>
6#include "pci.h" 6#include <asm/pci_x86.h>
7 7
8/* 8/*
9 * Discover remaining PCI buses in case there are peer host bridges. 9 * Discover remaining PCI buses in case there are peer host bridges.
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 654a2234f8f3..89bf9242c80a 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -15,8 +15,7 @@
15#include <linux/acpi.h> 15#include <linux/acpi.h>
16#include <linux/bitmap.h> 16#include <linux/bitmap.h>
17#include <asm/e820.h> 17#include <asm/e820.h>
18 18#include <asm/pci_x86.h>
19#include "pci.h"
20 19
21/* aperture is up to 256MB but BIOS may reserve less */ 20/* aperture is up to 256MB but BIOS may reserve less */
22#define MMCONFIG_APER_MIN (2 * 1024*1024) 21#define MMCONFIG_APER_MIN (2 * 1024*1024)
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c
index f3c761dce695..8b2d561046a3 100644
--- a/arch/x86/pci/mmconfig_32.c
+++ b/arch/x86/pci/mmconfig_32.c
@@ -13,7 +13,7 @@
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/acpi.h> 14#include <linux/acpi.h>
15#include <asm/e820.h> 15#include <asm/e820.h>
16#include "pci.h" 16#include <asm/pci_x86.h>
17 17
18/* Assume systems with more busses have correct MCFG */ 18/* Assume systems with more busses have correct MCFG */
19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) 19#define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG))
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c
index a1994163c99d..30007ffc8e11 100644
--- a/arch/x86/pci/mmconfig_64.c
+++ b/arch/x86/pci/mmconfig_64.c
@@ -10,8 +10,7 @@
10#include <linux/acpi.h> 10#include <linux/acpi.h>
11#include <linux/bitmap.h> 11#include <linux/bitmap.h>
12#include <asm/e820.h> 12#include <asm/e820.h>
13 13#include <asm/pci_x86.h>
14#include "pci.h"
15 14
16/* Static virtual mapping of the MMCONFIG aperture */ 15/* Static virtual mapping of the MMCONFIG aperture */
17struct mmcfg_virt { 16struct mmcfg_virt {
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c
index 1177845d3186..2089354968a2 100644
--- a/arch/x86/pci/numaq_32.c
+++ b/arch/x86/pci/numaq_32.c
@@ -7,7 +7,7 @@
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <mach_apic.h> 8#include <mach_apic.h>
9#include <asm/mpspec.h> 9#include <asm/mpspec.h>
10#include "pci.h" 10#include <asm/pci_x86.h>
11 11
12#define XQUAD_PORTIO_BASE 0xfe400000 12#define XQUAD_PORTIO_BASE 0xfe400000
13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ 13#define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c
index e11e9e803d5f..b889d824f7c6 100644
--- a/arch/x86/pci/olpc.c
+++ b/arch/x86/pci/olpc.c
@@ -29,7 +29,7 @@
29#include <linux/init.h> 29#include <linux/init.h>
30#include <asm/olpc.h> 30#include <asm/olpc.h>
31#include <asm/geode.h> 31#include <asm/geode.h>
32#include "pci.h" 32#include <asm/pci_x86.h>
33 33
34/* 34/*
35 * In the tables below, the first two line (8 longwords) are the 35 * In the tables below, the first two line (8 longwords) are the
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c
index 37472fc6f729..b82cae970dfd 100644
--- a/arch/x86/pci/pcbios.c
+++ b/arch/x86/pci/pcbios.c
@@ -6,9 +6,8 @@
6#include <linux/init.h> 6#include <linux/init.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/uaccess.h> 8#include <linux/uaccess.h>
9#include "pci.h" 9#include <asm/pci_x86.h>
10#include "pci-functions.h" 10#include <asm/mach-default/pci-functions.h>
11
12 11
13/* BIOS32 signature: "_32_" */ 12/* BIOS32 signature: "_32_" */
14#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) 13#define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 42f4cb19faca..16d0c0eb0d19 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -9,11 +9,10 @@
9#include <linux/init.h> 9#include <linux/init.h>
10 10
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/pci_x86.h>
12#include <asm/visws/cobalt.h> 13#include <asm/visws/cobalt.h>
13#include <asm/visws/lithium.h> 14#include <asm/visws/lithium.h>
14 15
15#include "pci.h"
16
17static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } 16static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; }
18static void pci_visws_disable_irq(struct pci_dev *dev) { } 17static void pci_visws_disable_irq(struct pci_dev *dev) { }
19 18
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 65d75a6be0ba..14f240623497 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void)
132 *snap = state; 132 *snap = state;
133 133
134 /* Add the appropriate number of ticks of stolen time, 134 /* Add the appropriate number of ticks of stolen time,
135 including any left-overs from last time. Passing NULL to 135 including any left-overs from last time. */
136 account_steal_time accounts the time as stolen. */
137 stolen = runnable + offline + __get_cpu_var(residual_stolen); 136 stolen = runnable + offline + __get_cpu_var(residual_stolen);
138 137
139 if (stolen < 0) 138 if (stolen < 0)
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void)
141 140
142 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); 141 ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
143 __get_cpu_var(residual_stolen) = stolen; 142 __get_cpu_var(residual_stolen) = stolen;
144 account_steal_time(NULL, ticks); 143 account_steal_ticks(ticks);
145 144
146 /* Add the appropriate number of ticks of blocked time, 145 /* Add the appropriate number of ticks of blocked time,
147 including any left-overs from last time. Passing idle to 146 including any left-overs from last time. */
148 account_steal_time accounts the time as idle/wait. */
149 blocked += __get_cpu_var(residual_blocked); 147 blocked += __get_cpu_var(residual_blocked);
150 148
151 if (blocked < 0) 149 if (blocked < 0)
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void)
153 151
154 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); 152 ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
155 __get_cpu_var(residual_blocked) = blocked; 153 __get_cpu_var(residual_blocked) = blocked;
156 account_steal_time(idle_task(smp_processor_id()), ticks); 154 account_idle_ticks(ticks);
157} 155}
158 156
159/* 157/*