diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-08-11 06:57:01 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-08-11 06:57:01 -0400 |
commit | 6de9c70882ecdee63a652d493bf2353963bd4c22 (patch) | |
tree | 9d219e705492331c97f5f7dccce3b0b1a29251bf /arch/x86 | |
parent | d406d21d90dce2e66c7eb4a44605aac947fe55fb (diff) | |
parent | 796aadeb1b2db9b5d463946766c5bbfd7717158c (diff) |
Merge branch 'linus' into x86/cleanups
Diffstat (limited to 'arch/x86')
46 files changed, 959 insertions, 642 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e3cba0b45600..3d0f2b6a5a16 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -22,13 +22,15 @@ config X86 | |||
22 | select HAVE_IDE | 22 | select HAVE_IDE |
23 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
24 | select HAVE_IOREMAP_PROT | 24 | select HAVE_IOREMAP_PROT |
25 | select HAVE_GET_USER_PAGES_FAST | ||
25 | select HAVE_KPROBES | 26 | select HAVE_KPROBES |
26 | select ARCH_WANT_OPTIONAL_GPIOLIB if !X86_RDC321X | 27 | select ARCH_WANT_OPTIONAL_GPIOLIB |
27 | select HAVE_KRETPROBES | 28 | select HAVE_KRETPROBES |
28 | select HAVE_DYNAMIC_FTRACE | 29 | select HAVE_DYNAMIC_FTRACE |
29 | select HAVE_FTRACE | 30 | select HAVE_FTRACE |
30 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 31 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
31 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 32 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
33 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | ||
32 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 34 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
33 | 35 | ||
34 | config ARCH_DEFCONFIG | 36 | config ARCH_DEFCONFIG |
@@ -332,20 +334,6 @@ config X86_BIGSMP | |||
332 | 334 | ||
333 | endif | 335 | endif |
334 | 336 | ||
335 | config X86_RDC321X | ||
336 | bool "RDC R-321x SoC" | ||
337 | depends on X86_32 | ||
338 | select M486 | ||
339 | select X86_REBOOTFIXUPS | ||
340 | select GENERIC_GPIO | ||
341 | select LEDS_CLASS | ||
342 | select LEDS_GPIO | ||
343 | select NEW_LEDS | ||
344 | help | ||
345 | This option is needed for RDC R-321x system-on-chip, also known | ||
346 | as R-8610-(G). | ||
347 | If you don't have one of these chips, you should say N here. | ||
348 | |||
349 | config X86_VSMP | 337 | config X86_VSMP |
350 | bool "Support for ScaleMP vSMP" | 338 | bool "Support for ScaleMP vSMP" |
351 | select PARAVIRT | 339 | select PARAVIRT |
@@ -369,6 +357,16 @@ config X86_VISWS | |||
369 | A kernel compiled for the Visual Workstation will run on general | 357 | A kernel compiled for the Visual Workstation will run on general |
370 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. | 358 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. |
371 | 359 | ||
360 | config X86_RDC321X | ||
361 | bool "RDC R-321x SoC" | ||
362 | depends on X86_32 | ||
363 | select M486 | ||
364 | select X86_REBOOTFIXUPS | ||
365 | help | ||
366 | This option is needed for RDC R-321x system-on-chip, also known | ||
367 | as R-8610-(G). | ||
368 | If you don't have one of these chips, you should say N here. | ||
369 | |||
372 | config SCHED_NO_NO_OMIT_FRAME_POINTER | 370 | config SCHED_NO_NO_OMIT_FRAME_POINTER |
373 | def_bool y | 371 | def_bool y |
374 | prompt "Single-depth WCHAN output" | 372 | prompt "Single-depth WCHAN output" |
@@ -1279,6 +1277,14 @@ config CRASH_DUMP | |||
1279 | (CONFIG_RELOCATABLE=y). | 1277 | (CONFIG_RELOCATABLE=y). |
1280 | For more details see Documentation/kdump/kdump.txt | 1278 | For more details see Documentation/kdump/kdump.txt |
1281 | 1279 | ||
1280 | config KEXEC_JUMP | ||
1281 | bool "kexec jump (EXPERIMENTAL)" | ||
1282 | depends on EXPERIMENTAL | ||
1283 | depends on KEXEC && HIBERNATION && X86_32 | ||
1284 | help | ||
1285 | Jump between original kernel and kexeced kernel and invoke | ||
1286 | code in physical address mode via KEXEC | ||
1287 | |||
1282 | config PHYSICAL_START | 1288 | config PHYSICAL_START |
1283 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 1289 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) |
1284 | default "0x1000000" if X86_NUMAQ | 1290 | default "0x1000000" if X86_NUMAQ |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 919ce21ea654..f5631da585b6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -118,11 +118,6 @@ mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | |||
118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ | 118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ |
119 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ | 119 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ |
120 | 120 | ||
121 | # RDC R-321x subarch support | ||
122 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x | ||
123 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ | ||
124 | core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/ | ||
125 | |||
126 | # default subarch .h files | 121 | # default subarch .h files |
127 | mflags-y += -Iinclude/asm-x86/mach-default | 122 | mflags-y += -Iinclude/asm-x86/mach-default |
128 | 123 | ||
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 58cccb6483b0..a0e1dbe67dc1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -441,12 +441,6 @@ beyond_if: | |||
441 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = | 441 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = |
442 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; | 442 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; |
443 | set_fs(USER_DS); | 443 | set_fs(USER_DS); |
444 | if (unlikely(current->ptrace & PT_PTRACED)) { | ||
445 | if (current->ptrace & PT_TRACE_EXEC) | ||
446 | ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); | ||
447 | else | ||
448 | send_sig(SIGTRAP, current, 0); | ||
449 | } | ||
450 | return 0; | 444 | return 0; |
451 | } | 445 | } |
452 | 446 | ||
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 9220cf46aa10..c2502eb9aa83 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -73,7 +73,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
73 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 73 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
74 | 74 | ||
75 | cpumask_t saved_mask; | 75 | cpumask_t saved_mask; |
76 | cpumask_of_cpu_ptr(new_mask, cpu); | ||
77 | int retval; | 76 | int retval; |
78 | unsigned int eax, ebx, ecx, edx; | 77 | unsigned int eax, ebx, ecx, edx; |
79 | unsigned int edx_part; | 78 | unsigned int edx_part; |
@@ -92,7 +91,7 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
92 | 91 | ||
93 | /* Make sure we are running on right CPU */ | 92 | /* Make sure we are running on right CPU */ |
94 | saved_mask = current->cpus_allowed; | 93 | saved_mask = current->cpus_allowed; |
95 | retval = set_cpus_allowed_ptr(current, new_mask); | 94 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
96 | if (retval) | 95 | if (retval) |
97 | return -1; | 96 | return -1; |
98 | 97 | ||
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index c25210e6ac88..22d7d050905d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -29,9 +29,6 @@ | |||
29 | 29 | ||
30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | 30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
31 | 31 | ||
32 | #define to_pages(addr, size) \ | ||
33 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
34 | |||
35 | #define EXIT_LOOP_COUNT 10000000 | 32 | #define EXIT_LOOP_COUNT 10000000 |
36 | 33 | ||
37 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
@@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
185 | u64 address, size_t size) | 182 | u64 address, size_t size) |
186 | { | 183 | { |
187 | int s = 0; | 184 | int s = 0; |
188 | unsigned pages = to_pages(address, size); | 185 | unsigned pages = iommu_num_pages(address, size); |
189 | 186 | ||
190 | address &= PAGE_MASK; | 187 | address &= PAGE_MASK; |
191 | 188 | ||
@@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
557 | if (iommu->exclusion_start && | 554 | if (iommu->exclusion_start && |
558 | iommu->exclusion_start < dma_dom->aperture_size) { | 555 | iommu->exclusion_start < dma_dom->aperture_size) { |
559 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 556 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; |
560 | int pages = to_pages(iommu->exclusion_start, | 557 | int pages = iommu_num_pages(iommu->exclusion_start, |
561 | iommu->exclusion_length); | 558 | iommu->exclusion_length); |
562 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 559 | dma_ops_reserve_addresses(dma_dom, startpage, pages); |
563 | } | 560 | } |
564 | 561 | ||
@@ -667,7 +664,7 @@ static int get_device_resources(struct device *dev, | |||
667 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); | 664 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
668 | 665 | ||
669 | /* device not translated by any IOMMU in the system? */ | 666 | /* device not translated by any IOMMU in the system? */ |
670 | if (_bdf >= amd_iommu_last_bdf) { | 667 | if (_bdf > amd_iommu_last_bdf) { |
671 | *iommu = NULL; | 668 | *iommu = NULL; |
672 | *domain = NULL; | 669 | *domain = NULL; |
673 | *bdf = 0xffff; | 670 | *bdf = 0xffff; |
@@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
767 | unsigned int pages; | 764 | unsigned int pages; |
768 | int i; | 765 | int i; |
769 | 766 | ||
770 | pages = to_pages(paddr, size); | 767 | pages = iommu_num_pages(paddr, size); |
771 | paddr &= PAGE_MASK; | 768 | paddr &= PAGE_MASK; |
772 | 769 | ||
773 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 770 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); |
@@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
802 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) | 799 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) |
803 | return; | 800 | return; |
804 | 801 | ||
805 | pages = to_pages(dma_addr, size); | 802 | pages = iommu_num_pages(dma_addr, size); |
806 | dma_addr &= PAGE_MASK; | 803 | dma_addr &= PAGE_MASK; |
807 | start = dma_addr; | 804 | start = dma_addr; |
808 | 805 | ||
@@ -1085,7 +1082,7 @@ void prealloc_protection_domains(void) | |||
1085 | 1082 | ||
1086 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1083 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
1087 | devid = (dev->bus->number << 8) | dev->devfn; | 1084 | devid = (dev->bus->number << 8) | dev->devfn; |
1088 | if (devid >= amd_iommu_last_bdf) | 1085 | if (devid > amd_iommu_last_bdf) |
1089 | continue; | 1086 | continue; |
1090 | devid = amd_iommu_alias_table[devid]; | 1087 | devid = amd_iommu_alias_table[devid]; |
1091 | if (domain_for_device(devid)) | 1088 | if (domain_for_device(devid)) |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c9d8ff2eb130..d9a9da597e79 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -732,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
732 | set_device_exclusion_range(m->devid, m); | 732 | set_device_exclusion_range(m->devid, m); |
733 | break; | 733 | break; |
734 | case ACPI_IVMD_TYPE_ALL: | 734 | case ACPI_IVMD_TYPE_ALL: |
735 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 735 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
736 | set_device_exclusion_range(i, m); | 736 | set_device_exclusion_range(i, m); |
737 | break; | 737 | break; |
738 | case ACPI_IVMD_TYPE_RANGE: | 738 | case ACPI_IVMD_TYPE_RANGE: |
@@ -934,7 +934,7 @@ int __init amd_iommu_init(void) | |||
934 | /* | 934 | /* |
935 | * let all alias entries point to itself | 935 | * let all alias entries point to itself |
936 | */ | 936 | */ |
937 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 937 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
938 | amd_iommu_alias_table[i] = i; | 938 | amd_iommu_alias_table[i] = i; |
939 | 939 | ||
940 | /* | 940 | /* |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596d..efae3b22a0ff 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -235,9 +235,9 @@ config X86_LONGHAUL | |||
235 | If in doubt, say N. | 235 | If in doubt, say N. |
236 | 236 | ||
237 | config X86_E_POWERSAVER | 237 | config X86_E_POWERSAVER |
238 | tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" | 238 | tristate "VIA C7 Enhanced PowerSaver" |
239 | select CPU_FREQ_TABLE | 239 | select CPU_FREQ_TABLE |
240 | depends on X86_32 && EXPERIMENTAL | 240 | depends on X86_32 |
241 | help | 241 | help |
242 | This adds the CPUFreq driver for VIA C7 processors. | 242 | This adds the CPUFreq driver for VIA C7 processors. |
243 | 243 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ff2fff56f0a8..dd097b835839 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -200,12 +200,10 @@ static void drv_read(struct drv_cmd *cmd) | |||
200 | static void drv_write(struct drv_cmd *cmd) | 200 | static void drv_write(struct drv_cmd *cmd) |
201 | { | 201 | { |
202 | cpumask_t saved_mask = current->cpus_allowed; | 202 | cpumask_t saved_mask = current->cpus_allowed; |
203 | cpumask_of_cpu_ptr_declare(cpu_mask); | ||
204 | unsigned int i; | 203 | unsigned int i; |
205 | 204 | ||
206 | for_each_cpu_mask_nr(i, cmd->mask) { | 205 | for_each_cpu_mask_nr(i, cmd->mask) { |
207 | cpumask_of_cpu_ptr_next(cpu_mask, i); | 206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); |
208 | set_cpus_allowed_ptr(current, cpu_mask); | ||
209 | do_drv_write(cmd); | 207 | do_drv_write(cmd); |
210 | } | 208 | } |
211 | 209 | ||
@@ -269,12 +267,11 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
269 | } aperf_cur, mperf_cur; | 267 | } aperf_cur, mperf_cur; |
270 | 268 | ||
271 | cpumask_t saved_mask; | 269 | cpumask_t saved_mask; |
272 | cpumask_of_cpu_ptr(cpu_mask, cpu); | ||
273 | unsigned int perf_percent; | 270 | unsigned int perf_percent; |
274 | unsigned int retval; | 271 | unsigned int retval; |
275 | 272 | ||
276 | saved_mask = current->cpus_allowed; | 273 | saved_mask = current->cpus_allowed; |
277 | set_cpus_allowed_ptr(current, cpu_mask); | 274 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
278 | if (get_cpu() != cpu) { | 275 | if (get_cpu() != cpu) { |
279 | /* We were not able to run on requested processor */ | 276 | /* We were not able to run on requested processor */ |
280 | put_cpu(); | 277 | put_cpu(); |
@@ -340,7 +337,6 @@ static unsigned int get_measured_perf(unsigned int cpu) | |||
340 | 337 | ||
341 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | 338 | static unsigned int get_cur_freq_on_cpu(unsigned int cpu) |
342 | { | 339 | { |
343 | cpumask_of_cpu_ptr(cpu_mask, cpu); | ||
344 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); | 340 | struct acpi_cpufreq_data *data = per_cpu(drv_data, cpu); |
345 | unsigned int freq; | 341 | unsigned int freq; |
346 | unsigned int cached_freq; | 342 | unsigned int cached_freq; |
@@ -353,7 +349,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) | |||
353 | } | 349 | } |
354 | 350 | ||
355 | cached_freq = data->freq_table[data->acpi_data->state].frequency; | 351 | cached_freq = data->freq_table[data->acpi_data->state].frequency; |
356 | freq = extract_freq(get_cur_val(cpu_mask), data); | 352 | freq = extract_freq(get_cur_val(&cpumask_of_cpu(cpu)), data); |
357 | if (freq != cached_freq) { | 353 | if (freq != cached_freq) { |
358 | /* | 354 | /* |
359 | * The dreaded BIOS frequency change behind our back. | 355 | * The dreaded BIOS frequency change behind our back. |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f563..e4a4bf870e94 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
@@ -44,7 +44,7 @@ struct s_elan_multiplier { | |||
44 | * It is important that the frequencies | 44 | * It is important that the frequencies |
45 | * are listed in ascending order here! | 45 | * are listed in ascending order here! |
46 | */ | 46 | */ |
47 | struct s_elan_multiplier elan_multiplier[] = { | 47 | static struct s_elan_multiplier elan_multiplier[] = { |
48 | {1000, 0x02, 0x18}, | 48 | {1000, 0x02, 0x18}, |
49 | {2000, 0x02, 0x10}, | 49 | {2000, 0x02, 0x10}, |
50 | {4000, 0x02, 0x08}, | 50 | {4000, 0x02, 0x08}, |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 53c7b6936973..4e7271999a74 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid) | |||
66 | return 800 + (fid * 100); | 66 | return 800 + (fid * 100); |
67 | } | 67 | } |
68 | 68 | ||
69 | |||
70 | /* Return a frequency in KHz, given an input fid */ | 69 | /* Return a frequency in KHz, given an input fid */ |
71 | static u32 find_khz_freq_from_fid(u32 fid) | 70 | static u32 find_khz_freq_from_fid(u32 fid) |
72 | { | 71 | { |
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p | |||
78 | return data[pstate].frequency; | 77 | return data[pstate].frequency; |
79 | } | 78 | } |
80 | 79 | ||
81 | |||
82 | /* Return the vco fid for an input fid | 80 | /* Return the vco fid for an input fid |
83 | * | 81 | * |
84 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | 82 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids |
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void) | |||
166 | wrmsr(MSR_FIDVID_CTL, lo, hi); | 164 | wrmsr(MSR_FIDVID_CTL, lo, hi); |
167 | } | 165 | } |
168 | 166 | ||
169 | |||
170 | /* write the new fid value along with the other control fields to the msr */ | 167 | /* write the new fid value along with the other control fields to the msr */ |
171 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | 168 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) |
172 | { | 169 | { |
@@ -479,12 +476,11 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, u32 reqvi | |||
479 | static int check_supported_cpu(unsigned int cpu) | 476 | static int check_supported_cpu(unsigned int cpu) |
480 | { | 477 | { |
481 | cpumask_t oldmask; | 478 | cpumask_t oldmask; |
482 | cpumask_of_cpu_ptr(cpu_mask, cpu); | ||
483 | u32 eax, ebx, ecx, edx; | 479 | u32 eax, ebx, ecx, edx; |
484 | unsigned int rc = 0; | 480 | unsigned int rc = 0; |
485 | 481 | ||
486 | oldmask = current->cpus_allowed; | 482 | oldmask = current->cpus_allowed; |
487 | set_cpus_allowed_ptr(current, cpu_mask); | 483 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
488 | 484 | ||
489 | if (smp_processor_id() != cpu) { | 485 | if (smp_processor_id() != cpu) { |
490 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); | 486 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); |
@@ -741,44 +737,63 @@ static int find_psb_table(struct powernow_k8_data *data) | |||
741 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | 737 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI |
742 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) | 738 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) |
743 | { | 739 | { |
744 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 740 | if (!data->acpi_data->state_count || (cpu_family == CPU_HW_PSTATE)) |
745 | return; | 741 | return; |
746 | 742 | ||
747 | data->irt = (data->acpi_data.states[index].control >> IRT_SHIFT) & IRT_MASK; | 743 | data->irt = (data->acpi_data->states[index].control >> IRT_SHIFT) & IRT_MASK; |
748 | data->rvo = (data->acpi_data.states[index].control >> RVO_SHIFT) & RVO_MASK; | 744 | data->rvo = (data->acpi_data->states[index].control >> RVO_SHIFT) & RVO_MASK; |
749 | data->exttype = (data->acpi_data.states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | 745 | data->exttype = (data->acpi_data->states[index].control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; |
750 | data->plllock = (data->acpi_data.states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; | 746 | data->plllock = (data->acpi_data->states[index].control >> PLL_L_SHIFT) & PLL_L_MASK; |
751 | data->vidmvs = 1 << ((data->acpi_data.states[index].control >> MVS_SHIFT) & MVS_MASK); | 747 | data->vidmvs = 1 << ((data->acpi_data->states[index].control >> MVS_SHIFT) & MVS_MASK); |
752 | data->vstable = (data->acpi_data.states[index].control >> VST_SHIFT) & VST_MASK; | 748 | data->vstable = (data->acpi_data->states[index].control >> VST_SHIFT) & VST_MASK; |
749 | } | ||
750 | |||
751 | |||
752 | static struct acpi_processor_performance *acpi_perf_data; | ||
753 | static int preregister_valid; | ||
754 | |||
755 | static int powernow_k8_cpu_preinit_acpi(void) | ||
756 | { | ||
757 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | ||
758 | if (!acpi_perf_data) | ||
759 | return -ENODEV; | ||
760 | |||
761 | if (acpi_processor_preregister_performance(acpi_perf_data)) | ||
762 | return -ENODEV; | ||
763 | else | ||
764 | preregister_valid = 1; | ||
765 | return 0; | ||
753 | } | 766 | } |
754 | 767 | ||
755 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 768 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
756 | { | 769 | { |
757 | struct cpufreq_frequency_table *powernow_table; | 770 | struct cpufreq_frequency_table *powernow_table; |
758 | int ret_val; | 771 | int ret_val; |
772 | int cpu = 0; | ||
759 | 773 | ||
760 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 774 | data->acpi_data = percpu_ptr(acpi_perf_data, cpu); |
775 | if (acpi_processor_register_performance(data->acpi_data, data->cpu)) { | ||
761 | dprintk("register performance failed: bad ACPI data\n"); | 776 | dprintk("register performance failed: bad ACPI data\n"); |
762 | return -EIO; | 777 | return -EIO; |
763 | } | 778 | } |
764 | 779 | ||
765 | /* verify the data contained in the ACPI structures */ | 780 | /* verify the data contained in the ACPI structures */ |
766 | if (data->acpi_data.state_count <= 1) { | 781 | if (data->acpi_data->state_count <= 1) { |
767 | dprintk("No ACPI P-States\n"); | 782 | dprintk("No ACPI P-States\n"); |
768 | goto err_out; | 783 | goto err_out; |
769 | } | 784 | } |
770 | 785 | ||
771 | if ((data->acpi_data.control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || | 786 | if ((data->acpi_data->control_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE) || |
772 | (data->acpi_data.status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { | 787 | (data->acpi_data->status_register.space_id != ACPI_ADR_SPACE_FIXED_HARDWARE)) { |
773 | dprintk("Invalid control/status registers (%x - %x)\n", | 788 | dprintk("Invalid control/status registers (%x - %x)\n", |
774 | data->acpi_data.control_register.space_id, | 789 | data->acpi_data->control_register.space_id, |
775 | data->acpi_data.status_register.space_id); | 790 | data->acpi_data->status_register.space_id); |
776 | goto err_out; | 791 | goto err_out; |
777 | } | 792 | } |
778 | 793 | ||
779 | /* fill in data->powernow_table */ | 794 | /* fill in data->powernow_table */ |
780 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) | 795 | powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) |
781 | * (data->acpi_data.state_count + 1)), GFP_KERNEL); | 796 | * (data->acpi_data->state_count + 1)), GFP_KERNEL); |
782 | if (!powernow_table) { | 797 | if (!powernow_table) { |
783 | dprintk("powernow_table memory alloc failure\n"); | 798 | dprintk("powernow_table memory alloc failure\n"); |
784 | goto err_out; | 799 | goto err_out; |
@@ -791,12 +806,12 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
791 | if (ret_val) | 806 | if (ret_val) |
792 | goto err_out_mem; | 807 | goto err_out_mem; |
793 | 808 | ||
794 | powernow_table[data->acpi_data.state_count].frequency = CPUFREQ_TABLE_END; | 809 | powernow_table[data->acpi_data->state_count].frequency = CPUFREQ_TABLE_END; |
795 | powernow_table[data->acpi_data.state_count].index = 0; | 810 | powernow_table[data->acpi_data->state_count].index = 0; |
796 | data->powernow_table = powernow_table; | 811 | data->powernow_table = powernow_table; |
797 | 812 | ||
798 | /* fill in data */ | 813 | /* fill in data */ |
799 | data->numps = data->acpi_data.state_count; | 814 | data->numps = data->acpi_data->state_count; |
800 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) | 815 | if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu) |
801 | print_basics(data); | 816 | print_basics(data); |
802 | powernow_k8_acpi_pst_values(data, 0); | 817 | powernow_k8_acpi_pst_values(data, 0); |
@@ -804,16 +819,31 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
804 | /* notify BIOS that we exist */ | 819 | /* notify BIOS that we exist */ |
805 | acpi_processor_notify_smm(THIS_MODULE); | 820 | acpi_processor_notify_smm(THIS_MODULE); |
806 | 821 | ||
822 | /* determine affinity, from ACPI if available */ | ||
823 | if (preregister_valid) { | ||
824 | if ((data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ALL) || | ||
825 | (data->acpi_data->shared_type == CPUFREQ_SHARED_TYPE_ANY)) | ||
826 | data->starting_core_affinity = data->acpi_data->shared_cpu_map; | ||
827 | else | ||
828 | data->starting_core_affinity = cpumask_of_cpu(data->cpu); | ||
829 | } else { | ||
830 | /* best guess from family if not */ | ||
831 | if (cpu_family == CPU_HW_PSTATE) | ||
832 | data->starting_core_affinity = cpumask_of_cpu(data->cpu); | ||
833 | else | ||
834 | data->starting_core_affinity = per_cpu(cpu_core_map, data->cpu); | ||
835 | } | ||
836 | |||
807 | return 0; | 837 | return 0; |
808 | 838 | ||
809 | err_out_mem: | 839 | err_out_mem: |
810 | kfree(powernow_table); | 840 | kfree(powernow_table); |
811 | 841 | ||
812 | err_out: | 842 | err_out: |
813 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 843 | acpi_processor_unregister_performance(data->acpi_data, data->cpu); |
814 | 844 | ||
815 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ | 845 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ |
816 | data->acpi_data.state_count = 0; | 846 | data->acpi_data->state_count = 0; |
817 | 847 | ||
818 | return -ENODEV; | 848 | return -ENODEV; |
819 | } | 849 | } |
@@ -825,10 +855,10 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
825 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); | 855 | rdmsr(MSR_PSTATE_CUR_LIMIT, hi, lo); |
826 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; | 856 | data->max_hw_pstate = (hi & HW_PSTATE_MAX_MASK) >> HW_PSTATE_MAX_SHIFT; |
827 | 857 | ||
828 | for (i = 0; i < data->acpi_data.state_count; i++) { | 858 | for (i = 0; i < data->acpi_data->state_count; i++) { |
829 | u32 index; | 859 | u32 index; |
830 | 860 | ||
831 | index = data->acpi_data.states[i].control & HW_PSTATE_MASK; | 861 | index = data->acpi_data->states[i].control & HW_PSTATE_MASK; |
832 | if (index > data->max_hw_pstate) { | 862 | if (index > data->max_hw_pstate) { |
833 | printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); | 863 | printk(KERN_ERR PFX "invalid pstate %d - bad value %d.\n", i, index); |
834 | printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); | 864 | printk(KERN_ERR PFX "Please report to BIOS manufacturer\n"); |
@@ -844,7 +874,7 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf | |||
844 | 874 | ||
845 | powernow_table[i].index = index; | 875 | powernow_table[i].index = index; |
846 | 876 | ||
847 | powernow_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; | 877 | powernow_table[i].frequency = data->acpi_data->states[i].core_frequency * 1000; |
848 | } | 878 | } |
849 | return 0; | 879 | return 0; |
850 | } | 880 | } |
@@ -853,16 +883,16 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
853 | { | 883 | { |
854 | int i; | 884 | int i; |
855 | int cntlofreq = 0; | 885 | int cntlofreq = 0; |
856 | for (i = 0; i < data->acpi_data.state_count; i++) { | 886 | for (i = 0; i < data->acpi_data->state_count; i++) { |
857 | u32 fid; | 887 | u32 fid; |
858 | u32 vid; | 888 | u32 vid; |
859 | 889 | ||
860 | if (data->exttype) { | 890 | if (data->exttype) { |
861 | fid = data->acpi_data.states[i].status & EXT_FID_MASK; | 891 | fid = data->acpi_data->states[i].status & EXT_FID_MASK; |
862 | vid = (data->acpi_data.states[i].status >> VID_SHIFT) & EXT_VID_MASK; | 892 | vid = (data->acpi_data->states[i].status >> VID_SHIFT) & EXT_VID_MASK; |
863 | } else { | 893 | } else { |
864 | fid = data->acpi_data.states[i].control & FID_MASK; | 894 | fid = data->acpi_data->states[i].control & FID_MASK; |
865 | vid = (data->acpi_data.states[i].control >> VID_SHIFT) & VID_MASK; | 895 | vid = (data->acpi_data->states[i].control >> VID_SHIFT) & VID_MASK; |
866 | } | 896 | } |
867 | 897 | ||
868 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); | 898 | dprintk(" %d : fid 0x%x, vid 0x%x\n", i, fid, vid); |
@@ -903,10 +933,10 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
903 | cntlofreq = i; | 933 | cntlofreq = i; |
904 | } | 934 | } |
905 | 935 | ||
906 | if (powernow_table[i].frequency != (data->acpi_data.states[i].core_frequency * 1000)) { | 936 | if (powernow_table[i].frequency != (data->acpi_data->states[i].core_frequency * 1000)) { |
907 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", | 937 | printk(KERN_INFO PFX "invalid freq entries %u kHz vs. %u kHz\n", |
908 | powernow_table[i].frequency, | 938 | powernow_table[i].frequency, |
909 | (unsigned int) (data->acpi_data.states[i].core_frequency * 1000)); | 939 | (unsigned int) (data->acpi_data->states[i].core_frequency * 1000)); |
910 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; | 940 | powernow_table[i].frequency = CPUFREQ_ENTRY_INVALID; |
911 | continue; | 941 | continue; |
912 | } | 942 | } |
@@ -916,11 +946,12 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpuf | |||
916 | 946 | ||
917 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | 947 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) |
918 | { | 948 | { |
919 | if (data->acpi_data.state_count) | 949 | if (data->acpi_data->state_count) |
920 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 950 | acpi_processor_unregister_performance(data->acpi_data, data->cpu); |
921 | } | 951 | } |
922 | 952 | ||
923 | #else | 953 | #else |
954 | static int powernow_k8_cpu_preinit_acpi(void) { return -ENODEV; } | ||
924 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } | 955 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } |
925 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } | 956 | static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } |
926 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } | 957 | static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } |
@@ -1017,7 +1048,6 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
1017 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) | 1048 | static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsigned relation) |
1018 | { | 1049 | { |
1019 | cpumask_t oldmask; | 1050 | cpumask_t oldmask; |
1020 | cpumask_of_cpu_ptr(cpu_mask, pol->cpu); | ||
1021 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); | 1051 | struct powernow_k8_data *data = per_cpu(powernow_data, pol->cpu); |
1022 | u32 checkfid; | 1052 | u32 checkfid; |
1023 | u32 checkvid; | 1053 | u32 checkvid; |
@@ -1032,7 +1062,7 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi | |||
1032 | 1062 | ||
1033 | /* only run on specific CPU from here on */ | 1063 | /* only run on specific CPU from here on */ |
1034 | oldmask = current->cpus_allowed; | 1064 | oldmask = current->cpus_allowed; |
1035 | set_cpus_allowed_ptr(current, cpu_mask); | 1065 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); |
1036 | 1066 | ||
1037 | if (smp_processor_id() != pol->cpu) { | 1067 | if (smp_processor_id() != pol->cpu) { |
1038 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1068 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
@@ -1106,8 +1136,7 @@ static int powernowk8_verify(struct cpufreq_policy *pol) | |||
1106 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | 1136 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) |
1107 | { | 1137 | { |
1108 | struct powernow_k8_data *data; | 1138 | struct powernow_k8_data *data; |
1109 | cpumask_t oldmask; | 1139 | cpumask_t oldmask = CPU_MASK_ALL; |
1110 | cpumask_of_cpu_ptr_declare(newmask); | ||
1111 | int rc; | 1140 | int rc; |
1112 | 1141 | ||
1113 | if (!cpu_online(pol->cpu)) | 1142 | if (!cpu_online(pol->cpu)) |
@@ -1159,8 +1188,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1159 | 1188 | ||
1160 | /* only run on specific CPU from here on */ | 1189 | /* only run on specific CPU from here on */ |
1161 | oldmask = current->cpus_allowed; | 1190 | oldmask = current->cpus_allowed; |
1162 | cpumask_of_cpu_ptr_next(newmask, pol->cpu); | 1191 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); |
1163 | set_cpus_allowed_ptr(current, newmask); | ||
1164 | 1192 | ||
1165 | if (smp_processor_id() != pol->cpu) { | 1193 | if (smp_processor_id() != pol->cpu) { |
1166 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1194 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); |
@@ -1181,10 +1209,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1181 | /* run on any CPU again */ | 1209 | /* run on any CPU again */ |
1182 | set_cpus_allowed_ptr(current, &oldmask); | 1210 | set_cpus_allowed_ptr(current, &oldmask); |
1183 | 1211 | ||
1184 | if (cpu_family == CPU_HW_PSTATE) | 1212 | pol->cpus = data->starting_core_affinity; |
1185 | pol->cpus = *newmask; | ||
1186 | else | ||
1187 | pol->cpus = per_cpu(cpu_core_map, pol->cpu); | ||
1188 | data->available_cores = &(pol->cpus); | 1213 | data->available_cores = &(pol->cpus); |
1189 | 1214 | ||
1190 | /* Take a crude guess here. | 1215 | /* Take a crude guess here. |
@@ -1248,7 +1273,6 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1248 | { | 1273 | { |
1249 | struct powernow_k8_data *data; | 1274 | struct powernow_k8_data *data; |
1250 | cpumask_t oldmask = current->cpus_allowed; | 1275 | cpumask_t oldmask = current->cpus_allowed; |
1251 | cpumask_of_cpu_ptr(newmask, cpu); | ||
1252 | unsigned int khz = 0; | 1276 | unsigned int khz = 0; |
1253 | unsigned int first; | 1277 | unsigned int first; |
1254 | 1278 | ||
@@ -1258,7 +1282,7 @@ static unsigned int powernowk8_get (unsigned int cpu) | |||
1258 | if (!data) | 1282 | if (!data) |
1259 | return -EINVAL; | 1283 | return -EINVAL; |
1260 | 1284 | ||
1261 | set_cpus_allowed_ptr(current, newmask); | 1285 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
1262 | if (smp_processor_id() != cpu) { | 1286 | if (smp_processor_id() != cpu) { |
1263 | printk(KERN_ERR PFX | 1287 | printk(KERN_ERR PFX |
1264 | "limiting to CPU %d failed in powernowk8_get\n", cpu); | 1288 | "limiting to CPU %d failed in powernowk8_get\n", cpu); |
@@ -1308,6 +1332,7 @@ static int __cpuinit powernowk8_init(void) | |||
1308 | } | 1332 | } |
1309 | 1333 | ||
1310 | if (supported_cpus == num_online_cpus()) { | 1334 | if (supported_cpus == num_online_cpus()) { |
1335 | powernow_k8_cpu_preinit_acpi(); | ||
1311 | printk(KERN_INFO PFX "Found %d %s " | 1336 | printk(KERN_INFO PFX "Found %d %s " |
1312 | "processors (%d cpu cores) (" VERSION ")\n", | 1337 | "processors (%d cpu cores) (" VERSION ")\n", |
1313 | num_online_nodes(), | 1338 | num_online_nodes(), |
@@ -1324,6 +1349,10 @@ static void __exit powernowk8_exit(void) | |||
1324 | dprintk("exit\n"); | 1349 | dprintk("exit\n"); |
1325 | 1350 | ||
1326 | cpufreq_unregister_driver(&cpufreq_amd64_driver); | 1351 | cpufreq_unregister_driver(&cpufreq_amd64_driver); |
1352 | |||
1353 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | ||
1354 | free_percpu(acpi_perf_data); | ||
1355 | #endif | ||
1327 | } | 1356 | } |
1328 | 1357 | ||
1329 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); | 1358 | MODULE_AUTHOR("Paul Devriendt <paul.devriendt@amd.com> and Mark Langsdorf <mark.langsdorf@amd.com>"); |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfed4d96..a62612cd4be8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -33,12 +33,13 @@ struct powernow_k8_data { | |||
33 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI | 33 | #ifdef CONFIG_X86_POWERNOW_K8_ACPI |
34 | /* the acpi table needs to be kept. it's only available if ACPI was | 34 | /* the acpi table needs to be kept. it's only available if ACPI was |
35 | * used to determine valid frequency/vid/fid states */ | 35 | * used to determine valid frequency/vid/fid states */ |
36 | struct acpi_processor_performance acpi_data; | 36 | struct acpi_processor_performance *acpi_data; |
37 | #endif | 37 | #endif |
38 | /* we need to keep track of associated cores, but let cpufreq | 38 | /* we need to keep track of associated cores, but let cpufreq |
39 | * handle hotplug events - so just point at cpufreq pol->cpus | 39 | * handle hotplug events - so just point at cpufreq pol->cpus |
40 | * structure */ | 40 | * structure */ |
41 | cpumask_t *available_cores; | 41 | cpumask_t *available_cores; |
42 | cpumask_t starting_core_affinity; | ||
42 | }; | 43 | }; |
43 | 44 | ||
44 | 45 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index ca2ac13b7af2..15e13c01cc36 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -324,10 +324,9 @@ static unsigned int get_cur_freq(unsigned int cpu) | |||
324 | unsigned l, h; | 324 | unsigned l, h; |
325 | unsigned clock_freq; | 325 | unsigned clock_freq; |
326 | cpumask_t saved_mask; | 326 | cpumask_t saved_mask; |
327 | cpumask_of_cpu_ptr(new_mask, cpu); | ||
328 | 327 | ||
329 | saved_mask = current->cpus_allowed; | 328 | saved_mask = current->cpus_allowed; |
330 | set_cpus_allowed_ptr(current, new_mask); | 329 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
331 | if (smp_processor_id() != cpu) | 330 | if (smp_processor_id() != cpu) |
332 | return 0; | 331 | return 0; |
333 | 332 | ||
@@ -585,15 +584,12 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
585 | * Best effort undo.. | 584 | * Best effort undo.. |
586 | */ | 585 | */ |
587 | 586 | ||
588 | if (!cpus_empty(*covered_cpus)) { | 587 | if (!cpus_empty(*covered_cpus)) |
589 | cpumask_of_cpu_ptr_declare(new_mask); | ||
590 | |||
591 | for_each_cpu_mask_nr(j, *covered_cpus) { | 588 | for_each_cpu_mask_nr(j, *covered_cpus) { |
592 | cpumask_of_cpu_ptr_next(new_mask, j); | 589 | set_cpus_allowed_ptr(current, |
593 | set_cpus_allowed_ptr(current, new_mask); | 590 | &cpumask_of_cpu(j)); |
594 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 591 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
595 | } | 592 | } |
596 | } | ||
597 | 593 | ||
598 | tmp = freqs.new; | 594 | tmp = freqs.new; |
599 | freqs.new = freqs.old; | 595 | freqs.new = freqs.old; |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 2f3728dc24f6..191f7263c61d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -244,8 +244,7 @@ static unsigned int _speedstep_get(const cpumask_t *cpus) | |||
244 | 244 | ||
245 | static unsigned int speedstep_get(unsigned int cpu) | 245 | static unsigned int speedstep_get(unsigned int cpu) |
246 | { | 246 | { |
247 | cpumask_of_cpu_ptr(newmask, cpu); | 247 | return _speedstep_get(&cpumask_of_cpu(cpu)); |
248 | return _speedstep_get(newmask); | ||
249 | } | 248 | } |
250 | 249 | ||
251 | /** | 250 | /** |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 650d40f7912b..6b0a10b002f1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -516,7 +516,6 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
516 | unsigned long j; | 516 | unsigned long j; |
517 | int retval; | 517 | int retval; |
518 | cpumask_t oldmask; | 518 | cpumask_t oldmask; |
519 | cpumask_of_cpu_ptr(newmask, cpu); | ||
520 | 519 | ||
521 | if (num_cache_leaves == 0) | 520 | if (num_cache_leaves == 0) |
522 | return -ENOENT; | 521 | return -ENOENT; |
@@ -527,7 +526,7 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
527 | return -ENOMEM; | 526 | return -ENOMEM; |
528 | 527 | ||
529 | oldmask = current->cpus_allowed; | 528 | oldmask = current->cpus_allowed; |
530 | retval = set_cpus_allowed_ptr(current, newmask); | 529 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
531 | if (retval) | 530 | if (retval) |
532 | goto out; | 531 | goto out; |
533 | 532 | ||
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..eaff0bbb1444 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -99,3 +99,4 @@ int is_uv_system(void) | |||
99 | { | 99 | { |
100 | return uv_system_type != UV_NONE; | 100 | return uv_system_type != UV_NONE; |
101 | } | 101 | } |
102 | EXPORT_SYMBOL_GPL(is_uv_system); | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f67e93441caf..a7010c3a377a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP | |||
456 | 1: | 456 | 1: |
457 | #endif /* CONFIG_SMP */ | 457 | #endif /* CONFIG_SMP */ |
458 | jmp *(initial_code) | 458 | jmp *(initial_code) |
459 | .align 4 | ||
460 | ENTRY(initial_code) | ||
461 | .long i386_start_kernel | ||
462 | 459 | ||
463 | /* | 460 | /* |
464 | * We depend on ET to be correct. This checks for 287/387. | 461 | * We depend on ET to be correct. This checks for 287/387. |
@@ -601,6 +598,11 @@ ignore_int: | |||
601 | #endif | 598 | #endif |
602 | iret | 599 | iret |
603 | 600 | ||
601 | .section .cpuinit.data,"wa" | ||
602 | .align 4 | ||
603 | ENTRY(initial_code) | ||
604 | .long i386_start_kernel | ||
605 | |||
604 | .section .text | 606 | .section .text |
605 | /* | 607 | /* |
606 | * Real beginning of normal "text" segment | 608 | * Real beginning of normal "text" segment |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 3fee2aa50f3f..b68e21f06f4f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
62 | 62 | ||
63 | if (reload) { | 63 | if (reload) { |
64 | #ifdef CONFIG_SMP | 64 | #ifdef CONFIG_SMP |
65 | cpumask_of_cpu_ptr_declare(mask); | ||
66 | |||
67 | preempt_disable(); | 65 | preempt_disable(); |
68 | load_LDT(pc); | 66 | load_LDT(pc); |
69 | cpumask_of_cpu_ptr_next(mask, smp_processor_id()); | 67 | if (!cpus_equal(current->mm->cpu_vm_mask, |
70 | if (!cpus_equal(current->mm->cpu_vm_mask, *mask)) | 68 | cpumask_of_cpu(smp_processor_id()))) |
71 | smp_call_function(flush_ldt, current->mm, 1); | 69 | smp_call_function(flush_ldt, current->mm, 1); |
72 | preempt_enable(); | 70 | preempt_enable(); |
73 | #else | 71 | #else |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..9fe478d98406 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <asm/cpufeature.h> | 22 | #include <asm/cpufeature.h> |
23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
24 | #include <asm/system.h> | 24 | #include <asm/system.h> |
25 | #include <asm/cacheflush.h> | ||
25 | 26 | ||
26 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 27 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
27 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | 28 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
@@ -85,10 +86,12 @@ static void load_segments(void) | |||
85 | * reboot code buffer to allow us to avoid allocations | 86 | * reboot code buffer to allow us to avoid allocations |
86 | * later. | 87 | * later. |
87 | * | 88 | * |
88 | * Currently nothing. | 89 | * Make control page executable. |
89 | */ | 90 | */ |
90 | int machine_kexec_prepare(struct kimage *image) | 91 | int machine_kexec_prepare(struct kimage *image) |
91 | { | 92 | { |
93 | if (nx_enabled) | ||
94 | set_pages_x(image->control_code_page, 1); | ||
92 | return 0; | 95 | return 0; |
93 | } | 96 | } |
94 | 97 | ||
@@ -98,27 +101,48 @@ int machine_kexec_prepare(struct kimage *image) | |||
98 | */ | 101 | */ |
99 | void machine_kexec_cleanup(struct kimage *image) | 102 | void machine_kexec_cleanup(struct kimage *image) |
100 | { | 103 | { |
104 | if (nx_enabled) | ||
105 | set_pages_nx(image->control_code_page, 1); | ||
101 | } | 106 | } |
102 | 107 | ||
103 | /* | 108 | /* |
104 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 109 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
105 | * We are past the point of no return, committed to rebooting now. | 110 | * We are past the point of no return, committed to rebooting now. |
106 | */ | 111 | */ |
107 | NORET_TYPE void machine_kexec(struct kimage *image) | 112 | void machine_kexec(struct kimage *image) |
108 | { | 113 | { |
109 | unsigned long page_list[PAGES_NR]; | 114 | unsigned long page_list[PAGES_NR]; |
110 | void *control_page; | 115 | void *control_page; |
116 | asmlinkage unsigned long | ||
117 | (*relocate_kernel_ptr)(unsigned long indirection_page, | ||
118 | unsigned long control_page, | ||
119 | unsigned long start_address, | ||
120 | unsigned int has_pae, | ||
121 | unsigned int preserve_context); | ||
111 | 122 | ||
112 | tracer_disable(); | 123 | tracer_disable(); |
113 | 124 | ||
114 | /* Interrupts aren't acceptable while we reboot */ | 125 | /* Interrupts aren't acceptable while we reboot */ |
115 | local_irq_disable(); | 126 | local_irq_disable(); |
116 | 127 | ||
128 | if (image->preserve_context) { | ||
129 | #ifdef CONFIG_X86_IO_APIC | ||
130 | /* We need to put APICs in legacy mode so that we can | ||
131 | * get timer interrupts in second kernel. kexec/kdump | ||
132 | * paths already have calls to disable_IO_APIC() in | ||
133 | * one form or other. kexec jump path also need | ||
134 | * one. | ||
135 | */ | ||
136 | disable_IO_APIC(); | ||
137 | #endif | ||
138 | } | ||
139 | |||
117 | control_page = page_address(image->control_code_page); | 140 | control_page = page_address(image->control_code_page); |
118 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 141 | memcpy(control_page, relocate_kernel, PAGE_SIZE/2); |
119 | 142 | ||
143 | relocate_kernel_ptr = control_page; | ||
120 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 144 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
121 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 145 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
122 | page_list[PA_PGD] = __pa(kexec_pgd); | 146 | page_list[PA_PGD] = __pa(kexec_pgd); |
123 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 147 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
124 | #ifdef CONFIG_X86_PAE | 148 | #ifdef CONFIG_X86_PAE |
@@ -131,6 +155,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
131 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 155 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
132 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 156 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
133 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 157 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
158 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); | ||
134 | 159 | ||
135 | /* The segment registers are funny things, they have both a | 160 | /* The segment registers are funny things, they have both a |
136 | * visible and an invisible part. Whenever the visible part is | 161 | * visible and an invisible part. Whenever the visible part is |
@@ -149,8 +174,10 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
149 | set_idt(phys_to_virt(0),0); | 174 | set_idt(phys_to_virt(0),0); |
150 | 175 | ||
151 | /* now call it */ | 176 | /* now call it */ |
152 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 177 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
153 | image->start, cpu_has_pae); | 178 | (unsigned long)page_list, |
179 | image->start, cpu_has_pae, | ||
180 | image->preserve_context); | ||
154 | } | 181 | } |
155 | 182 | ||
156 | void arch_crash_save_vmcoreinfo(void) | 183 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
181 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
182 | * We are past the point of no return, committed to rebooting now. | 182 | * We are past the point of no return, committed to rebooting now. |
183 | */ | 183 | */ |
184 | NORET_TYPE void machine_kexec(struct kimage *image) | 184 | void machine_kexec(struct kimage *image) |
185 | { | 185 | { |
186 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
187 | void *control_page; | 187 | void *control_page; |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 6994c751590e..652fa5c38ebe 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
@@ -388,7 +388,6 @@ static int do_microcode_update (void) | |||
388 | void *new_mc = NULL; | 388 | void *new_mc = NULL; |
389 | int cpu; | 389 | int cpu; |
390 | cpumask_t old; | 390 | cpumask_t old; |
391 | cpumask_of_cpu_ptr_declare(newmask); | ||
392 | 391 | ||
393 | old = current->cpus_allowed; | 392 | old = current->cpus_allowed; |
394 | 393 | ||
@@ -405,8 +404,7 @@ static int do_microcode_update (void) | |||
405 | 404 | ||
406 | if (!uci->valid) | 405 | if (!uci->valid) |
407 | continue; | 406 | continue; |
408 | cpumask_of_cpu_ptr_next(newmask, cpu); | 407 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
409 | set_cpus_allowed_ptr(current, newmask); | ||
410 | error = get_maching_microcode(new_mc, cpu); | 408 | error = get_maching_microcode(new_mc, cpu); |
411 | if (error < 0) | 409 | if (error < 0) |
412 | goto out; | 410 | goto out; |
@@ -576,7 +574,6 @@ static int apply_microcode_check_cpu(int cpu) | |||
576 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 574 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
577 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 575 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
578 | cpumask_t old; | 576 | cpumask_t old; |
579 | cpumask_of_cpu_ptr(newmask, cpu); | ||
580 | unsigned int val[2]; | 577 | unsigned int val[2]; |
581 | int err = 0; | 578 | int err = 0; |
582 | 579 | ||
@@ -585,7 +582,7 @@ static int apply_microcode_check_cpu(int cpu) | |||
585 | return 0; | 582 | return 0; |
586 | 583 | ||
587 | old = current->cpus_allowed; | 584 | old = current->cpus_allowed; |
588 | set_cpus_allowed_ptr(current, newmask); | 585 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
589 | 586 | ||
590 | /* Check if the microcode we have in memory matches the CPU */ | 587 | /* Check if the microcode we have in memory matches the CPU */ |
591 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || | 588 | if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 || |
@@ -623,12 +620,11 @@ static int apply_microcode_check_cpu(int cpu) | |||
623 | static void microcode_init_cpu(int cpu, int resume) | 620 | static void microcode_init_cpu(int cpu, int resume) |
624 | { | 621 | { |
625 | cpumask_t old; | 622 | cpumask_t old; |
626 | cpumask_of_cpu_ptr(newmask, cpu); | ||
627 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 623 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
628 | 624 | ||
629 | old = current->cpus_allowed; | 625 | old = current->cpus_allowed; |
630 | 626 | ||
631 | set_cpus_allowed_ptr(current, newmask); | 627 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
632 | mutex_lock(µcode_mutex); | 628 | mutex_lock(µcode_mutex); |
633 | collect_cpu_info(cpu); | 629 | collect_cpu_info(cpu); |
634 | if (uci->valid && system_state == SYSTEM_RUNNING && !resume) | 630 | if (uci->valid && system_state == SYSTEM_RUNNING && !resume) |
@@ -661,13 +657,10 @@ static ssize_t reload_store(struct sys_device *dev, | |||
661 | if (end == buf) | 657 | if (end == buf) |
662 | return -EINVAL; | 658 | return -EINVAL; |
663 | if (val == 1) { | 659 | if (val == 1) { |
664 | cpumask_t old; | 660 | cpumask_t old = current->cpus_allowed; |
665 | cpumask_of_cpu_ptr(newmask, cpu); | ||
666 | |||
667 | old = current->cpus_allowed; | ||
668 | 661 | ||
669 | get_online_cpus(); | 662 | get_online_cpus(); |
670 | set_cpus_allowed_ptr(current, newmask); | 663 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
671 | 664 | ||
672 | mutex_lock(µcode_mutex); | 665 | mutex_lock(µcode_mutex); |
673 | if (uci->valid) | 666 | if (uci->valid) |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 19e7fc7c2c4f..b67a4b1d4eae 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
38 | #include <linux/scatterlist.h> | 38 | #include <linux/scatterlist.h> |
39 | #include <linux/iommu-helper.h> | 39 | #include <linux/iommu-helper.h> |
40 | |||
40 | #include <asm/iommu.h> | 41 | #include <asm/iommu.h> |
41 | #include <asm/calgary.h> | 42 | #include <asm/calgary.h> |
42 | #include <asm/tce.h> | 43 | #include <asm/tce.h> |
@@ -413,22 +414,6 @@ static void calgary_unmap_sg(struct device *dev, | |||
413 | } | 414 | } |
414 | } | 415 | } |
415 | 416 | ||
416 | static int calgary_nontranslate_map_sg(struct device* dev, | ||
417 | struct scatterlist *sg, int nelems, int direction) | ||
418 | { | ||
419 | struct scatterlist *s; | ||
420 | int i; | ||
421 | |||
422 | for_each_sg(sg, s, nelems, i) { | ||
423 | struct page *p = sg_page(s); | ||
424 | |||
425 | BUG_ON(!p); | ||
426 | s->dma_address = virt_to_bus(sg_virt(s)); | ||
427 | s->dma_length = s->length; | ||
428 | } | ||
429 | return nelems; | ||
430 | } | ||
431 | |||
432 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | 417 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, |
433 | int nelems, int direction) | 418 | int nelems, int direction) |
434 | { | 419 | { |
@@ -439,9 +424,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
439 | unsigned long entry; | 424 | unsigned long entry; |
440 | int i; | 425 | int i; |
441 | 426 | ||
442 | if (!translation_enabled(tbl)) | ||
443 | return calgary_nontranslate_map_sg(dev, sg, nelems, direction); | ||
444 | |||
445 | for_each_sg(sg, s, nelems, i) { | 427 | for_each_sg(sg, s, nelems, i) { |
446 | BUG_ON(!sg_page(s)); | 428 | BUG_ON(!sg_page(s)); |
447 | 429 | ||
@@ -477,7 +459,6 @@ error: | |||
477 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | 459 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, |
478 | size_t size, int direction) | 460 | size_t size, int direction) |
479 | { | 461 | { |
480 | dma_addr_t dma_handle = bad_dma_address; | ||
481 | void *vaddr = phys_to_virt(paddr); | 462 | void *vaddr = phys_to_virt(paddr); |
482 | unsigned long uaddr; | 463 | unsigned long uaddr; |
483 | unsigned int npages; | 464 | unsigned int npages; |
@@ -486,12 +467,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | |||
486 | uaddr = (unsigned long)vaddr; | 467 | uaddr = (unsigned long)vaddr; |
487 | npages = num_dma_pages(uaddr, size); | 468 | npages = num_dma_pages(uaddr, size); |
488 | 469 | ||
489 | if (translation_enabled(tbl)) | 470 | return iommu_alloc(dev, tbl, vaddr, npages, direction); |
490 | dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); | ||
491 | else | ||
492 | dma_handle = virt_to_bus(vaddr); | ||
493 | |||
494 | return dma_handle; | ||
495 | } | 471 | } |
496 | 472 | ||
497 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | 473 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, |
@@ -500,9 +476,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | |||
500 | struct iommu_table *tbl = find_iommu_table(dev); | 476 | struct iommu_table *tbl = find_iommu_table(dev); |
501 | unsigned int npages; | 477 | unsigned int npages; |
502 | 478 | ||
503 | if (!translation_enabled(tbl)) | ||
504 | return; | ||
505 | |||
506 | npages = num_dma_pages(dma_handle, size); | 479 | npages = num_dma_pages(dma_handle, size); |
507 | iommu_free(tbl, dma_handle, npages); | 480 | iommu_free(tbl, dma_handle, npages); |
508 | } | 481 | } |
@@ -525,18 +498,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
525 | goto error; | 498 | goto error; |
526 | memset(ret, 0, size); | 499 | memset(ret, 0, size); |
527 | 500 | ||
528 | if (translation_enabled(tbl)) { | 501 | /* set up tces to cover the allocated range */ |
529 | /* set up tces to cover the allocated range */ | 502 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
530 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 503 | if (mapping == bad_dma_address) |
531 | if (mapping == bad_dma_address) | 504 | goto free; |
532 | goto free; | 505 | *dma_handle = mapping; |
533 | |||
534 | *dma_handle = mapping; | ||
535 | } else /* non translated slot */ | ||
536 | *dma_handle = virt_to_bus(ret); | ||
537 | |||
538 | return ret; | 506 | return ret; |
539 | |||
540 | free: | 507 | free: |
541 | free_pages((unsigned long)ret, get_order(size)); | 508 | free_pages((unsigned long)ret, get_order(size)); |
542 | ret = NULL; | 509 | ret = NULL; |
@@ -544,7 +511,7 @@ error: | |||
544 | return ret; | 511 | return ret; |
545 | } | 512 | } |
546 | 513 | ||
547 | static const struct dma_mapping_ops calgary_dma_ops = { | 514 | static struct dma_mapping_ops calgary_dma_ops = { |
548 | .alloc_coherent = calgary_alloc_coherent, | 515 | .alloc_coherent = calgary_alloc_coherent, |
549 | .map_single = calgary_map_single, | 516 | .map_single = calgary_map_single, |
550 | .unmap_single = calgary_unmap_single, | 517 | .unmap_single = calgary_unmap_single, |
@@ -1241,6 +1208,16 @@ static int __init calgary_init(void) | |||
1241 | goto error; | 1208 | goto error; |
1242 | } while (1); | 1209 | } while (1); |
1243 | 1210 | ||
1211 | dev = NULL; | ||
1212 | for_each_pci_dev(dev) { | ||
1213 | struct iommu_table *tbl; | ||
1214 | |||
1215 | tbl = find_iommu_table(&dev->dev); | ||
1216 | |||
1217 | if (translation_enabled(tbl)) | ||
1218 | dev->dev.archdata.dma_ops = &calgary_dma_ops; | ||
1219 | } | ||
1220 | |||
1244 | return ret; | 1221 | return ret; |
1245 | 1222 | ||
1246 | error: | 1223 | error: |
@@ -1262,6 +1239,7 @@ error: | |||
1262 | calgary_disable_translation(dev); | 1239 | calgary_disable_translation(dev); |
1263 | calgary_free_bus(dev); | 1240 | calgary_free_bus(dev); |
1264 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ | 1241 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ |
1242 | dev->dev.archdata.dma_ops = NULL; | ||
1265 | } while (1); | 1243 | } while (1); |
1266 | 1244 | ||
1267 | return ret; | 1245 | return ret; |
@@ -1503,6 +1481,10 @@ void __init detect_calgary(void) | |||
1503 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " | 1481 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " |
1504 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | 1482 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, |
1505 | debugging ? "enabled" : "disabled"); | 1483 | debugging ? "enabled" : "disabled"); |
1484 | |||
1485 | /* swiotlb for devices that aren't behind the Calgary. */ | ||
1486 | if (max_pfn > MAX_DMA32_PFN) | ||
1487 | swiotlb = 1; | ||
1506 | } | 1488 | } |
1507 | return; | 1489 | return; |
1508 | 1490 | ||
@@ -1519,7 +1501,7 @@ int __init calgary_iommu_init(void) | |||
1519 | { | 1501 | { |
1520 | int ret; | 1502 | int ret; |
1521 | 1503 | ||
1522 | if (no_iommu || swiotlb) | 1504 | if (no_iommu || (swiotlb && !calgary_detected)) |
1523 | return -ENODEV; | 1505 | return -ENODEV; |
1524 | 1506 | ||
1525 | if (!calgary_detected) | 1507 | if (!calgary_detected) |
@@ -1532,15 +1514,14 @@ int __init calgary_iommu_init(void) | |||
1532 | if (ret) { | 1514 | if (ret) { |
1533 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | 1515 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " |
1534 | "falling back to no_iommu\n", ret); | 1516 | "falling back to no_iommu\n", ret); |
1535 | if (max_pfn > MAX_DMA32_PFN) | ||
1536 | printk(KERN_ERR "WARNING more than 4GB of memory, " | ||
1537 | "32bit PCI may malfunction.\n"); | ||
1538 | return ret; | 1517 | return ret; |
1539 | } | 1518 | } |
1540 | 1519 | ||
1541 | force_iommu = 1; | 1520 | force_iommu = 1; |
1542 | bad_dma_address = 0x0; | 1521 | bad_dma_address = 0x0; |
1543 | dma_ops = &calgary_dma_ops; | 1522 | /* dma_ops is set to swiotlb or nommu */ |
1523 | if (!dma_ops) | ||
1524 | dma_ops = &nommu_dma_ops; | ||
1544 | 1525 | ||
1545 | return 0; | 1526 | return 0; |
1546 | } | 1527 | } |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 88ddd04cfa98..f704cb51ff82 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -11,7 +11,7 @@ | |||
11 | 11 | ||
12 | static int forbid_dac __read_mostly; | 12 | static int forbid_dac __read_mostly; |
13 | 13 | ||
14 | const struct dma_mapping_ops *dma_ops; | 14 | struct dma_mapping_ops *dma_ops; |
15 | EXPORT_SYMBOL(dma_ops); | 15 | EXPORT_SYMBOL(dma_ops); |
16 | 16 | ||
17 | static int iommu_sac_force __read_mostly; | 17 | static int iommu_sac_force __read_mostly; |
@@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) | |||
123 | 123 | ||
124 | pci_swiotlb_init(); | 124 | pci_swiotlb_init(); |
125 | } | 125 | } |
126 | |||
127 | unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | ||
128 | { | ||
129 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
130 | |||
131 | return size >> PAGE_SHIFT; | ||
132 | } | ||
133 | EXPORT_SYMBOL(iommu_num_pages); | ||
126 | #endif | 134 | #endif |
127 | 135 | ||
128 | /* | 136 | /* |
@@ -192,126 +200,10 @@ static __init int iommu_setup(char *p) | |||
192 | } | 200 | } |
193 | early_param("iommu", iommu_setup); | 201 | early_param("iommu", iommu_setup); |
194 | 202 | ||
195 | #ifdef CONFIG_X86_32 | ||
196 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
197 | dma_addr_t device_addr, size_t size, int flags) | ||
198 | { | ||
199 | void __iomem *mem_base = NULL; | ||
200 | int pages = size >> PAGE_SHIFT; | ||
201 | int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | ||
202 | |||
203 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
204 | goto out; | ||
205 | if (!size) | ||
206 | goto out; | ||
207 | if (dev->dma_mem) | ||
208 | goto out; | ||
209 | |||
210 | /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | ||
211 | |||
212 | mem_base = ioremap(bus_addr, size); | ||
213 | if (!mem_base) | ||
214 | goto out; | ||
215 | |||
216 | dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
217 | if (!dev->dma_mem) | ||
218 | goto out; | ||
219 | dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
220 | if (!dev->dma_mem->bitmap) | ||
221 | goto free1_out; | ||
222 | |||
223 | dev->dma_mem->virt_base = mem_base; | ||
224 | dev->dma_mem->device_base = device_addr; | ||
225 | dev->dma_mem->size = pages; | ||
226 | dev->dma_mem->flags = flags; | ||
227 | |||
228 | if (flags & DMA_MEMORY_MAP) | ||
229 | return DMA_MEMORY_MAP; | ||
230 | |||
231 | return DMA_MEMORY_IO; | ||
232 | |||
233 | free1_out: | ||
234 | kfree(dev->dma_mem); | ||
235 | out: | ||
236 | if (mem_base) | ||
237 | iounmap(mem_base); | ||
238 | return 0; | ||
239 | } | ||
240 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
241 | |||
242 | void dma_release_declared_memory(struct device *dev) | ||
243 | { | ||
244 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
245 | |||
246 | if (!mem) | ||
247 | return; | ||
248 | dev->dma_mem = NULL; | ||
249 | iounmap(mem->virt_base); | ||
250 | kfree(mem->bitmap); | ||
251 | kfree(mem); | ||
252 | } | ||
253 | EXPORT_SYMBOL(dma_release_declared_memory); | ||
254 | |||
255 | void *dma_mark_declared_memory_occupied(struct device *dev, | ||
256 | dma_addr_t device_addr, size_t size) | ||
257 | { | ||
258 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
259 | int pos, err; | ||
260 | int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); | ||
261 | |||
262 | pages >>= PAGE_SHIFT; | ||
263 | |||
264 | if (!mem) | ||
265 | return ERR_PTR(-EINVAL); | ||
266 | |||
267 | pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | ||
268 | err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); | ||
269 | if (err != 0) | ||
270 | return ERR_PTR(err); | ||
271 | return mem->virt_base + (pos << PAGE_SHIFT); | ||
272 | } | ||
273 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
274 | |||
275 | static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, | ||
276 | dma_addr_t *dma_handle, void **ret) | ||
277 | { | ||
278 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
279 | int order = get_order(size); | ||
280 | |||
281 | if (mem) { | ||
282 | int page = bitmap_find_free_region(mem->bitmap, mem->size, | ||
283 | order); | ||
284 | if (page >= 0) { | ||
285 | *dma_handle = mem->device_base + (page << PAGE_SHIFT); | ||
286 | *ret = mem->virt_base + (page << PAGE_SHIFT); | ||
287 | memset(*ret, 0, size); | ||
288 | } | ||
289 | if (mem->flags & DMA_MEMORY_EXCLUSIVE) | ||
290 | *ret = NULL; | ||
291 | } | ||
292 | return (mem != NULL); | ||
293 | } | ||
294 | |||
295 | static int dma_release_coherent(struct device *dev, int order, void *vaddr) | ||
296 | { | ||
297 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
298 | |||
299 | if (mem && vaddr >= mem->virt_base && vaddr < | ||
300 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
301 | int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
302 | |||
303 | bitmap_release_region(mem->bitmap, page, order); | ||
304 | return 1; | ||
305 | } | ||
306 | return 0; | ||
307 | } | ||
308 | #else | ||
309 | #define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) | ||
310 | #define dma_release_coherent(dev, order, vaddr) (0) | ||
311 | #endif /* CONFIG_X86_32 */ | ||
312 | |||
313 | int dma_supported(struct device *dev, u64 mask) | 203 | int dma_supported(struct device *dev, u64 mask) |
314 | { | 204 | { |
205 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
206 | |||
315 | #ifdef CONFIG_PCI | 207 | #ifdef CONFIG_PCI |
316 | if (mask > 0xffffffff && forbid_dac > 0) { | 208 | if (mask > 0xffffffff && forbid_dac > 0) { |
317 | dev_info(dev, "PCI: Disallowing DAC for device\n"); | 209 | dev_info(dev, "PCI: Disallowing DAC for device\n"); |
@@ -319,8 +211,8 @@ int dma_supported(struct device *dev, u64 mask) | |||
319 | } | 211 | } |
320 | #endif | 212 | #endif |
321 | 213 | ||
322 | if (dma_ops->dma_supported) | 214 | if (ops->dma_supported) |
323 | return dma_ops->dma_supported(dev, mask); | 215 | return ops->dma_supported(dev, mask); |
324 | 216 | ||
325 | /* Copied from i386. Doesn't make much sense, because it will | 217 | /* Copied from i386. Doesn't make much sense, because it will |
326 | only work for pci_alloc_coherent. | 218 | only work for pci_alloc_coherent. |
@@ -367,6 +259,7 @@ void * | |||
367 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | 259 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
368 | gfp_t gfp) | 260 | gfp_t gfp) |
369 | { | 261 | { |
262 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
370 | void *memory = NULL; | 263 | void *memory = NULL; |
371 | struct page *page; | 264 | struct page *page; |
372 | unsigned long dma_mask = 0; | 265 | unsigned long dma_mask = 0; |
@@ -376,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
376 | /* ignore region specifiers */ | 269 | /* ignore region specifiers */ |
377 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 270 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
378 | 271 | ||
379 | if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) | 272 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) |
380 | return memory; | 273 | return memory; |
381 | 274 | ||
382 | if (!dev) { | 275 | if (!dev) { |
@@ -435,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
435 | /* Let low level make its own zone decisions */ | 328 | /* Let low level make its own zone decisions */ |
436 | gfp &= ~(GFP_DMA32|GFP_DMA); | 329 | gfp &= ~(GFP_DMA32|GFP_DMA); |
437 | 330 | ||
438 | if (dma_ops->alloc_coherent) | 331 | if (ops->alloc_coherent) |
439 | return dma_ops->alloc_coherent(dev, size, | 332 | return ops->alloc_coherent(dev, size, |
440 | dma_handle, gfp); | 333 | dma_handle, gfp); |
441 | return NULL; | 334 | return NULL; |
442 | } | 335 | } |
@@ -448,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
448 | } | 341 | } |
449 | } | 342 | } |
450 | 343 | ||
451 | if (dma_ops->alloc_coherent) { | 344 | if (ops->alloc_coherent) { |
452 | free_pages((unsigned long)memory, get_order(size)); | 345 | free_pages((unsigned long)memory, get_order(size)); |
453 | gfp &= ~(GFP_DMA|GFP_DMA32); | 346 | gfp &= ~(GFP_DMA|GFP_DMA32); |
454 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | 347 | return ops->alloc_coherent(dev, size, dma_handle, gfp); |
455 | } | 348 | } |
456 | 349 | ||
457 | if (dma_ops->map_simple) { | 350 | if (ops->map_simple) { |
458 | *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), | 351 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), |
459 | size, | 352 | size, |
460 | PCI_DMA_BIDIRECTIONAL); | 353 | PCI_DMA_BIDIRECTIONAL); |
461 | if (*dma_handle != bad_dma_address) | 354 | if (*dma_handle != bad_dma_address) |
@@ -477,12 +370,14 @@ EXPORT_SYMBOL(dma_alloc_coherent); | |||
477 | void dma_free_coherent(struct device *dev, size_t size, | 370 | void dma_free_coherent(struct device *dev, size_t size, |
478 | void *vaddr, dma_addr_t bus) | 371 | void *vaddr, dma_addr_t bus) |
479 | { | 372 | { |
373 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
374 | |||
480 | int order = get_order(size); | 375 | int order = get_order(size); |
481 | WARN_ON(irqs_disabled()); /* for portability */ | 376 | WARN_ON(irqs_disabled()); /* for portability */ |
482 | if (dma_release_coherent(dev, order, vaddr)) | 377 | if (dma_release_from_coherent(dev, order, vaddr)) |
483 | return; | 378 | return; |
484 | if (dma_ops->unmap_single) | 379 | if (ops->unmap_single) |
485 | dma_ops->unmap_single(dev, bus, size, 0); | 380 | ops->unmap_single(dev, bus, size, 0); |
486 | free_pages((unsigned long)vaddr, order); | 381 | free_pages((unsigned long)vaddr, order); |
487 | } | 382 | } |
488 | EXPORT_SYMBOL(dma_free_coherent); | 383 | EXPORT_SYMBOL(dma_free_coherent); |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index df5f142657d2..49285f8fd4d5 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry; | |||
67 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | 67 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) |
68 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | 68 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) |
69 | 69 | ||
70 | #define to_pages(addr, size) \ | ||
71 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
72 | |||
73 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 70 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
74 | 71 | ||
75 | #ifdef CONFIG_AGP | 72 | #ifdef CONFIG_AGP |
@@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | |||
241 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 238 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
242 | size_t size, int dir) | 239 | size_t size, int dir) |
243 | { | 240 | { |
244 | unsigned long npages = to_pages(phys_mem, size); | 241 | unsigned long npages = iommu_num_pages(phys_mem, size); |
245 | unsigned long iommu_page = alloc_iommu(dev, npages); | 242 | unsigned long iommu_page = alloc_iommu(dev, npages); |
246 | int i; | 243 | int i; |
247 | 244 | ||
@@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
304 | return; | 301 | return; |
305 | 302 | ||
306 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | 303 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; |
307 | npages = to_pages(dma_addr, size); | 304 | npages = iommu_num_pages(dma_addr, size); |
308 | for (i = 0; i < npages; i++) { | 305 | for (i = 0; i < npages; i++) { |
309 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | 306 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; |
310 | CLEAR_LEAK(iommu_page + i); | 307 | CLEAR_LEAK(iommu_page + i); |
@@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
387 | } | 384 | } |
388 | 385 | ||
389 | addr = phys_addr; | 386 | addr = phys_addr; |
390 | pages = to_pages(s->offset, s->length); | 387 | pages = iommu_num_pages(s->offset, s->length); |
391 | while (pages--) { | 388 | while (pages--) { |
392 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | 389 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); |
393 | SET_LEAK(iommu_page); | 390 | SET_LEAK(iommu_page); |
@@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
470 | 467 | ||
471 | seg_size += s->length; | 468 | seg_size += s->length; |
472 | need = nextneed; | 469 | need = nextneed; |
473 | pages += to_pages(s->offset, s->length); | 470 | pages += iommu_num_pages(s->offset, s->length); |
474 | ps = s; | 471 | ps = s; |
475 | } | 472 | } |
476 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) | 473 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) |
@@ -692,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
692 | 689 | ||
693 | extern int agp_amd64_init(void); | 690 | extern int agp_amd64_init(void); |
694 | 691 | ||
695 | static const struct dma_mapping_ops gart_dma_ops = { | 692 | static struct dma_mapping_ops gart_dma_ops = { |
696 | .mapping_error = NULL, | ||
697 | .map_single = gart_map_single, | 693 | .map_single = gart_map_single, |
698 | .map_simple = gart_map_simple, | 694 | .map_simple = gart_map_simple, |
699 | .unmap_single = gart_unmap_single, | 695 | .unmap_single = gart_unmap_single, |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 792b9179eff3..3f91f71cdc3e 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
72 | return nents; | 72 | return nents; |
73 | } | 73 | } |
74 | 74 | ||
75 | /* Make sure we keep the same behaviour */ | 75 | struct dma_mapping_ops nommu_dma_ops = { |
76 | static int nommu_mapping_error(dma_addr_t dma_addr) | ||
77 | { | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | return 0; | ||
80 | #else | ||
81 | return (dma_addr == bad_dma_address); | ||
82 | #endif | ||
83 | } | ||
84 | |||
85 | |||
86 | const struct dma_mapping_ops nommu_dma_ops = { | ||
87 | .map_single = nommu_map_single, | 76 | .map_single = nommu_map_single, |
88 | .map_sg = nommu_map_sg, | 77 | .map_sg = nommu_map_sg, |
89 | .mapping_error = nommu_mapping_error, | ||
90 | .is_phys = 1, | 78 | .is_phys = 1, |
91 | }; | 79 | }; |
92 | 80 | ||
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 20df839b9c20..c4ce0332759e 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | |||
18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); | 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); |
19 | } | 19 | } |
20 | 20 | ||
21 | const struct dma_mapping_ops swiotlb_dma_ops = { | 21 | struct dma_mapping_ops swiotlb_dma_ops = { |
22 | .mapping_error = swiotlb_dma_mapping_error, | 22 | .mapping_error = swiotlb_dma_mapping_error, |
23 | .alloc_coherent = swiotlb_alloc_coherent, | 23 | .alloc_coherent = swiotlb_alloc_coherent, |
24 | .free_coherent = swiotlb_free_coherent, | 24 | .free_coherent = swiotlb_free_coherent, |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 06a9f643817e..724adfc63cb9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -414,25 +414,20 @@ void native_machine_shutdown(void) | |||
414 | 414 | ||
415 | /* The boot cpu is always logical cpu 0 */ | 415 | /* The boot cpu is always logical cpu 0 */ |
416 | int reboot_cpu_id = 0; | 416 | int reboot_cpu_id = 0; |
417 | cpumask_of_cpu_ptr(newmask, reboot_cpu_id); | ||
418 | 417 | ||
419 | #ifdef CONFIG_X86_32 | 418 | #ifdef CONFIG_X86_32 |
420 | /* See if there has been given a command line override */ | 419 | /* See if there has been given a command line override */ |
421 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && | 420 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && |
422 | cpu_online(reboot_cpu)) { | 421 | cpu_online(reboot_cpu)) |
423 | reboot_cpu_id = reboot_cpu; | 422 | reboot_cpu_id = reboot_cpu; |
424 | cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); | ||
425 | } | ||
426 | #endif | 423 | #endif |
427 | 424 | ||
428 | /* Make certain the cpu I'm about to reboot on is online */ | 425 | /* Make certain the cpu I'm about to reboot on is online */ |
429 | if (!cpu_online(reboot_cpu_id)) { | 426 | if (!cpu_online(reboot_cpu_id)) |
430 | reboot_cpu_id = smp_processor_id(); | 427 | reboot_cpu_id = smp_processor_id(); |
431 | cpumask_of_cpu_ptr_next(newmask, reboot_cpu_id); | ||
432 | } | ||
433 | 428 | ||
434 | /* Make certain I only run on the appropriate processor */ | 429 | /* Make certain I only run on the appropriate processor */ |
435 | set_cpus_allowed_ptr(current, newmask); | 430 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); |
436 | 431 | ||
437 | /* O.K Now that I'm on the appropriate processor, | 432 | /* O.K Now that I'm on the appropriate processor, |
438 | * stop all of the others. | 433 | * stop all of the others. |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..703310a99023 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -20,11 +20,44 @@ | |||
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
22 | 22 | ||
23 | /* control_page + PAGE_SIZE/2 ~ control_page + PAGE_SIZE * 3/4 are | ||
24 | * used to save some data for jumping back | ||
25 | */ | ||
26 | #define DATA(offset) (PAGE_SIZE/2+(offset)) | ||
27 | |||
28 | /* Minimal CPU state */ | ||
29 | #define ESP DATA(0x0) | ||
30 | #define CR0 DATA(0x4) | ||
31 | #define CR3 DATA(0x8) | ||
32 | #define CR4 DATA(0xc) | ||
33 | |||
34 | /* other data */ | ||
35 | #define CP_VA_CONTROL_PAGE DATA(0x10) | ||
36 | #define CP_PA_PGD DATA(0x14) | ||
37 | #define CP_PA_SWAP_PAGE DATA(0x18) | ||
38 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | ||
39 | |||
23 | .text | 40 | .text |
24 | .align PAGE_SIZE | 41 | .align PAGE_SIZE |
25 | .globl relocate_kernel | 42 | .globl relocate_kernel |
26 | relocate_kernel: | 43 | relocate_kernel: |
27 | movl 8(%esp), %ebp /* list of pages */ | 44 | /* Save the CPU context, used for jumping back */ |
45 | |||
46 | pushl %ebx | ||
47 | pushl %esi | ||
48 | pushl %edi | ||
49 | pushl %ebp | ||
50 | pushf | ||
51 | |||
52 | movl 20+8(%esp), %ebp /* list of pages */ | ||
53 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
54 | movl %esp, ESP(%edi) | ||
55 | movl %cr0, %eax | ||
56 | movl %eax, CR0(%edi) | ||
57 | movl %cr3, %eax | ||
58 | movl %eax, CR3(%edi) | ||
59 | movl %cr4, %eax | ||
60 | movl %eax, CR4(%edi) | ||
28 | 61 | ||
29 | #ifdef CONFIG_X86_PAE | 62 | #ifdef CONFIG_X86_PAE |
30 | /* map the control page at its virtual address */ | 63 | /* map the control page at its virtual address */ |
@@ -138,15 +171,25 @@ relocate_kernel: | |||
138 | 171 | ||
139 | relocate_new_kernel: | 172 | relocate_new_kernel: |
140 | /* read the arguments and say goodbye to the stack */ | 173 | /* read the arguments and say goodbye to the stack */ |
141 | movl 4(%esp), %ebx /* page_list */ | 174 | movl 20+4(%esp), %ebx /* page_list */ |
142 | movl 8(%esp), %ebp /* list of pages */ | 175 | movl 20+8(%esp), %ebp /* list of pages */ |
143 | movl 12(%esp), %edx /* start address */ | 176 | movl 20+12(%esp), %edx /* start address */ |
144 | movl 16(%esp), %ecx /* cpu_has_pae */ | 177 | movl 20+16(%esp), %ecx /* cpu_has_pae */ |
178 | movl 20+20(%esp), %esi /* preserve_context */ | ||
145 | 179 | ||
146 | /* zero out flags, and disable interrupts */ | 180 | /* zero out flags, and disable interrupts */ |
147 | pushl $0 | 181 | pushl $0 |
148 | popfl | 182 | popfl |
149 | 183 | ||
184 | /* save some information for jumping back */ | ||
185 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
186 | movl %edi, CP_VA_CONTROL_PAGE(%edi) | ||
187 | movl PTR(PA_PGD)(%ebp), %eax | ||
188 | movl %eax, CP_PA_PGD(%edi) | ||
189 | movl PTR(PA_SWAP_PAGE)(%ebp), %eax | ||
190 | movl %eax, CP_PA_SWAP_PAGE(%edi) | ||
191 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | ||
192 | |||
150 | /* get physical address of control page now */ | 193 | /* get physical address of control page now */ |
151 | /* this is impossible after page table switch */ | 194 | /* this is impossible after page table switch */ |
152 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 195 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
@@ -197,8 +240,90 @@ identity_mapped: | |||
197 | xorl %eax, %eax | 240 | xorl %eax, %eax |
198 | movl %eax, %cr3 | 241 | movl %eax, %cr3 |
199 | 242 | ||
243 | movl CP_PA_SWAP_PAGE(%edi), %eax | ||
244 | pushl %eax | ||
245 | pushl %ebx | ||
246 | call swap_pages | ||
247 | addl $8, %esp | ||
248 | |||
249 | /* To be certain of avoiding problems with self-modifying code | ||
250 | * I need to execute a serializing instruction here. | ||
251 | * So I flush the TLB, it's handy, and not processor dependent. | ||
252 | */ | ||
253 | xorl %eax, %eax | ||
254 | movl %eax, %cr3 | ||
255 | |||
256 | /* set all of the registers to known values */ | ||
257 | /* leave %esp alone */ | ||
258 | |||
259 | testl %esi, %esi | ||
260 | jnz 1f | ||
261 | xorl %edi, %edi | ||
262 | xorl %eax, %eax | ||
263 | xorl %ebx, %ebx | ||
264 | xorl %ecx, %ecx | ||
265 | xorl %edx, %edx | ||
266 | xorl %esi, %esi | ||
267 | xorl %ebp, %ebp | ||
268 | ret | ||
269 | 1: | ||
270 | popl %edx | ||
271 | movl CP_PA_SWAP_PAGE(%edi), %esp | ||
272 | addl $PAGE_SIZE, %esp | ||
273 | 2: | ||
274 | call *%edx | ||
275 | |||
276 | /* get the re-entry point of the peer system */ | ||
277 | movl 0(%esp), %ebp | ||
278 | call 1f | ||
279 | 1: | ||
280 | popl %ebx | ||
281 | subl $(1b - relocate_kernel), %ebx | ||
282 | movl CP_VA_CONTROL_PAGE(%ebx), %edi | ||
283 | lea PAGE_SIZE(%ebx), %esp | ||
284 | movl CP_PA_SWAP_PAGE(%ebx), %eax | ||
285 | movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx | ||
286 | pushl %eax | ||
287 | pushl %edx | ||
288 | call swap_pages | ||
289 | addl $8, %esp | ||
290 | movl CP_PA_PGD(%ebx), %eax | ||
291 | movl %eax, %cr3 | ||
292 | movl %cr0, %eax | ||
293 | orl $(1<<31), %eax | ||
294 | movl %eax, %cr0 | ||
295 | lea PAGE_SIZE(%edi), %esp | ||
296 | movl %edi, %eax | ||
297 | addl $(virtual_mapped - relocate_kernel), %eax | ||
298 | pushl %eax | ||
299 | ret | ||
300 | |||
301 | virtual_mapped: | ||
302 | movl CR4(%edi), %eax | ||
303 | movl %eax, %cr4 | ||
304 | movl CR3(%edi), %eax | ||
305 | movl %eax, %cr3 | ||
306 | movl CR0(%edi), %eax | ||
307 | movl %eax, %cr0 | ||
308 | movl ESP(%edi), %esp | ||
309 | movl %ebp, %eax | ||
310 | |||
311 | popf | ||
312 | popl %ebp | ||
313 | popl %edi | ||
314 | popl %esi | ||
315 | popl %ebx | ||
316 | ret | ||
317 | |||
200 | /* Do the copies */ | 318 | /* Do the copies */ |
201 | movl %ebx, %ecx | 319 | swap_pages: |
320 | movl 8(%esp), %edx | ||
321 | movl 4(%esp), %ecx | ||
322 | pushl %ebp | ||
323 | pushl %ebx | ||
324 | pushl %edi | ||
325 | pushl %esi | ||
326 | movl %ecx, %ebx | ||
202 | jmp 1f | 327 | jmp 1f |
203 | 328 | ||
204 | 0: /* top, read another word from the indirection page */ | 329 | 0: /* top, read another word from the indirection page */ |
@@ -226,27 +351,28 @@ identity_mapped: | |||
226 | movl %ecx, %esi /* For every source page do a copy */ | 351 | movl %ecx, %esi /* For every source page do a copy */ |
227 | andl $0xfffff000, %esi | 352 | andl $0xfffff000, %esi |
228 | 353 | ||
354 | movl %edi, %eax | ||
355 | movl %esi, %ebp | ||
356 | |||
357 | movl %edx, %edi | ||
229 | movl $1024, %ecx | 358 | movl $1024, %ecx |
230 | rep ; movsl | 359 | rep ; movsl |
231 | jmp 0b | ||
232 | 360 | ||
233 | 3: | 361 | movl %ebp, %edi |
234 | 362 | movl %eax, %esi | |
235 | /* To be certain of avoiding problems with self-modifying code | 363 | movl $1024, %ecx |
236 | * I need to execute a serializing instruction here. | 364 | rep ; movsl |
237 | * So I flush the TLB, it's handy, and not processor dependent. | ||
238 | */ | ||
239 | xorl %eax, %eax | ||
240 | movl %eax, %cr3 | ||
241 | 365 | ||
242 | /* set all of the registers to known values */ | 366 | movl %eax, %edi |
243 | /* leave %esp alone */ | 367 | movl %edx, %esi |
368 | movl $1024, %ecx | ||
369 | rep ; movsl | ||
244 | 370 | ||
245 | xorl %eax, %eax | 371 | lea PAGE_SIZE(%ebp), %esi |
246 | xorl %ebx, %ebx | 372 | jmp 0b |
247 | xorl %ecx, %ecx | 373 | 3: |
248 | xorl %edx, %edx | 374 | popl %esi |
249 | xorl %esi, %esi | 375 | popl %edi |
250 | xorl %edi, %edi | 376 | popl %ebx |
251 | xorl %ebp, %ebp | 377 | popl %ebp |
252 | ret | 378 | ret |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b520dae02bf4..2d888586385d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -788,10 +788,6 @@ void __init setup_arch(char **cmdline_p) | |||
788 | 788 | ||
789 | initmem_init(0, max_pfn); | 789 | initmem_init(0, max_pfn); |
790 | 790 | ||
791 | #ifdef CONFIG_X86_64 | ||
792 | dma32_reserve_bootmem(); | ||
793 | #endif | ||
794 | |||
795 | #ifdef CONFIG_ACPI_SLEEP | 791 | #ifdef CONFIG_ACPI_SLEEP |
796 | /* | 792 | /* |
797 | * Reserve low memory region for sleep support. | 793 | * Reserve low memory region for sleep support. |
@@ -806,6 +802,15 @@ void __init setup_arch(char **cmdline_p) | |||
806 | #endif | 802 | #endif |
807 | reserve_crashkernel(); | 803 | reserve_crashkernel(); |
808 | 804 | ||
805 | #ifdef CONFIG_X86_64 | ||
806 | /* | ||
807 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
808 | * with the crashkernel command line, so do that after | ||
809 | * reserve_crashkernel() | ||
810 | */ | ||
811 | dma32_reserve_bootmem(); | ||
812 | #endif | ||
813 | |||
809 | reserve_ibft_region(); | 814 | reserve_ibft_region(); |
810 | 815 | ||
811 | #ifdef CONFIG_KVM_CLOCK | 816 | #ifdef CONFIG_KVM_CLOCK |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index f7745f94c006..76e305e064f9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void) | |||
80 | #endif | 80 | #endif |
81 | } | 81 | } |
82 | 82 | ||
83 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | ||
84 | cpumask_t *cpumask_of_cpu_map __read_mostly; | ||
85 | EXPORT_SYMBOL(cpumask_of_cpu_map); | ||
86 | |||
87 | /* requires nr_cpu_ids to be initialized */ | ||
88 | static void __init setup_cpumask_of_cpu(void) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | /* alloc_bootmem zeroes memory */ | ||
93 | cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); | ||
94 | for (i = 0; i < nr_cpu_ids; i++) | ||
95 | cpu_set(i, cpumask_of_cpu_map[i]); | ||
96 | } | ||
97 | #else | ||
98 | static inline void setup_cpumask_of_cpu(void) { } | ||
99 | #endif | ||
100 | |||
101 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
102 | /* | 84 | /* |
103 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | 85 | * Great future not-so-futuristic plan: make i386 and x86_64 do it |
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void) | |||
197 | 179 | ||
198 | /* Setup node to cpumask map */ | 180 | /* Setup node to cpumask map */ |
199 | setup_node_to_cpumask_map(); | 181 | setup_node_to_cpumask_map(); |
200 | |||
201 | /* Setup cpumask_of_cpu map */ | ||
202 | setup_cpumask_of_cpu(); | ||
203 | } | 182 | } |
204 | 183 | ||
205 | #endif | 184 | #endif |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3b..ce3251ce5504 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -21,6 +21,7 @@ config KVM | |||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | select PREEMPT_NOTIFIERS | 23 | select PREEMPT_NOTIFIERS |
24 | select MMU_NOTIFIER | ||
24 | select ANON_INODES | 25 | select ANON_INODES |
25 | ---help--- | 26 | ---help--- |
26 | Support hosting fully virtualized guest machines using hardware | 27 | Support hosting fully virtualized guest machines using hardware |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b0e4ddca6c18..0bfe2bd305eb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -653,6 +653,84 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
653 | account_shadowed(kvm, gfn); | 653 | account_shadowed(kvm, gfn); |
654 | } | 654 | } |
655 | 655 | ||
656 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
657 | { | ||
658 | u64 *spte; | ||
659 | int need_tlb_flush = 0; | ||
660 | |||
661 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | ||
662 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
663 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | ||
664 | rmap_remove(kvm, spte); | ||
665 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | ||
666 | need_tlb_flush = 1; | ||
667 | } | ||
668 | return need_tlb_flush; | ||
669 | } | ||
670 | |||
671 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
672 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | ||
673 | { | ||
674 | int i; | ||
675 | int retval = 0; | ||
676 | |||
677 | /* | ||
678 | * If mmap_sem isn't taken, we can look the memslots with only | ||
679 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | ||
680 | */ | ||
681 | for (i = 0; i < kvm->nmemslots; i++) { | ||
682 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
683 | unsigned long start = memslot->userspace_addr; | ||
684 | unsigned long end; | ||
685 | |||
686 | /* mmu_lock protects userspace_addr */ | ||
687 | if (!start) | ||
688 | continue; | ||
689 | |||
690 | end = start + (memslot->npages << PAGE_SHIFT); | ||
691 | if (hva >= start && hva < end) { | ||
692 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
693 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | ||
694 | retval |= handler(kvm, | ||
695 | &memslot->lpage_info[ | ||
696 | gfn_offset / | ||
697 | KVM_PAGES_PER_HPAGE].rmap_pde); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | return retval; | ||
702 | } | ||
703 | |||
704 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
705 | { | ||
706 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
707 | } | ||
708 | |||
709 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
710 | { | ||
711 | u64 *spte; | ||
712 | int young = 0; | ||
713 | |||
714 | spte = rmap_next(kvm, rmapp, NULL); | ||
715 | while (spte) { | ||
716 | int _young; | ||
717 | u64 _spte = *spte; | ||
718 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
719 | _young = _spte & PT_ACCESSED_MASK; | ||
720 | if (_young) { | ||
721 | young = 1; | ||
722 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | ||
723 | } | ||
724 | spte = rmap_next(kvm, rmapp, spte); | ||
725 | } | ||
726 | return young; | ||
727 | } | ||
728 | |||
729 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
730 | { | ||
731 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
732 | } | ||
733 | |||
656 | #ifdef MMU_DEBUG | 734 | #ifdef MMU_DEBUG |
657 | static int is_empty_shadow_page(u64 *spt) | 735 | static int is_empty_shadow_page(u64 *spt) |
658 | { | 736 | { |
@@ -1203,6 +1281,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1203 | int r; | 1281 | int r; |
1204 | int largepage = 0; | 1282 | int largepage = 0; |
1205 | pfn_t pfn; | 1283 | pfn_t pfn; |
1284 | unsigned long mmu_seq; | ||
1206 | 1285 | ||
1207 | down_read(¤t->mm->mmap_sem); | 1286 | down_read(¤t->mm->mmap_sem); |
1208 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1287 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
@@ -1210,6 +1289,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1210 | largepage = 1; | 1289 | largepage = 1; |
1211 | } | 1290 | } |
1212 | 1291 | ||
1292 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1293 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1213 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1294 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1214 | up_read(¤t->mm->mmap_sem); | 1295 | up_read(¤t->mm->mmap_sem); |
1215 | 1296 | ||
@@ -1220,6 +1301,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1220 | } | 1301 | } |
1221 | 1302 | ||
1222 | spin_lock(&vcpu->kvm->mmu_lock); | 1303 | spin_lock(&vcpu->kvm->mmu_lock); |
1304 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
1305 | goto out_unlock; | ||
1223 | kvm_mmu_free_some_pages(vcpu); | 1306 | kvm_mmu_free_some_pages(vcpu); |
1224 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, | 1307 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
1225 | PT32E_ROOT_LEVEL); | 1308 | PT32E_ROOT_LEVEL); |
@@ -1227,6 +1310,11 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1227 | 1310 | ||
1228 | 1311 | ||
1229 | return r; | 1312 | return r; |
1313 | |||
1314 | out_unlock: | ||
1315 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1316 | kvm_release_pfn_clean(pfn); | ||
1317 | return 0; | ||
1230 | } | 1318 | } |
1231 | 1319 | ||
1232 | 1320 | ||
@@ -1345,6 +1433,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1345 | int r; | 1433 | int r; |
1346 | int largepage = 0; | 1434 | int largepage = 0; |
1347 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1435 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1436 | unsigned long mmu_seq; | ||
1348 | 1437 | ||
1349 | ASSERT(vcpu); | 1438 | ASSERT(vcpu); |
1350 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 1439 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -1358,6 +1447,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1358 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1447 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1359 | largepage = 1; | 1448 | largepage = 1; |
1360 | } | 1449 | } |
1450 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1451 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1361 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1452 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1362 | up_read(¤t->mm->mmap_sem); | 1453 | up_read(¤t->mm->mmap_sem); |
1363 | if (is_error_pfn(pfn)) { | 1454 | if (is_error_pfn(pfn)) { |
@@ -1365,12 +1456,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1365 | return 1; | 1456 | return 1; |
1366 | } | 1457 | } |
1367 | spin_lock(&vcpu->kvm->mmu_lock); | 1458 | spin_lock(&vcpu->kvm->mmu_lock); |
1459 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
1460 | goto out_unlock; | ||
1368 | kvm_mmu_free_some_pages(vcpu); | 1461 | kvm_mmu_free_some_pages(vcpu); |
1369 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1462 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1370 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); | 1463 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
1371 | spin_unlock(&vcpu->kvm->mmu_lock); | 1464 | spin_unlock(&vcpu->kvm->mmu_lock); |
1372 | 1465 | ||
1373 | return r; | 1466 | return r; |
1467 | |||
1468 | out_unlock: | ||
1469 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1470 | kvm_release_pfn_clean(pfn); | ||
1471 | return 0; | ||
1374 | } | 1472 | } |
1375 | 1473 | ||
1376 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 1474 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
@@ -1670,6 +1768,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1670 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1768 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1671 | vcpu->arch.update_pte.largepage = 1; | 1769 | vcpu->arch.update_pte.largepage = 1; |
1672 | } | 1770 | } |
1771 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1772 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1673 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1773 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1674 | up_read(¤t->mm->mmap_sem); | 1774 | up_read(¤t->mm->mmap_sem); |
1675 | 1775 | ||
@@ -1814,6 +1914,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1814 | spin_unlock(&vcpu->kvm->mmu_lock); | 1914 | spin_unlock(&vcpu->kvm->mmu_lock); |
1815 | return r; | 1915 | return r; |
1816 | } | 1916 | } |
1917 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | ||
1817 | 1918 | ||
1818 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 1919 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
1819 | { | 1920 | { |
@@ -1870,6 +1971,12 @@ void kvm_enable_tdp(void) | |||
1870 | } | 1971 | } |
1871 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | 1972 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); |
1872 | 1973 | ||
1974 | void kvm_disable_tdp(void) | ||
1975 | { | ||
1976 | tdp_enabled = false; | ||
1977 | } | ||
1978 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | ||
1979 | |||
1873 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1980 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
1874 | { | 1981 | { |
1875 | struct kvm_mmu_page *sp; | 1982 | struct kvm_mmu_page *sp; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 4d918220baeb..f72ac1fa35f0 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
263 | pfn = vcpu->arch.update_pte.pfn; | 263 | pfn = vcpu->arch.update_pte.pfn; |
264 | if (is_error_pfn(pfn)) | 264 | if (is_error_pfn(pfn)) |
265 | return; | 265 | return; |
266 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
267 | return; | ||
266 | kvm_get_pfn(pfn); | 268 | kvm_get_pfn(pfn); |
267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 270 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
380 | int r; | 382 | int r; |
381 | pfn_t pfn; | 383 | pfn_t pfn; |
382 | int largepage = 0; | 384 | int largepage = 0; |
385 | unsigned long mmu_seq; | ||
383 | 386 | ||
384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 387 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
385 | kvm_mmu_audit(vcpu, "pre page fault"); | 388 | kvm_mmu_audit(vcpu, "pre page fault"); |
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
413 | largepage = 1; | 416 | largepage = 1; |
414 | } | 417 | } |
415 | } | 418 | } |
419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
420 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
417 | up_read(¤t->mm->mmap_sem); | 422 | up_read(¤t->mm->mmap_sem); |
418 | 423 | ||
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
424 | } | 429 | } |
425 | 430 | ||
426 | spin_lock(&vcpu->kvm->mmu_lock); | 431 | spin_lock(&vcpu->kvm->mmu_lock); |
432 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
433 | goto out_unlock; | ||
427 | kvm_mmu_free_some_pages(vcpu); | 434 | kvm_mmu_free_some_pages(vcpu); |
428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 435 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
429 | largepage, &write_pt, pfn); | 436 | largepage, &write_pt, pfn); |
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
439 | spin_unlock(&vcpu->kvm->mmu_lock); | 446 | spin_unlock(&vcpu->kvm->mmu_lock); |
440 | 447 | ||
441 | return write_pt; | 448 | return write_pt; |
449 | |||
450 | out_unlock: | ||
451 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
452 | kvm_release_pfn_clean(pfn); | ||
453 | return 0; | ||
442 | } | 454 | } |
443 | 455 | ||
444 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b756e876dce3..e2ee264740c7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -453,7 +453,8 @@ static __init int svm_hardware_setup(void) | |||
453 | if (npt_enabled) { | 453 | if (npt_enabled) { |
454 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); | 454 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); |
455 | kvm_enable_tdp(); | 455 | kvm_enable_tdp(); |
456 | } | 456 | } else |
457 | kvm_disable_tdp(); | ||
457 | 458 | ||
458 | return 0; | 459 | return 0; |
459 | 460 | ||
@@ -1007,10 +1008,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1007 | struct kvm *kvm = svm->vcpu.kvm; | 1008 | struct kvm *kvm = svm->vcpu.kvm; |
1008 | u64 fault_address; | 1009 | u64 fault_address; |
1009 | u32 error_code; | 1010 | u32 error_code; |
1011 | bool event_injection = false; | ||
1010 | 1012 | ||
1011 | if (!irqchip_in_kernel(kvm) && | 1013 | if (!irqchip_in_kernel(kvm) && |
1012 | is_external_interrupt(exit_int_info)) | 1014 | is_external_interrupt(exit_int_info)) { |
1015 | event_injection = true; | ||
1013 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | 1016 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); |
1017 | } | ||
1014 | 1018 | ||
1015 | fault_address = svm->vmcb->control.exit_info_2; | 1019 | fault_address = svm->vmcb->control.exit_info_2; |
1016 | error_code = svm->vmcb->control.exit_info_1; | 1020 | error_code = svm->vmcb->control.exit_info_1; |
@@ -1024,6 +1028,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1024 | (u32)fault_address, (u32)(fault_address >> 32), | 1028 | (u32)fault_address, (u32)(fault_address >> 32), |
1025 | handler); | 1029 | handler); |
1026 | 1030 | ||
1031 | if (event_injection) | ||
1032 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | ||
1027 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1033 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1028 | } | 1034 | } |
1029 | 1035 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0cac63701719..2a69773e3b26 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
2300 | (u32)((u64)cr2 >> 32), handler); | 2300 | (u32)((u64)cr2 >> 32), handler); |
2301 | if (vect_info & VECTORING_INFO_VALID_MASK) | ||
2302 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | ||
2301 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2303 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
2302 | } | 2304 | } |
2303 | 2305 | ||
@@ -3116,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3116 | return ERR_PTR(-ENOMEM); | 3118 | return ERR_PTR(-ENOMEM); |
3117 | 3119 | ||
3118 | allocate_vpid(vmx); | 3120 | allocate_vpid(vmx); |
3119 | if (id == 0 && vm_need_ept()) { | ||
3120 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3121 | VMX_EPT_WRITABLE_MASK | | ||
3122 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3123 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3124 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3125 | VMX_EPT_EXECUTABLE_MASK); | ||
3126 | kvm_enable_tdp(); | ||
3127 | } | ||
3128 | 3121 | ||
3129 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3122 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
3130 | if (err) | 3123 | if (err) |
@@ -3303,8 +3296,17 @@ static int __init vmx_init(void) | |||
3303 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3296 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
3304 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3297 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
3305 | 3298 | ||
3306 | if (cpu_has_vmx_ept()) | 3299 | if (vm_need_ept()) { |
3307 | bypass_guest_pf = 0; | 3300 | bypass_guest_pf = 0; |
3301 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3302 | VMX_EPT_WRITABLE_MASK | | ||
3303 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3304 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3305 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3306 | VMX_EPT_EXECUTABLE_MASK); | ||
3307 | kvm_enable_tdp(); | ||
3308 | } else | ||
3309 | kvm_disable_tdp(); | ||
3308 | 3310 | ||
3309 | if (bypass_guest_pf) | 3311 | if (bypass_guest_pf) |
3310 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3312 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f1cdb011cff..0d682fc6aeb3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -883,6 +883,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
883 | case KVM_CAP_PIT: | 883 | case KVM_CAP_PIT: |
884 | case KVM_CAP_NOP_IO_DELAY: | 884 | case KVM_CAP_NOP_IO_DELAY: |
885 | case KVM_CAP_MP_STATE: | 885 | case KVM_CAP_MP_STATE: |
886 | case KVM_CAP_SYNC_MMU: | ||
886 | r = 1; | 887 | r = 1; |
887 | break; | 888 | break; |
888 | case KVM_CAP_COALESCED_MMIO: | 889 | case KVM_CAP_COALESCED_MMIO: |
@@ -1495,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1495 | goto out; | 1496 | goto out; |
1496 | 1497 | ||
1497 | down_write(&kvm->slots_lock); | 1498 | down_write(&kvm->slots_lock); |
1499 | spin_lock(&kvm->mmu_lock); | ||
1498 | 1500 | ||
1499 | p = &kvm->arch.aliases[alias->slot]; | 1501 | p = &kvm->arch.aliases[alias->slot]; |
1500 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 1502 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -1506,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1506 | break; | 1508 | break; |
1507 | kvm->arch.naliases = n; | 1509 | kvm->arch.naliases = n; |
1508 | 1510 | ||
1511 | spin_unlock(&kvm->mmu_lock); | ||
1509 | kvm_mmu_zap_all(kvm); | 1512 | kvm_mmu_zap_all(kvm); |
1510 | 1513 | ||
1511 | up_write(&kvm->slots_lock); | 1514 | up_write(&kvm->slots_lock); |
@@ -3184,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | |||
3184 | kvm_desct->base |= seg_desc->base2 << 24; | 3187 | kvm_desct->base |= seg_desc->base2 << 24; |
3185 | kvm_desct->limit = seg_desc->limit0; | 3188 | kvm_desct->limit = seg_desc->limit0; |
3186 | kvm_desct->limit |= seg_desc->limit << 16; | 3189 | kvm_desct->limit |= seg_desc->limit << 16; |
3190 | if (seg_desc->g) { | ||
3191 | kvm_desct->limit <<= 12; | ||
3192 | kvm_desct->limit |= 0xfff; | ||
3193 | } | ||
3187 | kvm_desct->selector = selector; | 3194 | kvm_desct->selector = selector; |
3188 | kvm_desct->type = seg_desc->type; | 3195 | kvm_desct->type = seg_desc->type; |
3189 | kvm_desct->present = seg_desc->p; | 3196 | kvm_desct->present = seg_desc->p; |
@@ -3223,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | |||
3223 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3230 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3224 | struct desc_struct *seg_desc) | 3231 | struct desc_struct *seg_desc) |
3225 | { | 3232 | { |
3233 | gpa_t gpa; | ||
3226 | struct descriptor_table dtable; | 3234 | struct descriptor_table dtable; |
3227 | u16 index = selector >> 3; | 3235 | u16 index = selector >> 3; |
3228 | 3236 | ||
@@ -3232,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3232 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 3240 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
3233 | return 1; | 3241 | return 1; |
3234 | } | 3242 | } |
3235 | return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3243 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
3244 | gpa += index * 8; | ||
3245 | return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
3236 | } | 3246 | } |
3237 | 3247 | ||
3238 | /* allowed just for 8 bytes segments */ | 3248 | /* allowed just for 8 bytes segments */ |
3239 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3249 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3240 | struct desc_struct *seg_desc) | 3250 | struct desc_struct *seg_desc) |
3241 | { | 3251 | { |
3252 | gpa_t gpa; | ||
3242 | struct descriptor_table dtable; | 3253 | struct descriptor_table dtable; |
3243 | u16 index = selector >> 3; | 3254 | u16 index = selector >> 3; |
3244 | 3255 | ||
@@ -3246,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3246 | 3257 | ||
3247 | if (dtable.limit < index * 8 + 7) | 3258 | if (dtable.limit < index * 8 + 7) |
3248 | return 1; | 3259 | return 1; |
3249 | return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3260 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
3261 | gpa += index * 8; | ||
3262 | return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
3250 | } | 3263 | } |
3251 | 3264 | ||
3252 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | 3265 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, |
@@ -3258,55 +3271,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | |||
3258 | base_addr |= (seg_desc->base1 << 16); | 3271 | base_addr |= (seg_desc->base1 << 16); |
3259 | base_addr |= (seg_desc->base2 << 24); | 3272 | base_addr |= (seg_desc->base2 << 24); |
3260 | 3273 | ||
3261 | return base_addr; | 3274 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); |
3262 | } | ||
3263 | |||
3264 | static int load_tss_segment32(struct kvm_vcpu *vcpu, | ||
3265 | struct desc_struct *seg_desc, | ||
3266 | struct tss_segment_32 *tss) | ||
3267 | { | ||
3268 | u32 base_addr; | ||
3269 | |||
3270 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3271 | |||
3272 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3273 | sizeof(struct tss_segment_32)); | ||
3274 | } | ||
3275 | |||
3276 | static int save_tss_segment32(struct kvm_vcpu *vcpu, | ||
3277 | struct desc_struct *seg_desc, | ||
3278 | struct tss_segment_32 *tss) | ||
3279 | { | ||
3280 | u32 base_addr; | ||
3281 | |||
3282 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3283 | |||
3284 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3285 | sizeof(struct tss_segment_32)); | ||
3286 | } | ||
3287 | |||
3288 | static int load_tss_segment16(struct kvm_vcpu *vcpu, | ||
3289 | struct desc_struct *seg_desc, | ||
3290 | struct tss_segment_16 *tss) | ||
3291 | { | ||
3292 | u32 base_addr; | ||
3293 | |||
3294 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3295 | |||
3296 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3297 | sizeof(struct tss_segment_16)); | ||
3298 | } | ||
3299 | |||
3300 | static int save_tss_segment16(struct kvm_vcpu *vcpu, | ||
3301 | struct desc_struct *seg_desc, | ||
3302 | struct tss_segment_16 *tss) | ||
3303 | { | ||
3304 | u32 base_addr; | ||
3305 | |||
3306 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3307 | |||
3308 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3309 | sizeof(struct tss_segment_16)); | ||
3310 | } | 3275 | } |
3311 | 3276 | ||
3312 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 3277 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
@@ -3466,20 +3431,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
3466 | } | 3431 | } |
3467 | 3432 | ||
3468 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | 3433 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, |
3469 | struct desc_struct *cseg_desc, | 3434 | u32 old_tss_base, |
3470 | struct desc_struct *nseg_desc) | 3435 | struct desc_struct *nseg_desc) |
3471 | { | 3436 | { |
3472 | struct tss_segment_16 tss_segment_16; | 3437 | struct tss_segment_16 tss_segment_16; |
3473 | int ret = 0; | 3438 | int ret = 0; |
3474 | 3439 | ||
3475 | if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) | 3440 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
3441 | sizeof tss_segment_16)) | ||
3476 | goto out; | 3442 | goto out; |
3477 | 3443 | ||
3478 | save_state_to_tss16(vcpu, &tss_segment_16); | 3444 | save_state_to_tss16(vcpu, &tss_segment_16); |
3479 | save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); | ||
3480 | 3445 | ||
3481 | if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) | 3446 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
3447 | sizeof tss_segment_16)) | ||
3448 | goto out; | ||
3449 | |||
3450 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
3451 | &tss_segment_16, sizeof tss_segment_16)) | ||
3482 | goto out; | 3452 | goto out; |
3453 | |||
3483 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | 3454 | if (load_state_from_tss16(vcpu, &tss_segment_16)) |
3484 | goto out; | 3455 | goto out; |
3485 | 3456 | ||
@@ -3489,20 +3460,26 @@ out: | |||
3489 | } | 3460 | } |
3490 | 3461 | ||
3491 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | 3462 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, |
3492 | struct desc_struct *cseg_desc, | 3463 | u32 old_tss_base, |
3493 | struct desc_struct *nseg_desc) | 3464 | struct desc_struct *nseg_desc) |
3494 | { | 3465 | { |
3495 | struct tss_segment_32 tss_segment_32; | 3466 | struct tss_segment_32 tss_segment_32; |
3496 | int ret = 0; | 3467 | int ret = 0; |
3497 | 3468 | ||
3498 | if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) | 3469 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
3470 | sizeof tss_segment_32)) | ||
3499 | goto out; | 3471 | goto out; |
3500 | 3472 | ||
3501 | save_state_to_tss32(vcpu, &tss_segment_32); | 3473 | save_state_to_tss32(vcpu, &tss_segment_32); |
3502 | save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); | ||
3503 | 3474 | ||
3504 | if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) | 3475 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
3476 | sizeof tss_segment_32)) | ||
3505 | goto out; | 3477 | goto out; |
3478 | |||
3479 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
3480 | &tss_segment_32, sizeof tss_segment_32)) | ||
3481 | goto out; | ||
3482 | |||
3506 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | 3483 | if (load_state_from_tss32(vcpu, &tss_segment_32)) |
3507 | goto out; | 3484 | goto out; |
3508 | 3485 | ||
@@ -3517,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3517 | struct desc_struct cseg_desc; | 3494 | struct desc_struct cseg_desc; |
3518 | struct desc_struct nseg_desc; | 3495 | struct desc_struct nseg_desc; |
3519 | int ret = 0; | 3496 | int ret = 0; |
3497 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
3498 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
3520 | 3499 | ||
3521 | kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3500 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); |
3522 | 3501 | ||
3502 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
3503 | * descriptors should generate a pagefault. | ||
3504 | */ | ||
3523 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | 3505 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) |
3524 | goto out; | 3506 | goto out; |
3525 | 3507 | ||
3526 | if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) | 3508 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) |
3527 | goto out; | 3509 | goto out; |
3528 | 3510 | ||
3529 | |||
3530 | if (reason != TASK_SWITCH_IRET) { | 3511 | if (reason != TASK_SWITCH_IRET) { |
3531 | int cpl; | 3512 | int cpl; |
3532 | 3513 | ||
@@ -3544,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3544 | 3525 | ||
3545 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3526 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
3546 | cseg_desc.type &= ~(1 << 1); //clear the B flag | 3527 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
3547 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3528 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); |
3548 | &cseg_desc); | ||
3549 | } | 3529 | } |
3550 | 3530 | ||
3551 | if (reason == TASK_SWITCH_IRET) { | 3531 | if (reason == TASK_SWITCH_IRET) { |
@@ -3557,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3557 | kvm_x86_ops->cache_regs(vcpu); | 3537 | kvm_x86_ops->cache_regs(vcpu); |
3558 | 3538 | ||
3559 | if (nseg_desc.type & 8) | 3539 | if (nseg_desc.type & 8) |
3560 | ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, | 3540 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, |
3561 | &nseg_desc); | 3541 | &nseg_desc); |
3562 | else | 3542 | else |
3563 | ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, | 3543 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, |
3564 | &nseg_desc); | 3544 | &nseg_desc); |
3565 | 3545 | ||
3566 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 3546 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
@@ -3995,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3995 | */ | 3975 | */ |
3996 | if (!user_alloc) { | 3976 | if (!user_alloc) { |
3997 | if (npages && !old.rmap) { | 3977 | if (npages && !old.rmap) { |
3978 | unsigned long userspace_addr; | ||
3979 | |||
3998 | down_write(¤t->mm->mmap_sem); | 3980 | down_write(¤t->mm->mmap_sem); |
3999 | memslot->userspace_addr = do_mmap(NULL, 0, | 3981 | userspace_addr = do_mmap(NULL, 0, |
4000 | npages * PAGE_SIZE, | 3982 | npages * PAGE_SIZE, |
4001 | PROT_READ | PROT_WRITE, | 3983 | PROT_READ | PROT_WRITE, |
4002 | MAP_SHARED | MAP_ANONYMOUS, | 3984 | MAP_SHARED | MAP_ANONYMOUS, |
4003 | 0); | 3985 | 0); |
4004 | up_write(¤t->mm->mmap_sem); | 3986 | up_write(¤t->mm->mmap_sem); |
4005 | 3987 | ||
4006 | if (IS_ERR((void *)memslot->userspace_addr)) | 3988 | if (IS_ERR((void *)userspace_addr)) |
4007 | return PTR_ERR((void *)memslot->userspace_addr); | 3989 | return PTR_ERR((void *)userspace_addr); |
3990 | |||
3991 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
3992 | spin_lock(&kvm->mmu_lock); | ||
3993 | memslot->userspace_addr = userspace_addr; | ||
3994 | spin_unlock(&kvm->mmu_lock); | ||
4008 | } else { | 3995 | } else { |
4009 | if (!old.user_alloc && old.rmap) { | 3996 | if (!old.user_alloc && old.rmap) { |
4010 | int ret; | 3997 | int ret; |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 0313a5eec412..d9249a882aa5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -1014,6 +1014,9 @@ __init void lguest_init(void) | |||
1014 | init_pg_tables_start = __pa(pg0); | 1014 | init_pg_tables_start = __pa(pg0); |
1015 | init_pg_tables_end = __pa(pg0); | 1015 | init_pg_tables_end = __pa(pg0); |
1016 | 1016 | ||
1017 | /* As described in head_32.S, we map the first 128M of memory. */ | ||
1018 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; | ||
1019 | |||
1017 | /* Load the %fs segment register (the per-cpu segment register) with | 1020 | /* Load the %fs segment register (the per-cpu segment register) with |
1018 | * the normal data segment to get through booting. */ | 1021 | * the normal data segment to get through booting. */ |
1019 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); | 1022 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dfdf428975c0..f118c110af32 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -52,7 +52,7 @@ | |||
52 | jnz 100b | 52 | jnz 100b |
53 | 102: | 53 | 102: |
54 | .section .fixup,"ax" | 54 | .section .fixup,"ax" |
55 | 103: addl %r8d,%edx /* ecx is zerorest also */ | 55 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
56 | jmp copy_user_handle_tail | 56 | jmp copy_user_handle_tail |
57 | .previous | 57 | .previous |
58 | 58 | ||
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 40e0e309d27e..cb0c112386fb 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -32,7 +32,7 @@ | |||
32 | jnz 100b | 32 | jnz 100b |
33 | 102: | 33 | 102: |
34 | .section .fixup,"ax" | 34 | .section .fixup,"ax" |
35 | 103: addl %r8d,%edx /* ecx is zerorest also */ | 35 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
36 | jmp copy_user_handle_tail | 36 | jmp copy_user_handle_tail |
37 | .previous | 37 | .previous |
38 | 38 | ||
@@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache) | |||
108 | jmp 60f | 108 | jmp 60f |
109 | 50: movl %ecx,%edx | 109 | 50: movl %ecx,%edx |
110 | 60: sfence | 110 | 60: sfence |
111 | movl %r8d,%ecx | ||
112 | jmp copy_user_handle_tail | 111 | jmp copy_user_handle_tail |
113 | .previous | 112 | .previous |
114 | 113 | ||
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 1fbb844c3d7a..2977ea37791f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,6 +1,7 @@ | |||
1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o pgtable.o | 2 | pat.o pgtable.o |
3 | 3 | ||
4 | obj-$(CONFIG_HAVE_GET_USER_PAGES_FAST) += gup.o | ||
4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 5 | obj-$(CONFIG_X86_32) += pgtable_32.o |
5 | 6 | ||
6 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 7 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 000000000000..007bb06c7504 --- /dev/null +++ b/arch/x86/mm/gup.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * Lockless get_user_pages_fast for x86 | ||
3 | * | ||
4 | * Copyright (C) 2008 Nick Piggin | ||
5 | * Copyright (C) 2008 Novell Inc. | ||
6 | */ | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/vmstat.h> | ||
10 | #include <linux/highmem.h> | ||
11 | |||
12 | #include <asm/pgtable.h> | ||
13 | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | ||
15 | { | ||
16 | #ifndef CONFIG_X86_PAE | ||
17 | return *ptep; | ||
18 | #else | ||
19 | /* | ||
20 | * With get_user_pages_fast, we walk down the pagetables without taking | ||
21 | * any locks. For this we would like to load the pointers atoimcally, | ||
22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | ||
23 | * we do have is the guarantee that a pte will only either go from not | ||
24 | * present to present, or present to not present or both -- it will not | ||
25 | * switch to a completely different present page without a TLB flush in | ||
26 | * between; something that we are blocking by holding interrupts off. | ||
27 | * | ||
28 | * Setting ptes from not present to present goes: | ||
29 | * ptep->pte_high = h; | ||
30 | * smp_wmb(); | ||
31 | * ptep->pte_low = l; | ||
32 | * | ||
33 | * And present to not present goes: | ||
34 | * ptep->pte_low = 0; | ||
35 | * smp_wmb(); | ||
36 | * ptep->pte_high = 0; | ||
37 | * | ||
38 | * We must ensure here that the load of pte_low sees l iff pte_high | ||
39 | * sees h. We load pte_high *after* loading pte_low, which ensures we | ||
40 | * don't see an older value of pte_high. *Then* we recheck pte_low, | ||
41 | * which ensures that we haven't picked up a changed pte high. We might | ||
42 | * have got rubbish values from pte_low and pte_high, but we are | ||
43 | * guaranteed that pte_low will not have the present bit set *unless* | ||
44 | * it is 'l'. And get_user_pages_fast only operates on present ptes, so | ||
45 | * we're safe. | ||
46 | * | ||
47 | * gup_get_pte should not be used or copied outside gup.c without being | ||
48 | * very careful -- it does not atomically load the pte or anything that | ||
49 | * is likely to be useful for you. | ||
50 | */ | ||
51 | pte_t pte; | ||
52 | |||
53 | retry: | ||
54 | pte.pte_low = ptep->pte_low; | ||
55 | smp_rmb(); | ||
56 | pte.pte_high = ptep->pte_high; | ||
57 | smp_rmb(); | ||
58 | if (unlikely(pte.pte_low != ptep->pte_low)) | ||
59 | goto retry; | ||
60 | |||
61 | return pte; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * The performance critical leaf functions are made noinline otherwise gcc | ||
67 | * inlines everything into a single function which results in too much | ||
68 | * register pressure. | ||
69 | */ | ||
70 | static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | ||
71 | unsigned long end, int write, struct page **pages, int *nr) | ||
72 | { | ||
73 | unsigned long mask; | ||
74 | pte_t *ptep; | ||
75 | |||
76 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
77 | if (write) | ||
78 | mask |= _PAGE_RW; | ||
79 | |||
80 | ptep = pte_offset_map(&pmd, addr); | ||
81 | do { | ||
82 | pte_t pte = gup_get_pte(ptep); | ||
83 | struct page *page; | ||
84 | |||
85 | if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { | ||
86 | pte_unmap(ptep); | ||
87 | return 0; | ||
88 | } | ||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
90 | page = pte_page(pte); | ||
91 | get_page(page); | ||
92 | pages[*nr] = page; | ||
93 | (*nr)++; | ||
94 | |||
95 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
96 | pte_unmap(ptep - 1); | ||
97 | |||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | static inline void get_head_page_multiple(struct page *page, int nr) | ||
102 | { | ||
103 | VM_BUG_ON(page != compound_head(page)); | ||
104 | VM_BUG_ON(page_count(page) == 0); | ||
105 | atomic_add(nr, &page->_count); | ||
106 | } | ||
107 | |||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | ||
109 | unsigned long end, int write, struct page **pages, int *nr) | ||
110 | { | ||
111 | unsigned long mask; | ||
112 | pte_t pte = *(pte_t *)&pmd; | ||
113 | struct page *head, *page; | ||
114 | int refs; | ||
115 | |||
116 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
117 | if (write) | ||
118 | mask |= _PAGE_RW; | ||
119 | if ((pte_val(pte) & mask) != mask) | ||
120 | return 0; | ||
121 | /* hugepages are never "special" */ | ||
122 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
124 | |||
125 | refs = 0; | ||
126 | head = pte_page(pte); | ||
127 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
128 | do { | ||
129 | VM_BUG_ON(compound_head(page) != head); | ||
130 | pages[*nr] = page; | ||
131 | (*nr)++; | ||
132 | page++; | ||
133 | refs++; | ||
134 | } while (addr += PAGE_SIZE, addr != end); | ||
135 | get_head_page_multiple(head, refs); | ||
136 | |||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | ||
141 | int write, struct page **pages, int *nr) | ||
142 | { | ||
143 | unsigned long next; | ||
144 | pmd_t *pmdp; | ||
145 | |||
146 | pmdp = pmd_offset(&pud, addr); | ||
147 | do { | ||
148 | pmd_t pmd = *pmdp; | ||
149 | |||
150 | next = pmd_addr_end(addr, end); | ||
151 | if (pmd_none(pmd)) | ||
152 | return 0; | ||
153 | if (unlikely(pmd_large(pmd))) { | ||
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | ||
155 | return 0; | ||
156 | } else { | ||
157 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
158 | return 0; | ||
159 | } | ||
160 | } while (pmdp++, addr = next, addr != end); | ||
161 | |||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | ||
166 | unsigned long end, int write, struct page **pages, int *nr) | ||
167 | { | ||
168 | unsigned long mask; | ||
169 | pte_t pte = *(pte_t *)&pud; | ||
170 | struct page *head, *page; | ||
171 | int refs; | ||
172 | |||
173 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
174 | if (write) | ||
175 | mask |= _PAGE_RW; | ||
176 | if ((pte_val(pte) & mask) != mask) | ||
177 | return 0; | ||
178 | /* hugepages are never "special" */ | ||
179 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
181 | |||
182 | refs = 0; | ||
183 | head = pte_page(pte); | ||
184 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
185 | do { | ||
186 | VM_BUG_ON(compound_head(page) != head); | ||
187 | pages[*nr] = page; | ||
188 | (*nr)++; | ||
189 | page++; | ||
190 | refs++; | ||
191 | } while (addr += PAGE_SIZE, addr != end); | ||
192 | get_head_page_multiple(head, refs); | ||
193 | |||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | ||
198 | int write, struct page **pages, int *nr) | ||
199 | { | ||
200 | unsigned long next; | ||
201 | pud_t *pudp; | ||
202 | |||
203 | pudp = pud_offset(&pgd, addr); | ||
204 | do { | ||
205 | pud_t pud = *pudp; | ||
206 | |||
207 | next = pud_addr_end(addr, end); | ||
208 | if (pud_none(pud)) | ||
209 | return 0; | ||
210 | if (unlikely(pud_large(pud))) { | ||
211 | if (!gup_huge_pud(pud, addr, next, write, pages, nr)) | ||
212 | return 0; | ||
213 | } else { | ||
214 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
215 | return 0; | ||
216 | } | ||
217 | } while (pudp++, addr = next, addr != end); | ||
218 | |||
219 | return 1; | ||
220 | } | ||
221 | |||
222 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
223 | struct page **pages) | ||
224 | { | ||
225 | struct mm_struct *mm = current->mm; | ||
226 | unsigned long addr, len, end; | ||
227 | unsigned long next; | ||
228 | pgd_t *pgdp; | ||
229 | int nr = 0; | ||
230 | |||
231 | start &= PAGE_MASK; | ||
232 | addr = start; | ||
233 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
234 | end = start + len; | ||
235 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
236 | start, len))) | ||
237 | goto slow_irqon; | ||
238 | |||
239 | /* | ||
240 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
241 | * needs some instrumenting to determine the common sizes used by | ||
242 | * important workloads (eg. DB2), and whether limiting the batch size | ||
243 | * will decrease performance. | ||
244 | * | ||
245 | * It seems like we're in the clear for the moment. Direct-IO is | ||
246 | * the main guy that batches up lots of get_user_pages, and even | ||
247 | * they are limited to 64-at-a-time which is not so many. | ||
248 | */ | ||
249 | /* | ||
250 | * This doesn't prevent pagetable teardown, but does prevent | ||
251 | * the pagetables and pages from being freed on x86. | ||
252 | * | ||
253 | * So long as we atomically load page table pointers versus teardown | ||
254 | * (which we do on x86, with the above PAE exception), we can follow the | ||
255 | * address down to the the page and take a ref on it. | ||
256 | */ | ||
257 | local_irq_disable(); | ||
258 | pgdp = pgd_offset(mm, addr); | ||
259 | do { | ||
260 | pgd_t pgd = *pgdp; | ||
261 | |||
262 | next = pgd_addr_end(addr, end); | ||
263 | if (pgd_none(pgd)) | ||
264 | goto slow; | ||
265 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
266 | goto slow; | ||
267 | } while (pgdp++, addr = next, addr != end); | ||
268 | local_irq_enable(); | ||
269 | |||
270 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
271 | return nr; | ||
272 | |||
273 | { | ||
274 | int ret; | ||
275 | |||
276 | slow: | ||
277 | local_irq_enable(); | ||
278 | slow_irqon: | ||
279 | /* Try to get the remaining pages with get_user_pages */ | ||
280 | start += nr << PAGE_SHIFT; | ||
281 | pages += nr; | ||
282 | |||
283 | down_read(&mm->mmap_sem); | ||
284 | ret = get_user_pages(current, mm, start, | ||
285 | (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); | ||
286 | up_read(&mm->mmap_sem); | ||
287 | |||
288 | /* Have to be a bit careful with return values */ | ||
289 | if (nr > 0) { | ||
290 | if (ret < 0) | ||
291 | ret = nr; | ||
292 | else | ||
293 | ret += nr; | ||
294 | } | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | } | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e4805771b5be..08a20e6a15c2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -86,43 +86,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
86 | * around without checking the pgd every time. | 86 | * around without checking the pgd every time. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | void show_mem(void) | ||
90 | { | ||
91 | long i, total = 0, reserved = 0; | ||
92 | long shared = 0, cached = 0; | ||
93 | struct page *page; | ||
94 | pg_data_t *pgdat; | ||
95 | |||
96 | printk(KERN_INFO "Mem-info:\n"); | ||
97 | show_free_areas(); | ||
98 | for_each_online_pgdat(pgdat) { | ||
99 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
100 | /* | ||
101 | * This loop can take a while with 256 GB and | ||
102 | * 4k pages so defer the NMI watchdog: | ||
103 | */ | ||
104 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
105 | touch_nmi_watchdog(); | ||
106 | |||
107 | if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
108 | continue; | ||
109 | |||
110 | page = pfn_to_page(pgdat->node_start_pfn + i); | ||
111 | total++; | ||
112 | if (PageReserved(page)) | ||
113 | reserved++; | ||
114 | else if (PageSwapCache(page)) | ||
115 | cached++; | ||
116 | else if (page_count(page)) | ||
117 | shared += page_count(page) - 1; | ||
118 | } | ||
119 | } | ||
120 | printk(KERN_INFO "%lu pages of RAM\n", total); | ||
121 | printk(KERN_INFO "%lu reserved pages\n", reserved); | ||
122 | printk(KERN_INFO "%lu pages shared\n", shared); | ||
123 | printk(KERN_INFO "%lu pages swap cached\n", cached); | ||
124 | } | ||
125 | |||
126 | int after_bootmem; | 89 | int after_bootmem; |
127 | 90 | ||
128 | static __init void *spp_getpage(void) | 91 | static __init void *spp_getpage(void) |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c570..cab0abbd1ebe 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -20,53 +20,6 @@ | |||
20 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
21 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
22 | 22 | ||
23 | void show_mem(void) | ||
24 | { | ||
25 | int total = 0, reserved = 0; | ||
26 | int shared = 0, cached = 0; | ||
27 | int highmem = 0; | ||
28 | struct page *page; | ||
29 | pg_data_t *pgdat; | ||
30 | unsigned long i; | ||
31 | unsigned long flags; | ||
32 | |||
33 | printk(KERN_INFO "Mem-info:\n"); | ||
34 | show_free_areas(); | ||
35 | for_each_online_pgdat(pgdat) { | ||
36 | pgdat_resize_lock(pgdat, &flags); | ||
37 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
38 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
39 | touch_nmi_watchdog(); | ||
40 | page = pgdat_page_nr(pgdat, i); | ||
41 | total++; | ||
42 | if (PageHighMem(page)) | ||
43 | highmem++; | ||
44 | if (PageReserved(page)) | ||
45 | reserved++; | ||
46 | else if (PageSwapCache(page)) | ||
47 | cached++; | ||
48 | else if (page_count(page)) | ||
49 | shared += page_count(page) - 1; | ||
50 | } | ||
51 | pgdat_resize_unlock(pgdat, &flags); | ||
52 | } | ||
53 | printk(KERN_INFO "%d pages of RAM\n", total); | ||
54 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | ||
55 | printk(KERN_INFO "%d reserved pages\n", reserved); | ||
56 | printk(KERN_INFO "%d pages shared\n", shared); | ||
57 | printk(KERN_INFO "%d pages swap cached\n", cached); | ||
58 | |||
59 | printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); | ||
60 | printk(KERN_INFO "%lu pages writeback\n", | ||
61 | global_page_state(NR_WRITEBACK)); | ||
62 | printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); | ||
63 | printk(KERN_INFO "%lu pages slab\n", | ||
64 | global_page_state(NR_SLAB_RECLAIMABLE) + | ||
65 | global_page_state(NR_SLAB_UNRECLAIMABLE)); | ||
66 | printk(KERN_INFO "%lu pages pagetables\n", | ||
67 | global_page_state(NR_PAGETABLE)); | ||
68 | } | ||
69 | |||
70 | /* | 23 | /* |
71 | * Associate a virtual page frame with a given physical page frame | 24 | * Associate a virtual page frame with a given physical page frame |
72 | * and protection flags for that frame. | 25 | * and protection flags for that frame. |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index ff3a6a336342..4bdaa590375d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
23 | pci_read_config_byte(d, reg++, &busno); | 23 | pci_read_config_byte(d, reg++, &busno); |
24 | pci_read_config_byte(d, reg++, &suba); | 24 | pci_read_config_byte(d, reg++, &suba); |
25 | pci_read_config_byte(d, reg++, &subb); | 25 | pci_read_config_byte(d, reg++, &subb); |
26 | DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); | 26 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, |
27 | suba, subb); | ||
27 | if (busno) | 28 | if (busno) |
28 | pci_scan_bus_with_sysdata(busno); /* Bus A */ | 29 | pci_scan_bus_with_sysdata(busno); /* Bus A */ |
29 | if (suba < subb) | 30 | if (suba < subb) |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a09505806b82..5807d1bc73f7 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -128,10 +128,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
128 | pr = pci_find_parent_resource(dev, r); | 128 | pr = pci_find_parent_resource(dev, r); |
129 | if (!r->start || !pr || | 129 | if (!r->start || !pr || |
130 | request_resource(pr, r) < 0) { | 130 | request_resource(pr, r) < 0) { |
131 | printk(KERN_ERR "PCI: Cannot allocate " | 131 | dev_err(&dev->dev, "BAR %d: can't " |
132 | "resource region %d " | 132 | "allocate resource\n", idx); |
133 | "of bridge %s\n", | ||
134 | idx, pci_name(dev)); | ||
135 | /* | 133 | /* |
136 | * Something is wrong with the region. | 134 | * Something is wrong with the region. |
137 | * Invalidate the resource to prevent | 135 | * Invalidate the resource to prevent |
@@ -166,15 +164,15 @@ static void __init pcibios_allocate_resources(int pass) | |||
166 | else | 164 | else |
167 | disabled = !(command & PCI_COMMAND_MEMORY); | 165 | disabled = !(command & PCI_COMMAND_MEMORY); |
168 | if (pass == disabled) { | 166 | if (pass == disabled) { |
169 | DBG("PCI: Resource %08lx-%08lx " | 167 | dev_dbg(&dev->dev, "resource %#08llx-%#08llx " |
170 | "(f=%lx, d=%d, p=%d)\n", | 168 | "(f=%lx, d=%d, p=%d)\n", |
171 | r->start, r->end, r->flags, disabled, pass); | 169 | (unsigned long long) r->start, |
170 | (unsigned long long) r->end, | ||
171 | r->flags, disabled, pass); | ||
172 | pr = pci_find_parent_resource(dev, r); | 172 | pr = pci_find_parent_resource(dev, r); |
173 | if (!pr || request_resource(pr, r) < 0) { | 173 | if (!pr || request_resource(pr, r) < 0) { |
174 | printk(KERN_ERR "PCI: Cannot allocate " | 174 | dev_err(&dev->dev, "BAR %d: can't " |
175 | "resource region %d " | 175 | "allocate resource\n", idx); |
176 | "of device %s\n", | ||
177 | idx, pci_name(dev)); | ||
178 | /* We'll assign a new address later */ | 176 | /* We'll assign a new address later */ |
179 | r->end -= r->start; | 177 | r->end -= r->start; |
180 | r->start = 0; | 178 | r->start = 0; |
@@ -187,8 +185,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
187 | /* Turn the ROM off, leave the resource region, | 185 | /* Turn the ROM off, leave the resource region, |
188 | * but keep it unregistered. */ | 186 | * but keep it unregistered. */ |
189 | u32 reg; | 187 | u32 reg; |
190 | DBG("PCI: Switching off ROM of %s\n", | 188 | dev_dbg(&dev->dev, "disabling ROM\n"); |
191 | pci_name(dev)); | ||
192 | r->flags &= ~IORESOURCE_ROM_ENABLE; | 189 | r->flags &= ~IORESOURCE_ROM_ENABLE; |
193 | pci_read_config_dword(dev, | 190 | pci_read_config_dword(dev, |
194 | dev->rom_base_reg, ®); | 191 | dev->rom_base_reg, ®); |
@@ -257,8 +254,7 @@ void pcibios_set_master(struct pci_dev *dev) | |||
257 | lat = pcibios_max_latency; | 254 | lat = pcibios_max_latency; |
258 | else | 255 | else |
259 | return; | 256 | return; |
260 | printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", | 257 | dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); |
261 | pci_name(dev), lat); | ||
262 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); | 258 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); |
263 | } | 259 | } |
264 | 260 | ||
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 6a06a2eb0597..fec0123b33a9 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |||
436 | { | 436 | { |
437 | WARN_ON_ONCE(pirq >= 9); | 437 | WARN_ON_ONCE(pirq >= 9); |
438 | if (pirq > 8) { | 438 | if (pirq > 8) { |
439 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | 439 | dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); |
440 | return 0; | 440 | return 0; |
441 | } | 441 | } |
442 | return read_config_nybble(router, 0x74, pirq-1); | 442 | return read_config_nybble(router, 0x74, pirq-1); |
@@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
446 | { | 446 | { |
447 | WARN_ON_ONCE(pirq >= 9); | 447 | WARN_ON_ONCE(pirq >= 9); |
448 | if (pirq > 8) { | 448 | if (pirq > 8) { |
449 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | 449 | dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); |
450 | return 0; | 450 | return 0; |
451 | } | 451 | } |
452 | write_config_nybble(router, 0x74, pirq-1, irq); | 452 | write_config_nybble(router, 0x74, pirq-1, irq); |
@@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq | |||
492 | irq = 0; | 492 | irq = 0; |
493 | if (pirq <= 4) | 493 | if (pirq <= 4) |
494 | irq = read_config_nybble(router, 0x56, pirq - 1); | 494 | irq = read_config_nybble(router, 0x56, pirq - 1); |
495 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", | 495 | dev_info(&dev->dev, |
496 | dev->vendor, dev->device, pirq, irq); | 496 | "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", |
497 | dev->vendor, dev->device, pirq, irq); | ||
497 | return irq; | 498 | return irq; |
498 | } | 499 | } |
499 | 500 | ||
500 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | 501 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) |
501 | { | 502 | { |
502 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", | 503 | dev_info(&dev->dev, |
503 | dev->vendor, dev->device, pirq, irq); | 504 | "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", |
505 | dev->vendor, dev->device, pirq, irq); | ||
504 | if (pirq <= 4) | 506 | if (pirq <= 4) |
505 | write_config_nybble(router, 0x56, pirq - 1, irq); | 507 | write_config_nybble(router, 0x56, pirq - 1, irq); |
506 | return 1; | 508 | return 1; |
@@ -730,7 +732,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, | |||
730 | switch (device) { | 732 | switch (device) { |
731 | case PCI_DEVICE_ID_AL_M1533: | 733 | case PCI_DEVICE_ID_AL_M1533: |
732 | case PCI_DEVICE_ID_AL_M1563: | 734 | case PCI_DEVICE_ID_AL_M1563: |
733 | printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); | ||
734 | r->name = "ALI"; | 735 | r->name = "ALI"; |
735 | r->get = pirq_ali_get; | 736 | r->get = pirq_ali_get; |
736 | r->set = pirq_ali_set; | 737 | r->set = pirq_ali_set; |
@@ -840,11 +841,9 @@ static void __init pirq_find_router(struct irq_router *r) | |||
840 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) | 841 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) |
841 | break; | 842 | break; |
842 | } | 843 | } |
843 | printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", | 844 | dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", |
844 | pirq_router.name, | 845 | pirq_router.name, |
845 | pirq_router_dev->vendor, | 846 | pirq_router_dev->vendor, pirq_router_dev->device); |
846 | pirq_router_dev->device, | ||
847 | pci_name(pirq_router_dev)); | ||
848 | 847 | ||
849 | /* The device remains referenced for the kernel lifetime */ | 848 | /* The device remains referenced for the kernel lifetime */ |
850 | } | 849 | } |
@@ -877,7 +876,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
877 | /* Find IRQ pin */ | 876 | /* Find IRQ pin */ |
878 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 877 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
879 | if (!pin) { | 878 | if (!pin) { |
880 | DBG(KERN_DEBUG " -> no interrupt pin\n"); | 879 | dev_dbg(&dev->dev, "no interrupt pin\n"); |
881 | return 0; | 880 | return 0; |
882 | } | 881 | } |
883 | pin = pin - 1; | 882 | pin = pin - 1; |
@@ -887,20 +886,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
887 | if (!pirq_table) | 886 | if (!pirq_table) |
888 | return 0; | 887 | return 0; |
889 | 888 | ||
890 | DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); | ||
891 | info = pirq_get_info(dev); | 889 | info = pirq_get_info(dev); |
892 | if (!info) { | 890 | if (!info) { |
893 | DBG(" -> not found in routing table\n" KERN_DEBUG); | 891 | dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", |
892 | 'A' + pin); | ||
894 | return 0; | 893 | return 0; |
895 | } | 894 | } |
896 | pirq = info->irq[pin].link; | 895 | pirq = info->irq[pin].link; |
897 | mask = info->irq[pin].bitmap; | 896 | mask = info->irq[pin].bitmap; |
898 | if (!pirq) { | 897 | if (!pirq) { |
899 | DBG(" -> not routed\n" KERN_DEBUG); | 898 | dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); |
900 | return 0; | 899 | return 0; |
901 | } | 900 | } |
902 | DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, | 901 | dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", |
903 | pirq_table->exclusive_irqs); | 902 | 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); |
904 | mask &= pcibios_irq_mask; | 903 | mask &= pcibios_irq_mask; |
905 | 904 | ||
906 | /* Work around broken HP Pavilion Notebooks which assign USB to | 905 | /* Work around broken HP Pavilion Notebooks which assign USB to |
@@ -930,10 +929,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
930 | if (pci_probe & PCI_USE_PIRQ_MASK) | 929 | if (pci_probe & PCI_USE_PIRQ_MASK) |
931 | newirq = 0; | 930 | newirq = 0; |
932 | else | 931 | else |
933 | printk("\n" KERN_WARNING | 932 | dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " |
934 | "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" | 933 | "%#x; try pci=usepirqmask\n", newirq, mask); |
935 | KERN_DEBUG, newirq, | ||
936 | pci_name(dev)); | ||
937 | } | 934 | } |
938 | if (!newirq && assign) { | 935 | if (!newirq && assign) { |
939 | for (i = 0; i < 16; i++) { | 936 | for (i = 0; i < 16; i++) { |
@@ -944,39 +941,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
944 | newirq = i; | 941 | newirq = i; |
945 | } | 942 | } |
946 | } | 943 | } |
947 | DBG(" -> newirq=%d", newirq); | 944 | dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); |
948 | 945 | ||
949 | /* Check if it is hardcoded */ | 946 | /* Check if it is hardcoded */ |
950 | if ((pirq & 0xf0) == 0xf0) { | 947 | if ((pirq & 0xf0) == 0xf0) { |
951 | irq = pirq & 0xf; | 948 | irq = pirq & 0xf; |
952 | DBG(" -> hardcoded IRQ %d\n", irq); | 949 | msg = "hardcoded"; |
953 | msg = "Hardcoded"; | ||
954 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ | 950 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ |
955 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { | 951 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { |
956 | DBG(" -> got IRQ %d\n", irq); | 952 | msg = "found"; |
957 | msg = "Found"; | ||
958 | eisa_set_level_irq(irq); | 953 | eisa_set_level_irq(irq); |
959 | } else if (newirq && r->set && | 954 | } else if (newirq && r->set && |
960 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | 955 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { |
961 | DBG(" -> assigning IRQ %d", newirq); | ||
962 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { | 956 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { |
963 | eisa_set_level_irq(newirq); | 957 | eisa_set_level_irq(newirq); |
964 | DBG(" ... OK\n"); | 958 | msg = "assigned"; |
965 | msg = "Assigned"; | ||
966 | irq = newirq; | 959 | irq = newirq; |
967 | } | 960 | } |
968 | } | 961 | } |
969 | 962 | ||
970 | if (!irq) { | 963 | if (!irq) { |
971 | DBG(" ... failed\n"); | ||
972 | if (newirq && mask == (1 << newirq)) { | 964 | if (newirq && mask == (1 << newirq)) { |
973 | msg = "Guessed"; | 965 | msg = "guessed"; |
974 | irq = newirq; | 966 | irq = newirq; |
975 | } else | 967 | } else { |
968 | dev_dbg(&dev->dev, "can't route interrupt\n"); | ||
976 | return 0; | 969 | return 0; |
970 | } | ||
977 | } | 971 | } |
978 | printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, | 972 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); |
979 | pci_name(dev)); | ||
980 | 973 | ||
981 | /* Update IRQ for all devices with the same pirq value */ | 974 | /* Update IRQ for all devices with the same pirq value */ |
982 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | 975 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { |
@@ -996,17 +989,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
996 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ | 989 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ |
997 | ((1 << dev2->irq) & mask))) { | 990 | ((1 << dev2->irq) & mask))) { |
998 | #ifndef CONFIG_PCI_MSI | 991 | #ifndef CONFIG_PCI_MSI |
999 | printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", | 992 | dev_info(&dev2->dev, "IRQ routing conflict: " |
1000 | pci_name(dev2), dev2->irq, irq); | 993 | "have IRQ %d, want IRQ %d\n", |
994 | dev2->irq, irq); | ||
1001 | #endif | 995 | #endif |
1002 | continue; | 996 | continue; |
1003 | } | 997 | } |
1004 | dev2->irq = irq; | 998 | dev2->irq = irq; |
1005 | pirq_penalty[irq]++; | 999 | pirq_penalty[irq]++; |
1006 | if (dev != dev2) | 1000 | if (dev != dev2) |
1007 | printk(KERN_INFO | 1001 | dev_info(&dev->dev, "sharing IRQ %d with %s\n", |
1008 | "PCI: Sharing IRQ %d with %s\n", | 1002 | irq, pci_name(dev2)); |
1009 | irq, pci_name(dev2)); | ||
1010 | } | 1003 | } |
1011 | } | 1004 | } |
1012 | return 1; | 1005 | return 1; |
@@ -1025,8 +1018,7 @@ static void __init pcibios_fixup_irqs(void) | |||
1025 | * already in use. | 1018 | * already in use. |
1026 | */ | 1019 | */ |
1027 | if (dev->irq >= 16) { | 1020 | if (dev->irq >= 16) { |
1028 | DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", | 1021 | dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); |
1029 | pci_name(dev), dev->irq); | ||
1030 | dev->irq = 0; | 1022 | dev->irq = 0; |
1031 | } | 1023 | } |
1032 | /* | 1024 | /* |
@@ -1070,12 +1062,12 @@ static void __init pcibios_fixup_irqs(void) | |||
1070 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1062 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1071 | PCI_SLOT(bridge->devfn), pin); | 1063 | PCI_SLOT(bridge->devfn), pin); |
1072 | if (irq >= 0) | 1064 | if (irq >= 0) |
1073 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | 1065 | dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", |
1074 | pci_name(bridge), 'A' + pin, irq); | 1066 | pci_name(bridge), |
1067 | 'A' + pin, irq); | ||
1075 | } | 1068 | } |
1076 | if (irq >= 0) { | 1069 | if (irq >= 0) { |
1077 | printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1070 | dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); |
1078 | pci_name(dev), 'A' + pin, irq); | ||
1079 | dev->irq = irq; | 1071 | dev->irq = irq; |
1080 | } | 1072 | } |
1081 | } | 1073 | } |
@@ -1231,25 +1223,24 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1231 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1223 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1232 | PCI_SLOT(bridge->devfn), pin); | 1224 | PCI_SLOT(bridge->devfn), pin); |
1233 | if (irq >= 0) | 1225 | if (irq >= 0) |
1234 | printk(KERN_WARNING | 1226 | dev_warn(&dev->dev, "using bridge %s " |
1235 | "PCI: using PPB %s[%c] to get irq %d\n", | 1227 | "INT %c to get IRQ %d\n", |
1236 | pci_name(bridge), | 1228 | pci_name(bridge), 'A' + pin, |
1237 | 'A' + pin, irq); | 1229 | irq); |
1238 | dev = bridge; | 1230 | dev = bridge; |
1239 | } | 1231 | } |
1240 | dev = temp_dev; | 1232 | dev = temp_dev; |
1241 | if (irq >= 0) { | 1233 | if (irq >= 0) { |
1242 | printk(KERN_INFO | 1234 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " |
1243 | "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1235 | "INT %c -> IRQ %d\n", 'A' + pin, irq); |
1244 | pci_name(dev), 'A' + pin, irq); | ||
1245 | dev->irq = irq; | 1236 | dev->irq = irq; |
1246 | return 0; | 1237 | return 0; |
1247 | } else | 1238 | } else |
1248 | msg = " Probably buggy MP table."; | 1239 | msg = "; probably buggy MP table"; |
1249 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) | 1240 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) |
1250 | msg = ""; | 1241 | msg = ""; |
1251 | else | 1242 | else |
1252 | msg = " Please try using pci=biosirq."; | 1243 | msg = "; please try using pci=biosirq"; |
1253 | 1244 | ||
1254 | /* | 1245 | /* |
1255 | * With IDE legacy devices the IRQ lookup failure is not | 1246 | * With IDE legacy devices the IRQ lookup failure is not |
@@ -1259,9 +1250,8 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1259 | !(dev->class & 0x5)) | 1250 | !(dev->class & 0x5)) |
1260 | return 0; | 1251 | return 0; |
1261 | 1252 | ||
1262 | printk(KERN_WARNING | 1253 | dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", |
1263 | "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", | 1254 | 'A' + pin, msg); |
1264 | 'A' + pin, pci_name(dev), msg); | ||
1265 | } | 1255 | } |
1266 | return 0; | 1256 | return 0; |
1267 | } | 1257 | } |
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index f4b16dc11dad..1177845d3186 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
131 | u8 busno, suba, subb; | 131 | u8 busno, suba, subb; |
132 | int quad = BUS2QUAD(d->bus->number); | 132 | int quad = BUS2QUAD(d->bus->number); |
133 | 133 | ||
134 | printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); | 134 | dev_info(&d->dev, "searching for i450NX host bridges\n"); |
135 | reg = 0xd0; | 135 | reg = 0xd0; |
136 | for(pxb=0; pxb<2; pxb++) { | 136 | for(pxb=0; pxb<2; pxb++) { |
137 | pci_read_config_byte(d, reg++, &busno); | 137 | pci_read_config_byte(d, reg++, &busno); |
138 | pci_read_config_byte(d, reg++, &suba); | 138 | pci_read_config_byte(d, reg++, &suba); |
139 | pci_read_config_byte(d, reg++, &subb); | 139 | pci_read_config_byte(d, reg++, &subb); |
140 | DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); | 140 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", |
141 | pxb, busno, suba, subb); | ||
141 | if (busno) { | 142 | if (busno) { |
142 | /* Bus A */ | 143 | /* Bus A */ |
143 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); | 144 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); |