diff options
137 files changed, 4113 insertions, 1103 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index d25acd51e181..22b19962a1a2 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -104,6 +104,9 @@ loader, and have no meaning to the kernel directly. | |||
104 | Do not modify the syntax of boot loader parameters without extreme | 104 | Do not modify the syntax of boot loader parameters without extreme |
105 | need or coordination with <Documentation/i386/boot.txt>. | 105 | need or coordination with <Documentation/i386/boot.txt>. |
106 | 106 | ||
107 | There are also arch-specific kernel-parameters not documented here. | ||
108 | See for example <Documentation/x86_64/boot-options.txt>. | ||
109 | |||
107 | Note that ALL kernel parameters listed below are CASE SENSITIVE, and that | 110 | Note that ALL kernel parameters listed below are CASE SENSITIVE, and that |
108 | a trailing = on the name of any parameter states that that parameter will | 111 | a trailing = on the name of any parameter states that that parameter will |
109 | be entered as an environment variable, whereas its absence indicates that | 112 | be entered as an environment variable, whereas its absence indicates that |
@@ -361,6 +364,11 @@ and is between 256 and 4096 characters. It is defined in the file | |||
361 | clocksource is not available, it defaults to PIT. | 364 | clocksource is not available, it defaults to PIT. |
362 | Format: { pit | tsc | cyclone | pmtmr } | 365 | Format: { pit | tsc | cyclone | pmtmr } |
363 | 366 | ||
367 | code_bytes [IA32] How many bytes of object code to print in an | ||
368 | oops report. | ||
369 | Range: 0 - 8192 | ||
370 | Default: 64 | ||
371 | |||
364 | disable_8254_timer | 372 | disable_8254_timer |
365 | enable_8254_timer | 373 | enable_8254_timer |
366 | [IA32/X86_64] Disable/Enable interrupt 0 timer routing | 374 | [IA32/X86_64] Disable/Enable interrupt 0 timer routing |
diff --git a/Documentation/x86_64/boot-options.txt b/Documentation/x86_64/boot-options.txt index 5c86ed6f0448..625a21db0c2a 100644 --- a/Documentation/x86_64/boot-options.txt +++ b/Documentation/x86_64/boot-options.txt | |||
@@ -180,40 +180,81 @@ PCI | |||
180 | pci=lastbus=NUMBER Scan upto NUMBER busses, no matter what the mptable says. | 180 | pci=lastbus=NUMBER Scan upto NUMBER busses, no matter what the mptable says. |
181 | pci=noacpi Don't use ACPI to set up PCI interrupt routing. | 181 | pci=noacpi Don't use ACPI to set up PCI interrupt routing. |
182 | 182 | ||
183 | IOMMU | 183 | IOMMU (input/output memory management unit) |
184 | 184 | ||
185 | iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | 185 | Currently four x86-64 PCI-DMA mapping implementations exist: |
186 | [,forcesac][,fullflush][,nomerge][,noaperture][,calgary] | 186 | |
187 | size set size of iommu (in bytes) | 187 | 1. <arch/x86_64/kernel/pci-nommu.c>: use no hardware/software IOMMU at all |
188 | noagp don't initialize the AGP driver and use full aperture. | 188 | (e.g. because you have < 3 GB memory). |
189 | off don't use the IOMMU | 189 | Kernel boot message: "PCI-DMA: Disabling IOMMU" |
190 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | 190 | |
191 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | 191 | 2. <arch/x86_64/kernel/pci-gart.c>: AMD GART based hardware IOMMU. |
192 | noforce don't force IOMMU usage. Default. | 192 | Kernel boot message: "PCI-DMA: using GART IOMMU" |
193 | force Force IOMMU. | 193 | |
194 | merge Do SG merging. Implies force (experimental) | 194 | 3. <arch/x86_64/kernel/pci-swiotlb.c> : Software IOMMU implementation. Used |
195 | nomerge Don't do SG merging. | 195 | e.g. if there is no hardware IOMMU in the system and it is need because |
196 | forcesac For SAC mode for masks <40bits (experimental) | 196 | you have >3GB memory or told the kernel to us it (iommu=soft)) |
197 | fullflush Flush IOMMU on each allocation (default) | 197 | Kernel boot message: "PCI-DMA: Using software bounce buffering |
198 | nofullflush Don't use IOMMU fullflush | 198 | for IO (SWIOTLB)" |
199 | allowed overwrite iommu off workarounds for specific chipsets. | 199 | |
200 | soft Use software bounce buffering (default for Intel machines) | 200 | 4. <arch/x86_64/pci-calgary.c> : IBM Calgary hardware IOMMU. Used in IBM |
201 | noaperture Don't touch the aperture for AGP. | 201 | pSeries and xSeries servers. This hardware IOMMU supports DMA address |
202 | allowdac Allow DMA >4GB | 202 | mapping with memory protection, etc. |
203 | When off all DMA over >4GB is forced through an IOMMU or bounce | 203 | Kernel boot message: "PCI-DMA: Using Calgary IOMMU" |
204 | buffering. | 204 | |
205 | nodac Forbid DMA >4GB | 205 | iommu=[<size>][,noagp][,off][,force][,noforce][,leak[=<nr_of_leak_pages>] |
206 | panic Always panic when IOMMU overflows | 206 | [,memaper[=<order>]][,merge][,forcesac][,fullflush][,nomerge] |
207 | calgary Use the Calgary IOMMU if it is available | 207 | [,noaperture][,calgary] |
208 | 208 | ||
209 | swiotlb=pages[,force] | 209 | General iommu options: |
210 | 210 | off Don't initialize and use any kind of IOMMU. | |
211 | pages Prereserve that many 128K pages for the software IO bounce buffering. | 211 | noforce Don't force hardware IOMMU usage when it is not needed. |
212 | force Force all IO through the software TLB. | 212 | (default). |
213 | 213 | force Force the use of the hardware IOMMU even when it is | |
214 | calgary=[64k,128k,256k,512k,1M,2M,4M,8M] | 214 | not actually needed (e.g. because < 3 GB memory). |
215 | calgary=[translate_empty_slots] | 215 | soft Use software bounce buffering (SWIOTLB) (default for |
216 | calgary=[disable=<PCI bus number>] | 216 | Intel machines). This can be used to prevent the usage |
217 | of an available hardware IOMMU. | ||
218 | |||
219 | iommu options only relevant to the AMD GART hardware IOMMU: | ||
220 | <size> Set the size of the remapping area in bytes. | ||
221 | allowed Overwrite iommu off workarounds for specific chipsets. | ||
222 | fullflush Flush IOMMU on each allocation (default). | ||
223 | nofullflush Don't use IOMMU fullflush. | ||
224 | leak Turn on simple iommu leak tracing (only when | ||
225 | CONFIG_IOMMU_LEAK is on). Default number of leak pages | ||
226 | is 20. | ||
227 | memaper[=<order>] Allocate an own aperture over RAM with size 32MB<<order. | ||
228 | (default: order=1, i.e. 64MB) | ||
229 | merge Do scatter-gather (SG) merging. Implies "force" | ||
230 | (experimental). | ||
231 | nomerge Don't do scatter-gather (SG) merging. | ||
232 | noaperture Ask the IOMMU not to touch the aperture for AGP. | ||
233 | forcesac Force single-address cycle (SAC) mode for masks <40bits | ||
234 | (experimental). | ||
235 | noagp Don't initialize the AGP driver and use full aperture. | ||
236 | allowdac Allow double-address cycle (DAC) mode, i.e. DMA >4GB. | ||
237 | DAC is used with 32-bit PCI to push a 64-bit address in | ||
238 | two cycles. When off all DMA over >4GB is forced through | ||
239 | an IOMMU or software bounce buffering. | ||
240 | nodac Forbid DAC mode, i.e. DMA >4GB. | ||
241 | panic Always panic when IOMMU overflows. | ||
242 | calgary Use the Calgary IOMMU if it is available | ||
243 | |||
244 | iommu options only relevant to the software bounce buffering (SWIOTLB) IOMMU | ||
245 | implementation: | ||
246 | swiotlb=<pages>[,force] | ||
247 | <pages> Prereserve that many 128K pages for the software IO | ||
248 | bounce buffering. | ||
249 | force Force all IO through the software TLB. | ||
250 | |||
251 | Settings for the IBM Calgary hardware IOMMU currently found in IBM | ||
252 | pSeries and xSeries machines: | ||
253 | |||
254 | calgary=[64k,128k,256k,512k,1M,2M,4M,8M] | ||
255 | calgary=[translate_empty_slots] | ||
256 | calgary=[disable=<PCI bus number>] | ||
257 | panic Always panic when IOMMU overflows | ||
217 | 258 | ||
218 | 64k,...,8M - Set the size of each PCI slot's translation table | 259 | 64k,...,8M - Set the size of each PCI slot's translation table |
219 | when using the Calgary IOMMU. This is the size of the translation | 260 | when using the Calgary IOMMU. This is the size of the translation |
@@ -234,14 +275,14 @@ IOMMU | |||
234 | 275 | ||
235 | Debugging | 276 | Debugging |
236 | 277 | ||
237 | oops=panic Always panic on oopses. Default is to just kill the process, | 278 | oops=panic Always panic on oopses. Default is to just kill the process, |
238 | but there is a small probability of deadlocking the machine. | 279 | but there is a small probability of deadlocking the machine. |
239 | This will also cause panics on machine check exceptions. | 280 | This will also cause panics on machine check exceptions. |
240 | Useful together with panic=30 to trigger a reboot. | 281 | Useful together with panic=30 to trigger a reboot. |
241 | 282 | ||
242 | kstack=N Print that many words from the kernel stack in oops dumps. | 283 | kstack=N Print N words from the kernel stack in oops dumps. |
243 | 284 | ||
244 | pagefaulttrace Dump all page faults. Only useful for extreme debugging | 285 | pagefaulttrace Dump all page faults. Only useful for extreme debugging |
245 | and will create a lot of output. | 286 | and will create a lot of output. |
246 | 287 | ||
247 | call_trace=[old|both|newfallback|new] | 288 | call_trace=[old|both|newfallback|new] |
@@ -251,15 +292,8 @@ Debugging | |||
251 | newfallback: use new unwinder but fall back to old if it gets | 292 | newfallback: use new unwinder but fall back to old if it gets |
252 | stuck (default) | 293 | stuck (default) |
253 | 294 | ||
254 | call_trace=[old|both|newfallback|new] | 295 | Miscellaneous |
255 | old: use old inexact backtracer | ||
256 | new: use new exact dwarf2 unwinder | ||
257 | both: print entries from both | ||
258 | newfallback: use new unwinder but fall back to old if it gets | ||
259 | stuck (default) | ||
260 | |||
261 | Misc | ||
262 | 296 | ||
263 | noreplacement Don't replace instructions with more appropriate ones | 297 | noreplacement Don't replace instructions with more appropriate ones |
264 | for the CPU. This may be useful on asymmetric MP systems | 298 | for the CPU. This may be useful on asymmetric MP systems |
265 | where some CPU have less capabilities than the others. | 299 | where some CPUs have less capabilities than others. |
diff --git a/Documentation/x86_64/cpu-hotplug-spec b/Documentation/x86_64/cpu-hotplug-spec index 5c0fa345e556..3c23e0587db3 100644 --- a/Documentation/x86_64/cpu-hotplug-spec +++ b/Documentation/x86_64/cpu-hotplug-spec | |||
@@ -2,7 +2,7 @@ Firmware support for CPU hotplug under Linux/x86-64 | |||
2 | --------------------------------------------------- | 2 | --------------------------------------------------- |
3 | 3 | ||
4 | Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to | 4 | Linux/x86-64 supports CPU hotplug now. For various reasons Linux wants to |
5 | know in advance boot time the maximum number of CPUs that could be plugged | 5 | know in advance of boot time the maximum number of CPUs that could be plugged |
6 | into the system. ACPI 3.0 currently has no official way to supply | 6 | into the system. ACPI 3.0 currently has no official way to supply |
7 | this information from the firmware to the operating system. | 7 | this information from the firmware to the operating system. |
8 | 8 | ||
diff --git a/Documentation/x86_64/kernel-stacks b/Documentation/x86_64/kernel-stacks index bddfddd466ab..5ad65d51fb95 100644 --- a/Documentation/x86_64/kernel-stacks +++ b/Documentation/x86_64/kernel-stacks | |||
@@ -9,9 +9,9 @@ zombie. While the thread is in user space the kernel stack is empty | |||
9 | except for the thread_info structure at the bottom. | 9 | except for the thread_info structure at the bottom. |
10 | 10 | ||
11 | In addition to the per thread stacks, there are specialized stacks | 11 | In addition to the per thread stacks, there are specialized stacks |
12 | associated with each cpu. These stacks are only used while the kernel | 12 | associated with each CPU. These stacks are only used while the kernel |
13 | is in control on that cpu, when a cpu returns to user space the | 13 | is in control on that CPU; when a CPU returns to user space the |
14 | specialized stacks contain no useful data. The main cpu stacks is | 14 | specialized stacks contain no useful data. The main CPU stacks are: |
15 | 15 | ||
16 | * Interrupt stack. IRQSTACKSIZE | 16 | * Interrupt stack. IRQSTACKSIZE |
17 | 17 | ||
@@ -32,17 +32,17 @@ x86_64 also has a feature which is not available on i386, the ability | |||
32 | to automatically switch to a new stack for designated events such as | 32 | to automatically switch to a new stack for designated events such as |
33 | double fault or NMI, which makes it easier to handle these unusual | 33 | double fault or NMI, which makes it easier to handle these unusual |
34 | events on x86_64. This feature is called the Interrupt Stack Table | 34 | events on x86_64. This feature is called the Interrupt Stack Table |
35 | (IST). There can be up to 7 IST entries per cpu. The IST code is an | 35 | (IST). There can be up to 7 IST entries per CPU. The IST code is an |
36 | index into the Task State Segment (TSS), the IST entries in the TSS | 36 | index into the Task State Segment (TSS). The IST entries in the TSS |
37 | point to dedicated stacks, each stack can be a different size. | 37 | point to dedicated stacks; each stack can be a different size. |
38 | 38 | ||
39 | An IST is selected by an non-zero value in the IST field of an | 39 | An IST is selected by a non-zero value in the IST field of an |
40 | interrupt-gate descriptor. When an interrupt occurs and the hardware | 40 | interrupt-gate descriptor. When an interrupt occurs and the hardware |
41 | loads such a descriptor, the hardware automatically sets the new stack | 41 | loads such a descriptor, the hardware automatically sets the new stack |
42 | pointer based on the IST value, then invokes the interrupt handler. If | 42 | pointer based on the IST value, then invokes the interrupt handler. If |
43 | software wants to allow nested IST interrupts then the handler must | 43 | software wants to allow nested IST interrupts then the handler must |
44 | adjust the IST values on entry to and exit from the interrupt handler. | 44 | adjust the IST values on entry to and exit from the interrupt handler. |
45 | (this is occasionally done, e.g. for debug exceptions) | 45 | (This is occasionally done, e.g. for debug exceptions.) |
46 | 46 | ||
47 | Events with different IST codes (i.e. with different stacks) can be | 47 | Events with different IST codes (i.e. with different stacks) can be |
48 | nested. For example, a debug interrupt can safely be interrupted by an | 48 | nested. For example, a debug interrupt can safely be interrupted by an |
@@ -58,17 +58,17 @@ The currently assigned IST stacks are :- | |||
58 | 58 | ||
59 | Used for interrupt 12 - Stack Fault Exception (#SS). | 59 | Used for interrupt 12 - Stack Fault Exception (#SS). |
60 | 60 | ||
61 | This allows to recover from invalid stack segments. Rarely | 61 | This allows the CPU to recover from invalid stack segments. Rarely |
62 | happens. | 62 | happens. |
63 | 63 | ||
64 | * DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). | 64 | * DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE). |
65 | 65 | ||
66 | Used for interrupt 8 - Double Fault Exception (#DF). | 66 | Used for interrupt 8 - Double Fault Exception (#DF). |
67 | 67 | ||
68 | Invoked when handling a exception causes another exception. Happens | 68 | Invoked when handling one exception causes another exception. Happens |
69 | when the kernel is very confused (e.g. kernel stack pointer corrupt) | 69 | when the kernel is very confused (e.g. kernel stack pointer corrupt). |
70 | Using a separate stack allows to recover from it well enough in many | 70 | Using a separate stack allows the kernel to recover from it well enough |
71 | cases to still output an oops. | 71 | in many cases to still output an oops. |
72 | 72 | ||
73 | * NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). | 73 | * NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE). |
74 | 74 | ||
diff --git a/Documentation/x86_64/machinecheck b/Documentation/x86_64/machinecheck new file mode 100644 index 000000000000..068a6d9904b9 --- /dev/null +++ b/Documentation/x86_64/machinecheck | |||
@@ -0,0 +1,70 @@ | |||
1 | |||
2 | Configurable sysfs parameters for the x86-64 machine check code. | ||
3 | |||
4 | Machine checks report internal hardware error conditions detected | ||
5 | by the CPU. Uncorrected errors typically cause a machine check | ||
6 | (often with panic), corrected ones cause a machine check log entry. | ||
7 | |||
8 | Machine checks are organized in banks (normally associated with | ||
9 | a hardware subsystem) and subevents in a bank. The exact meaning | ||
10 | of the banks and subevent is CPU specific. | ||
11 | |||
12 | mcelog knows how to decode them. | ||
13 | |||
14 | When you see the "Machine check errors logged" message in the system | ||
15 | log then mcelog should run to collect and decode machine check entries | ||
16 | from /dev/mcelog. Normally mcelog should be run regularly from a cronjob. | ||
17 | |||
18 | Each CPU has a directory in /sys/devices/system/machinecheck/machinecheckN | ||
19 | (N = CPU number) | ||
20 | |||
21 | The directory contains some configurable entries: | ||
22 | |||
23 | Entries: | ||
24 | |||
25 | bankNctl | ||
26 | (N bank number) | ||
27 | 64bit Hex bitmask enabling/disabling specific subevents for bank N | ||
28 | When a bit in the bitmask is zero then the respective | ||
29 | subevent will not be reported. | ||
30 | By default all events are enabled. | ||
31 | Note that BIOS maintain another mask to disable specific events | ||
32 | per bank. This is not visible here | ||
33 | |||
34 | The following entries appear for each CPU, but they are truly shared | ||
35 | between all CPUs. | ||
36 | |||
37 | check_interval | ||
38 | How often to poll for corrected machine check errors, in seconds | ||
39 | (Note output is hexademical). Default 5 minutes. | ||
40 | |||
41 | tolerant | ||
42 | Tolerance level. When a machine check exception occurs for a non | ||
43 | corrected machine check the kernel can take different actions. | ||
44 | Since machine check exceptions can happen any time it is sometimes | ||
45 | risky for the kernel to kill a process because it defies | ||
46 | normal kernel locking rules. The tolerance level configures | ||
47 | how hard the kernel tries to recover even at some risk of deadlock. | ||
48 | |||
49 | 0: always panic, | ||
50 | 1: panic if deadlock possible, | ||
51 | 2: try to avoid panic, | ||
52 | 3: never panic or exit (for testing only) | ||
53 | |||
54 | Default: 1 | ||
55 | |||
56 | Note this only makes a difference if the CPU allows recovery | ||
57 | from a machine check exception. Current x86 CPUs generally do not. | ||
58 | |||
59 | trigger | ||
60 | Program to run when a machine check event is detected. | ||
61 | This is an alternative to running mcelog regularly from cron | ||
62 | and allows to detect events faster. | ||
63 | |||
64 | TBD document entries for AMD threshold interrupt configuration | ||
65 | |||
66 | For more details about the x86 machine check architecture | ||
67 | see the Intel and AMD architecture manuals from their developer websites. | ||
68 | |||
69 | For more details about the architecture see | ||
70 | see http://one.firstfloor.org/~andi/mce.pdf | ||
diff --git a/Documentation/x86_64/mm.txt b/Documentation/x86_64/mm.txt index 133561b9cb0c..f42798ed1c54 100644 --- a/Documentation/x86_64/mm.txt +++ b/Documentation/x86_64/mm.txt | |||
@@ -3,26 +3,26 @@ | |||
3 | 3 | ||
4 | Virtual memory map with 4 level page tables: | 4 | Virtual memory map with 4 level page tables: |
5 | 5 | ||
6 | 0000000000000000 - 00007fffffffffff (=47bits) user space, different per mm | 6 | 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm |
7 | hole caused by [48:63] sign extension | 7 | hole caused by [48:63] sign extension |
8 | ffff800000000000 - ffff80ffffffffff (=40bits) guard hole | 8 | ffff800000000000 - ffff80ffffffffff (=40 bits) guard hole |
9 | ffff810000000000 - ffffc0ffffffffff (=46bits) direct mapping of all phys. memory | 9 | ffff810000000000 - ffffc0ffffffffff (=46 bits) direct mapping of all phys. memory |
10 | ffffc10000000000 - ffffc1ffffffffff (=40bits) hole | 10 | ffffc10000000000 - ffffc1ffffffffff (=40 bits) hole |
11 | ffffc20000000000 - ffffe1ffffffffff (=45bits) vmalloc/ioremap space | 11 | ffffc20000000000 - ffffe1ffffffffff (=45 bits) vmalloc/ioremap space |
12 | ... unused hole ... | 12 | ... unused hole ... |
13 | ffffffff80000000 - ffffffff82800000 (=40MB) kernel text mapping, from phys 0 | 13 | ffffffff80000000 - ffffffff82800000 (=40 MB) kernel text mapping, from phys 0 |
14 | ... unused hole ... | 14 | ... unused hole ... |
15 | ffffffff88000000 - fffffffffff00000 (=1919MB) module mapping space | 15 | ffffffff88000000 - fffffffffff00000 (=1919 MB) module mapping space |
16 | 16 | ||
17 | The direct mapping covers all memory in the system upto the highest | 17 | The direct mapping covers all memory in the system up to the highest |
18 | memory address (this means in some cases it can also include PCI memory | 18 | memory address (this means in some cases it can also include PCI memory |
19 | holes) | 19 | holes). |
20 | 20 | ||
21 | vmalloc space is lazily synchronized into the different PML4 pages of | 21 | vmalloc space is lazily synchronized into the different PML4 pages of |
22 | the processes using the page fault handler, with init_level4_pgt as | 22 | the processes using the page fault handler, with init_level4_pgt as |
23 | reference. | 23 | reference. |
24 | 24 | ||
25 | Current X86-64 implementations only support 40 bit of address space, | 25 | Current X86-64 implementations only support 40 bits of address space, |
26 | but we support upto 46bits. This expands into MBZ space in the page tables. | 26 | but we support up to 46 bits. This expands into MBZ space in the page tables. |
27 | 27 | ||
28 | -Andi Kleen, Jul 2004 | 28 | -Andi Kleen, Jul 2004 |
diff --git a/MAINTAINERS b/MAINTAINERS index 9e6c9ff0f543..b0fd71b3f66f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -3779,6 +3779,7 @@ P: Andi Kleen | |||
3779 | M: ak@suse.de | 3779 | M: ak@suse.de |
3780 | L: discuss@x86-64.org | 3780 | L: discuss@x86-64.org |
3781 | W: http://www.x86-64.org | 3781 | W: http://www.x86-64.org |
3782 | T: quilt ftp://ftp.firstfloor.org/pub/ak/x86_64/quilt-current | ||
3782 | S: Maintained | 3783 | S: Maintained |
3783 | 3784 | ||
3784 | YAM DRIVER FOR AX.25 | 3785 | YAM DRIVER FOR AX.25 |
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 63d5e841caf5..595fb771366e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -203,6 +203,15 @@ config PARAVIRT | |||
203 | However, when run without a hypervisor the kernel is | 203 | However, when run without a hypervisor the kernel is |
204 | theoretically slower. If in doubt, say N. | 204 | theoretically slower. If in doubt, say N. |
205 | 205 | ||
206 | config VMI | ||
207 | bool "VMI Paravirt-ops support" | ||
208 | depends on PARAVIRT | ||
209 | default y | ||
210 | help | ||
211 | VMI provides a paravirtualized interface to multiple hypervisors | ||
212 | include VMware ESX server and Xen by connecting to a ROM module | ||
213 | provided by the hypervisor. | ||
214 | |||
206 | config ACPI_SRAT | 215 | config ACPI_SRAT |
207 | bool | 216 | bool |
208 | default y | 217 | default y |
@@ -1263,3 +1272,12 @@ config X86_TRAMPOLINE | |||
1263 | config KTIME_SCALAR | 1272 | config KTIME_SCALAR |
1264 | bool | 1273 | bool |
1265 | default y | 1274 | default y |
1275 | |||
1276 | config NO_IDLE_HZ | ||
1277 | bool | ||
1278 | depends on PARAVIRT | ||
1279 | default y | ||
1280 | help | ||
1281 | Switches the regular HZ timer off when the system is going idle. | ||
1282 | This helps a hypervisor detect that the Linux system is idle, | ||
1283 | reducing the overhead of idle systems. | ||
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index 2aecfba4ac4f..b99c0e2a4e63 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu | |||
@@ -226,11 +226,6 @@ config X86_CMPXCHG | |||
226 | depends on !M386 | 226 | depends on !M386 |
227 | default y | 227 | default y |
228 | 228 | ||
229 | config X86_XADD | ||
230 | bool | ||
231 | depends on !M386 | ||
232 | default y | ||
233 | |||
234 | config X86_L1_CACHE_SHIFT | 229 | config X86_L1_CACHE_SHIFT |
235 | int | 230 | int |
236 | default "7" if MPENTIUM4 || X86_GENERIC | 231 | default "7" if MPENTIUM4 || X86_GENERIC |
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index f68cc6f215f8..458bc1611933 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug | |||
@@ -87,7 +87,7 @@ config DOUBLEFAULT | |||
87 | 87 | ||
88 | config DEBUG_PARAVIRT | 88 | config DEBUG_PARAVIRT |
89 | bool "Enable some paravirtualization debugging" | 89 | bool "Enable some paravirtualization debugging" |
90 | default y | 90 | default n |
91 | depends on PARAVIRT && DEBUG_KERNEL | 91 | depends on PARAVIRT && DEBUG_KERNEL |
92 | help | 92 | help |
93 | Currently deliberately clobbers regs which are allowed to be | 93 | Currently deliberately clobbers regs which are allowed to be |
diff --git a/arch/i386/defconfig b/arch/i386/defconfig index bb0c376b62b3..5ae1e0bc8fd7 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.20-rc3 | 3 | # Linux kernel version: 2.6.20-git8 |
4 | # Fri Jan 5 11:54:46 2007 | 4 | # Tue Feb 13 11:25:18 2007 |
5 | # | 5 | # |
6 | CONFIG_X86_32=y | 6 | CONFIG_X86_32=y |
7 | CONFIG_GENERIC_TIME=y | 7 | CONFIG_GENERIC_TIME=y |
@@ -10,6 +10,7 @@ CONFIG_STACKTRACE_SUPPORT=y | |||
10 | CONFIG_SEMAPHORE_SLEEPERS=y | 10 | CONFIG_SEMAPHORE_SLEEPERS=y |
11 | CONFIG_X86=y | 11 | CONFIG_X86=y |
12 | CONFIG_MMU=y | 12 | CONFIG_MMU=y |
13 | CONFIG_ZONE_DMA=y | ||
13 | CONFIG_GENERIC_ISA_DMA=y | 14 | CONFIG_GENERIC_ISA_DMA=y |
14 | CONFIG_GENERIC_IOMAP=y | 15 | CONFIG_GENERIC_IOMAP=y |
15 | CONFIG_GENERIC_BUG=y | 16 | CONFIG_GENERIC_BUG=y |
@@ -139,7 +140,6 @@ CONFIG_MPENTIUMIII=y | |||
139 | # CONFIG_MVIAC3_2 is not set | 140 | # CONFIG_MVIAC3_2 is not set |
140 | CONFIG_X86_GENERIC=y | 141 | CONFIG_X86_GENERIC=y |
141 | CONFIG_X86_CMPXCHG=y | 142 | CONFIG_X86_CMPXCHG=y |
142 | CONFIG_X86_XADD=y | ||
143 | CONFIG_X86_L1_CACHE_SHIFT=7 | 143 | CONFIG_X86_L1_CACHE_SHIFT=7 |
144 | CONFIG_RWSEM_XCHGADD_ALGORITHM=y | 144 | CONFIG_RWSEM_XCHGADD_ALGORITHM=y |
145 | # CONFIG_ARCH_HAS_ILOG2_U32 is not set | 145 | # CONFIG_ARCH_HAS_ILOG2_U32 is not set |
@@ -198,6 +198,7 @@ CONFIG_FLAT_NODE_MEM_MAP=y | |||
198 | # CONFIG_SPARSEMEM_STATIC is not set | 198 | # CONFIG_SPARSEMEM_STATIC is not set |
199 | CONFIG_SPLIT_PTLOCK_CPUS=4 | 199 | CONFIG_SPLIT_PTLOCK_CPUS=4 |
200 | CONFIG_RESOURCES_64BIT=y | 200 | CONFIG_RESOURCES_64BIT=y |
201 | CONFIG_ZONE_DMA_FLAG=1 | ||
201 | # CONFIG_HIGHPTE is not set | 202 | # CONFIG_HIGHPTE is not set |
202 | # CONFIG_MATH_EMULATION is not set | 203 | # CONFIG_MATH_EMULATION is not set |
203 | CONFIG_MTRR=y | 204 | CONFIG_MTRR=y |
@@ -211,6 +212,7 @@ CONFIG_HZ_250=y | |||
211 | CONFIG_HZ=250 | 212 | CONFIG_HZ=250 |
212 | # CONFIG_KEXEC is not set | 213 | # CONFIG_KEXEC is not set |
213 | # CONFIG_CRASH_DUMP is not set | 214 | # CONFIG_CRASH_DUMP is not set |
215 | CONFIG_PHYSICAL_START=0x100000 | ||
214 | # CONFIG_RELOCATABLE is not set | 216 | # CONFIG_RELOCATABLE is not set |
215 | CONFIG_PHYSICAL_ALIGN=0x100000 | 217 | CONFIG_PHYSICAL_ALIGN=0x100000 |
216 | # CONFIG_HOTPLUG_CPU is not set | 218 | # CONFIG_HOTPLUG_CPU is not set |
@@ -229,13 +231,14 @@ CONFIG_PM_SYSFS_DEPRECATED=y | |||
229 | # ACPI (Advanced Configuration and Power Interface) Support | 231 | # ACPI (Advanced Configuration and Power Interface) Support |
230 | # | 232 | # |
231 | CONFIG_ACPI=y | 233 | CONFIG_ACPI=y |
234 | CONFIG_ACPI_PROCFS=y | ||
232 | CONFIG_ACPI_AC=y | 235 | CONFIG_ACPI_AC=y |
233 | CONFIG_ACPI_BATTERY=y | 236 | CONFIG_ACPI_BATTERY=y |
234 | CONFIG_ACPI_BUTTON=y | 237 | CONFIG_ACPI_BUTTON=y |
235 | # CONFIG_ACPI_VIDEO is not set | ||
236 | # CONFIG_ACPI_HOTKEY is not set | 238 | # CONFIG_ACPI_HOTKEY is not set |
237 | CONFIG_ACPI_FAN=y | 239 | CONFIG_ACPI_FAN=y |
238 | # CONFIG_ACPI_DOCK is not set | 240 | # CONFIG_ACPI_DOCK is not set |
241 | # CONFIG_ACPI_BAY is not set | ||
239 | CONFIG_ACPI_PROCESSOR=y | 242 | CONFIG_ACPI_PROCESSOR=y |
240 | CONFIG_ACPI_THERMAL=y | 243 | CONFIG_ACPI_THERMAL=y |
241 | # CONFIG_ACPI_ASUS is not set | 244 | # CONFIG_ACPI_ASUS is not set |
@@ -306,7 +309,6 @@ CONFIG_PCI_DIRECT=y | |||
306 | CONFIG_PCI_MMCONFIG=y | 309 | CONFIG_PCI_MMCONFIG=y |
307 | # CONFIG_PCIEPORTBUS is not set | 310 | # CONFIG_PCIEPORTBUS is not set |
308 | CONFIG_PCI_MSI=y | 311 | CONFIG_PCI_MSI=y |
309 | # CONFIG_PCI_MULTITHREAD_PROBE is not set | ||
310 | # CONFIG_PCI_DEBUG is not set | 312 | # CONFIG_PCI_DEBUG is not set |
311 | # CONFIG_HT_IRQ is not set | 313 | # CONFIG_HT_IRQ is not set |
312 | CONFIG_ISA_DMA_API=y | 314 | CONFIG_ISA_DMA_API=y |
@@ -347,6 +349,7 @@ CONFIG_UNIX=y | |||
347 | CONFIG_XFRM=y | 349 | CONFIG_XFRM=y |
348 | # CONFIG_XFRM_USER is not set | 350 | # CONFIG_XFRM_USER is not set |
349 | # CONFIG_XFRM_SUB_POLICY is not set | 351 | # CONFIG_XFRM_SUB_POLICY is not set |
352 | # CONFIG_XFRM_MIGRATE is not set | ||
350 | # CONFIG_NET_KEY is not set | 353 | # CONFIG_NET_KEY is not set |
351 | CONFIG_INET=y | 354 | CONFIG_INET=y |
352 | CONFIG_IP_MULTICAST=y | 355 | CONFIG_IP_MULTICAST=y |
@@ -446,6 +449,7 @@ CONFIG_STANDALONE=y | |||
446 | CONFIG_PREVENT_FIRMWARE_BUILD=y | 449 | CONFIG_PREVENT_FIRMWARE_BUILD=y |
447 | CONFIG_FW_LOADER=y | 450 | CONFIG_FW_LOADER=y |
448 | # CONFIG_DEBUG_DRIVER is not set | 451 | # CONFIG_DEBUG_DRIVER is not set |
452 | # CONFIG_DEBUG_DEVRES is not set | ||
449 | # CONFIG_SYS_HYPERVISOR is not set | 453 | # CONFIG_SYS_HYPERVISOR is not set |
450 | 454 | ||
451 | # | 455 | # |
@@ -466,8 +470,7 @@ CONFIG_FW_LOADER=y | |||
466 | # | 470 | # |
467 | # Plug and Play support | 471 | # Plug and Play support |
468 | # | 472 | # |
469 | CONFIG_PNP=y | 473 | # CONFIG_PNP is not set |
470 | CONFIG_PNPACPI=y | ||
471 | 474 | ||
472 | # | 475 | # |
473 | # Block devices | 476 | # Block devices |
@@ -515,6 +518,7 @@ CONFIG_BLK_DEV_IDECD=y | |||
515 | # CONFIG_BLK_DEV_IDETAPE is not set | 518 | # CONFIG_BLK_DEV_IDETAPE is not set |
516 | # CONFIG_BLK_DEV_IDEFLOPPY is not set | 519 | # CONFIG_BLK_DEV_IDEFLOPPY is not set |
517 | # CONFIG_BLK_DEV_IDESCSI is not set | 520 | # CONFIG_BLK_DEV_IDESCSI is not set |
521 | CONFIG_BLK_DEV_IDEACPI=y | ||
518 | # CONFIG_IDE_TASK_IOCTL is not set | 522 | # CONFIG_IDE_TASK_IOCTL is not set |
519 | 523 | ||
520 | # | 524 | # |
@@ -547,6 +551,7 @@ CONFIG_BLK_DEV_AMD74XX=y | |||
547 | # CONFIG_BLK_DEV_JMICRON is not set | 551 | # CONFIG_BLK_DEV_JMICRON is not set |
548 | # CONFIG_BLK_DEV_SC1200 is not set | 552 | # CONFIG_BLK_DEV_SC1200 is not set |
549 | CONFIG_BLK_DEV_PIIX=y | 553 | CONFIG_BLK_DEV_PIIX=y |
554 | # CONFIG_BLK_DEV_IT8213 is not set | ||
550 | # CONFIG_BLK_DEV_IT821X is not set | 555 | # CONFIG_BLK_DEV_IT821X is not set |
551 | # CONFIG_BLK_DEV_NS87415 is not set | 556 | # CONFIG_BLK_DEV_NS87415 is not set |
552 | # CONFIG_BLK_DEV_PDC202XX_OLD is not set | 557 | # CONFIG_BLK_DEV_PDC202XX_OLD is not set |
@@ -557,6 +562,7 @@ CONFIG_BLK_DEV_PIIX=y | |||
557 | # CONFIG_BLK_DEV_SLC90E66 is not set | 562 | # CONFIG_BLK_DEV_SLC90E66 is not set |
558 | # CONFIG_BLK_DEV_TRM290 is not set | 563 | # CONFIG_BLK_DEV_TRM290 is not set |
559 | # CONFIG_BLK_DEV_VIA82CXXX is not set | 564 | # CONFIG_BLK_DEV_VIA82CXXX is not set |
565 | # CONFIG_BLK_DEV_TC86C001 is not set | ||
560 | # CONFIG_IDE_ARM is not set | 566 | # CONFIG_IDE_ARM is not set |
561 | CONFIG_BLK_DEV_IDEDMA=y | 567 | CONFIG_BLK_DEV_IDEDMA=y |
562 | # CONFIG_IDEDMA_IVB is not set | 568 | # CONFIG_IDEDMA_IVB is not set |
@@ -655,6 +661,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0 | |||
655 | # Serial ATA (prod) and Parallel ATA (experimental) drivers | 661 | # Serial ATA (prod) and Parallel ATA (experimental) drivers |
656 | # | 662 | # |
657 | CONFIG_ATA=y | 663 | CONFIG_ATA=y |
664 | # CONFIG_ATA_NONSTANDARD is not set | ||
658 | CONFIG_SATA_AHCI=y | 665 | CONFIG_SATA_AHCI=y |
659 | CONFIG_SATA_SVW=y | 666 | CONFIG_SATA_SVW=y |
660 | CONFIG_ATA_PIIX=y | 667 | CONFIG_ATA_PIIX=y |
@@ -670,6 +677,7 @@ CONFIG_SATA_SIL=y | |||
670 | # CONFIG_SATA_ULI is not set | 677 | # CONFIG_SATA_ULI is not set |
671 | CONFIG_SATA_VIA=y | 678 | CONFIG_SATA_VIA=y |
672 | # CONFIG_SATA_VITESSE is not set | 679 | # CONFIG_SATA_VITESSE is not set |
680 | # CONFIG_SATA_INIC162X is not set | ||
673 | CONFIG_SATA_INTEL_COMBINED=y | 681 | CONFIG_SATA_INTEL_COMBINED=y |
674 | # CONFIG_PATA_ALI is not set | 682 | # CONFIG_PATA_ALI is not set |
675 | # CONFIG_PATA_AMD is not set | 683 | # CONFIG_PATA_AMD is not set |
@@ -687,6 +695,7 @@ CONFIG_SATA_INTEL_COMBINED=y | |||
687 | # CONFIG_PATA_HPT3X2N is not set | 695 | # CONFIG_PATA_HPT3X2N is not set |
688 | # CONFIG_PATA_HPT3X3 is not set | 696 | # CONFIG_PATA_HPT3X3 is not set |
689 | # CONFIG_PATA_IT821X is not set | 697 | # CONFIG_PATA_IT821X is not set |
698 | # CONFIG_PATA_IT8213 is not set | ||
690 | # CONFIG_PATA_JMICRON is not set | 699 | # CONFIG_PATA_JMICRON is not set |
691 | # CONFIG_PATA_TRIFLEX is not set | 700 | # CONFIG_PATA_TRIFLEX is not set |
692 | # CONFIG_PATA_MARVELL is not set | 701 | # CONFIG_PATA_MARVELL is not set |
@@ -739,9 +748,7 @@ CONFIG_IEEE1394=y | |||
739 | # Subsystem Options | 748 | # Subsystem Options |
740 | # | 749 | # |
741 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set | 750 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set |
742 | # CONFIG_IEEE1394_OUI_DB is not set | ||
743 | # CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set | 751 | # CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set |
744 | # CONFIG_IEEE1394_EXPORT_FULL_API is not set | ||
745 | 752 | ||
746 | # | 753 | # |
747 | # Device Drivers | 754 | # Device Drivers |
@@ -767,6 +774,11 @@ CONFIG_IEEE1394_RAWIO=y | |||
767 | # CONFIG_I2O is not set | 774 | # CONFIG_I2O is not set |
768 | 775 | ||
769 | # | 776 | # |
777 | # Macintosh device drivers | ||
778 | # | ||
779 | # CONFIG_MAC_EMUMOUSEBTN is not set | ||
780 | |||
781 | # | ||
770 | # Network device support | 782 | # Network device support |
771 | # | 783 | # |
772 | CONFIG_NETDEVICES=y | 784 | CONFIG_NETDEVICES=y |
@@ -833,6 +845,7 @@ CONFIG_8139TOO=y | |||
833 | # CONFIG_SUNDANCE is not set | 845 | # CONFIG_SUNDANCE is not set |
834 | # CONFIG_TLAN is not set | 846 | # CONFIG_TLAN is not set |
835 | # CONFIG_VIA_RHINE is not set | 847 | # CONFIG_VIA_RHINE is not set |
848 | # CONFIG_SC92031 is not set | ||
836 | 849 | ||
837 | # | 850 | # |
838 | # Ethernet (1000 Mbit) | 851 | # Ethernet (1000 Mbit) |
@@ -855,11 +868,13 @@ CONFIG_SKY2=y | |||
855 | CONFIG_TIGON3=y | 868 | CONFIG_TIGON3=y |
856 | CONFIG_BNX2=y | 869 | CONFIG_BNX2=y |
857 | # CONFIG_QLA3XXX is not set | 870 | # CONFIG_QLA3XXX is not set |
871 | # CONFIG_ATL1 is not set | ||
858 | 872 | ||
859 | # | 873 | # |
860 | # Ethernet (10000 Mbit) | 874 | # Ethernet (10000 Mbit) |
861 | # | 875 | # |
862 | # CONFIG_CHELSIO_T1 is not set | 876 | # CONFIG_CHELSIO_T1 is not set |
877 | # CONFIG_CHELSIO_T3 is not set | ||
863 | # CONFIG_IXGB is not set | 878 | # CONFIG_IXGB is not set |
864 | # CONFIG_S2IO is not set | 879 | # CONFIG_S2IO is not set |
865 | # CONFIG_MYRI10GE is not set | 880 | # CONFIG_MYRI10GE is not set |
@@ -1090,6 +1105,7 @@ CONFIG_SOUND=y | |||
1090 | # Open Sound System | 1105 | # Open Sound System |
1091 | # | 1106 | # |
1092 | CONFIG_SOUND_PRIME=y | 1107 | CONFIG_SOUND_PRIME=y |
1108 | CONFIG_OBSOLETE_OSS=y | ||
1093 | # CONFIG_SOUND_BT878 is not set | 1109 | # CONFIG_SOUND_BT878 is not set |
1094 | # CONFIG_SOUND_ES1371 is not set | 1110 | # CONFIG_SOUND_ES1371 is not set |
1095 | CONFIG_SOUND_ICH=y | 1111 | CONFIG_SOUND_ICH=y |
@@ -1103,6 +1119,7 @@ CONFIG_SOUND_ICH=y | |||
1103 | # HID Devices | 1119 | # HID Devices |
1104 | # | 1120 | # |
1105 | CONFIG_HID=y | 1121 | CONFIG_HID=y |
1122 | # CONFIG_HID_DEBUG is not set | ||
1106 | 1123 | ||
1107 | # | 1124 | # |
1108 | # USB support | 1125 | # USB support |
@@ -1117,10 +1134,8 @@ CONFIG_USB=y | |||
1117 | # Miscellaneous USB options | 1134 | # Miscellaneous USB options |
1118 | # | 1135 | # |
1119 | CONFIG_USB_DEVICEFS=y | 1136 | CONFIG_USB_DEVICEFS=y |
1120 | # CONFIG_USB_BANDWIDTH is not set | ||
1121 | # CONFIG_USB_DYNAMIC_MINORS is not set | 1137 | # CONFIG_USB_DYNAMIC_MINORS is not set |
1122 | # CONFIG_USB_SUSPEND is not set | 1138 | # CONFIG_USB_SUSPEND is not set |
1123 | # CONFIG_USB_MULTITHREAD_PROBE is not set | ||
1124 | # CONFIG_USB_OTG is not set | 1139 | # CONFIG_USB_OTG is not set |
1125 | 1140 | ||
1126 | # | 1141 | # |
@@ -1130,9 +1145,11 @@ CONFIG_USB_EHCI_HCD=y | |||
1130 | # CONFIG_USB_EHCI_SPLIT_ISO is not set | 1145 | # CONFIG_USB_EHCI_SPLIT_ISO is not set |
1131 | # CONFIG_USB_EHCI_ROOT_HUB_TT is not set | 1146 | # CONFIG_USB_EHCI_ROOT_HUB_TT is not set |
1132 | # CONFIG_USB_EHCI_TT_NEWSCHED is not set | 1147 | # CONFIG_USB_EHCI_TT_NEWSCHED is not set |
1148 | # CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set | ||
1133 | # CONFIG_USB_ISP116X_HCD is not set | 1149 | # CONFIG_USB_ISP116X_HCD is not set |
1134 | CONFIG_USB_OHCI_HCD=y | 1150 | CONFIG_USB_OHCI_HCD=y |
1135 | # CONFIG_USB_OHCI_BIG_ENDIAN is not set | 1151 | # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set |
1152 | # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set | ||
1136 | CONFIG_USB_OHCI_LITTLE_ENDIAN=y | 1153 | CONFIG_USB_OHCI_LITTLE_ENDIAN=y |
1137 | CONFIG_USB_UHCI_HCD=y | 1154 | CONFIG_USB_UHCI_HCD=y |
1138 | # CONFIG_USB_SL811_HCD is not set | 1155 | # CONFIG_USB_SL811_HCD is not set |
@@ -1183,6 +1200,7 @@ CONFIG_USB_HID=y | |||
1183 | # CONFIG_USB_ATI_REMOTE2 is not set | 1200 | # CONFIG_USB_ATI_REMOTE2 is not set |
1184 | # CONFIG_USB_KEYSPAN_REMOTE is not set | 1201 | # CONFIG_USB_KEYSPAN_REMOTE is not set |
1185 | # CONFIG_USB_APPLETOUCH is not set | 1202 | # CONFIG_USB_APPLETOUCH is not set |
1203 | # CONFIG_USB_GTCO is not set | ||
1186 | 1204 | ||
1187 | # | 1205 | # |
1188 | # USB Imaging devices | 1206 | # USB Imaging devices |
@@ -1288,6 +1306,10 @@ CONFIG_USB_MON=y | |||
1288 | # | 1306 | # |
1289 | 1307 | ||
1290 | # | 1308 | # |
1309 | # Auxiliary Display support | ||
1310 | # | ||
1311 | |||
1312 | # | ||
1291 | # Virtualization | 1313 | # Virtualization |
1292 | # | 1314 | # |
1293 | # CONFIG_KVM is not set | 1315 | # CONFIG_KVM is not set |
@@ -1480,6 +1502,7 @@ CONFIG_UNUSED_SYMBOLS=y | |||
1480 | # CONFIG_DEBUG_FS is not set | 1502 | # CONFIG_DEBUG_FS is not set |
1481 | # CONFIG_HEADERS_CHECK is not set | 1503 | # CONFIG_HEADERS_CHECK is not set |
1482 | CONFIG_DEBUG_KERNEL=y | 1504 | CONFIG_DEBUG_KERNEL=y |
1505 | # CONFIG_DEBUG_SHIRQ is not set | ||
1483 | CONFIG_LOG_BUF_SHIFT=18 | 1506 | CONFIG_LOG_BUF_SHIFT=18 |
1484 | CONFIG_DETECT_SOFTLOCKUP=y | 1507 | CONFIG_DETECT_SOFTLOCKUP=y |
1485 | # CONFIG_SCHEDSTATS is not set | 1508 | # CONFIG_SCHEDSTATS is not set |
@@ -1488,7 +1511,6 @@ CONFIG_DETECT_SOFTLOCKUP=y | |||
1488 | # CONFIG_RT_MUTEX_TESTER is not set | 1511 | # CONFIG_RT_MUTEX_TESTER is not set |
1489 | # CONFIG_DEBUG_SPINLOCK is not set | 1512 | # CONFIG_DEBUG_SPINLOCK is not set |
1490 | # CONFIG_DEBUG_MUTEXES is not set | 1513 | # CONFIG_DEBUG_MUTEXES is not set |
1491 | # CONFIG_DEBUG_RWSEMS is not set | ||
1492 | # CONFIG_DEBUG_LOCK_ALLOC is not set | 1514 | # CONFIG_DEBUG_LOCK_ALLOC is not set |
1493 | # CONFIG_PROVE_LOCKING is not set | 1515 | # CONFIG_PROVE_LOCKING is not set |
1494 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set | 1516 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set |
@@ -1533,7 +1555,8 @@ CONFIG_CRC32=y | |||
1533 | # CONFIG_LIBCRC32C is not set | 1555 | # CONFIG_LIBCRC32C is not set |
1534 | CONFIG_ZLIB_INFLATE=y | 1556 | CONFIG_ZLIB_INFLATE=y |
1535 | CONFIG_PLIST=y | 1557 | CONFIG_PLIST=y |
1536 | CONFIG_IOMAP_COPY=y | 1558 | CONFIG_HAS_IOMEM=y |
1559 | CONFIG_HAS_IOPORT=y | ||
1537 | CONFIG_GENERIC_HARDIRQS=y | 1560 | CONFIG_GENERIC_HARDIRQS=y |
1538 | CONFIG_GENERIC_IRQ_PROBE=y | 1561 | CONFIG_GENERIC_IRQ_PROBE=y |
1539 | CONFIG_GENERIC_PENDING_IRQ=y | 1562 | CONFIG_GENERIC_PENDING_IRQ=y |
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 1e8988e558c5..cbe4e601885c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -40,8 +40,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
40 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 40 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
41 | obj-$(CONFIG_K8_NB) += k8.o | 41 | obj-$(CONFIG_K8_NB) += k8.o |
42 | 42 | ||
43 | # Make sure this is linked after any other paravirt_ops structs: see head.S | 43 | obj-$(CONFIG_VMI) += vmi.o vmitime.o |
44 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 44 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
45 | obj-y += pcspeaker.o | ||
45 | 46 | ||
46 | EXTRA_AFLAGS := -traditional | 47 | EXTRA_AFLAGS := -traditional |
47 | 48 | ||
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776d9be26af9..f4159e0a7ae9 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/hpet.h> | 36 | #include <asm/hpet.h> |
37 | #include <asm/i8253.h> | 37 | #include <asm/i8253.h> |
38 | #include <asm/nmi.h> | 38 | #include <asm/nmi.h> |
39 | #include <asm/idle.h> | ||
39 | 40 | ||
40 | #include <mach_apic.h> | 41 | #include <mach_apic.h> |
41 | #include <mach_apicdef.h> | 42 | #include <mach_apicdef.h> |
@@ -1255,6 +1256,7 @@ fastcall void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
1255 | * Besides, if we don't timer interrupts ignore the global | 1256 | * Besides, if we don't timer interrupts ignore the global |
1256 | * interrupt lock, which is the WrongThing (tm) to do. | 1257 | * interrupt lock, which is the WrongThing (tm) to do. |
1257 | */ | 1258 | */ |
1259 | exit_idle(); | ||
1258 | irq_enter(); | 1260 | irq_enter(); |
1259 | smp_local_timer_interrupt(); | 1261 | smp_local_timer_interrupt(); |
1260 | irq_exit(); | 1262 | irq_exit(); |
@@ -1305,6 +1307,7 @@ fastcall void smp_spurious_interrupt(struct pt_regs *regs) | |||
1305 | { | 1307 | { |
1306 | unsigned long v; | 1308 | unsigned long v; |
1307 | 1309 | ||
1310 | exit_idle(); | ||
1308 | irq_enter(); | 1311 | irq_enter(); |
1309 | /* | 1312 | /* |
1310 | * Check if this really is a spurious interrupt and ACK it | 1313 | * Check if this really is a spurious interrupt and ACK it |
@@ -1329,6 +1332,7 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) | |||
1329 | { | 1332 | { |
1330 | unsigned long v, v1; | 1333 | unsigned long v, v1; |
1331 | 1334 | ||
1335 | exit_idle(); | ||
1332 | irq_enter(); | 1336 | irq_enter(); |
1333 | /* First tickle the hardware, only then report what went on. -- REW */ | 1337 | /* First tickle the hardware, only then report what went on. -- REW */ |
1334 | v = apic_read(APIC_ESR); | 1338 | v = apic_read(APIC_ESR); |
@@ -1395,7 +1399,7 @@ int __init APIC_init_uniprocessor (void) | |||
1395 | if (!skip_ioapic_setup && nr_ioapics) | 1399 | if (!skip_ioapic_setup && nr_ioapics) |
1396 | setup_IO_APIC(); | 1400 | setup_IO_APIC(); |
1397 | #endif | 1401 | #endif |
1398 | setup_boot_APIC_clock(); | 1402 | setup_boot_clock(); |
1399 | 1403 | ||
1400 | return 0; | 1404 | return 0; |
1401 | } | 1405 | } |
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index db99a8948dae..f9ba0af7ee1f 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -211,6 +211,7 @@ | |||
211 | #include <linux/slab.h> | 211 | #include <linux/slab.h> |
212 | #include <linux/stat.h> | 212 | #include <linux/stat.h> |
213 | #include <linux/proc_fs.h> | 213 | #include <linux/proc_fs.h> |
214 | #include <linux/seq_file.h> | ||
214 | #include <linux/miscdevice.h> | 215 | #include <linux/miscdevice.h> |
215 | #include <linux/apm_bios.h> | 216 | #include <linux/apm_bios.h> |
216 | #include <linux/init.h> | 217 | #include <linux/init.h> |
@@ -1636,9 +1637,8 @@ static int do_open(struct inode * inode, struct file * filp) | |||
1636 | return 0; | 1637 | return 0; |
1637 | } | 1638 | } |
1638 | 1639 | ||
1639 | static int apm_get_info(char *buf, char **start, off_t fpos, int length) | 1640 | static int proc_apm_show(struct seq_file *m, void *v) |
1640 | { | 1641 | { |
1641 | char * p; | ||
1642 | unsigned short bx; | 1642 | unsigned short bx; |
1643 | unsigned short cx; | 1643 | unsigned short cx; |
1644 | unsigned short dx; | 1644 | unsigned short dx; |
@@ -1650,8 +1650,6 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1650 | int time_units = -1; | 1650 | int time_units = -1; |
1651 | char *units = "?"; | 1651 | char *units = "?"; |
1652 | 1652 | ||
1653 | p = buf; | ||
1654 | |||
1655 | if ((num_online_cpus() == 1) && | 1653 | if ((num_online_cpus() == 1) && |
1656 | !(error = apm_get_power_status(&bx, &cx, &dx))) { | 1654 | !(error = apm_get_power_status(&bx, &cx, &dx))) { |
1657 | ac_line_status = (bx >> 8) & 0xff; | 1655 | ac_line_status = (bx >> 8) & 0xff; |
@@ -1705,7 +1703,7 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1705 | -1: Unknown | 1703 | -1: Unknown |
1706 | 8) min = minutes; sec = seconds */ | 1704 | 8) min = minutes; sec = seconds */ |
1707 | 1705 | ||
1708 | p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", | 1706 | seq_printf(m, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n", |
1709 | driver_version, | 1707 | driver_version, |
1710 | (apm_info.bios.version >> 8) & 0xff, | 1708 | (apm_info.bios.version >> 8) & 0xff, |
1711 | apm_info.bios.version & 0xff, | 1709 | apm_info.bios.version & 0xff, |
@@ -1716,10 +1714,22 @@ static int apm_get_info(char *buf, char **start, off_t fpos, int length) | |||
1716 | percentage, | 1714 | percentage, |
1717 | time_units, | 1715 | time_units, |
1718 | units); | 1716 | units); |
1717 | return 0; | ||
1718 | } | ||
1719 | 1719 | ||
1720 | return p - buf; | 1720 | static int proc_apm_open(struct inode *inode, struct file *file) |
1721 | { | ||
1722 | return single_open(file, proc_apm_show, NULL); | ||
1721 | } | 1723 | } |
1722 | 1724 | ||
1725 | static const struct file_operations apm_file_ops = { | ||
1726 | .owner = THIS_MODULE, | ||
1727 | .open = proc_apm_open, | ||
1728 | .read = seq_read, | ||
1729 | .llseek = seq_lseek, | ||
1730 | .release = single_release, | ||
1731 | }; | ||
1732 | |||
1723 | static int apm(void *unused) | 1733 | static int apm(void *unused) |
1724 | { | 1734 | { |
1725 | unsigned short bx; | 1735 | unsigned short bx; |
@@ -2341,9 +2351,9 @@ static int __init apm_init(void) | |||
2341 | set_base(gdt[APM_DS >> 3], | 2351 | set_base(gdt[APM_DS >> 3], |
2342 | __va((unsigned long)apm_info.bios.dseg << 4)); | 2352 | __va((unsigned long)apm_info.bios.dseg << 4)); |
2343 | 2353 | ||
2344 | apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info); | 2354 | apm_proc = create_proc_entry("apm", 0, NULL); |
2345 | if (apm_proc) | 2355 | if (apm_proc) |
2346 | apm_proc->owner = THIS_MODULE; | 2356 | apm_proc->proc_fops = &apm_file_ops; |
2347 | 2357 | ||
2348 | kapmd_task = kthread_create(apm, NULL, "kapmd"); | 2358 | kapmd_task = kthread_create(apm, NULL, "kapmd"); |
2349 | if (IS_ERR(kapmd_task)) { | 2359 | if (IS_ERR(kapmd_task)) { |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index 1b2f3cd33270..c37535163bfc 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -72,7 +72,7 @@ void foo(void) | |||
72 | OFFSET(PT_EAX, pt_regs, eax); | 72 | OFFSET(PT_EAX, pt_regs, eax); |
73 | OFFSET(PT_DS, pt_regs, xds); | 73 | OFFSET(PT_DS, pt_regs, xds); |
74 | OFFSET(PT_ES, pt_regs, xes); | 74 | OFFSET(PT_ES, pt_regs, xes); |
75 | OFFSET(PT_GS, pt_regs, xgs); | 75 | OFFSET(PT_FS, pt_regs, xfs); |
76 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); | 76 | OFFSET(PT_ORIG_EAX, pt_regs, orig_eax); |
77 | OFFSET(PT_EIP, pt_regs, eip); | 77 | OFFSET(PT_EIP, pt_regs, eip); |
78 | OFFSET(PT_CS, pt_regs, xcs); | 78 | OFFSET(PT_CS, pt_regs, xcs); |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 8a8bbdaaf38a..dcbbd0a8bfc2 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -605,7 +605,7 @@ void __init early_cpu_init(void) | |||
605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | 605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
606 | { | 606 | { |
607 | memset(regs, 0, sizeof(struct pt_regs)); | 607 | memset(regs, 0, sizeof(struct pt_regs)); |
608 | regs->xgs = __KERNEL_PDA; | 608 | regs->xfs = __KERNEL_PDA; |
609 | return regs; | 609 | return regs; |
610 | } | 610 | } |
611 | 611 | ||
@@ -662,12 +662,12 @@ struct i386_pda boot_pda = { | |||
662 | .pcurrent = &init_task, | 662 | .pcurrent = &init_task, |
663 | }; | 663 | }; |
664 | 664 | ||
665 | static inline void set_kernel_gs(void) | 665 | static inline void set_kernel_fs(void) |
666 | { | 666 | { |
667 | /* Set %gs for this CPU's PDA. Memory clobber is to create a | 667 | /* Set %fs for this CPU's PDA. Memory clobber is to create a |
668 | barrier with respect to any PDA operations, so the compiler | 668 | barrier with respect to any PDA operations, so the compiler |
669 | doesn't move any before here. */ | 669 | doesn't move any before here. */ |
670 | asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory"); | 670 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); |
671 | } | 671 | } |
672 | 672 | ||
673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for | 673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for |
@@ -718,7 +718,7 @@ void __cpuinit cpu_set_gdt(int cpu) | |||
718 | the boot CPU, this will transition from the boot gdt+pda to | 718 | the boot CPU, this will transition from the boot gdt+pda to |
719 | the real ones). */ | 719 | the real ones). */ |
720 | load_gdt(cpu_gdt_descr); | 720 | load_gdt(cpu_gdt_descr); |
721 | set_kernel_gs(); | 721 | set_kernel_fs(); |
722 | } | 722 | } |
723 | 723 | ||
724 | /* Common CPU init for both boot and secondary CPUs */ | 724 | /* Common CPU init for both boot and secondary CPUs */ |
@@ -764,8 +764,8 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
764 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); | 764 | __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); |
765 | #endif | 765 | #endif |
766 | 766 | ||
767 | /* Clear %fs. */ | 767 | /* Clear %gs. */ |
768 | asm volatile ("mov %0, %%fs" : : "r" (0)); | 768 | asm volatile ("mov %0, %%gs" : : "r" (0)); |
769 | 769 | ||
770 | /* Clear all 6 debug registers: */ | 770 | /* Clear all 6 debug registers: */ |
771 | set_debugreg(0, 0); | 771 | set_debugreg(0, 0); |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index c0c3b59de32c..de27bd07bc9c 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/io.h> | 6 | #include <asm/io.h> |
7 | #include <asm/processor.h> | 7 | #include <asm/processor.h> |
8 | #include <asm/timer.h> | 8 | #include <asm/timer.h> |
9 | #include <asm/pci-direct.h> | ||
9 | 10 | ||
10 | #include "cpu.h" | 11 | #include "cpu.h" |
11 | 12 | ||
@@ -161,19 +162,19 @@ static void __cpuinit set_cx86_inc(void) | |||
161 | static void __cpuinit geode_configure(void) | 162 | static void __cpuinit geode_configure(void) |
162 | { | 163 | { |
163 | unsigned long flags; | 164 | unsigned long flags; |
164 | u8 ccr3, ccr4; | 165 | u8 ccr3; |
165 | local_irq_save(flags); | 166 | local_irq_save(flags); |
166 | 167 | ||
167 | /* Suspend on halt power saving and enable #SUSP pin */ | 168 | /* Suspend on halt power saving and enable #SUSP pin */ |
168 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); | 169 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); |
169 | 170 | ||
170 | ccr3 = getCx86(CX86_CCR3); | 171 | ccr3 = getCx86(CX86_CCR3); |
171 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* Enable */ | 172 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
172 | |||
173 | ccr4 = getCx86(CX86_CCR4); | ||
174 | ccr4 |= 0x38; /* FPU fast, DTE cache, Mem bypass */ | ||
175 | 173 | ||
176 | setCx86(CX86_CCR3, ccr3); | 174 | |
175 | /* FPU fast, DTE cache, Mem bypass */ | ||
176 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); | ||
177 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
177 | 178 | ||
178 | set_cx86_memwb(); | 179 | set_cx86_memwb(); |
179 | set_cx86_reorder(); | 180 | set_cx86_reorder(); |
@@ -183,14 +184,6 @@ static void __cpuinit geode_configure(void) | |||
183 | } | 184 | } |
184 | 185 | ||
185 | 186 | ||
186 | #ifdef CONFIG_PCI | ||
187 | static struct pci_device_id __cpuinitdata cyrix_55x0[] = { | ||
188 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, | ||
189 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, | ||
190 | { }, | ||
191 | }; | ||
192 | #endif | ||
193 | |||
194 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | 187 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
195 | { | 188 | { |
196 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; | 189 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; |
@@ -258,6 +251,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
258 | 251 | ||
259 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ | 252 | case 4: /* MediaGX/GXm or Geode GXM/GXLV/GX1 */ |
260 | #ifdef CONFIG_PCI | 253 | #ifdef CONFIG_PCI |
254 | { | ||
255 | u32 vendor, device; | ||
261 | /* It isn't really a PCI quirk directly, but the cure is the | 256 | /* It isn't really a PCI quirk directly, but the cure is the |
262 | same. The MediaGX has deep magic SMM stuff that handles the | 257 | same. The MediaGX has deep magic SMM stuff that handles the |
263 | SB emulation. It thows away the fifo on disable_dma() which | 258 | SB emulation. It thows away the fifo on disable_dma() which |
@@ -273,22 +268,34 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
273 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); | 268 | printk(KERN_INFO "Working around Cyrix MediaGX virtual DMA bugs.\n"); |
274 | isa_dma_bridge_buggy = 2; | 269 | isa_dma_bridge_buggy = 2; |
275 | 270 | ||
271 | /* We do this before the PCI layer is running. However we | ||
272 | are safe here as we know the bridge must be a Cyrix | ||
273 | companion and must be present */ | ||
274 | vendor = read_pci_config_16(0, 0, 0x12, PCI_VENDOR_ID); | ||
275 | device = read_pci_config_16(0, 0, 0x12, PCI_DEVICE_ID); | ||
276 | 276 | ||
277 | /* | 277 | /* |
278 | * The 5510/5520 companion chips have a funky PIT. | 278 | * The 5510/5520 companion chips have a funky PIT. |
279 | */ | 279 | */ |
280 | if (pci_dev_present(cyrix_55x0)) | 280 | if (vendor == PCI_VENDOR_ID_CYRIX && |
281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | ||
281 | pit_latch_buggy = 1; | 282 | pit_latch_buggy = 1; |
283 | } | ||
282 | #endif | 284 | #endif |
283 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ | 285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ |
284 | 286 | ||
285 | /* GXm supports extended cpuid levels 'ala' AMD */ | 287 | /* GXm supports extended cpuid levels 'ala' AMD */ |
286 | if (c->cpuid_level == 2) { | 288 | if (c->cpuid_level == 2) { |
287 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ | 289 | /* Enable cxMMX extensions (GX1 Datasheet 54) */ |
288 | setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); | 290 | setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); |
289 | 291 | ||
290 | /* GXlv/GXm/GX1 */ | 292 | /* |
291 | if((dir1 >= 0x50 && dir1 <= 0x54) || dir1 >= 0x63) | 293 | * GXm : 0x30 ... 0x5f GXm datasheet 51 |
294 | * GXlv: 0x6x GXlv datasheet 54 | ||
295 | * ? : 0x7x | ||
296 | * GX1 : 0x8x GX1 datasheet 56 | ||
297 | */ | ||
298 | if((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <=dir1 && dir1 <= 0x8f)) | ||
292 | geode_configure(); | 299 | geode_configure(); |
293 | get_model_name(c); /* get CPU marketing name */ | 300 | get_model_name(c); /* get CPU marketing name */ |
294 | return; | 301 | return; |
@@ -415,15 +422,14 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) | |||
415 | 422 | ||
416 | if (dir0 == 5 || dir0 == 3) | 423 | if (dir0 == 5 || dir0 == 3) |
417 | { | 424 | { |
418 | unsigned char ccr3, ccr4; | 425 | unsigned char ccr3; |
419 | unsigned long flags; | 426 | unsigned long flags; |
420 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); | 427 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); |
421 | local_irq_save(flags); | 428 | local_irq_save(flags); |
422 | ccr3 = getCx86(CX86_CCR3); | 429 | ccr3 = getCx86(CX86_CCR3); |
423 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 430 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
424 | ccr4 = getCx86(CX86_CCR4); | 431 | setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ |
425 | setCx86(CX86_CCR4, ccr4 | 0x80); /* enable cpuid */ | 432 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
426 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
427 | local_irq_restore(flags); | 433 | local_irq_restore(flags); |
428 | } | 434 | } |
429 | } | 435 | } |
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index d555bec0db99..4f10c62d180c 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <asm/processor.h> | 13 | #include <asm/processor.h> |
14 | #include <asm/system.h> | 14 | #include <asm/system.h> |
15 | #include <asm/mce.h> | ||
15 | 16 | ||
16 | #include "mce.h" | 17 | #include "mce.h" |
17 | 18 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h index 84fd4cf7d0fb..81fb6e2d35f3 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.h +++ b/arch/i386/kernel/cpu/mcheck/mce.h | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <asm/mce.h> | ||
2 | 3 | ||
3 | void amd_mcheck_init(struct cpuinfo_x86 *c); | 4 | void amd_mcheck_init(struct cpuinfo_x86 *c); |
4 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | 5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); |
@@ -9,6 +10,5 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c); | |||
9 | /* Call the installed machine check handler for this CPU setup. */ | 10 | /* Call the installed machine check handler for this CPU setup. */ |
10 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); | 11 | extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); |
11 | 12 | ||
12 | extern int mce_disabled; | ||
13 | extern int nr_mce_banks; | 13 | extern int nr_mce_banks; |
14 | 14 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 504434a46011..8359c19d3a23 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | #include <asm/idle.h> | ||
15 | 16 | ||
16 | #include <asm/therm_throt.h> | 17 | #include <asm/therm_throt.h> |
17 | 18 | ||
@@ -59,6 +60,7 @@ static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_therm | |||
59 | 60 | ||
60 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) | 61 | fastcall void smp_thermal_interrupt(struct pt_regs *regs) |
61 | { | 62 | { |
63 | exit_idle(); | ||
62 | irq_enter(); | 64 | irq_enter(); |
63 | vendor_thermal_interrupt(regs); | 65 | vendor_thermal_interrupt(regs); |
64 | irq_exit(); | 66 | irq_exit(); |
diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index ee771f305f96..c7d8f1756745 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c | |||
@@ -211,6 +211,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
211 | default: | 211 | default: |
212 | return -ENOTTY; | 212 | return -ENOTTY; |
213 | case MTRRIOC_ADD_ENTRY: | 213 | case MTRRIOC_ADD_ENTRY: |
214 | #ifdef CONFIG_COMPAT | ||
215 | case MTRRIOC32_ADD_ENTRY: | ||
216 | #endif | ||
214 | if (!capable(CAP_SYS_ADMIN)) | 217 | if (!capable(CAP_SYS_ADMIN)) |
215 | return -EPERM; | 218 | return -EPERM; |
216 | err = | 219 | err = |
@@ -218,21 +221,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
218 | file, 0); | 221 | file, 0); |
219 | break; | 222 | break; |
220 | case MTRRIOC_SET_ENTRY: | 223 | case MTRRIOC_SET_ENTRY: |
224 | #ifdef CONFIG_COMPAT | ||
225 | case MTRRIOC32_SET_ENTRY: | ||
226 | #endif | ||
221 | if (!capable(CAP_SYS_ADMIN)) | 227 | if (!capable(CAP_SYS_ADMIN)) |
222 | return -EPERM; | 228 | return -EPERM; |
223 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); | 229 | err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); |
224 | break; | 230 | break; |
225 | case MTRRIOC_DEL_ENTRY: | 231 | case MTRRIOC_DEL_ENTRY: |
232 | #ifdef CONFIG_COMPAT | ||
233 | case MTRRIOC32_DEL_ENTRY: | ||
234 | #endif | ||
226 | if (!capable(CAP_SYS_ADMIN)) | 235 | if (!capable(CAP_SYS_ADMIN)) |
227 | return -EPERM; | 236 | return -EPERM; |
228 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); | 237 | err = mtrr_file_del(sentry.base, sentry.size, file, 0); |
229 | break; | 238 | break; |
230 | case MTRRIOC_KILL_ENTRY: | 239 | case MTRRIOC_KILL_ENTRY: |
240 | #ifdef CONFIG_COMPAT | ||
241 | case MTRRIOC32_KILL_ENTRY: | ||
242 | #endif | ||
231 | if (!capable(CAP_SYS_ADMIN)) | 243 | if (!capable(CAP_SYS_ADMIN)) |
232 | return -EPERM; | 244 | return -EPERM; |
233 | err = mtrr_del(-1, sentry.base, sentry.size); | 245 | err = mtrr_del(-1, sentry.base, sentry.size); |
234 | break; | 246 | break; |
235 | case MTRRIOC_GET_ENTRY: | 247 | case MTRRIOC_GET_ENTRY: |
248 | #ifdef CONFIG_COMPAT | ||
249 | case MTRRIOC32_GET_ENTRY: | ||
250 | #endif | ||
236 | if (gentry.regnum >= num_var_ranges) | 251 | if (gentry.regnum >= num_var_ranges) |
237 | return -EINVAL; | 252 | return -EINVAL; |
238 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); | 253 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); |
@@ -249,6 +264,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
249 | 264 | ||
250 | break; | 265 | break; |
251 | case MTRRIOC_ADD_PAGE_ENTRY: | 266 | case MTRRIOC_ADD_PAGE_ENTRY: |
267 | #ifdef CONFIG_COMPAT | ||
268 | case MTRRIOC32_ADD_PAGE_ENTRY: | ||
269 | #endif | ||
252 | if (!capable(CAP_SYS_ADMIN)) | 270 | if (!capable(CAP_SYS_ADMIN)) |
253 | return -EPERM; | 271 | return -EPERM; |
254 | err = | 272 | err = |
@@ -256,21 +274,33 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
256 | file, 1); | 274 | file, 1); |
257 | break; | 275 | break; |
258 | case MTRRIOC_SET_PAGE_ENTRY: | 276 | case MTRRIOC_SET_PAGE_ENTRY: |
277 | #ifdef CONFIG_COMPAT | ||
278 | case MTRRIOC32_SET_PAGE_ENTRY: | ||
279 | #endif | ||
259 | if (!capable(CAP_SYS_ADMIN)) | 280 | if (!capable(CAP_SYS_ADMIN)) |
260 | return -EPERM; | 281 | return -EPERM; |
261 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); | 282 | err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); |
262 | break; | 283 | break; |
263 | case MTRRIOC_DEL_PAGE_ENTRY: | 284 | case MTRRIOC_DEL_PAGE_ENTRY: |
285 | #ifdef CONFIG_COMPAT | ||
286 | case MTRRIOC32_DEL_PAGE_ENTRY: | ||
287 | #endif | ||
264 | if (!capable(CAP_SYS_ADMIN)) | 288 | if (!capable(CAP_SYS_ADMIN)) |
265 | return -EPERM; | 289 | return -EPERM; |
266 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); | 290 | err = mtrr_file_del(sentry.base, sentry.size, file, 1); |
267 | break; | 291 | break; |
268 | case MTRRIOC_KILL_PAGE_ENTRY: | 292 | case MTRRIOC_KILL_PAGE_ENTRY: |
293 | #ifdef CONFIG_COMPAT | ||
294 | case MTRRIOC32_KILL_PAGE_ENTRY: | ||
295 | #endif | ||
269 | if (!capable(CAP_SYS_ADMIN)) | 296 | if (!capable(CAP_SYS_ADMIN)) |
270 | return -EPERM; | 297 | return -EPERM; |
271 | err = mtrr_del_page(-1, sentry.base, sentry.size); | 298 | err = mtrr_del_page(-1, sentry.base, sentry.size); |
272 | break; | 299 | break; |
273 | case MTRRIOC_GET_PAGE_ENTRY: | 300 | case MTRRIOC_GET_PAGE_ENTRY: |
301 | #ifdef CONFIG_COMPAT | ||
302 | case MTRRIOC32_GET_PAGE_ENTRY: | ||
303 | #endif | ||
274 | if (gentry.regnum >= num_var_ranges) | 304 | if (gentry.regnum >= num_var_ranges) |
275 | return -EINVAL; | 305 | return -EINVAL; |
276 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); | 306 | mtrr_if->get(gentry.regnum, &gentry.base, &size, &type); |
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 16bb7ea87145..0acfb6a5a220 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c | |||
@@ -50,7 +50,7 @@ u32 num_var_ranges = 0; | |||
50 | unsigned int *usage_table; | 50 | unsigned int *usage_table; |
51 | static DEFINE_MUTEX(mtrr_mutex); | 51 | static DEFINE_MUTEX(mtrr_mutex); |
52 | 52 | ||
53 | u32 size_or_mask, size_and_mask; | 53 | u64 size_or_mask, size_and_mask; |
54 | 54 | ||
55 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; | 55 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; |
56 | 56 | ||
@@ -662,8 +662,8 @@ void __init mtrr_bp_init(void) | |||
662 | boot_cpu_data.x86_mask == 0x4)) | 662 | boot_cpu_data.x86_mask == 0x4)) |
663 | phys_addr = 36; | 663 | phys_addr = 36; |
664 | 664 | ||
665 | size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); | 665 | size_or_mask = ~((1ULL << (phys_addr - PAGE_SHIFT)) - 1); |
666 | size_and_mask = ~size_or_mask & 0xfff00000; | 666 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; |
667 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && | 667 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && |
668 | boot_cpu_data.x86 == 6) { | 668 | boot_cpu_data.x86 == 6) { |
669 | /* VIA C* family have Intel style MTRRs, but | 669 | /* VIA C* family have Intel style MTRRs, but |
diff --git a/arch/i386/kernel/cpu/mtrr/mtrr.h b/arch/i386/kernel/cpu/mtrr/mtrr.h index d61ea9db6cfe..289dfe6030e3 100644 --- a/arch/i386/kernel/cpu/mtrr/mtrr.h +++ b/arch/i386/kernel/cpu/mtrr/mtrr.h | |||
@@ -84,7 +84,7 @@ void get_mtrr_state(void); | |||
84 | 84 | ||
85 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 85 | extern void set_mtrr_ops(struct mtrr_ops * ops); |
86 | 86 | ||
87 | extern u32 size_or_mask, size_and_mask; | 87 | extern u64 size_or_mask, size_and_mask; |
88 | extern struct mtrr_ops * mtrr_if; | 88 | extern struct mtrr_ops * mtrr_if; |
89 | 89 | ||
90 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 90 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 6624d8583c42..47e3ebbfb28d 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -29,7 +29,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 29 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | 30 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, | 31 | NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL, |
32 | NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", | 32 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", |
33 | 33 | ||
34 | /* Transmeta-defined */ | 34 | /* Transmeta-defined */ |
35 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | 35 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
@@ -47,7 +47,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
47 | /* Intel-defined (#2) */ | 47 | /* Intel-defined (#2) */ |
48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | 48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | 49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, | 50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", |
51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
52 | 52 | ||
53 | /* VIA/Cyrix/Centaur-defined */ | 53 | /* VIA/Cyrix/Centaur-defined */ |
@@ -57,8 +57,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
57 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 57 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
58 | 58 | ||
59 | /* AMD-defined (#2) */ | 59 | /* AMD-defined (#2) */ |
60 | "lahf_lm", "cmp_legacy", "svm", NULL, "cr8legacy", NULL, NULL, NULL, | 60 | "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8legacy", "abm", |
61 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 61 | "sse4a", "misalignsse", |
62 | "3dnowprefetch", "osvw", "ibs", NULL, NULL, NULL, NULL, NULL, | ||
62 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
63 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 64 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
64 | }; | 65 | }; |
@@ -69,8 +70,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
69 | "ttp", /* thermal trip */ | 70 | "ttp", /* thermal trip */ |
70 | "tm", | 71 | "tm", |
71 | "stc", | 72 | "stc", |
73 | "100mhzsteps", | ||
74 | "hwpstate", | ||
72 | NULL, | 75 | NULL, |
73 | /* nothing */ /* constant_tsc - moved to flags */ | 76 | NULL, /* constant_tsc - moved to flags */ |
77 | /* nothing */ | ||
74 | }; | 78 | }; |
75 | struct cpuinfo_x86 *c = v; | 79 | struct cpuinfo_x86 *c = v; |
76 | int i, n = c - cpu_data; | 80 | int i, n = c - cpu_data; |
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 4056fb7d2cdf..5678d46863c6 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -9,7 +9,7 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
9 | { | 9 | { |
10 | unsigned int cap_mask, uk, max, dummy; | 10 | unsigned int cap_mask, uk, max, dummy; |
11 | unsigned int cms_rev1, cms_rev2; | 11 | unsigned int cms_rev1, cms_rev2; |
12 | unsigned int cpu_rev, cpu_freq, cpu_flags, new_cpu_rev; | 12 | unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; |
13 | char cpu_info[65]; | 13 | char cpu_info[65]; |
14 | 14 | ||
15 | get_model_name(c); /* Same as AMD/Cyrix */ | 15 | get_model_name(c); /* Same as AMD/Cyrix */ |
@@ -72,6 +72,9 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) | |||
72 | wrmsr(0x80860004, ~0, uk); | 72 | wrmsr(0x80860004, ~0, uk); |
73 | c->x86_capability[0] = cpuid_edx(0x00000001); | 73 | c->x86_capability[0] = cpuid_edx(0x00000001); |
74 | wrmsr(0x80860004, cap_mask, uk); | 74 | wrmsr(0x80860004, cap_mask, uk); |
75 | |||
76 | /* All Transmeta CPUs have a constant TSC */ | ||
77 | set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); | ||
75 | 78 | ||
76 | /* If we can run i686 user-space code, call us an i686 */ | 79 | /* If we can run i686 user-space code, call us an i686 */ |
77 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) | 80 | #define USER686 (X86_FEATURE_TSC|X86_FEATURE_CX8|X86_FEATURE_CMOV) |
diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 4da75fa3208d..eeae0d992337 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c | |||
@@ -48,7 +48,6 @@ static struct class *cpuid_class; | |||
48 | #ifdef CONFIG_SMP | 48 | #ifdef CONFIG_SMP |
49 | 49 | ||
50 | struct cpuid_command { | 50 | struct cpuid_command { |
51 | int cpu; | ||
52 | u32 reg; | 51 | u32 reg; |
53 | u32 *data; | 52 | u32 *data; |
54 | }; | 53 | }; |
@@ -57,8 +56,7 @@ static void cpuid_smp_cpuid(void *cmd_block) | |||
57 | { | 56 | { |
58 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; | 57 | struct cpuid_command *cmd = (struct cpuid_command *)cmd_block; |
59 | 58 | ||
60 | if (cmd->cpu == smp_processor_id()) | 59 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], |
61 | cpuid(cmd->reg, &cmd->data[0], &cmd->data[1], &cmd->data[2], | ||
62 | &cmd->data[3]); | 60 | &cmd->data[3]); |
63 | } | 61 | } |
64 | 62 | ||
@@ -70,11 +68,10 @@ static inline void do_cpuid(int cpu, u32 reg, u32 * data) | |||
70 | if (cpu == smp_processor_id()) { | 68 | if (cpu == smp_processor_id()) { |
71 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); | 69 | cpuid(reg, &data[0], &data[1], &data[2], &data[3]); |
72 | } else { | 70 | } else { |
73 | cmd.cpu = cpu; | ||
74 | cmd.reg = reg; | 71 | cmd.reg = reg; |
75 | cmd.data = data; | 72 | cmd.data = data; |
76 | 73 | ||
77 | smp_call_function(cpuid_smp_cpuid, &cmd, 1, 1); | 74 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); |
78 | } | 75 | } |
79 | preempt_enable(); | 76 | preempt_enable(); |
80 | } | 77 | } |
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index f391abcf7da9..70f39560846a 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/page.h> | 15 | #include <asm/page.h> |
16 | #include <asm/e820.h> | 16 | #include <asm/e820.h> |
17 | #include <asm/setup.h> | ||
17 | 18 | ||
18 | #ifdef CONFIG_EFI | 19 | #ifdef CONFIG_EFI |
19 | int efi_enabled = 0; | 20 | int efi_enabled = 0; |
@@ -156,21 +157,22 @@ static struct resource standard_io_resources[] = { { | |||
156 | .flags = IORESOURCE_BUSY | IORESOURCE_IO | 157 | .flags = IORESOURCE_BUSY | IORESOURCE_IO |
157 | } }; | 158 | } }; |
158 | 159 | ||
159 | static int romsignature(const unsigned char *x) | 160 | #define ROMSIGNATURE 0xaa55 |
161 | |||
162 | static int __init romsignature(const unsigned char *rom) | ||
160 | { | 163 | { |
161 | unsigned short sig; | 164 | unsigned short sig; |
162 | int ret = 0; | 165 | |
163 | if (probe_kernel_address((const unsigned short *)x, sig) == 0) | 166 | return probe_kernel_address((const unsigned short *)rom, sig) == 0 && |
164 | ret = (sig == 0xaa55); | 167 | sig == ROMSIGNATURE; |
165 | return ret; | ||
166 | } | 168 | } |
167 | 169 | ||
168 | static int __init romchecksum(unsigned char *rom, unsigned long length) | 170 | static int __init romchecksum(unsigned char *rom, unsigned long length) |
169 | { | 171 | { |
170 | unsigned char *p, sum = 0; | 172 | unsigned char sum; |
171 | 173 | ||
172 | for (p = rom; p < rom + length; p++) | 174 | for (sum = 0; length; length--) |
173 | sum += *p; | 175 | sum += *rom++; |
174 | return sum == 0; | 176 | return sum == 0; |
175 | } | 177 | } |
176 | 178 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 5e47683fc63a..18bddcb8e9e8 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -30,7 +30,7 @@ | |||
30 | * 18(%esp) - %eax | 30 | * 18(%esp) - %eax |
31 | * 1C(%esp) - %ds | 31 | * 1C(%esp) - %ds |
32 | * 20(%esp) - %es | 32 | * 20(%esp) - %es |
33 | * 24(%esp) - %gs | 33 | * 24(%esp) - %fs |
34 | * 28(%esp) - orig_eax | 34 | * 28(%esp) - orig_eax |
35 | * 2C(%esp) - %eip | 35 | * 2C(%esp) - %eip |
36 | * 30(%esp) - %cs | 36 | * 30(%esp) - %cs |
@@ -99,9 +99,9 @@ VM_MASK = 0x00020000 | |||
99 | 99 | ||
100 | #define SAVE_ALL \ | 100 | #define SAVE_ALL \ |
101 | cld; \ | 101 | cld; \ |
102 | pushl %gs; \ | 102 | pushl %fs; \ |
103 | CFI_ADJUST_CFA_OFFSET 4;\ | 103 | CFI_ADJUST_CFA_OFFSET 4;\ |
104 | /*CFI_REL_OFFSET gs, 0;*/\ | 104 | /*CFI_REL_OFFSET fs, 0;*/\ |
105 | pushl %es; \ | 105 | pushl %es; \ |
106 | CFI_ADJUST_CFA_OFFSET 4;\ | 106 | CFI_ADJUST_CFA_OFFSET 4;\ |
107 | /*CFI_REL_OFFSET es, 0;*/\ | 107 | /*CFI_REL_OFFSET es, 0;*/\ |
@@ -133,7 +133,7 @@ VM_MASK = 0x00020000 | |||
133 | movl %edx, %ds; \ | 133 | movl %edx, %ds; \ |
134 | movl %edx, %es; \ | 134 | movl %edx, %es; \ |
135 | movl $(__KERNEL_PDA), %edx; \ | 135 | movl $(__KERNEL_PDA), %edx; \ |
136 | movl %edx, %gs | 136 | movl %edx, %fs |
137 | 137 | ||
138 | #define RESTORE_INT_REGS \ | 138 | #define RESTORE_INT_REGS \ |
139 | popl %ebx; \ | 139 | popl %ebx; \ |
@@ -166,9 +166,9 @@ VM_MASK = 0x00020000 | |||
166 | 2: popl %es; \ | 166 | 2: popl %es; \ |
167 | CFI_ADJUST_CFA_OFFSET -4;\ | 167 | CFI_ADJUST_CFA_OFFSET -4;\ |
168 | /*CFI_RESTORE es;*/\ | 168 | /*CFI_RESTORE es;*/\ |
169 | 3: popl %gs; \ | 169 | 3: popl %fs; \ |
170 | CFI_ADJUST_CFA_OFFSET -4;\ | 170 | CFI_ADJUST_CFA_OFFSET -4;\ |
171 | /*CFI_RESTORE gs;*/\ | 171 | /*CFI_RESTORE fs;*/\ |
172 | .pushsection .fixup,"ax"; \ | 172 | .pushsection .fixup,"ax"; \ |
173 | 4: movl $0,(%esp); \ | 173 | 4: movl $0,(%esp); \ |
174 | jmp 1b; \ | 174 | jmp 1b; \ |
@@ -227,6 +227,7 @@ ENTRY(ret_from_fork) | |||
227 | CFI_ADJUST_CFA_OFFSET -4 | 227 | CFI_ADJUST_CFA_OFFSET -4 |
228 | jmp syscall_exit | 228 | jmp syscall_exit |
229 | CFI_ENDPROC | 229 | CFI_ENDPROC |
230 | END(ret_from_fork) | ||
230 | 231 | ||
231 | /* | 232 | /* |
232 | * Return to user mode is not as complex as all this looks, | 233 | * Return to user mode is not as complex as all this looks, |
@@ -258,6 +259,7 @@ ENTRY(resume_userspace) | |||
258 | # int/exception return? | 259 | # int/exception return? |
259 | jne work_pending | 260 | jne work_pending |
260 | jmp restore_all | 261 | jmp restore_all |
262 | END(ret_from_exception) | ||
261 | 263 | ||
262 | #ifdef CONFIG_PREEMPT | 264 | #ifdef CONFIG_PREEMPT |
263 | ENTRY(resume_kernel) | 265 | ENTRY(resume_kernel) |
@@ -272,6 +274,7 @@ need_resched: | |||
272 | jz restore_all | 274 | jz restore_all |
273 | call preempt_schedule_irq | 275 | call preempt_schedule_irq |
274 | jmp need_resched | 276 | jmp need_resched |
277 | END(resume_kernel) | ||
275 | #endif | 278 | #endif |
276 | CFI_ENDPROC | 279 | CFI_ENDPROC |
277 | 280 | ||
@@ -349,16 +352,17 @@ sysenter_past_esp: | |||
349 | movl PT_OLDESP(%esp), %ecx | 352 | movl PT_OLDESP(%esp), %ecx |
350 | xorl %ebp,%ebp | 353 | xorl %ebp,%ebp |
351 | TRACE_IRQS_ON | 354 | TRACE_IRQS_ON |
352 | 1: mov PT_GS(%esp), %gs | 355 | 1: mov PT_FS(%esp), %fs |
353 | ENABLE_INTERRUPTS_SYSEXIT | 356 | ENABLE_INTERRUPTS_SYSEXIT |
354 | CFI_ENDPROC | 357 | CFI_ENDPROC |
355 | .pushsection .fixup,"ax" | 358 | .pushsection .fixup,"ax" |
356 | 2: movl $0,PT_GS(%esp) | 359 | 2: movl $0,PT_FS(%esp) |
357 | jmp 1b | 360 | jmp 1b |
358 | .section __ex_table,"a" | 361 | .section __ex_table,"a" |
359 | .align 4 | 362 | .align 4 |
360 | .long 1b,2b | 363 | .long 1b,2b |
361 | .popsection | 364 | .popsection |
365 | ENDPROC(sysenter_entry) | ||
362 | 366 | ||
363 | # system call handler stub | 367 | # system call handler stub |
364 | ENTRY(system_call) | 368 | ENTRY(system_call) |
@@ -459,6 +463,7 @@ ldt_ss: | |||
459 | CFI_ADJUST_CFA_OFFSET -8 | 463 | CFI_ADJUST_CFA_OFFSET -8 |
460 | jmp restore_nocheck | 464 | jmp restore_nocheck |
461 | CFI_ENDPROC | 465 | CFI_ENDPROC |
466 | ENDPROC(system_call) | ||
462 | 467 | ||
463 | # perform work that needs to be done immediately before resumption | 468 | # perform work that needs to be done immediately before resumption |
464 | ALIGN | 469 | ALIGN |
@@ -504,6 +509,7 @@ work_notifysig_v86: | |||
504 | xorl %edx, %edx | 509 | xorl %edx, %edx |
505 | call do_notify_resume | 510 | call do_notify_resume |
506 | jmp resume_userspace_sig | 511 | jmp resume_userspace_sig |
512 | END(work_pending) | ||
507 | 513 | ||
508 | # perform syscall exit tracing | 514 | # perform syscall exit tracing |
509 | ALIGN | 515 | ALIGN |
@@ -519,6 +525,7 @@ syscall_trace_entry: | |||
519 | cmpl $(nr_syscalls), %eax | 525 | cmpl $(nr_syscalls), %eax |
520 | jnae syscall_call | 526 | jnae syscall_call |
521 | jmp syscall_exit | 527 | jmp syscall_exit |
528 | END(syscall_trace_entry) | ||
522 | 529 | ||
523 | # perform syscall exit tracing | 530 | # perform syscall exit tracing |
524 | ALIGN | 531 | ALIGN |
@@ -532,6 +539,7 @@ syscall_exit_work: | |||
532 | movl $1, %edx | 539 | movl $1, %edx |
533 | call do_syscall_trace | 540 | call do_syscall_trace |
534 | jmp resume_userspace | 541 | jmp resume_userspace |
542 | END(syscall_exit_work) | ||
535 | CFI_ENDPROC | 543 | CFI_ENDPROC |
536 | 544 | ||
537 | RING0_INT_FRAME # can't unwind into user space anyway | 545 | RING0_INT_FRAME # can't unwind into user space anyway |
@@ -542,15 +550,17 @@ syscall_fault: | |||
542 | GET_THREAD_INFO(%ebp) | 550 | GET_THREAD_INFO(%ebp) |
543 | movl $-EFAULT,PT_EAX(%esp) | 551 | movl $-EFAULT,PT_EAX(%esp) |
544 | jmp resume_userspace | 552 | jmp resume_userspace |
553 | END(syscall_fault) | ||
545 | 554 | ||
546 | syscall_badsys: | 555 | syscall_badsys: |
547 | movl $-ENOSYS,PT_EAX(%esp) | 556 | movl $-ENOSYS,PT_EAX(%esp) |
548 | jmp resume_userspace | 557 | jmp resume_userspace |
558 | END(syscall_badsys) | ||
549 | CFI_ENDPROC | 559 | CFI_ENDPROC |
550 | 560 | ||
551 | #define FIXUP_ESPFIX_STACK \ | 561 | #define FIXUP_ESPFIX_STACK \ |
552 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 562 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
553 | movl %gs:PDA_cpu, %ebx; \ | 563 | movl %fs:PDA_cpu, %ebx; \ |
554 | PER_CPU(cpu_gdt_descr, %ebx); \ | 564 | PER_CPU(cpu_gdt_descr, %ebx); \ |
555 | movl GDS_address(%ebx), %ebx; \ | 565 | movl GDS_address(%ebx), %ebx; \ |
556 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 566 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
@@ -581,9 +591,9 @@ syscall_badsys: | |||
581 | ENTRY(interrupt) | 591 | ENTRY(interrupt) |
582 | .text | 592 | .text |
583 | 593 | ||
584 | vector=0 | ||
585 | ENTRY(irq_entries_start) | 594 | ENTRY(irq_entries_start) |
586 | RING0_INT_FRAME | 595 | RING0_INT_FRAME |
596 | vector=0 | ||
587 | .rept NR_IRQS | 597 | .rept NR_IRQS |
588 | ALIGN | 598 | ALIGN |
589 | .if vector | 599 | .if vector |
@@ -592,11 +602,16 @@ ENTRY(irq_entries_start) | |||
592 | 1: pushl $~(vector) | 602 | 1: pushl $~(vector) |
593 | CFI_ADJUST_CFA_OFFSET 4 | 603 | CFI_ADJUST_CFA_OFFSET 4 |
594 | jmp common_interrupt | 604 | jmp common_interrupt |
595 | .data | 605 | .previous |
596 | .long 1b | 606 | .long 1b |
597 | .text | 607 | .text |
598 | vector=vector+1 | 608 | vector=vector+1 |
599 | .endr | 609 | .endr |
610 | END(irq_entries_start) | ||
611 | |||
612 | .previous | ||
613 | END(interrupt) | ||
614 | .previous | ||
600 | 615 | ||
601 | /* | 616 | /* |
602 | * the CPU automatically disables interrupts when executing an IRQ vector, | 617 | * the CPU automatically disables interrupts when executing an IRQ vector, |
@@ -609,6 +624,7 @@ common_interrupt: | |||
609 | movl %esp,%eax | 624 | movl %esp,%eax |
610 | call do_IRQ | 625 | call do_IRQ |
611 | jmp ret_from_intr | 626 | jmp ret_from_intr |
627 | ENDPROC(common_interrupt) | ||
612 | CFI_ENDPROC | 628 | CFI_ENDPROC |
613 | 629 | ||
614 | #define BUILD_INTERRUPT(name, nr) \ | 630 | #define BUILD_INTERRUPT(name, nr) \ |
@@ -621,18 +637,24 @@ ENTRY(name) \ | |||
621 | movl %esp,%eax; \ | 637 | movl %esp,%eax; \ |
622 | call smp_/**/name; \ | 638 | call smp_/**/name; \ |
623 | jmp ret_from_intr; \ | 639 | jmp ret_from_intr; \ |
624 | CFI_ENDPROC | 640 | CFI_ENDPROC; \ |
641 | ENDPROC(name) | ||
625 | 642 | ||
626 | /* The include is where all of the SMP etc. interrupts come from */ | 643 | /* The include is where all of the SMP etc. interrupts come from */ |
627 | #include "entry_arch.h" | 644 | #include "entry_arch.h" |
628 | 645 | ||
646 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
647 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
648 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
649 | #endif | ||
650 | |||
629 | KPROBE_ENTRY(page_fault) | 651 | KPROBE_ENTRY(page_fault) |
630 | RING0_EC_FRAME | 652 | RING0_EC_FRAME |
631 | pushl $do_page_fault | 653 | pushl $do_page_fault |
632 | CFI_ADJUST_CFA_OFFSET 4 | 654 | CFI_ADJUST_CFA_OFFSET 4 |
633 | ALIGN | 655 | ALIGN |
634 | error_code: | 656 | error_code: |
635 | /* the function address is in %gs's slot on the stack */ | 657 | /* the function address is in %fs's slot on the stack */ |
636 | pushl %es | 658 | pushl %es |
637 | CFI_ADJUST_CFA_OFFSET 4 | 659 | CFI_ADJUST_CFA_OFFSET 4 |
638 | /*CFI_REL_OFFSET es, 0*/ | 660 | /*CFI_REL_OFFSET es, 0*/ |
@@ -661,20 +683,20 @@ error_code: | |||
661 | CFI_ADJUST_CFA_OFFSET 4 | 683 | CFI_ADJUST_CFA_OFFSET 4 |
662 | CFI_REL_OFFSET ebx, 0 | 684 | CFI_REL_OFFSET ebx, 0 |
663 | cld | 685 | cld |
664 | pushl %gs | 686 | pushl %fs |
665 | CFI_ADJUST_CFA_OFFSET 4 | 687 | CFI_ADJUST_CFA_OFFSET 4 |
666 | /*CFI_REL_OFFSET gs, 0*/ | 688 | /*CFI_REL_OFFSET fs, 0*/ |
667 | movl $(__KERNEL_PDA), %ecx | 689 | movl $(__KERNEL_PDA), %ecx |
668 | movl %ecx, %gs | 690 | movl %ecx, %fs |
669 | UNWIND_ESPFIX_STACK | 691 | UNWIND_ESPFIX_STACK |
670 | popl %ecx | 692 | popl %ecx |
671 | CFI_ADJUST_CFA_OFFSET -4 | 693 | CFI_ADJUST_CFA_OFFSET -4 |
672 | /*CFI_REGISTER es, ecx*/ | 694 | /*CFI_REGISTER es, ecx*/ |
673 | movl PT_GS(%esp), %edi # get the function address | 695 | movl PT_FS(%esp), %edi # get the function address |
674 | movl PT_ORIG_EAX(%esp), %edx # get the error code | 696 | movl PT_ORIG_EAX(%esp), %edx # get the error code |
675 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | 697 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart |
676 | mov %ecx, PT_GS(%esp) | 698 | mov %ecx, PT_FS(%esp) |
677 | /*CFI_REL_OFFSET gs, ES*/ | 699 | /*CFI_REL_OFFSET fs, ES*/ |
678 | movl $(__USER_DS), %ecx | 700 | movl $(__USER_DS), %ecx |
679 | movl %ecx, %ds | 701 | movl %ecx, %ds |
680 | movl %ecx, %es | 702 | movl %ecx, %es |
@@ -692,6 +714,7 @@ ENTRY(coprocessor_error) | |||
692 | CFI_ADJUST_CFA_OFFSET 4 | 714 | CFI_ADJUST_CFA_OFFSET 4 |
693 | jmp error_code | 715 | jmp error_code |
694 | CFI_ENDPROC | 716 | CFI_ENDPROC |
717 | END(coprocessor_error) | ||
695 | 718 | ||
696 | ENTRY(simd_coprocessor_error) | 719 | ENTRY(simd_coprocessor_error) |
697 | RING0_INT_FRAME | 720 | RING0_INT_FRAME |
@@ -701,6 +724,7 @@ ENTRY(simd_coprocessor_error) | |||
701 | CFI_ADJUST_CFA_OFFSET 4 | 724 | CFI_ADJUST_CFA_OFFSET 4 |
702 | jmp error_code | 725 | jmp error_code |
703 | CFI_ENDPROC | 726 | CFI_ENDPROC |
727 | END(simd_coprocessor_error) | ||
704 | 728 | ||
705 | ENTRY(device_not_available) | 729 | ENTRY(device_not_available) |
706 | RING0_INT_FRAME | 730 | RING0_INT_FRAME |
@@ -721,6 +745,7 @@ device_not_available_emulate: | |||
721 | CFI_ADJUST_CFA_OFFSET -4 | 745 | CFI_ADJUST_CFA_OFFSET -4 |
722 | jmp ret_from_exception | 746 | jmp ret_from_exception |
723 | CFI_ENDPROC | 747 | CFI_ENDPROC |
748 | END(device_not_available) | ||
724 | 749 | ||
725 | /* | 750 | /* |
726 | * Debug traps and NMI can happen at the one SYSENTER instruction | 751 | * Debug traps and NMI can happen at the one SYSENTER instruction |
@@ -864,10 +889,12 @@ ENTRY(native_iret) | |||
864 | .align 4 | 889 | .align 4 |
865 | .long 1b,iret_exc | 890 | .long 1b,iret_exc |
866 | .previous | 891 | .previous |
892 | END(native_iret) | ||
867 | 893 | ||
868 | ENTRY(native_irq_enable_sysexit) | 894 | ENTRY(native_irq_enable_sysexit) |
869 | sti | 895 | sti |
870 | sysexit | 896 | sysexit |
897 | END(native_irq_enable_sysexit) | ||
871 | #endif | 898 | #endif |
872 | 899 | ||
873 | KPROBE_ENTRY(int3) | 900 | KPROBE_ENTRY(int3) |
@@ -890,6 +917,7 @@ ENTRY(overflow) | |||
890 | CFI_ADJUST_CFA_OFFSET 4 | 917 | CFI_ADJUST_CFA_OFFSET 4 |
891 | jmp error_code | 918 | jmp error_code |
892 | CFI_ENDPROC | 919 | CFI_ENDPROC |
920 | END(overflow) | ||
893 | 921 | ||
894 | ENTRY(bounds) | 922 | ENTRY(bounds) |
895 | RING0_INT_FRAME | 923 | RING0_INT_FRAME |
@@ -899,6 +927,7 @@ ENTRY(bounds) | |||
899 | CFI_ADJUST_CFA_OFFSET 4 | 927 | CFI_ADJUST_CFA_OFFSET 4 |
900 | jmp error_code | 928 | jmp error_code |
901 | CFI_ENDPROC | 929 | CFI_ENDPROC |
930 | END(bounds) | ||
902 | 931 | ||
903 | ENTRY(invalid_op) | 932 | ENTRY(invalid_op) |
904 | RING0_INT_FRAME | 933 | RING0_INT_FRAME |
@@ -908,6 +937,7 @@ ENTRY(invalid_op) | |||
908 | CFI_ADJUST_CFA_OFFSET 4 | 937 | CFI_ADJUST_CFA_OFFSET 4 |
909 | jmp error_code | 938 | jmp error_code |
910 | CFI_ENDPROC | 939 | CFI_ENDPROC |
940 | END(invalid_op) | ||
911 | 941 | ||
912 | ENTRY(coprocessor_segment_overrun) | 942 | ENTRY(coprocessor_segment_overrun) |
913 | RING0_INT_FRAME | 943 | RING0_INT_FRAME |
@@ -917,6 +947,7 @@ ENTRY(coprocessor_segment_overrun) | |||
917 | CFI_ADJUST_CFA_OFFSET 4 | 947 | CFI_ADJUST_CFA_OFFSET 4 |
918 | jmp error_code | 948 | jmp error_code |
919 | CFI_ENDPROC | 949 | CFI_ENDPROC |
950 | END(coprocessor_segment_overrun) | ||
920 | 951 | ||
921 | ENTRY(invalid_TSS) | 952 | ENTRY(invalid_TSS) |
922 | RING0_EC_FRAME | 953 | RING0_EC_FRAME |
@@ -924,6 +955,7 @@ ENTRY(invalid_TSS) | |||
924 | CFI_ADJUST_CFA_OFFSET 4 | 955 | CFI_ADJUST_CFA_OFFSET 4 |
925 | jmp error_code | 956 | jmp error_code |
926 | CFI_ENDPROC | 957 | CFI_ENDPROC |
958 | END(invalid_TSS) | ||
927 | 959 | ||
928 | ENTRY(segment_not_present) | 960 | ENTRY(segment_not_present) |
929 | RING0_EC_FRAME | 961 | RING0_EC_FRAME |
@@ -931,6 +963,7 @@ ENTRY(segment_not_present) | |||
931 | CFI_ADJUST_CFA_OFFSET 4 | 963 | CFI_ADJUST_CFA_OFFSET 4 |
932 | jmp error_code | 964 | jmp error_code |
933 | CFI_ENDPROC | 965 | CFI_ENDPROC |
966 | END(segment_not_present) | ||
934 | 967 | ||
935 | ENTRY(stack_segment) | 968 | ENTRY(stack_segment) |
936 | RING0_EC_FRAME | 969 | RING0_EC_FRAME |
@@ -938,6 +971,7 @@ ENTRY(stack_segment) | |||
938 | CFI_ADJUST_CFA_OFFSET 4 | 971 | CFI_ADJUST_CFA_OFFSET 4 |
939 | jmp error_code | 972 | jmp error_code |
940 | CFI_ENDPROC | 973 | CFI_ENDPROC |
974 | END(stack_segment) | ||
941 | 975 | ||
942 | KPROBE_ENTRY(general_protection) | 976 | KPROBE_ENTRY(general_protection) |
943 | RING0_EC_FRAME | 977 | RING0_EC_FRAME |
@@ -953,6 +987,7 @@ ENTRY(alignment_check) | |||
953 | CFI_ADJUST_CFA_OFFSET 4 | 987 | CFI_ADJUST_CFA_OFFSET 4 |
954 | jmp error_code | 988 | jmp error_code |
955 | CFI_ENDPROC | 989 | CFI_ENDPROC |
990 | END(alignment_check) | ||
956 | 991 | ||
957 | ENTRY(divide_error) | 992 | ENTRY(divide_error) |
958 | RING0_INT_FRAME | 993 | RING0_INT_FRAME |
@@ -962,6 +997,7 @@ ENTRY(divide_error) | |||
962 | CFI_ADJUST_CFA_OFFSET 4 | 997 | CFI_ADJUST_CFA_OFFSET 4 |
963 | jmp error_code | 998 | jmp error_code |
964 | CFI_ENDPROC | 999 | CFI_ENDPROC |
1000 | END(divide_error) | ||
965 | 1001 | ||
966 | #ifdef CONFIG_X86_MCE | 1002 | #ifdef CONFIG_X86_MCE |
967 | ENTRY(machine_check) | 1003 | ENTRY(machine_check) |
@@ -972,6 +1008,7 @@ ENTRY(machine_check) | |||
972 | CFI_ADJUST_CFA_OFFSET 4 | 1008 | CFI_ADJUST_CFA_OFFSET 4 |
973 | jmp error_code | 1009 | jmp error_code |
974 | CFI_ENDPROC | 1010 | CFI_ENDPROC |
1011 | END(machine_check) | ||
975 | #endif | 1012 | #endif |
976 | 1013 | ||
977 | ENTRY(spurious_interrupt_bug) | 1014 | ENTRY(spurious_interrupt_bug) |
@@ -982,6 +1019,7 @@ ENTRY(spurious_interrupt_bug) | |||
982 | CFI_ADJUST_CFA_OFFSET 4 | 1019 | CFI_ADJUST_CFA_OFFSET 4 |
983 | jmp error_code | 1020 | jmp error_code |
984 | CFI_ENDPROC | 1021 | CFI_ENDPROC |
1022 | END(spurious_interrupt_bug) | ||
985 | 1023 | ||
986 | ENTRY(kernel_thread_helper) | 1024 | ENTRY(kernel_thread_helper) |
987 | pushl $0 # fake return address for unwinder | 1025 | pushl $0 # fake return address for unwinder |
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index cb9abdfced9b..3fa7f9389afe 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -53,6 +53,7 @@ | |||
53 | * any particular GDT layout, because we load our own as soon as we | 53 | * any particular GDT layout, because we load our own as soon as we |
54 | * can. | 54 | * can. |
55 | */ | 55 | */ |
56 | .section .text.head,"ax",@progbits | ||
56 | ENTRY(startup_32) | 57 | ENTRY(startup_32) |
57 | 58 | ||
58 | #ifdef CONFIG_PARAVIRT | 59 | #ifdef CONFIG_PARAVIRT |
@@ -141,16 +142,25 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
141 | jb 10b | 142 | jb 10b |
142 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) | 143 | movl %edi,(init_pg_tables_end - __PAGE_OFFSET) |
143 | 144 | ||
144 | #ifdef CONFIG_SMP | ||
145 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ | 145 | xorl %ebx,%ebx /* This is the boot CPU (BSP) */ |
146 | jmp 3f | 146 | jmp 3f |
147 | |||
148 | /* | 147 | /* |
149 | * Non-boot CPU entry point; entered from trampoline.S | 148 | * Non-boot CPU entry point; entered from trampoline.S |
150 | * We can't lgdt here, because lgdt itself uses a data segment, but | 149 | * We can't lgdt here, because lgdt itself uses a data segment, but |
151 | * we know the trampoline has already loaded the boot_gdt_table GDT | 150 | * we know the trampoline has already loaded the boot_gdt_table GDT |
152 | * for us. | 151 | * for us. |
152 | * | ||
153 | * If cpu hotplug is not supported then this code can go in init section | ||
154 | * which will be freed later | ||
153 | */ | 155 | */ |
156 | |||
157 | #ifdef CONFIG_HOTPLUG_CPU | ||
158 | .section .text,"ax",@progbits | ||
159 | #else | ||
160 | .section .init.text,"ax",@progbits | ||
161 | #endif | ||
162 | |||
163 | #ifdef CONFIG_SMP | ||
154 | ENTRY(startup_32_smp) | 164 | ENTRY(startup_32_smp) |
155 | cld | 165 | cld |
156 | movl $(__BOOT_DS),%eax | 166 | movl $(__BOOT_DS),%eax |
@@ -208,8 +218,8 @@ ENTRY(startup_32_smp) | |||
208 | xorl %ebx,%ebx | 218 | xorl %ebx,%ebx |
209 | incl %ebx | 219 | incl %ebx |
210 | 220 | ||
211 | 3: | ||
212 | #endif /* CONFIG_SMP */ | 221 | #endif /* CONFIG_SMP */ |
222 | 3: | ||
213 | 223 | ||
214 | /* | 224 | /* |
215 | * Enable paging | 225 | * Enable paging |
@@ -309,7 +319,7 @@ is386: movl $2,%ecx # set MP | |||
309 | 319 | ||
310 | call check_x87 | 320 | call check_x87 |
311 | call setup_pda | 321 | call setup_pda |
312 | lgdt cpu_gdt_descr | 322 | lgdt early_gdt_descr |
313 | lidt idt_descr | 323 | lidt idt_descr |
314 | ljmp $(__KERNEL_CS),$1f | 324 | ljmp $(__KERNEL_CS),$1f |
315 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 325 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
@@ -319,12 +329,12 @@ is386: movl $2,%ecx # set MP | |||
319 | movl %eax,%ds | 329 | movl %eax,%ds |
320 | movl %eax,%es | 330 | movl %eax,%es |
321 | 331 | ||
322 | xorl %eax,%eax # Clear FS and LDT | 332 | xorl %eax,%eax # Clear GS and LDT |
323 | movl %eax,%fs | 333 | movl %eax,%gs |
324 | lldt %ax | 334 | lldt %ax |
325 | 335 | ||
326 | movl $(__KERNEL_PDA),%eax | 336 | movl $(__KERNEL_PDA),%eax |
327 | mov %eax,%gs | 337 | mov %eax,%fs |
328 | 338 | ||
329 | cld # gcc2 wants the direction flag cleared at all times | 339 | cld # gcc2 wants the direction flag cleared at all times |
330 | pushl $0 # fake return address for unwinder | 340 | pushl $0 # fake return address for unwinder |
@@ -360,12 +370,12 @@ check_x87: | |||
360 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be | 370 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be |
361 | * that CPU's GDT and PDA. | 371 | * that CPU's GDT and PDA. |
362 | */ | 372 | */ |
363 | setup_pda: | 373 | ENTRY(setup_pda) |
364 | /* get the PDA pointer */ | 374 | /* get the PDA pointer */ |
365 | movl start_pda, %eax | 375 | movl start_pda, %eax |
366 | 376 | ||
367 | /* slot the PDA address into the GDT */ | 377 | /* slot the PDA address into the GDT */ |
368 | mov cpu_gdt_descr+2, %ecx | 378 | mov early_gdt_descr+2, %ecx |
369 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | 379 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ |
370 | shr $16, %eax | 380 | shr $16, %eax |
371 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | 381 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ |
@@ -492,6 +502,7 @@ ignore_int: | |||
492 | #endif | 502 | #endif |
493 | iret | 503 | iret |
494 | 504 | ||
505 | .section .text | ||
495 | #ifdef CONFIG_PARAVIRT | 506 | #ifdef CONFIG_PARAVIRT |
496 | startup_paravirt: | 507 | startup_paravirt: |
497 | cld | 508 | cld |
@@ -502,10 +513,11 @@ startup_paravirt: | |||
502 | pushl %ecx | 513 | pushl %ecx |
503 | pushl %eax | 514 | pushl %eax |
504 | 515 | ||
505 | /* paravirt.o is last in link, and that probe fn never returns */ | ||
506 | pushl $__start_paravirtprobe | 516 | pushl $__start_paravirtprobe |
507 | 1: | 517 | 1: |
508 | movl 0(%esp), %eax | 518 | movl 0(%esp), %eax |
519 | cmpl $__stop_paravirtprobe, %eax | ||
520 | je unhandled_paravirt | ||
509 | pushl (%eax) | 521 | pushl (%eax) |
510 | movl 8(%esp), %eax | 522 | movl 8(%esp), %eax |
511 | call *(%esp) | 523 | call *(%esp) |
@@ -517,6 +529,10 @@ startup_paravirt: | |||
517 | 529 | ||
518 | addl $4, (%esp) | 530 | addl $4, (%esp) |
519 | jmp 1b | 531 | jmp 1b |
532 | |||
533 | unhandled_paravirt: | ||
534 | /* Nothing wanted us: we're screwed. */ | ||
535 | ud2 | ||
520 | #endif | 536 | #endif |
521 | 537 | ||
522 | /* | 538 | /* |
@@ -581,7 +597,7 @@ idt_descr: | |||
581 | 597 | ||
582 | # boot GDT descriptor (later on used by CPU#0): | 598 | # boot GDT descriptor (later on used by CPU#0): |
583 | .word 0 # 32 bit align gdt_desc.address | 599 | .word 0 # 32 bit align gdt_desc.address |
584 | ENTRY(cpu_gdt_descr) | 600 | ENTRY(early_gdt_descr) |
585 | .word GDT_ENTRIES*8-1 | 601 | .word GDT_ENTRIES*8-1 |
586 | .long cpu_gdt_table | 602 | .long cpu_gdt_table |
587 | 603 | ||
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index ba8d302a0b72..e30ccedad0b9 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -1920,7 +1920,7 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
1920 | static void __init setup_ioapic_ids_from_mpc(void) { } | 1920 | static void __init setup_ioapic_ids_from_mpc(void) { } |
1921 | #endif | 1921 | #endif |
1922 | 1922 | ||
1923 | static int no_timer_check __initdata; | 1923 | int no_timer_check __initdata; |
1924 | 1924 | ||
1925 | static int __init notimercheck(char *s) | 1925 | static int __init notimercheck(char *s) |
1926 | { | 1926 | { |
@@ -2310,7 +2310,7 @@ static inline void __init check_timer(void) | |||
2310 | 2310 | ||
2311 | disable_8259A_irq(0); | 2311 | disable_8259A_irq(0); |
2312 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, | 2312 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, |
2313 | "fasteio"); | 2313 | "fasteoi"); |
2314 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2314 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2315 | enable_8259A_irq(0); | 2315 | enable_8259A_irq(0); |
2316 | 2316 | ||
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 3201d421090a..5785d84103a6 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -19,6 +19,8 @@ | |||
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | 21 | ||
22 | #include <asm/idle.h> | ||
23 | |||
22 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | 24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
23 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 25 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
24 | 26 | ||
@@ -61,6 +63,7 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) | |||
61 | union irq_ctx *curctx, *irqctx; | 63 | union irq_ctx *curctx, *irqctx; |
62 | u32 *isp; | 64 | u32 *isp; |
63 | #endif | 65 | #endif |
66 | exit_idle(); | ||
64 | 67 | ||
65 | if (unlikely((unsigned)irq >= NR_IRQS)) { | 68 | if (unlikely((unsigned)irq >= NR_IRQS)) { |
66 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", | 69 | printk(KERN_EMERG "%s: cannot handle IRQ %d\n", |
diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index af1d53344993..b545bc746fce 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c | |||
@@ -363,7 +363,7 @@ no_kprobe: | |||
363 | " pushf\n" | 363 | " pushf\n" |
364 | /* skip cs, eip, orig_eax */ | 364 | /* skip cs, eip, orig_eax */ |
365 | " subl $12, %esp\n" | 365 | " subl $12, %esp\n" |
366 | " pushl %gs\n" | 366 | " pushl %fs\n" |
367 | " pushl %ds\n" | 367 | " pushl %ds\n" |
368 | " pushl %es\n" | 368 | " pushl %es\n" |
369 | " pushl %eax\n" | 369 | " pushl %eax\n" |
@@ -387,7 +387,7 @@ no_kprobe: | |||
387 | " popl %edi\n" | 387 | " popl %edi\n" |
388 | " popl %ebp\n" | 388 | " popl %ebp\n" |
389 | " popl %eax\n" | 389 | " popl %eax\n" |
390 | /* skip eip, orig_eax, es, ds, gs */ | 390 | /* skip eip, orig_eax, es, ds, fs */ |
391 | " addl $20, %esp\n" | 391 | " addl $20, %esp\n" |
392 | " popf\n" | 392 | " popf\n" |
393 | " ret\n"); | 393 | " ret\n"); |
@@ -408,7 +408,7 @@ fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) | |||
408 | spin_lock_irqsave(&kretprobe_lock, flags); | 408 | spin_lock_irqsave(&kretprobe_lock, flags); |
409 | head = kretprobe_inst_table_head(current); | 409 | head = kretprobe_inst_table_head(current); |
410 | /* fixup registers */ | 410 | /* fixup registers */ |
411 | regs->xcs = __KERNEL_CS; | 411 | regs->xcs = __KERNEL_CS | get_kernel_rpl(); |
412 | regs->eip = trampoline_address; | 412 | regs->eip = trampoline_address; |
413 | regs->orig_eax = 0xffffffff; | 413 | regs->orig_eax = 0xffffffff; |
414 | 414 | ||
diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 381252bae3d8..b8f16633a6ec 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c | |||
@@ -384,7 +384,7 @@ static int do_microcode_update (void) | |||
384 | { | 384 | { |
385 | long cursor = 0; | 385 | long cursor = 0; |
386 | int error = 0; | 386 | int error = 0; |
387 | void *new_mc; | 387 | void *new_mc = NULL; |
388 | int cpu; | 388 | int cpu; |
389 | cpumask_t old; | 389 | cpumask_t old; |
390 | 390 | ||
diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 4e14264f392a..bcaa6e9b6197 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c | |||
@@ -68,7 +68,6 @@ static inline int rdmsr_eio(u32 reg, u32 *eax, u32 *edx) | |||
68 | #ifdef CONFIG_SMP | 68 | #ifdef CONFIG_SMP |
69 | 69 | ||
70 | struct msr_command { | 70 | struct msr_command { |
71 | int cpu; | ||
72 | int err; | 71 | int err; |
73 | u32 reg; | 72 | u32 reg; |
74 | u32 data[2]; | 73 | u32 data[2]; |
@@ -78,16 +77,14 @@ static void msr_smp_wrmsr(void *cmd_block) | |||
78 | { | 77 | { |
79 | struct msr_command *cmd = (struct msr_command *)cmd_block; | 78 | struct msr_command *cmd = (struct msr_command *)cmd_block; |
80 | 79 | ||
81 | if (cmd->cpu == smp_processor_id()) | 80 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); |
82 | cmd->err = wrmsr_eio(cmd->reg, cmd->data[0], cmd->data[1]); | ||
83 | } | 81 | } |
84 | 82 | ||
85 | static void msr_smp_rdmsr(void *cmd_block) | 83 | static void msr_smp_rdmsr(void *cmd_block) |
86 | { | 84 | { |
87 | struct msr_command *cmd = (struct msr_command *)cmd_block; | 85 | struct msr_command *cmd = (struct msr_command *)cmd_block; |
88 | 86 | ||
89 | if (cmd->cpu == smp_processor_id()) | 87 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); |
90 | cmd->err = rdmsr_eio(cmd->reg, &cmd->data[0], &cmd->data[1]); | ||
91 | } | 88 | } |
92 | 89 | ||
93 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | 90 | static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) |
@@ -99,12 +96,11 @@ static inline int do_wrmsr(int cpu, u32 reg, u32 eax, u32 edx) | |||
99 | if (cpu == smp_processor_id()) { | 96 | if (cpu == smp_processor_id()) { |
100 | ret = wrmsr_eio(reg, eax, edx); | 97 | ret = wrmsr_eio(reg, eax, edx); |
101 | } else { | 98 | } else { |
102 | cmd.cpu = cpu; | ||
103 | cmd.reg = reg; | 99 | cmd.reg = reg; |
104 | cmd.data[0] = eax; | 100 | cmd.data[0] = eax; |
105 | cmd.data[1] = edx; | 101 | cmd.data[1] = edx; |
106 | 102 | ||
107 | smp_call_function(msr_smp_wrmsr, &cmd, 1, 1); | 103 | smp_call_function_single(cpu, msr_smp_wrmsr, &cmd, 1, 1); |
108 | ret = cmd.err; | 104 | ret = cmd.err; |
109 | } | 105 | } |
110 | preempt_enable(); | 106 | preempt_enable(); |
@@ -120,10 +116,9 @@ static inline int do_rdmsr(int cpu, u32 reg, u32 * eax, u32 * edx) | |||
120 | if (cpu == smp_processor_id()) { | 116 | if (cpu == smp_processor_id()) { |
121 | ret = rdmsr_eio(reg, eax, edx); | 117 | ret = rdmsr_eio(reg, eax, edx); |
122 | } else { | 118 | } else { |
123 | cmd.cpu = cpu; | ||
124 | cmd.reg = reg; | 119 | cmd.reg = reg; |
125 | 120 | ||
126 | smp_call_function(msr_smp_rdmsr, &cmd, 1, 1); | 121 | smp_call_function_single(cpu, msr_smp_rdmsr, &cmd, 1, 1); |
127 | 122 | ||
128 | *eax = cmd.data[0]; | 123 | *eax = cmd.data[0]; |
129 | *edx = cmd.data[1]; | 124 | *edx = cmd.data[1]; |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 1a6f8bb8881c..5d8a07c20281 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -185,7 +185,8 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
185 | { | 185 | { |
186 | switch (boot_cpu_data.x86_vendor) { | 186 | switch (boot_cpu_data.x86_vendor) { |
187 | case X86_VENDOR_AMD: | 187 | case X86_VENDOR_AMD: |
188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | 188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6) |
189 | || (boot_cpu_data.x86 == 16)); | ||
189 | case X86_VENDOR_INTEL: | 190 | case X86_VENDOR_INTEL: |
190 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 191 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
191 | return 1; | 192 | return 1; |
@@ -216,6 +217,28 @@ static __init void nmi_cpu_busy(void *data) | |||
216 | } | 217 | } |
217 | #endif | 218 | #endif |
218 | 219 | ||
220 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
221 | { | ||
222 | u64 counter_val; | ||
223 | unsigned int retval = hz; | ||
224 | |||
225 | /* | ||
226 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
227 | * are writable, with higher bits sign extending from bit 31. | ||
228 | * So, we can only program the counter with 31 bit values and | ||
229 | * 32nd bit should be 1, for 33.. to be 1. | ||
230 | * Find the appropriate nmi_hz | ||
231 | */ | ||
232 | counter_val = (u64)cpu_khz * 1000; | ||
233 | do_div(counter_val, retval); | ||
234 | if (counter_val > 0x7fffffffULL) { | ||
235 | u64 count = (u64)cpu_khz * 1000; | ||
236 | do_div(count, 0x7fffffffUL); | ||
237 | retval = count + 1; | ||
238 | } | ||
239 | return retval; | ||
240 | } | ||
241 | |||
219 | static int __init check_nmi_watchdog(void) | 242 | static int __init check_nmi_watchdog(void) |
220 | { | 243 | { |
221 | unsigned int *prev_nmi_count; | 244 | unsigned int *prev_nmi_count; |
@@ -281,18 +304,10 @@ static int __init check_nmi_watchdog(void) | |||
281 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 304 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
282 | 305 | ||
283 | nmi_hz = 1; | 306 | nmi_hz = 1; |
284 | /* | 307 | |
285 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | 308 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || |
286 | * are writable, with higher bits sign extending from bit 31. | 309 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
287 | * So, we can only program the counter with 31 bit values and | 310 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
288 | * 32nd bit should be 1, for 33.. to be 1. | ||
289 | * Find the appropriate nmi_hz | ||
290 | */ | ||
291 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
292 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
293 | u64 count = (u64)cpu_khz * 1000; | ||
294 | do_div(count, 0x7fffffffUL); | ||
295 | nmi_hz = count + 1; | ||
296 | } | 311 | } |
297 | } | 312 | } |
298 | 313 | ||
@@ -369,6 +384,34 @@ void enable_timer_nmi_watchdog(void) | |||
369 | } | 384 | } |
370 | } | 385 | } |
371 | 386 | ||
387 | static void __acpi_nmi_disable(void *__unused) | ||
388 | { | ||
389 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * Disable timer based NMIs on all CPUs: | ||
394 | */ | ||
395 | void acpi_nmi_disable(void) | ||
396 | { | ||
397 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
398 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
399 | } | ||
400 | |||
401 | static void __acpi_nmi_enable(void *__unused) | ||
402 | { | ||
403 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Enable timer based NMIs on all CPUs: | ||
408 | */ | ||
409 | void acpi_nmi_enable(void) | ||
410 | { | ||
411 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
412 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
413 | } | ||
414 | |||
372 | #ifdef CONFIG_PM | 415 | #ifdef CONFIG_PM |
373 | 416 | ||
374 | static int nmi_pm_active; /* nmi_active before suspend */ | 417 | static int nmi_pm_active; /* nmi_active before suspend */ |
@@ -442,6 +485,17 @@ static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) | |||
442 | wrmsrl(perfctr_msr, 0 - count); | 485 | wrmsrl(perfctr_msr, 0 - count); |
443 | } | 486 | } |
444 | 487 | ||
488 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
489 | const char *descr) | ||
490 | { | ||
491 | u64 count = (u64)cpu_khz * 1000; | ||
492 | |||
493 | do_div(count, nmi_hz); | ||
494 | if(descr) | ||
495 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
496 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
497 | } | ||
498 | |||
445 | /* Note that these events don't tick when the CPU idles. This means | 499 | /* Note that these events don't tick when the CPU idles. This means |
446 | the frequency varies with CPU load. */ | 500 | the frequency varies with CPU load. */ |
447 | 501 | ||
@@ -531,7 +585,8 @@ static int setup_p6_watchdog(void) | |||
531 | 585 | ||
532 | /* setup the timer */ | 586 | /* setup the timer */ |
533 | wrmsr(evntsel_msr, evntsel, 0); | 587 | wrmsr(evntsel_msr, evntsel, 0); |
534 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | 588 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
589 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0"); | ||
535 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 590 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
536 | evntsel |= P6_EVNTSEL0_ENABLE; | 591 | evntsel |= P6_EVNTSEL0_ENABLE; |
537 | wrmsr(evntsel_msr, evntsel, 0); | 592 | wrmsr(evntsel_msr, evntsel, 0); |
@@ -704,7 +759,8 @@ static int setup_intel_arch_watchdog(void) | |||
704 | 759 | ||
705 | /* setup the timer */ | 760 | /* setup the timer */ |
706 | wrmsr(evntsel_msr, evntsel, 0); | 761 | wrmsr(evntsel_msr, evntsel, 0); |
707 | write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | 762 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
763 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
708 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 764 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
709 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 765 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
710 | wrmsr(evntsel_msr, evntsel, 0); | 766 | wrmsr(evntsel_msr, evntsel, 0); |
@@ -762,7 +818,8 @@ void setup_apic_nmi_watchdog (void *unused) | |||
762 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 818 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
763 | switch (boot_cpu_data.x86_vendor) { | 819 | switch (boot_cpu_data.x86_vendor) { |
764 | case X86_VENDOR_AMD: | 820 | case X86_VENDOR_AMD: |
765 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 821 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && |
822 | boot_cpu_data.x86 != 16) | ||
766 | return; | 823 | return; |
767 | if (!setup_k7_watchdog()) | 824 | if (!setup_k7_watchdog()) |
768 | return; | 825 | return; |
@@ -956,6 +1013,8 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
956 | dummy &= ~P4_CCCR_OVF; | 1013 | dummy &= ~P4_CCCR_OVF; |
957 | wrmsrl(wd->cccr_msr, dummy); | 1014 | wrmsrl(wd->cccr_msr, dummy); |
958 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1015 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1016 | /* start the cycle over again */ | ||
1017 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
959 | } | 1018 | } |
960 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | 1019 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || |
961 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | 1020 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
@@ -964,9 +1023,12 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
964 | * other P6 variant. | 1023 | * other P6 variant. |
965 | * ArchPerfom/Core Duo also needs this */ | 1024 | * ArchPerfom/Core Duo also needs this */ |
966 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 1025 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
1026 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
1027 | write_watchdog_counter32(wd->perfctr_msr, NULL); | ||
1028 | } else { | ||
1029 | /* start the cycle over again */ | ||
1030 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
967 | } | 1031 | } |
968 | /* start the cycle over again */ | ||
969 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
970 | rc = 1; | 1032 | rc = 1; |
971 | } else if (nmi_watchdog == NMI_IO_APIC) { | 1033 | } else if (nmi_watchdog == NMI_IO_APIC) { |
972 | /* don't know how to accurately check for this. | 1034 | /* don't know how to accurately check for this. |
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index e55fd05da0f5..c156ecfa3872 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -92,7 +92,7 @@ static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) | |||
92 | return insn_len; | 92 | return insn_len; |
93 | } | 93 | } |
94 | 94 | ||
95 | static fastcall unsigned long native_get_debugreg(int regno) | 95 | static unsigned long native_get_debugreg(int regno) |
96 | { | 96 | { |
97 | unsigned long val = 0; /* Damn you, gcc! */ | 97 | unsigned long val = 0; /* Damn you, gcc! */ |
98 | 98 | ||
@@ -115,7 +115,7 @@ static fastcall unsigned long native_get_debugreg(int regno) | |||
115 | return val; | 115 | return val; |
116 | } | 116 | } |
117 | 117 | ||
118 | static fastcall void native_set_debugreg(int regno, unsigned long value) | 118 | static void native_set_debugreg(int regno, unsigned long value) |
119 | { | 119 | { |
120 | switch (regno) { | 120 | switch (regno) { |
121 | case 0: | 121 | case 0: |
@@ -146,55 +146,55 @@ void init_IRQ(void) | |||
146 | paravirt_ops.init_IRQ(); | 146 | paravirt_ops.init_IRQ(); |
147 | } | 147 | } |
148 | 148 | ||
149 | static fastcall void native_clts(void) | 149 | static void native_clts(void) |
150 | { | 150 | { |
151 | asm volatile ("clts"); | 151 | asm volatile ("clts"); |
152 | } | 152 | } |
153 | 153 | ||
154 | static fastcall unsigned long native_read_cr0(void) | 154 | static unsigned long native_read_cr0(void) |
155 | { | 155 | { |
156 | unsigned long val; | 156 | unsigned long val; |
157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | 157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); |
158 | return val; | 158 | return val; |
159 | } | 159 | } |
160 | 160 | ||
161 | static fastcall void native_write_cr0(unsigned long val) | 161 | static void native_write_cr0(unsigned long val) |
162 | { | 162 | { |
163 | asm volatile("movl %0,%%cr0": :"r" (val)); | 163 | asm volatile("movl %0,%%cr0": :"r" (val)); |
164 | } | 164 | } |
165 | 165 | ||
166 | static fastcall unsigned long native_read_cr2(void) | 166 | static unsigned long native_read_cr2(void) |
167 | { | 167 | { |
168 | unsigned long val; | 168 | unsigned long val; |
169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); | 169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); |
170 | return val; | 170 | return val; |
171 | } | 171 | } |
172 | 172 | ||
173 | static fastcall void native_write_cr2(unsigned long val) | 173 | static void native_write_cr2(unsigned long val) |
174 | { | 174 | { |
175 | asm volatile("movl %0,%%cr2": :"r" (val)); | 175 | asm volatile("movl %0,%%cr2": :"r" (val)); |
176 | } | 176 | } |
177 | 177 | ||
178 | static fastcall unsigned long native_read_cr3(void) | 178 | static unsigned long native_read_cr3(void) |
179 | { | 179 | { |
180 | unsigned long val; | 180 | unsigned long val; |
181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | 181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); |
182 | return val; | 182 | return val; |
183 | } | 183 | } |
184 | 184 | ||
185 | static fastcall void native_write_cr3(unsigned long val) | 185 | static void native_write_cr3(unsigned long val) |
186 | { | 186 | { |
187 | asm volatile("movl %0,%%cr3": :"r" (val)); | 187 | asm volatile("movl %0,%%cr3": :"r" (val)); |
188 | } | 188 | } |
189 | 189 | ||
190 | static fastcall unsigned long native_read_cr4(void) | 190 | static unsigned long native_read_cr4(void) |
191 | { | 191 | { |
192 | unsigned long val; | 192 | unsigned long val; |
193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | 193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); |
194 | return val; | 194 | return val; |
195 | } | 195 | } |
196 | 196 | ||
197 | static fastcall unsigned long native_read_cr4_safe(void) | 197 | static unsigned long native_read_cr4_safe(void) |
198 | { | 198 | { |
199 | unsigned long val; | 199 | unsigned long val; |
200 | /* This could fault if %cr4 does not exist */ | 200 | /* This could fault if %cr4 does not exist */ |
@@ -207,51 +207,51 @@ static fastcall unsigned long native_read_cr4_safe(void) | |||
207 | return val; | 207 | return val; |
208 | } | 208 | } |
209 | 209 | ||
210 | static fastcall void native_write_cr4(unsigned long val) | 210 | static void native_write_cr4(unsigned long val) |
211 | { | 211 | { |
212 | asm volatile("movl %0,%%cr4": :"r" (val)); | 212 | asm volatile("movl %0,%%cr4": :"r" (val)); |
213 | } | 213 | } |
214 | 214 | ||
215 | static fastcall unsigned long native_save_fl(void) | 215 | static unsigned long native_save_fl(void) |
216 | { | 216 | { |
217 | unsigned long f; | 217 | unsigned long f; |
218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); | 218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); |
219 | return f; | 219 | return f; |
220 | } | 220 | } |
221 | 221 | ||
222 | static fastcall void native_restore_fl(unsigned long f) | 222 | static void native_restore_fl(unsigned long f) |
223 | { | 223 | { |
224 | asm volatile("pushl %0 ; popfl": /* no output */ | 224 | asm volatile("pushl %0 ; popfl": /* no output */ |
225 | :"g" (f) | 225 | :"g" (f) |
226 | :"memory", "cc"); | 226 | :"memory", "cc"); |
227 | } | 227 | } |
228 | 228 | ||
229 | static fastcall void native_irq_disable(void) | 229 | static void native_irq_disable(void) |
230 | { | 230 | { |
231 | asm volatile("cli": : :"memory"); | 231 | asm volatile("cli": : :"memory"); |
232 | } | 232 | } |
233 | 233 | ||
234 | static fastcall void native_irq_enable(void) | 234 | static void native_irq_enable(void) |
235 | { | 235 | { |
236 | asm volatile("sti": : :"memory"); | 236 | asm volatile("sti": : :"memory"); |
237 | } | 237 | } |
238 | 238 | ||
239 | static fastcall void native_safe_halt(void) | 239 | static void native_safe_halt(void) |
240 | { | 240 | { |
241 | asm volatile("sti; hlt": : :"memory"); | 241 | asm volatile("sti; hlt": : :"memory"); |
242 | } | 242 | } |
243 | 243 | ||
244 | static fastcall void native_halt(void) | 244 | static void native_halt(void) |
245 | { | 245 | { |
246 | asm volatile("hlt": : :"memory"); | 246 | asm volatile("hlt": : :"memory"); |
247 | } | 247 | } |
248 | 248 | ||
249 | static fastcall void native_wbinvd(void) | 249 | static void native_wbinvd(void) |
250 | { | 250 | { |
251 | asm volatile("wbinvd": : :"memory"); | 251 | asm volatile("wbinvd": : :"memory"); |
252 | } | 252 | } |
253 | 253 | ||
254 | static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | 254 | static unsigned long long native_read_msr(unsigned int msr, int *err) |
255 | { | 255 | { |
256 | unsigned long long val; | 256 | unsigned long long val; |
257 | 257 | ||
@@ -270,7 +270,7 @@ static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | |||
270 | return val; | 270 | return val; |
271 | } | 271 | } |
272 | 272 | ||
273 | static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | 273 | static int native_write_msr(unsigned int msr, unsigned long long val) |
274 | { | 274 | { |
275 | int err; | 275 | int err; |
276 | asm volatile("2: wrmsr ; xorl %0,%0\n" | 276 | asm volatile("2: wrmsr ; xorl %0,%0\n" |
@@ -288,53 +288,53 @@ static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | |||
288 | return err; | 288 | return err; |
289 | } | 289 | } |
290 | 290 | ||
291 | static fastcall unsigned long long native_read_tsc(void) | 291 | static unsigned long long native_read_tsc(void) |
292 | { | 292 | { |
293 | unsigned long long val; | 293 | unsigned long long val; |
294 | asm volatile("rdtsc" : "=A" (val)); | 294 | asm volatile("rdtsc" : "=A" (val)); |
295 | return val; | 295 | return val; |
296 | } | 296 | } |
297 | 297 | ||
298 | static fastcall unsigned long long native_read_pmc(void) | 298 | static unsigned long long native_read_pmc(void) |
299 | { | 299 | { |
300 | unsigned long long val; | 300 | unsigned long long val; |
301 | asm volatile("rdpmc" : "=A" (val)); | 301 | asm volatile("rdpmc" : "=A" (val)); |
302 | return val; | 302 | return val; |
303 | } | 303 | } |
304 | 304 | ||
305 | static fastcall void native_load_tr_desc(void) | 305 | static void native_load_tr_desc(void) |
306 | { | 306 | { |
307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | 307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); |
308 | } | 308 | } |
309 | 309 | ||
310 | static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) | 310 | static void native_load_gdt(const struct Xgt_desc_struct *dtr) |
311 | { | 311 | { |
312 | asm volatile("lgdt %0"::"m" (*dtr)); | 312 | asm volatile("lgdt %0"::"m" (*dtr)); |
313 | } | 313 | } |
314 | 314 | ||
315 | static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) | 315 | static void native_load_idt(const struct Xgt_desc_struct *dtr) |
316 | { | 316 | { |
317 | asm volatile("lidt %0"::"m" (*dtr)); | 317 | asm volatile("lidt %0"::"m" (*dtr)); |
318 | } | 318 | } |
319 | 319 | ||
320 | static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) | 320 | static void native_store_gdt(struct Xgt_desc_struct *dtr) |
321 | { | 321 | { |
322 | asm ("sgdt %0":"=m" (*dtr)); | 322 | asm ("sgdt %0":"=m" (*dtr)); |
323 | } | 323 | } |
324 | 324 | ||
325 | static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) | 325 | static void native_store_idt(struct Xgt_desc_struct *dtr) |
326 | { | 326 | { |
327 | asm ("sidt %0":"=m" (*dtr)); | 327 | asm ("sidt %0":"=m" (*dtr)); |
328 | } | 328 | } |
329 | 329 | ||
330 | static fastcall unsigned long native_store_tr(void) | 330 | static unsigned long native_store_tr(void) |
331 | { | 331 | { |
332 | unsigned long tr; | 332 | unsigned long tr; |
333 | asm ("str %0":"=r" (tr)); | 333 | asm ("str %0":"=r" (tr)); |
334 | return tr; | 334 | return tr; |
335 | } | 335 | } |
336 | 336 | ||
337 | static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) | 337 | static void native_load_tls(struct thread_struct *t, unsigned int cpu) |
338 | { | 338 | { |
339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] | 339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] |
340 | C(0); C(1); C(2); | 340 | C(0); C(1); C(2); |
@@ -348,22 +348,22 @@ static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 | |||
348 | lp[1] = entry_high; | 348 | lp[1] = entry_high; |
349 | } | 349 | } |
350 | 350 | ||
351 | static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) | 351 | static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) |
352 | { | 352 | { |
353 | native_write_dt_entry(dt, entrynum, low, high); | 353 | native_write_dt_entry(dt, entrynum, low, high); |
354 | } | 354 | } |
355 | 355 | ||
356 | static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) | 356 | static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) |
357 | { | 357 | { |
358 | native_write_dt_entry(dt, entrynum, low, high); | 358 | native_write_dt_entry(dt, entrynum, low, high); |
359 | } | 359 | } |
360 | 360 | ||
361 | static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) | 361 | static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) |
362 | { | 362 | { |
363 | native_write_dt_entry(dt, entrynum, low, high); | 363 | native_write_dt_entry(dt, entrynum, low, high); |
364 | } | 364 | } |
365 | 365 | ||
366 | static fastcall void native_load_esp0(struct tss_struct *tss, | 366 | static void native_load_esp0(struct tss_struct *tss, |
367 | struct thread_struct *thread) | 367 | struct thread_struct *thread) |
368 | { | 368 | { |
369 | tss->esp0 = thread->esp0; | 369 | tss->esp0 = thread->esp0; |
@@ -375,12 +375,12 @@ static fastcall void native_load_esp0(struct tss_struct *tss, | |||
375 | } | 375 | } |
376 | } | 376 | } |
377 | 377 | ||
378 | static fastcall void native_io_delay(void) | 378 | static void native_io_delay(void) |
379 | { | 379 | { |
380 | asm volatile("outb %al,$0x80"); | 380 | asm volatile("outb %al,$0x80"); |
381 | } | 381 | } |
382 | 382 | ||
383 | static fastcall void native_flush_tlb(void) | 383 | static void native_flush_tlb(void) |
384 | { | 384 | { |
385 | __native_flush_tlb(); | 385 | __native_flush_tlb(); |
386 | } | 386 | } |
@@ -389,49 +389,49 @@ static fastcall void native_flush_tlb(void) | |||
389 | * Global pages have to be flushed a bit differently. Not a real | 389 | * Global pages have to be flushed a bit differently. Not a real |
390 | * performance problem because this does not happen often. | 390 | * performance problem because this does not happen often. |
391 | */ | 391 | */ |
392 | static fastcall void native_flush_tlb_global(void) | 392 | static void native_flush_tlb_global(void) |
393 | { | 393 | { |
394 | __native_flush_tlb_global(); | 394 | __native_flush_tlb_global(); |
395 | } | 395 | } |
396 | 396 | ||
397 | static fastcall void native_flush_tlb_single(u32 addr) | 397 | static void native_flush_tlb_single(u32 addr) |
398 | { | 398 | { |
399 | __native_flush_tlb_single(addr); | 399 | __native_flush_tlb_single(addr); |
400 | } | 400 | } |
401 | 401 | ||
402 | #ifndef CONFIG_X86_PAE | 402 | #ifndef CONFIG_X86_PAE |
403 | static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) | 403 | static void native_set_pte(pte_t *ptep, pte_t pteval) |
404 | { | 404 | { |
405 | *ptep = pteval; | 405 | *ptep = pteval; |
406 | } | 406 | } |
407 | 407 | ||
408 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) | 408 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) |
409 | { | 409 | { |
410 | *ptep = pteval; | 410 | *ptep = pteval; |
411 | } | 411 | } |
412 | 412 | ||
413 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 413 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
414 | { | 414 | { |
415 | *pmdp = pmdval; | 415 | *pmdp = pmdval; |
416 | } | 416 | } |
417 | 417 | ||
418 | #else /* CONFIG_X86_PAE */ | 418 | #else /* CONFIG_X86_PAE */ |
419 | 419 | ||
420 | static fastcall void native_set_pte(pte_t *ptep, pte_t pte) | 420 | static void native_set_pte(pte_t *ptep, pte_t pte) |
421 | { | 421 | { |
422 | ptep->pte_high = pte.pte_high; | 422 | ptep->pte_high = pte.pte_high; |
423 | smp_wmb(); | 423 | smp_wmb(); |
424 | ptep->pte_low = pte.pte_low; | 424 | ptep->pte_low = pte.pte_low; |
425 | } | 425 | } |
426 | 426 | ||
427 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | 427 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) |
428 | { | 428 | { |
429 | ptep->pte_high = pte.pte_high; | 429 | ptep->pte_high = pte.pte_high; |
430 | smp_wmb(); | 430 | smp_wmb(); |
431 | ptep->pte_low = pte.pte_low; | 431 | ptep->pte_low = pte.pte_low; |
432 | } | 432 | } |
433 | 433 | ||
434 | static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 434 | static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
435 | { | 435 | { |
436 | ptep->pte_low = 0; | 436 | ptep->pte_low = 0; |
437 | smp_wmb(); | 437 | smp_wmb(); |
@@ -440,29 +440,29 @@ static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long | |||
440 | ptep->pte_low = pte.pte_low; | 440 | ptep->pte_low = pte.pte_low; |
441 | } | 441 | } |
442 | 442 | ||
443 | static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) | 443 | static void native_set_pte_atomic(pte_t *ptep, pte_t pteval) |
444 | { | 444 | { |
445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | 445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); |
446 | } | 446 | } |
447 | 447 | ||
448 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 448 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
449 | { | 449 | { |
450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); | 450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); |
451 | } | 451 | } |
452 | 452 | ||
453 | static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) | 453 | static void native_set_pud(pud_t *pudp, pud_t pudval) |
454 | { | 454 | { |
455 | *pudp = pudval; | 455 | *pudp = pudval; |
456 | } | 456 | } |
457 | 457 | ||
458 | static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 458 | static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
459 | { | 459 | { |
460 | ptep->pte_low = 0; | 460 | ptep->pte_low = 0; |
461 | smp_wmb(); | 461 | smp_wmb(); |
462 | ptep->pte_high = 0; | 462 | ptep->pte_high = 0; |
463 | } | 463 | } |
464 | 464 | ||
465 | static fastcall void native_pmd_clear(pmd_t *pmd) | 465 | static void native_pmd_clear(pmd_t *pmd) |
466 | { | 466 | { |
467 | u32 *tmp = (u32 *)pmd; | 467 | u32 *tmp = (u32 *)pmd; |
468 | *tmp = 0; | 468 | *tmp = 0; |
@@ -472,8 +472,8 @@ static fastcall void native_pmd_clear(pmd_t *pmd) | |||
472 | #endif /* CONFIG_X86_PAE */ | 472 | #endif /* CONFIG_X86_PAE */ |
473 | 473 | ||
474 | /* These are in entry.S */ | 474 | /* These are in entry.S */ |
475 | extern fastcall void native_iret(void); | 475 | extern void native_iret(void); |
476 | extern fastcall void native_irq_enable_sysexit(void); | 476 | extern void native_irq_enable_sysexit(void); |
477 | 477 | ||
478 | static int __init print_banner(void) | 478 | static int __init print_banner(void) |
479 | { | 479 | { |
@@ -482,9 +482,6 @@ static int __init print_banner(void) | |||
482 | } | 482 | } |
483 | core_initcall(print_banner); | 483 | core_initcall(print_banner); |
484 | 484 | ||
485 | /* We simply declare start_kernel to be the paravirt probe of last resort. */ | ||
486 | paravirt_probe(start_kernel); | ||
487 | |||
488 | struct paravirt_ops paravirt_ops = { | 485 | struct paravirt_ops paravirt_ops = { |
489 | .name = "bare hardware", | 486 | .name = "bare hardware", |
490 | .paravirt_enabled = 0, | 487 | .paravirt_enabled = 0, |
@@ -544,12 +541,21 @@ struct paravirt_ops paravirt_ops = { | |||
544 | .apic_write = native_apic_write, | 541 | .apic_write = native_apic_write, |
545 | .apic_write_atomic = native_apic_write_atomic, | 542 | .apic_write_atomic = native_apic_write_atomic, |
546 | .apic_read = native_apic_read, | 543 | .apic_read = native_apic_read, |
544 | .setup_boot_clock = setup_boot_APIC_clock, | ||
545 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
547 | #endif | 546 | #endif |
547 | .set_lazy_mode = (void *)native_nop, | ||
548 | 548 | ||
549 | .flush_tlb_user = native_flush_tlb, | 549 | .flush_tlb_user = native_flush_tlb, |
550 | .flush_tlb_kernel = native_flush_tlb_global, | 550 | .flush_tlb_kernel = native_flush_tlb_global, |
551 | .flush_tlb_single = native_flush_tlb_single, | 551 | .flush_tlb_single = native_flush_tlb_single, |
552 | 552 | ||
553 | .alloc_pt = (void *)native_nop, | ||
554 | .alloc_pd = (void *)native_nop, | ||
555 | .alloc_pd_clone = (void *)native_nop, | ||
556 | .release_pt = (void *)native_nop, | ||
557 | .release_pd = (void *)native_nop, | ||
558 | |||
553 | .set_pte = native_set_pte, | 559 | .set_pte = native_set_pte, |
554 | .set_pte_at = native_set_pte_at, | 560 | .set_pte_at = native_set_pte_at, |
555 | .set_pmd = native_set_pmd, | 561 | .set_pmd = native_set_pmd, |
@@ -565,6 +571,8 @@ struct paravirt_ops paravirt_ops = { | |||
565 | 571 | ||
566 | .irq_enable_sysexit = native_irq_enable_sysexit, | 572 | .irq_enable_sysexit = native_irq_enable_sysexit, |
567 | .iret = native_iret, | 573 | .iret = native_iret, |
574 | |||
575 | .startup_ipi_hook = (void *)native_nop, | ||
568 | }; | 576 | }; |
569 | 577 | ||
570 | /* | 578 | /* |
diff --git a/arch/i386/kernel/pcspeaker.c b/arch/i386/kernel/pcspeaker.c new file mode 100644 index 000000000000..bc1f2d3ea277 --- /dev/null +++ b/arch/i386/kernel/pcspeaker.c | |||
@@ -0,0 +1,20 @@ | |||
1 | #include <linux/platform_device.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/init.h> | ||
4 | |||
5 | static __init int add_pcspkr(void) | ||
6 | { | ||
7 | struct platform_device *pd; | ||
8 | int ret; | ||
9 | |||
10 | pd = platform_device_alloc("pcspkr", -1); | ||
11 | if (!pd) | ||
12 | return -ENOMEM; | ||
13 | |||
14 | ret = platform_device_add(pd); | ||
15 | if (ret) | ||
16 | platform_device_put(pd); | ||
17 | |||
18 | return ret; | ||
19 | } | ||
20 | device_initcall(add_pcspkr); | ||
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index c641056233a6..7845d480c293 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include <asm/i387.h> | 48 | #include <asm/i387.h> |
49 | #include <asm/desc.h> | 49 | #include <asm/desc.h> |
50 | #include <asm/vm86.h> | 50 | #include <asm/vm86.h> |
51 | #include <asm/idle.h> | ||
51 | #ifdef CONFIG_MATH_EMULATION | 52 | #ifdef CONFIG_MATH_EMULATION |
52 | #include <asm/math_emu.h> | 53 | #include <asm/math_emu.h> |
53 | #endif | 54 | #endif |
@@ -80,6 +81,42 @@ void (*pm_idle)(void); | |||
80 | EXPORT_SYMBOL(pm_idle); | 81 | EXPORT_SYMBOL(pm_idle); |
81 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | 82 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); |
82 | 83 | ||
84 | static ATOMIC_NOTIFIER_HEAD(idle_notifier); | ||
85 | |||
86 | void idle_notifier_register(struct notifier_block *n) | ||
87 | { | ||
88 | atomic_notifier_chain_register(&idle_notifier, n); | ||
89 | } | ||
90 | |||
91 | void idle_notifier_unregister(struct notifier_block *n) | ||
92 | { | ||
93 | atomic_notifier_chain_unregister(&idle_notifier, n); | ||
94 | } | ||
95 | |||
96 | static DEFINE_PER_CPU(volatile unsigned long, idle_state); | ||
97 | |||
98 | void enter_idle(void) | ||
99 | { | ||
100 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
101 | __set_bit(0, &__get_cpu_var(idle_state)); | ||
102 | atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); | ||
103 | } | ||
104 | |||
105 | static void __exit_idle(void) | ||
106 | { | ||
107 | /* needs to be atomic w.r.t. interrupts, not against other CPUs */ | ||
108 | if (__test_and_clear_bit(0, &__get_cpu_var(idle_state)) == 0) | ||
109 | return; | ||
110 | atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); | ||
111 | } | ||
112 | |||
113 | void exit_idle(void) | ||
114 | { | ||
115 | if (current->pid) | ||
116 | return; | ||
117 | __exit_idle(); | ||
118 | } | ||
119 | |||
83 | void disable_hlt(void) | 120 | void disable_hlt(void) |
84 | { | 121 | { |
85 | hlt_counter++; | 122 | hlt_counter++; |
@@ -130,6 +167,7 @@ EXPORT_SYMBOL(default_idle); | |||
130 | */ | 167 | */ |
131 | static void poll_idle (void) | 168 | static void poll_idle (void) |
132 | { | 169 | { |
170 | local_irq_enable(); | ||
133 | cpu_relax(); | 171 | cpu_relax(); |
134 | } | 172 | } |
135 | 173 | ||
@@ -189,7 +227,16 @@ void cpu_idle(void) | |||
189 | play_dead(); | 227 | play_dead(); |
190 | 228 | ||
191 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | 229 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
230 | |||
231 | /* | ||
232 | * Idle routines should keep interrupts disabled | ||
233 | * from here on, until they go to idle. | ||
234 | * Otherwise, idle callbacks can misfire. | ||
235 | */ | ||
236 | local_irq_disable(); | ||
237 | enter_idle(); | ||
192 | idle(); | 238 | idle(); |
239 | __exit_idle(); | ||
193 | } | 240 | } |
194 | preempt_enable_no_resched(); | 241 | preempt_enable_no_resched(); |
195 | schedule(); | 242 | schedule(); |
@@ -243,7 +290,11 @@ void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | |||
243 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 290 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
244 | smp_mb(); | 291 | smp_mb(); |
245 | if (!need_resched()) | 292 | if (!need_resched()) |
246 | __mwait(eax, ecx); | 293 | __sti_mwait(eax, ecx); |
294 | else | ||
295 | local_irq_enable(); | ||
296 | } else { | ||
297 | local_irq_enable(); | ||
247 | } | 298 | } |
248 | } | 299 | } |
249 | 300 | ||
@@ -308,8 +359,8 @@ void show_regs(struct pt_regs * regs) | |||
308 | regs->eax,regs->ebx,regs->ecx,regs->edx); | 359 | regs->eax,regs->ebx,regs->ecx,regs->edx); |
309 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", | 360 | printk("ESI: %08lx EDI: %08lx EBP: %08lx", |
310 | regs->esi, regs->edi, regs->ebp); | 361 | regs->esi, regs->edi, regs->ebp); |
311 | printk(" DS: %04x ES: %04x GS: %04x\n", | 362 | printk(" DS: %04x ES: %04x FS: %04x\n", |
312 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs); | 363 | 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); |
313 | 364 | ||
314 | cr0 = read_cr0(); | 365 | cr0 = read_cr0(); |
315 | cr2 = read_cr2(); | 366 | cr2 = read_cr2(); |
@@ -340,7 +391,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
340 | 391 | ||
341 | regs.xds = __USER_DS; | 392 | regs.xds = __USER_DS; |
342 | regs.xes = __USER_DS; | 393 | regs.xes = __USER_DS; |
343 | regs.xgs = __KERNEL_PDA; | 394 | regs.xfs = __KERNEL_PDA; |
344 | regs.orig_eax = -1; | 395 | regs.orig_eax = -1; |
345 | regs.eip = (unsigned long) kernel_thread_helper; | 396 | regs.eip = (unsigned long) kernel_thread_helper; |
346 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 397 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -425,7 +476,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, | |||
425 | 476 | ||
426 | p->thread.eip = (unsigned long) ret_from_fork; | 477 | p->thread.eip = (unsigned long) ret_from_fork; |
427 | 478 | ||
428 | savesegment(fs,p->thread.fs); | 479 | savesegment(gs,p->thread.gs); |
429 | 480 | ||
430 | tsk = current; | 481 | tsk = current; |
431 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { | 482 | if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { |
@@ -501,8 +552,8 @@ void dump_thread(struct pt_regs * regs, struct user * dump) | |||
501 | dump->regs.eax = regs->eax; | 552 | dump->regs.eax = regs->eax; |
502 | dump->regs.ds = regs->xds; | 553 | dump->regs.ds = regs->xds; |
503 | dump->regs.es = regs->xes; | 554 | dump->regs.es = regs->xes; |
504 | savesegment(fs,dump->regs.fs); | 555 | dump->regs.fs = regs->xfs; |
505 | dump->regs.gs = regs->xgs; | 556 | savesegment(gs,dump->regs.gs); |
506 | dump->regs.orig_eax = regs->orig_eax; | 557 | dump->regs.orig_eax = regs->orig_eax; |
507 | dump->regs.eip = regs->eip; | 558 | dump->regs.eip = regs->eip; |
508 | dump->regs.cs = regs->xcs; | 559 | dump->regs.cs = regs->xcs; |
@@ -653,7 +704,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
653 | load_esp0(tss, next); | 704 | load_esp0(tss, next); |
654 | 705 | ||
655 | /* | 706 | /* |
656 | * Save away %fs. No need to save %gs, as it was saved on the | 707 | * Save away %gs. No need to save %fs, as it was saved on the |
657 | * stack on entry. No need to save %es and %ds, as those are | 708 | * stack on entry. No need to save %es and %ds, as those are |
658 | * always kernel segments while inside the kernel. Doing this | 709 | * always kernel segments while inside the kernel. Doing this |
659 | * before setting the new TLS descriptors avoids the situation | 710 | * before setting the new TLS descriptors avoids the situation |
@@ -662,7 +713,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
662 | * used %fs or %gs (it does not today), or if the kernel is | 713 | * used %fs or %gs (it does not today), or if the kernel is |
663 | * running inside of a hypervisor layer. | 714 | * running inside of a hypervisor layer. |
664 | */ | 715 | */ |
665 | savesegment(fs, prev->fs); | 716 | savesegment(gs, prev->gs); |
666 | 717 | ||
667 | /* | 718 | /* |
668 | * Load the per-thread Thread-Local Storage descriptor. | 719 | * Load the per-thread Thread-Local Storage descriptor. |
@@ -670,14 +721,13 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
670 | load_TLS(next, cpu); | 721 | load_TLS(next, cpu); |
671 | 722 | ||
672 | /* | 723 | /* |
673 | * Restore %fs if needed. | 724 | * Restore IOPL if needed. In normal use, the flags restore |
674 | * | 725 | * in the switch assembly will handle this. But if the kernel |
675 | * Glibc normally makes %fs be zero. | 726 | * is running virtualized at a non-zero CPL, the popf will |
727 | * not restore flags, so it must be done in a separate step. | ||
676 | */ | 728 | */ |
677 | if (unlikely(prev->fs | next->fs)) | 729 | if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) |
678 | loadsegment(fs, next->fs); | 730 | set_iopl_mask(next->iopl); |
679 | |||
680 | write_pda(pcurrent, next_p); | ||
681 | 731 | ||
682 | /* | 732 | /* |
683 | * Now maybe handle debug registers and/or IO bitmaps | 733 | * Now maybe handle debug registers and/or IO bitmaps |
@@ -688,6 +738,15 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
688 | 738 | ||
689 | disable_tsc(prev_p, next_p); | 739 | disable_tsc(prev_p, next_p); |
690 | 740 | ||
741 | /* | ||
742 | * Leave lazy mode, flushing any hypercalls made here. | ||
743 | * This must be done before restoring TLS segments so | ||
744 | * the GDT and LDT are properly updated, and must be | ||
745 | * done before math_state_restore, so the TS bit is up | ||
746 | * to date. | ||
747 | */ | ||
748 | arch_leave_lazy_cpu_mode(); | ||
749 | |||
691 | /* If the task has used fpu the last 5 timeslices, just do a full | 750 | /* If the task has used fpu the last 5 timeslices, just do a full |
692 | * restore of the math state immediately to avoid the trap; the | 751 | * restore of the math state immediately to avoid the trap; the |
693 | * chances of needing FPU soon are obviously high now | 752 | * chances of needing FPU soon are obviously high now |
@@ -695,6 +754,14 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
695 | if (next_p->fpu_counter > 5) | 754 | if (next_p->fpu_counter > 5) |
696 | math_state_restore(); | 755 | math_state_restore(); |
697 | 756 | ||
757 | /* | ||
758 | * Restore %gs if needed (which is common) | ||
759 | */ | ||
760 | if (prev->gs | next->gs) | ||
761 | loadsegment(gs, next->gs); | ||
762 | |||
763 | write_pda(pcurrent, next_p); | ||
764 | |||
698 | return prev_p; | 765 | return prev_p; |
699 | } | 766 | } |
700 | 767 | ||
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index af8aabe85800..4a8f8a259723 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c | |||
@@ -89,14 +89,14 @@ static int putreg(struct task_struct *child, | |||
89 | unsigned long regno, unsigned long value) | 89 | unsigned long regno, unsigned long value) |
90 | { | 90 | { |
91 | switch (regno >> 2) { | 91 | switch (regno >> 2) { |
92 | case FS: | 92 | case GS: |
93 | if (value && (value & 3) != 3) | 93 | if (value && (value & 3) != 3) |
94 | return -EIO; | 94 | return -EIO; |
95 | child->thread.fs = value; | 95 | child->thread.gs = value; |
96 | return 0; | 96 | return 0; |
97 | case DS: | 97 | case DS: |
98 | case ES: | 98 | case ES: |
99 | case GS: | 99 | case FS: |
100 | if (value && (value & 3) != 3) | 100 | if (value && (value & 3) != 3) |
101 | return -EIO; | 101 | return -EIO; |
102 | value &= 0xffff; | 102 | value &= 0xffff; |
@@ -112,7 +112,7 @@ static int putreg(struct task_struct *child, | |||
112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; | 112 | value |= get_stack_long(child, EFL_OFFSET) & ~FLAG_MASK; |
113 | break; | 113 | break; |
114 | } | 114 | } |
115 | if (regno > ES*4) | 115 | if (regno > FS*4) |
116 | regno -= 1*4; | 116 | regno -= 1*4; |
117 | put_stack_long(child, regno, value); | 117 | put_stack_long(child, regno, value); |
118 | return 0; | 118 | return 0; |
@@ -124,18 +124,18 @@ static unsigned long getreg(struct task_struct *child, | |||
124 | unsigned long retval = ~0UL; | 124 | unsigned long retval = ~0UL; |
125 | 125 | ||
126 | switch (regno >> 2) { | 126 | switch (regno >> 2) { |
127 | case FS: | 127 | case GS: |
128 | retval = child->thread.fs; | 128 | retval = child->thread.gs; |
129 | break; | 129 | break; |
130 | case DS: | 130 | case DS: |
131 | case ES: | 131 | case ES: |
132 | case GS: | 132 | case FS: |
133 | case SS: | 133 | case SS: |
134 | case CS: | 134 | case CS: |
135 | retval = 0xffff; | 135 | retval = 0xffff; |
136 | /* fall through */ | 136 | /* fall through */ |
137 | default: | 137 | default: |
138 | if (regno > ES*4) | 138 | if (regno > FS*4) |
139 | regno -= 1*4; | 139 | regno -= 1*4; |
140 | retval &= get_stack_long(child, regno); | 140 | retval &= get_stack_long(child, regno); |
141 | } | 141 | } |
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 4694ac980cd2..122623dcc6e1 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/initrd.h> | 33 | #include <linux/initrd.h> |
34 | #include <linux/bootmem.h> | 34 | #include <linux/bootmem.h> |
35 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
36 | #include <linux/platform_device.h> | ||
37 | #include <linux/console.h> | 36 | #include <linux/console.h> |
38 | #include <linux/mca.h> | 37 | #include <linux/mca.h> |
39 | #include <linux/root_dev.h> | 38 | #include <linux/root_dev.h> |
@@ -60,6 +59,7 @@ | |||
60 | #include <asm/io_apic.h> | 59 | #include <asm/io_apic.h> |
61 | #include <asm/ist.h> | 60 | #include <asm/ist.h> |
62 | #include <asm/io.h> | 61 | #include <asm/io.h> |
62 | #include <asm/vmi.h> | ||
63 | #include <setup_arch.h> | 63 | #include <setup_arch.h> |
64 | #include <bios_ebda.h> | 64 | #include <bios_ebda.h> |
65 | 65 | ||
@@ -581,6 +581,14 @@ void __init setup_arch(char **cmdline_p) | |||
581 | 581 | ||
582 | max_low_pfn = setup_memory(); | 582 | max_low_pfn = setup_memory(); |
583 | 583 | ||
584 | #ifdef CONFIG_VMI | ||
585 | /* | ||
586 | * Must be after max_low_pfn is determined, and before kernel | ||
587 | * pagetables are setup. | ||
588 | */ | ||
589 | vmi_init(); | ||
590 | #endif | ||
591 | |||
584 | /* | 592 | /* |
585 | * NOTE: before this point _nobody_ is allowed to allocate | 593 | * NOTE: before this point _nobody_ is allowed to allocate |
586 | * any memory using the bootmem allocator. Although the | 594 | * any memory using the bootmem allocator. Although the |
@@ -651,28 +659,3 @@ void __init setup_arch(char **cmdline_p) | |||
651 | #endif | 659 | #endif |
652 | tsc_init(); | 660 | tsc_init(); |
653 | } | 661 | } |
654 | |||
655 | static __init int add_pcspkr(void) | ||
656 | { | ||
657 | struct platform_device *pd; | ||
658 | int ret; | ||
659 | |||
660 | pd = platform_device_alloc("pcspkr", -1); | ||
661 | if (!pd) | ||
662 | return -ENOMEM; | ||
663 | |||
664 | ret = platform_device_add(pd); | ||
665 | if (ret) | ||
666 | platform_device_put(pd); | ||
667 | |||
668 | return ret; | ||
669 | } | ||
670 | device_initcall(add_pcspkr); | ||
671 | |||
672 | /* | ||
673 | * Local Variables: | ||
674 | * mode:c | ||
675 | * c-file-style:"k&r" | ||
676 | * c-basic-offset:8 | ||
677 | * End: | ||
678 | */ | ||
diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 65d7620eaa09..4f99e870c986 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
22 | #include <linux/ptrace.h> | 22 | #include <linux/ptrace.h> |
23 | #include <linux/elf.h> | 23 | #include <linux/elf.h> |
24 | #include <linux/binfmts.h> | ||
24 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
25 | #include <asm/ucontext.h> | 26 | #include <asm/ucontext.h> |
26 | #include <asm/uaccess.h> | 27 | #include <asm/uaccess.h> |
@@ -128,8 +129,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax | |||
128 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ | 129 | X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ |
129 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) | 130 | X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) |
130 | 131 | ||
131 | COPY_SEG(gs); | 132 | GET_SEG(gs); |
132 | GET_SEG(fs); | 133 | COPY_SEG(fs); |
133 | COPY_SEG(es); | 134 | COPY_SEG(es); |
134 | COPY_SEG(ds); | 135 | COPY_SEG(ds); |
135 | COPY(edi); | 136 | COPY(edi); |
@@ -244,9 +245,9 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, | |||
244 | { | 245 | { |
245 | int tmp, err = 0; | 246 | int tmp, err = 0; |
246 | 247 | ||
247 | err |= __put_user(regs->xgs, (unsigned int __user *)&sc->gs); | 248 | err |= __put_user(regs->xfs, (unsigned int __user *)&sc->fs); |
248 | savesegment(fs, tmp); | 249 | savesegment(gs, tmp); |
249 | err |= __put_user(tmp, (unsigned int __user *)&sc->fs); | 250 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); |
250 | 251 | ||
251 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); | 252 | err |= __put_user(regs->xes, (unsigned int __user *)&sc->es); |
252 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); | 253 | err |= __put_user(regs->xds, (unsigned int __user *)&sc->ds); |
@@ -349,7 +350,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, | |||
349 | goto give_sigsegv; | 350 | goto give_sigsegv; |
350 | } | 351 | } |
351 | 352 | ||
352 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | 353 | if (current->binfmt->hasvdso) |
354 | restorer = (void *)VDSO_SYM(&__kernel_sigreturn); | ||
355 | else | ||
356 | restorer = (void *)&frame->retcode; | ||
353 | if (ka->sa.sa_flags & SA_RESTORER) | 357 | if (ka->sa.sa_flags & SA_RESTORER) |
354 | restorer = ka->sa.sa_restorer; | 358 | restorer = ka->sa.sa_restorer; |
355 | 359 | ||
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 5285aff8367f..9bd9637ae692 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -23,6 +23,7 @@ | |||
23 | 23 | ||
24 | #include <asm/mtrr.h> | 24 | #include <asm/mtrr.h> |
25 | #include <asm/tlbflush.h> | 25 | #include <asm/tlbflush.h> |
26 | #include <asm/idle.h> | ||
26 | #include <mach_apic.h> | 27 | #include <mach_apic.h> |
27 | 28 | ||
28 | /* | 29 | /* |
@@ -374,8 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
374 | /* | 375 | /* |
375 | * i'm not happy about this global shared spinlock in the | 376 | * i'm not happy about this global shared spinlock in the |
376 | * MM hot path, but we'll see how contended it is. | 377 | * MM hot path, but we'll see how contended it is. |
377 | * Temporarily this turns IRQs off, so that lockups are | 378 | * AK: x86-64 has a faster method that could be ported. |
378 | * detected by the NMI watchdog. | ||
379 | */ | 379 | */ |
380 | spin_lock(&tlbstate_lock); | 380 | spin_lock(&tlbstate_lock); |
381 | 381 | ||
@@ -400,7 +400,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
400 | 400 | ||
401 | while (!cpus_empty(flush_cpumask)) | 401 | while (!cpus_empty(flush_cpumask)) |
402 | /* nothing. lockup detection does not belong here */ | 402 | /* nothing. lockup detection does not belong here */ |
403 | mb(); | 403 | cpu_relax(); |
404 | 404 | ||
405 | flush_mm = NULL; | 405 | flush_mm = NULL; |
406 | flush_va = 0; | 406 | flush_va = 0; |
@@ -624,6 +624,7 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) | |||
624 | /* | 624 | /* |
625 | * At this point the info structure may be out of scope unless wait==1 | 625 | * At this point the info structure may be out of scope unless wait==1 |
626 | */ | 626 | */ |
627 | exit_idle(); | ||
627 | irq_enter(); | 628 | irq_enter(); |
628 | (*func)(info); | 629 | (*func)(info); |
629 | irq_exit(); | 630 | irq_exit(); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 8c6c8c52b95c..f46a4d095e6c 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -63,6 +63,7 @@ | |||
63 | #include <mach_apic.h> | 63 | #include <mach_apic.h> |
64 | #include <mach_wakecpu.h> | 64 | #include <mach_wakecpu.h> |
65 | #include <smpboot_hooks.h> | 65 | #include <smpboot_hooks.h> |
66 | #include <asm/vmi.h> | ||
66 | 67 | ||
67 | /* Set if we find a B stepping CPU */ | 68 | /* Set if we find a B stepping CPU */ |
68 | static int __devinitdata smp_b_stepping; | 69 | static int __devinitdata smp_b_stepping; |
@@ -545,12 +546,15 @@ static void __cpuinit start_secondary(void *unused) | |||
545 | * booting is too fragile that we want to limit the | 546 | * booting is too fragile that we want to limit the |
546 | * things done here to the most necessary things. | 547 | * things done here to the most necessary things. |
547 | */ | 548 | */ |
549 | #ifdef CONFIG_VMI | ||
550 | vmi_bringup(); | ||
551 | #endif | ||
548 | secondary_cpu_init(); | 552 | secondary_cpu_init(); |
549 | preempt_disable(); | 553 | preempt_disable(); |
550 | smp_callin(); | 554 | smp_callin(); |
551 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 555 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
552 | rep_nop(); | 556 | rep_nop(); |
553 | setup_secondary_APIC_clock(); | 557 | setup_secondary_clock(); |
554 | if (nmi_watchdog == NMI_IO_APIC) { | 558 | if (nmi_watchdog == NMI_IO_APIC) { |
555 | disable_8259A_irq(0); | 559 | disable_8259A_irq(0); |
556 | enable_NMI_through_LVT0(NULL); | 560 | enable_NMI_through_LVT0(NULL); |
@@ -619,7 +623,6 @@ extern struct { | |||
619 | unsigned short ss; | 623 | unsigned short ss; |
620 | } stack_start; | 624 | } stack_start; |
621 | extern struct i386_pda *start_pda; | 625 | extern struct i386_pda *start_pda; |
622 | extern struct Xgt_desc_struct cpu_gdt_descr; | ||
623 | 626 | ||
624 | #ifdef CONFIG_NUMA | 627 | #ifdef CONFIG_NUMA |
625 | 628 | ||
@@ -835,6 +838,13 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
835 | num_starts = 0; | 838 | num_starts = 0; |
836 | 839 | ||
837 | /* | 840 | /* |
841 | * Paravirt / VMI wants a startup IPI hook here to set up the | ||
842 | * target processor state. | ||
843 | */ | ||
844 | startup_ipi_hook(phys_apicid, (unsigned long) start_secondary, | ||
845 | (unsigned long) stack_start.esp); | ||
846 | |||
847 | /* | ||
838 | * Run STARTUP IPI loop. | 848 | * Run STARTUP IPI loop. |
839 | */ | 849 | */ |
840 | Dprintk("#startup loops: %d.\n", num_starts); | 850 | Dprintk("#startup loops: %d.\n", num_starts); |
@@ -1320,7 +1330,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1320 | 1330 | ||
1321 | smpboot_setup_io_apic(); | 1331 | smpboot_setup_io_apic(); |
1322 | 1332 | ||
1323 | setup_boot_APIC_clock(); | 1333 | setup_boot_clock(); |
1324 | 1334 | ||
1325 | /* | 1335 | /* |
1326 | * Synchronize the TSC with the AP | 1336 | * Synchronize the TSC with the AP |
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index bc882a2b1db6..13ca54a85a1c 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -78,7 +78,7 @@ int __init sysenter_setup(void) | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 78 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 79 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 80 | #ifdef CONFIG_COMPAT_VDSO |
81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY); | 81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); |
82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
83 | #endif | 83 | #endif |
84 | 84 | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index c505b16c0990..a4f67a6e6821 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -131,15 +131,13 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
131 | unsigned long pc = instruction_pointer(regs); | 131 | unsigned long pc = instruction_pointer(regs); |
132 | 132 | ||
133 | #ifdef CONFIG_SMP | 133 | #ifdef CONFIG_SMP |
134 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | 134 | if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->xcs) && |
135 | in_lock_functions(pc)) { | ||
135 | #ifdef CONFIG_FRAME_POINTER | 136 | #ifdef CONFIG_FRAME_POINTER |
136 | return *(unsigned long *)(regs->ebp + 4); | 137 | return *(unsigned long *)(regs->ebp + 4); |
137 | #else | 138 | #else |
138 | unsigned long *sp; | 139 | unsigned long *sp = (unsigned long *)®s->esp; |
139 | if ((regs->xcs & 3) == 0) | 140 | |
140 | sp = (unsigned long *)®s->esp; | ||
141 | else | ||
142 | sp = (unsigned long *)regs->esp; | ||
143 | /* Return address is either directly at stack pointer | 141 | /* Return address is either directly at stack pointer |
144 | or above a saved eflags. Eflags has bits 22-31 zero, | 142 | or above a saved eflags. Eflags has bits 22-31 zero, |
145 | kernel addresses don't. */ | 143 | kernel addresses don't. */ |
@@ -232,6 +230,7 @@ EXPORT_SYMBOL(get_cmos_time); | |||
232 | static void sync_cmos_clock(unsigned long dummy); | 230 | static void sync_cmos_clock(unsigned long dummy); |
233 | 231 | ||
234 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); | 232 | static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0); |
233 | int no_sync_cmos_clock; | ||
235 | 234 | ||
236 | static void sync_cmos_clock(unsigned long dummy) | 235 | static void sync_cmos_clock(unsigned long dummy) |
237 | { | 236 | { |
@@ -275,7 +274,8 @@ static void sync_cmos_clock(unsigned long dummy) | |||
275 | 274 | ||
276 | void notify_arch_cmos_timer(void) | 275 | void notify_arch_cmos_timer(void) |
277 | { | 276 | { |
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | 277 | if (!no_sync_cmos_clock) |
278 | mod_timer(&sync_cmos_timer, jiffies + 1); | ||
279 | } | 279 | } |
280 | 280 | ||
281 | static long clock_cmos_diff; | 281 | static long clock_cmos_diff; |
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 0efad8aeb41a..af0d3f70a817 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -94,6 +94,7 @@ asmlinkage void spurious_interrupt_bug(void); | |||
94 | asmlinkage void machine_check(void); | 94 | asmlinkage void machine_check(void); |
95 | 95 | ||
96 | int kstack_depth_to_print = 24; | 96 | int kstack_depth_to_print = 24; |
97 | static unsigned int code_bytes = 64; | ||
97 | ATOMIC_NOTIFIER_HEAD(i386die_chain); | 98 | ATOMIC_NOTIFIER_HEAD(i386die_chain); |
98 | 99 | ||
99 | int register_die_notifier(struct notifier_block *nb) | 100 | int register_die_notifier(struct notifier_block *nb) |
@@ -291,10 +292,11 @@ void show_registers(struct pt_regs *regs) | |||
291 | int i; | 292 | int i; |
292 | int in_kernel = 1; | 293 | int in_kernel = 1; |
293 | unsigned long esp; | 294 | unsigned long esp; |
294 | unsigned short ss; | 295 | unsigned short ss, gs; |
295 | 296 | ||
296 | esp = (unsigned long) (®s->esp); | 297 | esp = (unsigned long) (®s->esp); |
297 | savesegment(ss, ss); | 298 | savesegment(ss, ss); |
299 | savesegment(gs, gs); | ||
298 | if (user_mode_vm(regs)) { | 300 | if (user_mode_vm(regs)) { |
299 | in_kernel = 0; | 301 | in_kernel = 0; |
300 | esp = regs->esp; | 302 | esp = regs->esp; |
@@ -313,8 +315,8 @@ void show_registers(struct pt_regs *regs) | |||
313 | regs->eax, regs->ebx, regs->ecx, regs->edx); | 315 | regs->eax, regs->ebx, regs->ecx, regs->edx); |
314 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | 316 | printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", |
315 | regs->esi, regs->edi, regs->ebp, esp); | 317 | regs->esi, regs->edi, regs->ebp, esp); |
316 | printk(KERN_EMERG "ds: %04x es: %04x ss: %04x\n", | 318 | printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", |
317 | regs->xds & 0xffff, regs->xes & 0xffff, ss); | 319 | regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); |
318 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", | 320 | printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", |
319 | TASK_COMM_LEN, current->comm, current->pid, | 321 | TASK_COMM_LEN, current->comm, current->pid, |
320 | current_thread_info(), current, current->thread_info); | 322 | current_thread_info(), current, current->thread_info); |
@@ -324,7 +326,8 @@ void show_registers(struct pt_regs *regs) | |||
324 | */ | 326 | */ |
325 | if (in_kernel) { | 327 | if (in_kernel) { |
326 | u8 *eip; | 328 | u8 *eip; |
327 | int code_bytes = 64; | 329 | unsigned int code_prologue = code_bytes * 43 / 64; |
330 | unsigned int code_len = code_bytes; | ||
328 | unsigned char c; | 331 | unsigned char c; |
329 | 332 | ||
330 | printk("\n" KERN_EMERG "Stack: "); | 333 | printk("\n" KERN_EMERG "Stack: "); |
@@ -332,14 +335,14 @@ void show_registers(struct pt_regs *regs) | |||
332 | 335 | ||
333 | printk(KERN_EMERG "Code: "); | 336 | printk(KERN_EMERG "Code: "); |
334 | 337 | ||
335 | eip = (u8 *)regs->eip - 43; | 338 | eip = (u8 *)regs->eip - code_prologue; |
336 | if (eip < (u8 *)PAGE_OFFSET || | 339 | if (eip < (u8 *)PAGE_OFFSET || |
337 | probe_kernel_address(eip, c)) { | 340 | probe_kernel_address(eip, c)) { |
338 | /* try starting at EIP */ | 341 | /* try starting at EIP */ |
339 | eip = (u8 *)regs->eip; | 342 | eip = (u8 *)regs->eip; |
340 | code_bytes = 32; | 343 | code_len = code_len - code_prologue + 1; |
341 | } | 344 | } |
342 | for (i = 0; i < code_bytes; i++, eip++) { | 345 | for (i = 0; i < code_len; i++, eip++) { |
343 | if (eip < (u8 *)PAGE_OFFSET || | 346 | if (eip < (u8 *)PAGE_OFFSET || |
344 | probe_kernel_address(eip, c)) { | 347 | probe_kernel_address(eip, c)) { |
345 | printk(" Bad EIP value."); | 348 | printk(" Bad EIP value."); |
@@ -1191,3 +1194,13 @@ static int __init kstack_setup(char *s) | |||
1191 | return 1; | 1194 | return 1; |
1192 | } | 1195 | } |
1193 | __setup("kstack=", kstack_setup); | 1196 | __setup("kstack=", kstack_setup); |
1197 | |||
1198 | static int __init code_bytes_setup(char *s) | ||
1199 | { | ||
1200 | code_bytes = simple_strtoul(s, NULL, 0); | ||
1201 | if (code_bytes > 8192) | ||
1202 | code_bytes = 8192; | ||
1203 | |||
1204 | return 1; | ||
1205 | } | ||
1206 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 2cfc7b09b925..46f752a8bbf3 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -23,6 +23,7 @@ | |||
23 | * an extra value to store the TSC freq | 23 | * an extra value to store the TSC freq |
24 | */ | 24 | */ |
25 | unsigned int tsc_khz; | 25 | unsigned int tsc_khz; |
26 | unsigned long long (*custom_sched_clock)(void); | ||
26 | 27 | ||
27 | int tsc_disable; | 28 | int tsc_disable; |
28 | 29 | ||
@@ -107,14 +108,14 @@ unsigned long long sched_clock(void) | |||
107 | { | 108 | { |
108 | unsigned long long this_offset; | 109 | unsigned long long this_offset; |
109 | 110 | ||
111 | if (unlikely(custom_sched_clock)) | ||
112 | return (*custom_sched_clock)(); | ||
113 | |||
110 | /* | 114 | /* |
111 | * in the NUMA case we dont use the TSC as they are not | 115 | * Fall back to jiffies if there's no TSC available: |
112 | * synchronized across all CPUs. | ||
113 | */ | 116 | */ |
114 | #ifndef CONFIG_NUMA | 117 | if (unlikely(tsc_disable)) |
115 | if (!cpu_khz || check_tsc_unstable()) | 118 | /* No locking but a rare wrong value is not a big deal: */ |
116 | #endif | ||
117 | /* no locking but a rare wrong value is not a big deal */ | ||
118 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 119 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
119 | 120 | ||
120 | /* read the Time Stamp Counter: */ | 121 | /* read the Time Stamp Counter: */ |
@@ -194,13 +195,13 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); | |||
194 | void __init tsc_init(void) | 195 | void __init tsc_init(void) |
195 | { | 196 | { |
196 | if (!cpu_has_tsc || tsc_disable) | 197 | if (!cpu_has_tsc || tsc_disable) |
197 | return; | 198 | goto out_no_tsc; |
198 | 199 | ||
199 | cpu_khz = calculate_cpu_khz(); | 200 | cpu_khz = calculate_cpu_khz(); |
200 | tsc_khz = cpu_khz; | 201 | tsc_khz = cpu_khz; |
201 | 202 | ||
202 | if (!cpu_khz) | 203 | if (!cpu_khz) |
203 | return; | 204 | goto out_no_tsc; |
204 | 205 | ||
205 | printk("Detected %lu.%03lu MHz processor.\n", | 206 | printk("Detected %lu.%03lu MHz processor.\n", |
206 | (unsigned long)cpu_khz / 1000, | 207 | (unsigned long)cpu_khz / 1000, |
@@ -208,6 +209,15 @@ void __init tsc_init(void) | |||
208 | 209 | ||
209 | set_cyc2ns_scale(cpu_khz); | 210 | set_cyc2ns_scale(cpu_khz); |
210 | use_tsc_delay(); | 211 | use_tsc_delay(); |
212 | return; | ||
213 | |||
214 | out_no_tsc: | ||
215 | /* | ||
216 | * Set the tsc_disable flag if there's no TSC support, this | ||
217 | * makes it a fast flag for the kernel to see whether it | ||
218 | * should be using the TSC. | ||
219 | */ | ||
220 | tsc_disable = 1; | ||
211 | } | 221 | } |
212 | 222 | ||
213 | #ifdef CONFIG_CPU_FREQ | 223 | #ifdef CONFIG_CPU_FREQ |
diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index be2f96e67f78..d1b8f2b7aea6 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c | |||
@@ -96,12 +96,12 @@ static int copy_vm86_regs_to_user(struct vm86_regs __user *user, | |||
96 | { | 96 | { |
97 | int ret = 0; | 97 | int ret = 0; |
98 | 98 | ||
99 | /* kernel_vm86_regs is missing xfs, so copy everything up to | 99 | /* kernel_vm86_regs is missing xgs, so copy everything up to |
100 | (but not including) xgs, and then rest after xgs. */ | 100 | (but not including) orig_eax, and then rest including orig_eax. */ |
101 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.xgs)); | 101 | ret += copy_to_user(user, regs, offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
102 | ret += copy_to_user(&user->__null_gs, ®s->pt.xgs, | 102 | ret += copy_to_user(&user->orig_eax, ®s->pt.orig_eax, |
103 | sizeof(struct kernel_vm86_regs) - | 103 | sizeof(struct kernel_vm86_regs) - |
104 | offsetof(struct kernel_vm86_regs, pt.xgs)); | 104 | offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
105 | 105 | ||
106 | return ret; | 106 | return ret; |
107 | } | 107 | } |
@@ -113,12 +113,13 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, | |||
113 | { | 113 | { |
114 | int ret = 0; | 114 | int ret = 0; |
115 | 115 | ||
116 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.xgs)); | 116 | /* copy eax-xfs inclusive */ |
117 | ret += copy_from_user(®s->pt.xgs, &user->__null_gs, | 117 | ret += copy_from_user(regs, user, offsetof(struct kernel_vm86_regs, pt.orig_eax)); |
118 | /* copy orig_eax-__gsh+extra */ | ||
119 | ret += copy_from_user(®s->pt.orig_eax, &user->orig_eax, | ||
118 | sizeof(struct kernel_vm86_regs) - | 120 | sizeof(struct kernel_vm86_regs) - |
119 | offsetof(struct kernel_vm86_regs, pt.xgs) + | 121 | offsetof(struct kernel_vm86_regs, pt.orig_eax) + |
120 | extra); | 122 | extra); |
121 | |||
122 | return ret; | 123 | return ret; |
123 | } | 124 | } |
124 | 125 | ||
@@ -157,8 +158,8 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) | |||
157 | 158 | ||
158 | ret = KVM86->regs32; | 159 | ret = KVM86->regs32; |
159 | 160 | ||
160 | loadsegment(fs, current->thread.saved_fs); | 161 | ret->xfs = current->thread.saved_fs; |
161 | ret->xgs = current->thread.saved_gs; | 162 | loadsegment(gs, current->thread.saved_gs); |
162 | 163 | ||
163 | return ret; | 164 | return ret; |
164 | } | 165 | } |
@@ -285,9 +286,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
285 | */ | 286 | */ |
286 | info->regs.pt.xds = 0; | 287 | info->regs.pt.xds = 0; |
287 | info->regs.pt.xes = 0; | 288 | info->regs.pt.xes = 0; |
288 | info->regs.pt.xgs = 0; | 289 | info->regs.pt.xfs = 0; |
289 | 290 | ||
290 | /* we are clearing fs later just before "jmp resume_userspace", | 291 | /* we are clearing gs later just before "jmp resume_userspace", |
291 | * because it is not saved/restored. | 292 | * because it is not saved/restored. |
292 | */ | 293 | */ |
293 | 294 | ||
@@ -321,8 +322,8 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
321 | */ | 322 | */ |
322 | info->regs32->eax = 0; | 323 | info->regs32->eax = 0; |
323 | tsk->thread.saved_esp0 = tsk->thread.esp0; | 324 | tsk->thread.saved_esp0 = tsk->thread.esp0; |
324 | savesegment(fs, tsk->thread.saved_fs); | 325 | tsk->thread.saved_fs = info->regs32->xfs; |
325 | tsk->thread.saved_gs = info->regs32->xgs; | 326 | savesegment(gs, tsk->thread.saved_gs); |
326 | 327 | ||
327 | tss = &per_cpu(init_tss, get_cpu()); | 328 | tss = &per_cpu(init_tss, get_cpu()); |
328 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; | 329 | tsk->thread.esp0 = (unsigned long) &info->VM86_TSS_ESP0; |
@@ -342,7 +343,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
342 | __asm__ __volatile__( | 343 | __asm__ __volatile__( |
343 | "movl %0,%%esp\n\t" | 344 | "movl %0,%%esp\n\t" |
344 | "movl %1,%%ebp\n\t" | 345 | "movl %1,%%ebp\n\t" |
345 | "mov %2, %%fs\n\t" | 346 | "mov %2, %%gs\n\t" |
346 | "jmp resume_userspace" | 347 | "jmp resume_userspace" |
347 | : /* no outputs */ | 348 | : /* no outputs */ |
348 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); | 349 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); |
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c new file mode 100644 index 000000000000..bb5a7abf949c --- /dev/null +++ b/arch/i386/kernel/vmi.c | |||
@@ -0,0 +1,949 @@ | |||
1 | /* | ||
2 | * VMI specific paravirt-ops implementation | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to zach@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/module.h> | ||
26 | #include <linux/license.h> | ||
27 | #include <linux/cpu.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/mm.h> | ||
30 | #include <asm/vmi.h> | ||
31 | #include <asm/io.h> | ||
32 | #include <asm/fixmap.h> | ||
33 | #include <asm/apicdef.h> | ||
34 | #include <asm/apic.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <asm/timer.h> | ||
37 | #include <asm/vmi_time.h> | ||
38 | |||
39 | /* Convenient for calling VMI functions indirectly in the ROM */ | ||
40 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | ||
41 | typedef u64 __attribute__((regparm(2))) (VROMLONGFUNC)(int); | ||
42 | |||
43 | #define call_vrom_func(rom,func) \ | ||
44 | (((VROMFUNC *)(rom->func))()) | ||
45 | |||
46 | #define call_vrom_long_func(rom,func,arg) \ | ||
47 | (((VROMLONGFUNC *)(rom->func)) (arg)) | ||
48 | |||
49 | static struct vrom_header *vmi_rom; | ||
50 | static int license_gplok; | ||
51 | static int disable_nodelay; | ||
52 | static int disable_pge; | ||
53 | static int disable_pse; | ||
54 | static int disable_sep; | ||
55 | static int disable_tsc; | ||
56 | static int disable_mtrr; | ||
57 | |||
58 | /* Cached VMI operations */ | ||
59 | struct { | ||
60 | void (*cpuid)(void /* non-c */); | ||
61 | void (*_set_ldt)(u32 selector); | ||
62 | void (*set_tr)(u32 selector); | ||
63 | void (*set_kernel_stack)(u32 selector, u32 esp0); | ||
64 | void (*allocate_page)(u32, u32, u32, u32, u32); | ||
65 | void (*release_page)(u32, u32); | ||
66 | void (*set_pte)(pte_t, pte_t *, unsigned); | ||
67 | void (*update_pte)(pte_t *, unsigned); | ||
68 | void (*set_linear_mapping)(int, u32, u32, u32); | ||
69 | void (*flush_tlb)(int); | ||
70 | void (*set_initial_ap_state)(int, int); | ||
71 | void (*halt)(void); | ||
72 | } vmi_ops; | ||
73 | |||
74 | /* XXX move this to alternative.h */ | ||
75 | extern struct paravirt_patch __start_parainstructions[], | ||
76 | __stop_parainstructions[]; | ||
77 | |||
78 | /* | ||
79 | * VMI patching routines. | ||
80 | */ | ||
81 | #define MNEM_CALL 0xe8 | ||
82 | #define MNEM_JMP 0xe9 | ||
83 | #define MNEM_RET 0xc3 | ||
84 | |||
85 | static char irq_save_disable_callout[] = { | ||
86 | MNEM_CALL, 0, 0, 0, 0, | ||
87 | MNEM_CALL, 0, 0, 0, 0, | ||
88 | MNEM_RET | ||
89 | }; | ||
90 | #define IRQ_PATCH_INT_MASK 0 | ||
91 | #define IRQ_PATCH_DISABLE 5 | ||
92 | |||
93 | static inline void patch_offset(unsigned char *eip, unsigned char *dest) | ||
94 | { | ||
95 | *(unsigned long *)(eip+1) = dest-eip-5; | ||
96 | } | ||
97 | |||
98 | static unsigned patch_internal(int call, unsigned len, void *insns) | ||
99 | { | ||
100 | u64 reloc; | ||
101 | struct vmi_relocation_info *const rel = (struct vmi_relocation_info *)&reloc; | ||
102 | reloc = call_vrom_long_func(vmi_rom, get_reloc, call); | ||
103 | switch(rel->type) { | ||
104 | case VMI_RELOCATION_CALL_REL: | ||
105 | BUG_ON(len < 5); | ||
106 | *(char *)insns = MNEM_CALL; | ||
107 | patch_offset(insns, rel->eip); | ||
108 | return 5; | ||
109 | |||
110 | case VMI_RELOCATION_JUMP_REL: | ||
111 | BUG_ON(len < 5); | ||
112 | *(char *)insns = MNEM_JMP; | ||
113 | patch_offset(insns, rel->eip); | ||
114 | return 5; | ||
115 | |||
116 | case VMI_RELOCATION_NOP: | ||
117 | /* obliterate the whole thing */ | ||
118 | return 0; | ||
119 | |||
120 | case VMI_RELOCATION_NONE: | ||
121 | /* leave native code in place */ | ||
122 | break; | ||
123 | |||
124 | default: | ||
125 | BUG(); | ||
126 | } | ||
127 | return len; | ||
128 | } | ||
129 | |||
130 | /* | ||
131 | * Apply patch if appropriate, return length of new instruction | ||
132 | * sequence. The callee does nop padding for us. | ||
133 | */ | ||
134 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) | ||
135 | { | ||
136 | switch (type) { | ||
137 | case PARAVIRT_IRQ_DISABLE: | ||
138 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); | ||
139 | case PARAVIRT_IRQ_ENABLE: | ||
140 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); | ||
141 | case PARAVIRT_RESTORE_FLAGS: | ||
142 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); | ||
143 | case PARAVIRT_SAVE_FLAGS: | ||
144 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
145 | case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE: | ||
146 | if (len >= 10) { | ||
147 | patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
148 | patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5); | ||
149 | return 10; | ||
150 | } else { | ||
151 | /* | ||
152 | * You bastards didn't leave enough room to | ||
153 | * patch save_flags_irq_disable inline. Patch | ||
154 | * to a helper | ||
155 | */ | ||
156 | BUG_ON(len < 5); | ||
157 | *(char *)insns = MNEM_CALL; | ||
158 | patch_offset(insns, irq_save_disable_callout); | ||
159 | return 5; | ||
160 | } | ||
161 | case PARAVIRT_INTERRUPT_RETURN: | ||
162 | return patch_internal(VMI_CALL_IRET, len, insns); | ||
163 | case PARAVIRT_STI_SYSEXIT: | ||
164 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); | ||
165 | default: | ||
166 | break; | ||
167 | } | ||
168 | return len; | ||
169 | } | ||
170 | |||
171 | /* CPUID has non-C semantics, and paravirt-ops API doesn't match hardware ISA */ | ||
172 | static void vmi_cpuid(unsigned int *eax, unsigned int *ebx, | ||
173 | unsigned int *ecx, unsigned int *edx) | ||
174 | { | ||
175 | int override = 0; | ||
176 | if (*eax == 1) | ||
177 | override = 1; | ||
178 | asm volatile ("call *%6" | ||
179 | : "=a" (*eax), | ||
180 | "=b" (*ebx), | ||
181 | "=c" (*ecx), | ||
182 | "=d" (*edx) | ||
183 | : "0" (*eax), "2" (*ecx), "r" (vmi_ops.cpuid)); | ||
184 | if (override) { | ||
185 | if (disable_pse) | ||
186 | *edx &= ~X86_FEATURE_PSE; | ||
187 | if (disable_pge) | ||
188 | *edx &= ~X86_FEATURE_PGE; | ||
189 | if (disable_sep) | ||
190 | *edx &= ~X86_FEATURE_SEP; | ||
191 | if (disable_tsc) | ||
192 | *edx &= ~X86_FEATURE_TSC; | ||
193 | if (disable_mtrr) | ||
194 | *edx &= ~X86_FEATURE_MTRR; | ||
195 | } | ||
196 | } | ||
197 | |||
198 | static inline void vmi_maybe_load_tls(struct desc_struct *gdt, int nr, struct desc_struct *new) | ||
199 | { | ||
200 | if (gdt[nr].a != new->a || gdt[nr].b != new->b) | ||
201 | write_gdt_entry(gdt, nr, new->a, new->b); | ||
202 | } | ||
203 | |||
204 | static void vmi_load_tls(struct thread_struct *t, unsigned int cpu) | ||
205 | { | ||
206 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
207 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0]); | ||
208 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1]); | ||
209 | vmi_maybe_load_tls(gdt, GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2]); | ||
210 | } | ||
211 | |||
212 | static void vmi_set_ldt(const void *addr, unsigned entries) | ||
213 | { | ||
214 | unsigned cpu = smp_processor_id(); | ||
215 | u32 low, high; | ||
216 | |||
217 | pack_descriptor(&low, &high, (unsigned long)addr, | ||
218 | entries * sizeof(struct desc_struct) - 1, | ||
219 | DESCTYPE_LDT, 0); | ||
220 | write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, low, high); | ||
221 | vmi_ops._set_ldt(entries ? GDT_ENTRY_LDT*sizeof(struct desc_struct) : 0); | ||
222 | } | ||
223 | |||
224 | static void vmi_set_tr(void) | ||
225 | { | ||
226 | vmi_ops.set_tr(GDT_ENTRY_TSS*sizeof(struct desc_struct)); | ||
227 | } | ||
228 | |||
229 | static void vmi_load_esp0(struct tss_struct *tss, | ||
230 | struct thread_struct *thread) | ||
231 | { | ||
232 | tss->esp0 = thread->esp0; | ||
233 | |||
234 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
235 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
236 | tss->ss1 = thread->sysenter_cs; | ||
237 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
238 | } | ||
239 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0); | ||
240 | } | ||
241 | |||
242 | static void vmi_flush_tlb_user(void) | ||
243 | { | ||
244 | vmi_ops.flush_tlb(VMI_FLUSH_TLB); | ||
245 | } | ||
246 | |||
247 | static void vmi_flush_tlb_kernel(void) | ||
248 | { | ||
249 | vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | ||
250 | } | ||
251 | |||
252 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | ||
253 | static void vmi_nop(void) | ||
254 | { | ||
255 | } | ||
256 | |||
257 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
258 | #ifdef CONFIG_NO_IDLE_HZ | ||
259 | static fastcall void vmi_safe_halt(void) | ||
260 | { | ||
261 | int idle = vmi_stop_hz_timer(); | ||
262 | vmi_ops.halt(); | ||
263 | if (idle) { | ||
264 | local_irq_disable(); | ||
265 | vmi_account_time_restart_hz_timer(); | ||
266 | local_irq_enable(); | ||
267 | } | ||
268 | } | ||
269 | #endif | ||
270 | |||
271 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
272 | |||
273 | #ifdef CONFIG_X86_PAE | ||
274 | #define MAX_BOOT_PTS (2048+4+1) | ||
275 | #else | ||
276 | #define MAX_BOOT_PTS (1024+1) | ||
277 | #endif | ||
278 | |||
279 | /* | ||
280 | * During boot, mem_map is not yet available in paging_init, so stash | ||
281 | * all the boot page allocations here. | ||
282 | */ | ||
283 | static struct { | ||
284 | u32 pfn; | ||
285 | int type; | ||
286 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
287 | static int num_boot_page_allocations; | ||
288 | static int boot_allocations_applied; | ||
289 | |||
290 | void vmi_apply_boot_page_allocations(void) | ||
291 | { | ||
292 | int i; | ||
293 | BUG_ON(!mem_map); | ||
294 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
295 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
296 | page->type = boot_page_allocations[i].type; | ||
297 | page->type = boot_page_allocations[i].type & | ||
298 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
299 | } | ||
300 | boot_allocations_applied = 1; | ||
301 | } | ||
302 | |||
303 | static void record_page_type(u32 pfn, int type) | ||
304 | { | ||
305 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
306 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
307 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
308 | num_boot_page_allocations++; | ||
309 | } | ||
310 | |||
311 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
312 | { | ||
313 | u32 *ptr; | ||
314 | int i; | ||
315 | int limit = PAGE_SIZE / sizeof(int); | ||
316 | |||
317 | if (page_address(page)) | ||
318 | ptr = (u32 *)page_address(page); | ||
319 | else | ||
320 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
321 | /* | ||
322 | * When cloning the root in non-PAE mode, only the userspace | ||
323 | * pdes need to be zeroed. | ||
324 | */ | ||
325 | if (type & VMI_PAGE_CLONE) | ||
326 | limit = USER_PTRS_PER_PGD; | ||
327 | for (i = 0; i < limit; i++) | ||
328 | BUG_ON(ptr[i]); | ||
329 | } | ||
330 | |||
331 | /* | ||
332 | * We stash the page type into struct page so we can verify the page | ||
333 | * types are used properly. | ||
334 | */ | ||
335 | static void vmi_set_page_type(u32 pfn, int type) | ||
336 | { | ||
337 | /* PAE can have multiple roots per page - don't track */ | ||
338 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
339 | return; | ||
340 | |||
341 | if (boot_allocations_applied) { | ||
342 | struct page *page = pfn_to_page(pfn); | ||
343 | if (type != VMI_PAGE_NORMAL) | ||
344 | BUG_ON(page->type); | ||
345 | else | ||
346 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
347 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
348 | if (type & VMI_PAGE_ZEROED) | ||
349 | check_zeroed_page(pfn, type, page); | ||
350 | } else { | ||
351 | record_page_type(pfn, type); | ||
352 | } | ||
353 | } | ||
354 | |||
355 | static void vmi_check_page_type(u32 pfn, int type) | ||
356 | { | ||
357 | /* PAE can have multiple roots per page - skip checks */ | ||
358 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
359 | return; | ||
360 | |||
361 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
362 | if (boot_allocations_applied) { | ||
363 | struct page *page = pfn_to_page(pfn); | ||
364 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
365 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
366 | BUG_ON((type & page->type) == 0); | ||
367 | } | ||
368 | } | ||
369 | #else | ||
370 | #define vmi_set_page_type(p,t) do { } while (0) | ||
371 | #define vmi_check_page_type(p,t) do { } while (0) | ||
372 | #endif | ||
373 | |||
374 | static void vmi_allocate_pt(u32 pfn) | ||
375 | { | ||
376 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
377 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | ||
378 | } | ||
379 | |||
380 | static void vmi_allocate_pd(u32 pfn) | ||
381 | { | ||
382 | /* | ||
383 | * This call comes in very early, before mem_map is setup. | ||
384 | * It is called only for swapper_pg_dir, which already has | ||
385 | * data on it. | ||
386 | */ | ||
387 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
388 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | ||
389 | } | ||
390 | |||
391 | static void vmi_allocate_pd_clone(u32 pfn, u32 clonepfn, u32 start, u32 count) | ||
392 | { | ||
393 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
394 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
395 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | ||
396 | } | ||
397 | |||
398 | static void vmi_release_pt(u32 pfn) | ||
399 | { | ||
400 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | ||
401 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
402 | } | ||
403 | |||
404 | static void vmi_release_pd(u32 pfn) | ||
405 | { | ||
406 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | ||
407 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
408 | } | ||
409 | |||
410 | /* | ||
411 | * Helper macros for MMU update flags. We can defer updates until a flush | ||
412 | * or page invalidation only if the update is to the current address space | ||
413 | * (otherwise, there is no flush). We must check against init_mm, since | ||
414 | * this could be a kernel update, which usually passes init_mm, although | ||
415 | * sometimes this check can be skipped if we know the particular function | ||
416 | * is only called on user mode PTEs. We could change the kernel to pass | ||
417 | * current->active_mm here, but in particular, I was unsure if changing | ||
418 | * mm/highmem.c to do this would still be correct on other architectures. | ||
419 | */ | ||
420 | #define is_current_as(mm, mustbeuser) ((mm) == current->active_mm || \ | ||
421 | (!mustbeuser && (mm) == &init_mm)) | ||
422 | #define vmi_flags_addr(mm, addr, level, user) \ | ||
423 | ((level) | (is_current_as(mm, user) ? \ | ||
424 | (VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
425 | #define vmi_flags_addr_defer(mm, addr, level, user) \ | ||
426 | ((level) | (is_current_as(mm, user) ? \ | ||
427 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | ||
428 | |||
429 | static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep) | ||
430 | { | ||
431 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
432 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
433 | } | ||
434 | |||
435 | static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) | ||
436 | { | ||
437 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
438 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | ||
439 | } | ||
440 | |||
441 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | ||
442 | { | ||
443 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | ||
444 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
445 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | ||
446 | } | ||
447 | |||
448 | static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | ||
449 | { | ||
450 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
451 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
452 | } | ||
453 | |||
454 | static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
455 | { | ||
456 | #ifdef CONFIG_X86_PAE | ||
457 | const pte_t pte = { pmdval.pmd, pmdval.pmd >> 32 }; | ||
458 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
459 | #else | ||
460 | const pte_t pte = { pmdval.pud.pgd.pgd }; | ||
461 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
462 | #endif | ||
463 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_X86_PAE | ||
467 | |||
468 | static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
469 | { | ||
470 | /* | ||
471 | * XXX This is called from set_pmd_pte, but at both PT | ||
472 | * and PD layers so the VMI_PAGE_PT flag is wrong. But | ||
473 | * it is only called for large page mapping changes, | ||
474 | * the Xen backend, doesn't support large pages, and the | ||
475 | * ESX backend doesn't depend on the flag. | ||
476 | */ | ||
477 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
478 | vmi_ops.update_pte(ptep, VMI_PAGE_PT); | ||
479 | } | ||
480 | |||
481 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
482 | { | ||
483 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
484 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | ||
485 | } | ||
486 | |||
487 | static void vmi_set_pud(pud_t *pudp, pud_t pudval) | ||
488 | { | ||
489 | /* Um, eww */ | ||
490 | const pte_t pte = { pudval.pgd.pgd, pudval.pgd.pgd >> 32 }; | ||
491 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
492 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | ||
493 | } | ||
494 | |||
495 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
496 | { | ||
497 | const pte_t pte = { 0 }; | ||
498 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
499 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | ||
500 | } | ||
501 | |||
502 | void vmi_pmd_clear(pmd_t *pmd) | ||
503 | { | ||
504 | const pte_t pte = { 0 }; | ||
505 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
506 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | ||
507 | } | ||
508 | #endif | ||
509 | |||
510 | #ifdef CONFIG_SMP | ||
511 | struct vmi_ap_state ap; | ||
512 | extern void setup_pda(void); | ||
513 | |||
514 | static void __init /* XXX cpu hotplug */ | ||
515 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
516 | unsigned long start_esp) | ||
517 | { | ||
518 | /* Default everything to zero. This is fine for most GPRs. */ | ||
519 | memset(&ap, 0, sizeof(struct vmi_ap_state)); | ||
520 | |||
521 | ap.gdtr_limit = GDT_SIZE - 1; | ||
522 | ap.gdtr_base = (unsigned long) get_cpu_gdt_table(phys_apicid); | ||
523 | |||
524 | ap.idtr_limit = IDT_ENTRIES * 8 - 1; | ||
525 | ap.idtr_base = (unsigned long) idt_table; | ||
526 | |||
527 | ap.ldtr = 0; | ||
528 | |||
529 | ap.cs = __KERNEL_CS; | ||
530 | ap.eip = (unsigned long) start_eip; | ||
531 | ap.ss = __KERNEL_DS; | ||
532 | ap.esp = (unsigned long) start_esp; | ||
533 | |||
534 | ap.ds = __USER_DS; | ||
535 | ap.es = __USER_DS; | ||
536 | ap.fs = __KERNEL_PDA; | ||
537 | ap.gs = 0; | ||
538 | |||
539 | ap.eflags = 0; | ||
540 | |||
541 | setup_pda(); | ||
542 | |||
543 | #ifdef CONFIG_X86_PAE | ||
544 | /* efer should match BSP efer. */ | ||
545 | if (cpu_has_nx) { | ||
546 | unsigned l, h; | ||
547 | rdmsr(MSR_EFER, l, h); | ||
548 | ap.efer = (unsigned long long) h << 32 | l; | ||
549 | } | ||
550 | #endif | ||
551 | |||
552 | ap.cr3 = __pa(swapper_pg_dir); | ||
553 | /* Protected mode, paging, AM, WP, NE, MP. */ | ||
554 | ap.cr0 = 0x80050023; | ||
555 | ap.cr4 = mmu_cr4_features; | ||
556 | vmi_ops.set_initial_ap_state(__pa(&ap), phys_apicid); | ||
557 | } | ||
558 | #endif | ||
559 | |||
560 | static inline int __init check_vmi_rom(struct vrom_header *rom) | ||
561 | { | ||
562 | struct pci_header *pci; | ||
563 | struct pnp_header *pnp; | ||
564 | const char *manufacturer = "UNKNOWN"; | ||
565 | const char *product = "UNKNOWN"; | ||
566 | const char *license = "unspecified"; | ||
567 | |||
568 | if (rom->rom_signature != 0xaa55) | ||
569 | return 0; | ||
570 | if (rom->vrom_signature != VMI_SIGNATURE) | ||
571 | return 0; | ||
572 | if (rom->api_version_maj != VMI_API_REV_MAJOR || | ||
573 | rom->api_version_min+1 < VMI_API_REV_MINOR+1) { | ||
574 | printk(KERN_WARNING "VMI: Found mismatched rom version %d.%d\n", | ||
575 | rom->api_version_maj, | ||
576 | rom->api_version_min); | ||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | /* | ||
581 | * Relying on the VMI_SIGNATURE field is not 100% safe, so check | ||
582 | * the PCI header and device type to make sure this is really a | ||
583 | * VMI device. | ||
584 | */ | ||
585 | if (!rom->pci_header_offs) { | ||
586 | printk(KERN_WARNING "VMI: ROM does not contain PCI header.\n"); | ||
587 | return 0; | ||
588 | } | ||
589 | |||
590 | pci = (struct pci_header *)((char *)rom+rom->pci_header_offs); | ||
591 | if (pci->vendorID != PCI_VENDOR_ID_VMWARE || | ||
592 | pci->deviceID != PCI_DEVICE_ID_VMWARE_VMI) { | ||
593 | /* Allow it to run... anyways, but warn */ | ||
594 | printk(KERN_WARNING "VMI: ROM from unknown manufacturer\n"); | ||
595 | } | ||
596 | |||
597 | if (rom->pnp_header_offs) { | ||
598 | pnp = (struct pnp_header *)((char *)rom+rom->pnp_header_offs); | ||
599 | if (pnp->manufacturer_offset) | ||
600 | manufacturer = (const char *)rom+pnp->manufacturer_offset; | ||
601 | if (pnp->product_offset) | ||
602 | product = (const char *)rom+pnp->product_offset; | ||
603 | } | ||
604 | |||
605 | if (rom->license_offs) | ||
606 | license = (char *)rom+rom->license_offs; | ||
607 | |||
608 | printk(KERN_INFO "VMI: Found %s %s, API version %d.%d, ROM version %d.%d\n", | ||
609 | manufacturer, product, | ||
610 | rom->api_version_maj, rom->api_version_min, | ||
611 | pci->rom_version_maj, pci->rom_version_min); | ||
612 | |||
613 | license_gplok = license_is_gpl_compatible(license); | ||
614 | if (!license_gplok) { | ||
615 | printk(KERN_WARNING "VMI: ROM license '%s' taints kernel... " | ||
616 | "inlining disabled\n", | ||
617 | license); | ||
618 | add_taint(TAINT_PROPRIETARY_MODULE); | ||
619 | } | ||
620 | return 1; | ||
621 | } | ||
622 | |||
623 | /* | ||
624 | * Probe for the VMI option ROM | ||
625 | */ | ||
626 | static inline int __init probe_vmi_rom(void) | ||
627 | { | ||
628 | unsigned long base; | ||
629 | |||
630 | /* VMI ROM is in option ROM area, check signature */ | ||
631 | for (base = 0xC0000; base < 0xE0000; base += 2048) { | ||
632 | struct vrom_header *romstart; | ||
633 | romstart = (struct vrom_header *)isa_bus_to_virt(base); | ||
634 | if (check_vmi_rom(romstart)) { | ||
635 | vmi_rom = romstart; | ||
636 | return 1; | ||
637 | } | ||
638 | } | ||
639 | return 0; | ||
640 | } | ||
641 | |||
642 | /* | ||
643 | * VMI setup common to all processors | ||
644 | */ | ||
645 | void vmi_bringup(void) | ||
646 | { | ||
647 | /* We must establish the lowmem mapping for MMU ops to work */ | ||
648 | if (vmi_rom) | ||
649 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); | ||
650 | } | ||
651 | |||
652 | /* | ||
653 | * Return a pointer to the VMI function or a NOP stub | ||
654 | */ | ||
655 | static void *vmi_get_function(int vmicall) | ||
656 | { | ||
657 | u64 reloc; | ||
658 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
659 | reloc = call_vrom_long_func(vmi_rom, get_reloc, vmicall); | ||
660 | BUG_ON(rel->type == VMI_RELOCATION_JUMP_REL); | ||
661 | if (rel->type == VMI_RELOCATION_CALL_REL) | ||
662 | return (void *)rel->eip; | ||
663 | else | ||
664 | return (void *)vmi_nop; | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Helper macro for making the VMI paravirt-ops fill code readable. | ||
669 | * For unimplemented operations, fall back to default. | ||
670 | */ | ||
671 | #define para_fill(opname, vmicall) \ | ||
672 | do { \ | ||
673 | reloc = call_vrom_long_func(vmi_rom, get_reloc, \ | ||
674 | VMI_CALL_##vmicall); \ | ||
675 | if (rel->type != VMI_RELOCATION_NONE) { \ | ||
676 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); \ | ||
677 | paravirt_ops.opname = (void *)rel->eip; \ | ||
678 | } \ | ||
679 | } while (0) | ||
680 | |||
681 | /* | ||
682 | * Activate the VMI interface and switch into paravirtualized mode | ||
683 | */ | ||
684 | static inline int __init activate_vmi(void) | ||
685 | { | ||
686 | short kernel_cs; | ||
687 | u64 reloc; | ||
688 | const struct vmi_relocation_info *rel = (struct vmi_relocation_info *)&reloc; | ||
689 | |||
690 | if (call_vrom_func(vmi_rom, vmi_init) != 0) { | ||
691 | printk(KERN_ERR "VMI ROM failed to initialize!"); | ||
692 | return 0; | ||
693 | } | ||
694 | savesegment(cs, kernel_cs); | ||
695 | |||
696 | paravirt_ops.paravirt_enabled = 1; | ||
697 | paravirt_ops.kernel_rpl = kernel_cs & SEGMENT_RPL_MASK; | ||
698 | |||
699 | paravirt_ops.patch = vmi_patch; | ||
700 | paravirt_ops.name = "vmi"; | ||
701 | |||
702 | /* | ||
703 | * Many of these operations are ABI compatible with VMI. | ||
704 | * This means we can fill in the paravirt-ops with direct | ||
705 | * pointers into the VMI ROM. If the calling convention for | ||
706 | * these operations changes, this code needs to be updated. | ||
707 | * | ||
708 | * Exceptions | ||
709 | * CPUID paravirt-op uses pointers, not the native ISA | ||
710 | * halt has no VMI equivalent; all VMI halts are "safe" | ||
711 | * no MSR support yet - just trap and emulate. VMI uses the | ||
712 | * same ABI as the native ISA, but Linux wants exceptions | ||
713 | * from bogus MSR read / write handled | ||
714 | * rdpmc is not yet used in Linux | ||
715 | */ | ||
716 | |||
717 | /* CPUID is special, so very special */ | ||
718 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_CPUID); | ||
719 | if (rel->type != VMI_RELOCATION_NONE) { | ||
720 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
721 | vmi_ops.cpuid = (void *)rel->eip; | ||
722 | paravirt_ops.cpuid = vmi_cpuid; | ||
723 | } | ||
724 | |||
725 | para_fill(clts, CLTS); | ||
726 | para_fill(get_debugreg, GetDR); | ||
727 | para_fill(set_debugreg, SetDR); | ||
728 | para_fill(read_cr0, GetCR0); | ||
729 | para_fill(read_cr2, GetCR2); | ||
730 | para_fill(read_cr3, GetCR3); | ||
731 | para_fill(read_cr4, GetCR4); | ||
732 | para_fill(write_cr0, SetCR0); | ||
733 | para_fill(write_cr2, SetCR2); | ||
734 | para_fill(write_cr3, SetCR3); | ||
735 | para_fill(write_cr4, SetCR4); | ||
736 | para_fill(save_fl, GetInterruptMask); | ||
737 | para_fill(restore_fl, SetInterruptMask); | ||
738 | para_fill(irq_disable, DisableInterrupts); | ||
739 | para_fill(irq_enable, EnableInterrupts); | ||
740 | /* irq_save_disable !!! sheer pain */ | ||
741 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], | ||
742 | (char *)paravirt_ops.save_fl); | ||
743 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], | ||
744 | (char *)paravirt_ops.irq_disable); | ||
745 | #ifndef CONFIG_NO_IDLE_HZ | ||
746 | para_fill(safe_halt, Halt); | ||
747 | #else | ||
748 | vmi_ops.halt = vmi_get_function(VMI_CALL_Halt); | ||
749 | paravirt_ops.safe_halt = vmi_safe_halt; | ||
750 | #endif | ||
751 | para_fill(wbinvd, WBINVD); | ||
752 | /* paravirt_ops.read_msr = vmi_rdmsr */ | ||
753 | /* paravirt_ops.write_msr = vmi_wrmsr */ | ||
754 | para_fill(read_tsc, RDTSC); | ||
755 | /* paravirt_ops.rdpmc = vmi_rdpmc */ | ||
756 | |||
757 | /* TR interface doesn't pass TR value */ | ||
758 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetTR); | ||
759 | if (rel->type != VMI_RELOCATION_NONE) { | ||
760 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
761 | vmi_ops.set_tr = (void *)rel->eip; | ||
762 | paravirt_ops.load_tr_desc = vmi_set_tr; | ||
763 | } | ||
764 | |||
765 | /* LDT is special, too */ | ||
766 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_SetLDT); | ||
767 | if (rel->type != VMI_RELOCATION_NONE) { | ||
768 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
769 | vmi_ops._set_ldt = (void *)rel->eip; | ||
770 | paravirt_ops.set_ldt = vmi_set_ldt; | ||
771 | } | ||
772 | |||
773 | para_fill(load_gdt, SetGDT); | ||
774 | para_fill(load_idt, SetIDT); | ||
775 | para_fill(store_gdt, GetGDT); | ||
776 | para_fill(store_idt, GetIDT); | ||
777 | para_fill(store_tr, GetTR); | ||
778 | paravirt_ops.load_tls = vmi_load_tls; | ||
779 | para_fill(write_ldt_entry, WriteLDTEntry); | ||
780 | para_fill(write_gdt_entry, WriteGDTEntry); | ||
781 | para_fill(write_idt_entry, WriteIDTEntry); | ||
782 | reloc = call_vrom_long_func(vmi_rom, get_reloc, | ||
783 | VMI_CALL_UpdateKernelStack); | ||
784 | if (rel->type != VMI_RELOCATION_NONE) { | ||
785 | BUG_ON(rel->type != VMI_RELOCATION_CALL_REL); | ||
786 | vmi_ops.set_kernel_stack = (void *)rel->eip; | ||
787 | paravirt_ops.load_esp0 = vmi_load_esp0; | ||
788 | } | ||
789 | |||
790 | para_fill(set_iopl_mask, SetIOPLMask); | ||
791 | paravirt_ops.io_delay = (void *)vmi_nop; | ||
792 | if (!disable_nodelay) { | ||
793 | paravirt_ops.const_udelay = (void *)vmi_nop; | ||
794 | } | ||
795 | |||
796 | para_fill(set_lazy_mode, SetLazyMode); | ||
797 | |||
798 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_FlushTLB); | ||
799 | if (rel->type != VMI_RELOCATION_NONE) { | ||
800 | vmi_ops.flush_tlb = (void *)rel->eip; | ||
801 | paravirt_ops.flush_tlb_user = vmi_flush_tlb_user; | ||
802 | paravirt_ops.flush_tlb_kernel = vmi_flush_tlb_kernel; | ||
803 | } | ||
804 | para_fill(flush_tlb_single, InvalPage); | ||
805 | |||
806 | /* | ||
807 | * Until a standard flag format can be agreed on, we need to | ||
808 | * implement these as wrappers in Linux. Get the VMI ROM | ||
809 | * function pointers for the two backend calls. | ||
810 | */ | ||
811 | #ifdef CONFIG_X86_PAE | ||
812 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxELong); | ||
813 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxELong); | ||
814 | #else | ||
815 | vmi_ops.set_pte = vmi_get_function(VMI_CALL_SetPxE); | ||
816 | vmi_ops.update_pte = vmi_get_function(VMI_CALL_UpdatePxE); | ||
817 | #endif | ||
818 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
819 | vmi_ops.allocate_page = vmi_get_function(VMI_CALL_AllocatePage); | ||
820 | vmi_ops.release_page = vmi_get_function(VMI_CALL_ReleasePage); | ||
821 | |||
822 | paravirt_ops.alloc_pt = vmi_allocate_pt; | ||
823 | paravirt_ops.alloc_pd = vmi_allocate_pd; | ||
824 | paravirt_ops.alloc_pd_clone = vmi_allocate_pd_clone; | ||
825 | paravirt_ops.release_pt = vmi_release_pt; | ||
826 | paravirt_ops.release_pd = vmi_release_pd; | ||
827 | paravirt_ops.set_pte = vmi_set_pte; | ||
828 | paravirt_ops.set_pte_at = vmi_set_pte_at; | ||
829 | paravirt_ops.set_pmd = vmi_set_pmd; | ||
830 | paravirt_ops.pte_update = vmi_update_pte; | ||
831 | paravirt_ops.pte_update_defer = vmi_update_pte_defer; | ||
832 | #ifdef CONFIG_X86_PAE | ||
833 | paravirt_ops.set_pte_atomic = vmi_set_pte_atomic; | ||
834 | paravirt_ops.set_pte_present = vmi_set_pte_present; | ||
835 | paravirt_ops.set_pud = vmi_set_pud; | ||
836 | paravirt_ops.pte_clear = vmi_pte_clear; | ||
837 | paravirt_ops.pmd_clear = vmi_pmd_clear; | ||
838 | #endif | ||
839 | /* | ||
840 | * These MUST always be patched. Don't support indirect jumps | ||
841 | * through these operations, as the VMI interface may use either | ||
842 | * a jump or a call to get to these operations, depending on | ||
843 | * the backend. They are performance critical anyway, so requiring | ||
844 | * a patch is not a big problem. | ||
845 | */ | ||
846 | paravirt_ops.irq_enable_sysexit = (void *)0xfeedbab0; | ||
847 | paravirt_ops.iret = (void *)0xbadbab0; | ||
848 | |||
849 | #ifdef CONFIG_SMP | ||
850 | paravirt_ops.startup_ipi_hook = vmi_startup_ipi_hook; | ||
851 | vmi_ops.set_initial_ap_state = vmi_get_function(VMI_CALL_SetInitialAPState); | ||
852 | #endif | ||
853 | |||
854 | #ifdef CONFIG_X86_LOCAL_APIC | ||
855 | paravirt_ops.apic_read = vmi_get_function(VMI_CALL_APICRead); | ||
856 | paravirt_ops.apic_write = vmi_get_function(VMI_CALL_APICWrite); | ||
857 | paravirt_ops.apic_write_atomic = vmi_get_function(VMI_CALL_APICWrite); | ||
858 | #endif | ||
859 | |||
860 | /* | ||
861 | * Check for VMI timer functionality by probing for a cycle frequency method | ||
862 | */ | ||
863 | reloc = call_vrom_long_func(vmi_rom, get_reloc, VMI_CALL_GetCycleFrequency); | ||
864 | if (rel->type != VMI_RELOCATION_NONE) { | ||
865 | vmi_timer_ops.get_cycle_frequency = (void *)rel->eip; | ||
866 | vmi_timer_ops.get_cycle_counter = | ||
867 | vmi_get_function(VMI_CALL_GetCycleCounter); | ||
868 | vmi_timer_ops.get_wallclock = | ||
869 | vmi_get_function(VMI_CALL_GetWallclockTime); | ||
870 | vmi_timer_ops.wallclock_updated = | ||
871 | vmi_get_function(VMI_CALL_WallclockUpdated); | ||
872 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | ||
873 | vmi_timer_ops.cancel_alarm = | ||
874 | vmi_get_function(VMI_CALL_CancelAlarm); | ||
875 | paravirt_ops.time_init = vmi_time_init; | ||
876 | paravirt_ops.get_wallclock = vmi_get_wallclock; | ||
877 | paravirt_ops.set_wallclock = vmi_set_wallclock; | ||
878 | #ifdef CONFIG_X86_LOCAL_APIC | ||
879 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | ||
880 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | ||
881 | #endif | ||
882 | custom_sched_clock = vmi_sched_clock; | ||
883 | } | ||
884 | |||
885 | /* | ||
886 | * Alternative instruction rewriting doesn't happen soon enough | ||
887 | * to convert VMI_IRET to a call instead of a jump; so we have | ||
888 | * to do this before IRQs get reenabled. Fortunately, it is | ||
889 | * idempotent. | ||
890 | */ | ||
891 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | ||
892 | |||
893 | vmi_bringup(); | ||
894 | |||
895 | return 1; | ||
896 | } | ||
897 | |||
898 | #undef para_fill | ||
899 | |||
900 | void __init vmi_init(void) | ||
901 | { | ||
902 | unsigned long flags; | ||
903 | |||
904 | if (!vmi_rom) | ||
905 | probe_vmi_rom(); | ||
906 | else | ||
907 | check_vmi_rom(vmi_rom); | ||
908 | |||
909 | /* In case probing for or validating the ROM failed, basil */ | ||
910 | if (!vmi_rom) | ||
911 | return; | ||
912 | |||
913 | reserve_top_address(-vmi_rom->virtual_top); | ||
914 | |||
915 | local_irq_save(flags); | ||
916 | activate_vmi(); | ||
917 | #ifdef CONFIG_SMP | ||
918 | no_timer_check = 1; | ||
919 | #endif | ||
920 | local_irq_restore(flags & X86_EFLAGS_IF); | ||
921 | } | ||
922 | |||
923 | static int __init parse_vmi(char *arg) | ||
924 | { | ||
925 | if (!arg) | ||
926 | return -EINVAL; | ||
927 | |||
928 | if (!strcmp(arg, "disable_nodelay")) | ||
929 | disable_nodelay = 1; | ||
930 | else if (!strcmp(arg, "disable_pge")) { | ||
931 | clear_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability); | ||
932 | disable_pge = 1; | ||
933 | } else if (!strcmp(arg, "disable_pse")) { | ||
934 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
935 | disable_pse = 1; | ||
936 | } else if (!strcmp(arg, "disable_sep")) { | ||
937 | clear_bit(X86_FEATURE_SEP, boot_cpu_data.x86_capability); | ||
938 | disable_sep = 1; | ||
939 | } else if (!strcmp(arg, "disable_tsc")) { | ||
940 | clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability); | ||
941 | disable_tsc = 1; | ||
942 | } else if (!strcmp(arg, "disable_mtrr")) { | ||
943 | clear_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability); | ||
944 | disable_mtrr = 1; | ||
945 | } | ||
946 | return 0; | ||
947 | } | ||
948 | |||
949 | early_param("vmi", parse_vmi); | ||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c new file mode 100644 index 000000000000..2e2d8dbcbd68 --- /dev/null +++ b/arch/i386/kernel/vmitime.c | |||
@@ -0,0 +1,499 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .is_continuous = 1, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | vmi_timer_interrupt, | ||
127 | SA_INTERRUPT, | ||
128 | CPU_MASK_NONE, | ||
129 | "VMI-alarm", | ||
130 | NULL, | ||
131 | NULL | ||
132 | }; | ||
133 | |||
134 | /* Alarm rate */ | ||
135 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
136 | { | ||
137 | int alarm_rate; | ||
138 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
139 | alarm_hz = alarm_rate; | ||
140 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
141 | } | ||
142 | return 1; | ||
143 | } | ||
144 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
145 | |||
146 | |||
147 | /* Initialization */ | ||
148 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
149 | { | ||
150 | unsigned long long wallclock; | ||
151 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
152 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
153 | ts->tv_sec = wallclock; | ||
154 | } | ||
155 | |||
156 | static void update_xtime_from_wallclock(void) | ||
157 | { | ||
158 | struct timespec ts; | ||
159 | vmi_get_wallclock_ts(&ts); | ||
160 | do_settimeofday(&ts); | ||
161 | } | ||
162 | |||
163 | unsigned long vmi_get_wallclock(void) | ||
164 | { | ||
165 | struct timespec ts; | ||
166 | vmi_get_wallclock_ts(&ts); | ||
167 | return ts.tv_sec; | ||
168 | } | ||
169 | |||
170 | int vmi_set_wallclock(unsigned long now) | ||
171 | { | ||
172 | return -1; | ||
173 | } | ||
174 | |||
175 | unsigned long long vmi_sched_clock(void) | ||
176 | { | ||
177 | return read_available_cycles(); | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | unsigned long flags; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | setup_irq(0, &vmi_timer_irq); | ||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
189 | #endif | ||
190 | |||
191 | no_sync_cmos_clock = 1; | ||
192 | |||
193 | vmi_get_wallclock_ts(&xtime); | ||
194 | set_normalized_timespec(&wall_to_monotonic, | ||
195 | -xtime.tv_sec, -xtime.tv_nsec); | ||
196 | |||
197 | real_cycles_accounted_system = read_real_cycles(); | ||
198 | update_xtime_from_wallclock(); | ||
199 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
200 | |||
201 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
202 | |||
203 | cycles_per_jiffy = cycles_per_sec; | ||
204 | (void)do_div(cycles_per_jiffy, HZ); | ||
205 | cycles_per_alarm = cycles_per_sec; | ||
206 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
207 | cycles_per_msec = cycles_per_sec; | ||
208 | (void)do_div(cycles_per_msec, 1000); | ||
209 | cpu_khz = cycles_per_msec; | ||
210 | |||
211 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
212 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
213 | cycles_per_alarm); | ||
214 | |||
215 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
216 | clocksource_vmi.shift); | ||
217 | if (clocksource_register(&clocksource_vmi)) | ||
218 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
219 | |||
220 | /* Disable PIT. */ | ||
221 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
222 | |||
223 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
224 | * reduce the latency calling update_process_times. */ | ||
225 | vmi_timer_ops.set_alarm( | ||
226 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
227 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
228 | cycles_per_alarm); | ||
229 | |||
230 | local_irq_restore(flags); | ||
231 | } | ||
232 | |||
233 | #ifdef CONFIG_X86_LOCAL_APIC | ||
234 | |||
235 | void __init vmi_timer_setup_boot_alarm(void) | ||
236 | { | ||
237 | local_irq_disable(); | ||
238 | |||
239 | /* Route the interrupt to the correct vector. */ | ||
240 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
241 | |||
242 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
243 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
244 | vmi_timer_ops.set_alarm( | ||
245 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
246 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
247 | cycles_per_alarm); | ||
248 | local_irq_enable(); | ||
249 | } | ||
250 | |||
251 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
252 | * Also, set the local alarm for the AP. */ | ||
253 | void __init vmi_timer_setup_secondary_alarm(void) | ||
254 | { | ||
255 | int cpu = smp_processor_id(); | ||
256 | |||
257 | /* Route the interrupt to the correct vector. */ | ||
258 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
259 | |||
260 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
261 | |||
262 | vmi_timer_ops.set_alarm( | ||
263 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
264 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
265 | cycles_per_alarm); | ||
266 | } | ||
267 | |||
268 | #endif | ||
269 | |||
270 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
271 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
272 | { | ||
273 | long long cycles_not_accounted; | ||
274 | |||
275 | write_seqlock(&xtime_lock); | ||
276 | |||
277 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
278 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
279 | /* systems wide jiffies and wallclock. */ | ||
280 | do_timer(1); | ||
281 | |||
282 | cycles_not_accounted -= cycles_per_jiffy; | ||
283 | real_cycles_accounted_system += cycles_per_jiffy; | ||
284 | } | ||
285 | |||
286 | if (vmi_timer_ops.wallclock_updated()) | ||
287 | update_xtime_from_wallclock(); | ||
288 | |||
289 | write_sequnlock(&xtime_lock); | ||
290 | } | ||
291 | |||
292 | /* Update per-cpu process times. */ | ||
293 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
294 | unsigned long long cur_process_times_cycles) | ||
295 | { | ||
296 | long long cycles_not_accounted; | ||
297 | cycles_not_accounted = cur_process_times_cycles - | ||
298 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
299 | |||
300 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
301 | /* Account time to the current process. This includes | ||
302 | * calling into the scheduler to decrement the timeslice | ||
303 | * and possibly reschedule.*/ | ||
304 | update_process_times(user_mode(regs)); | ||
305 | /* XXX handle /proc/profile multiplier. */ | ||
306 | profile_tick(CPU_PROFILING); | ||
307 | |||
308 | cycles_not_accounted -= cycles_per_jiffy; | ||
309 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
310 | } | ||
311 | } | ||
312 | |||
313 | #ifdef CONFIG_NO_IDLE_HZ | ||
314 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
315 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
316 | unsigned long long cur_process_times_cycles) | ||
317 | { | ||
318 | long long cycles_not_accounted; | ||
319 | unsigned long no_idle_hz_jiffies = 0; | ||
320 | |||
321 | cycles_not_accounted = cur_process_times_cycles - | ||
322 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
323 | |||
324 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
325 | no_idle_hz_jiffies++; | ||
326 | cycles_not_accounted -= cycles_per_jiffy; | ||
327 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
328 | } | ||
329 | /* Account time to the idle process. */ | ||
330 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
331 | } | ||
332 | #endif | ||
333 | |||
334 | /* Update per-cpu stolen time. */ | ||
335 | static void vmi_account_stolen_cycles(int cpu, | ||
336 | unsigned long long cur_real_cycles, | ||
337 | unsigned long long cur_avail_cycles) | ||
338 | { | ||
339 | long long stolen_cycles_not_accounted; | ||
340 | unsigned long stolen_jiffies = 0; | ||
341 | |||
342 | if (cur_real_cycles < cur_avail_cycles) | ||
343 | return; | ||
344 | |||
345 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
346 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
347 | |||
348 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
349 | stolen_jiffies++; | ||
350 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
351 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
352 | } | ||
353 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
354 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
355 | } | ||
356 | |||
357 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
358 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
359 | static void vmi_local_timer_interrupt(int cpu) | ||
360 | { | ||
361 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
362 | |||
363 | cur_real_cycles = read_real_cycles(); | ||
364 | cur_process_times_cycles = read_available_cycles(); | ||
365 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
366 | vmi_account_real_cycles(cur_real_cycles); | ||
367 | /* Update per-cpu process times. */ | ||
368 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
369 | /* Update time stolen from this cpu by the hypervisor. */ | ||
370 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
371 | } | ||
372 | |||
373 | #ifdef CONFIG_NO_IDLE_HZ | ||
374 | |||
375 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
376 | int vmi_stop_hz_timer(void) | ||
377 | { | ||
378 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
379 | |||
380 | unsigned long seq, next; | ||
381 | unsigned long long real_cycles_expiry; | ||
382 | int cpu = smp_processor_id(); | ||
383 | int idle; | ||
384 | |||
385 | BUG_ON(!irqs_disabled()); | ||
386 | if (sysctl_hz_timer != 0) | ||
387 | return 0; | ||
388 | |||
389 | cpu_set(cpu, nohz_cpu_mask); | ||
390 | smp_mb(); | ||
391 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
392 | (next = next_timer_interrupt(), time_before_eq(next, jiffies))) { | ||
393 | cpu_clear(cpu, nohz_cpu_mask); | ||
394 | next = jiffies; | ||
395 | idle = 0; | ||
396 | } else | ||
397 | idle = 1; | ||
398 | |||
399 | /* Convert jiffies to the real cycle counter. */ | ||
400 | do { | ||
401 | seq = read_seqbegin(&xtime_lock); | ||
402 | real_cycles_expiry = real_cycles_accounted_system + | ||
403 | (long)(next - jiffies) * cycles_per_jiffy; | ||
404 | } while (read_seqretry(&xtime_lock, seq)); | ||
405 | |||
406 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
407 | if (idle) { | ||
408 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
409 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
410 | } | ||
411 | |||
412 | /* Set the real time alarm to expire at the next event. */ | ||
413 | vmi_timer_ops.set_alarm( | ||
414 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
415 | real_cycles_expiry, 0); | ||
416 | |||
417 | return idle; | ||
418 | } | ||
419 | |||
420 | static void vmi_reenable_hz_timer(int cpu) | ||
421 | { | ||
422 | /* For /proc/vmi/info idle_hz stat. */ | ||
423 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
424 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
425 | |||
426 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
427 | * worse we will receive a spurious timer interrupt. */ | ||
428 | vmi_timer_ops.set_alarm( | ||
429 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
430 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
431 | cycles_per_alarm); | ||
432 | /* Indicate this cpu is no longer nohz idle. */ | ||
433 | cpu_clear(cpu, nohz_cpu_mask); | ||
434 | } | ||
435 | |||
436 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
437 | void vmi_account_time_restart_hz_timer(void) | ||
438 | { | ||
439 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
440 | int cpu = smp_processor_id(); | ||
441 | |||
442 | BUG_ON(!irqs_disabled()); | ||
443 | /* Account the time during which the HZ timer was disabled. */ | ||
444 | cur_real_cycles = read_real_cycles(); | ||
445 | cur_process_times_cycles = read_available_cycles(); | ||
446 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
447 | vmi_account_real_cycles(cur_real_cycles); | ||
448 | /* Update per-cpu idle times. */ | ||
449 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
450 | /* Update time stolen from this cpu by the hypervisor. */ | ||
451 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
452 | /* Reenable the hz timer. */ | ||
453 | vmi_reenable_hz_timer(cpu); | ||
454 | } | ||
455 | |||
456 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
457 | |||
458 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
459 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
460 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
461 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
462 | { | ||
463 | vmi_local_timer_interrupt(smp_processor_id()); | ||
464 | return IRQ_HANDLED; | ||
465 | } | ||
466 | |||
467 | #ifdef CONFIG_X86_LOCAL_APIC | ||
468 | |||
469 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
470 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
471 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
472 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
473 | { | ||
474 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
475 | int cpu = smp_processor_id(); | ||
476 | |||
477 | /* | ||
478 | * the NMI deadlock-detector uses this. | ||
479 | */ | ||
480 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
481 | |||
482 | /* | ||
483 | * NOTE! We'd better ACK the irq immediately, | ||
484 | * because timer handling can be slow. | ||
485 | */ | ||
486 | ack_APIC_irq(); | ||
487 | |||
488 | /* | ||
489 | * update_process_times() expects us to have done irq_enter(). | ||
490 | * Besides, if we don't timer interrupts ignore the global | ||
491 | * interrupt lock, which is the WrongThing (tm) to do. | ||
492 | */ | ||
493 | irq_enter(); | ||
494 | vmi_local_timer_interrupt(cpu); | ||
495 | irq_exit(); | ||
496 | set_irq_regs(old_regs); | ||
497 | } | ||
498 | |||
499 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 5038a73d554e..ca51610955df 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -37,9 +37,14 @@ SECTIONS | |||
37 | { | 37 | { |
38 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | 38 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; |
39 | phys_startup_32 = startup_32 - LOAD_OFFSET; | 39 | phys_startup_32 = startup_32 - LOAD_OFFSET; |
40 | |||
41 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
42 | _text = .; /* Text and read-only data */ | ||
43 | *(.text.head) | ||
44 | } :text = 0x9090 | ||
45 | |||
40 | /* read-only */ | 46 | /* read-only */ |
41 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | 47 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
42 | _text = .; /* Text and read-only data */ | ||
43 | *(.text) | 48 | *(.text) |
44 | SCHED_TEXT | 49 | SCHED_TEXT |
45 | LOCK_TEXT | 50 | LOCK_TEXT |
diff --git a/arch/i386/math-emu/get_address.c b/arch/i386/math-emu/get_address.c index 9819b705efa4..2e2c51a8bd3a 100644 --- a/arch/i386/math-emu/get_address.c +++ b/arch/i386/math-emu/get_address.c | |||
@@ -56,15 +56,14 @@ static int reg_offset_vm86[] = { | |||
56 | #define VM86_REG_(x) (*(unsigned short *) \ | 56 | #define VM86_REG_(x) (*(unsigned short *) \ |
57 | (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) | 57 | (reg_offset_vm86[((unsigned)x)]+(u_char *) FPU_info)) |
58 | 58 | ||
59 | /* These are dummy, fs and gs are not saved on the stack. */ | 59 | /* This dummy, gs is not saved on the stack. */ |
60 | #define ___FS ___ds | ||
61 | #define ___GS ___ds | 60 | #define ___GS ___ds |
62 | 61 | ||
63 | static int reg_offset_pm[] = { | 62 | static int reg_offset_pm[] = { |
64 | offsetof(struct info,___cs), | 63 | offsetof(struct info,___cs), |
65 | offsetof(struct info,___ds), | 64 | offsetof(struct info,___ds), |
66 | offsetof(struct info,___es), | 65 | offsetof(struct info,___es), |
67 | offsetof(struct info,___FS), | 66 | offsetof(struct info,___fs), |
68 | offsetof(struct info,___GS), | 67 | offsetof(struct info,___GS), |
69 | offsetof(struct info,___ss), | 68 | offsetof(struct info,___ss), |
70 | offsetof(struct info,___ds) | 69 | offsetof(struct info,___ds) |
@@ -169,13 +168,10 @@ static long pm_address(u_char FPU_modrm, u_char segment, | |||
169 | 168 | ||
170 | switch ( segment ) | 169 | switch ( segment ) |
171 | { | 170 | { |
172 | /* fs and gs aren't used by the kernel, so they still have their | 171 | /* gs isn't used by the kernel, so it still has its |
173 | user-space values. */ | 172 | user-space value. */ |
174 | case PREFIX_FS_-1: | ||
175 | /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ | ||
176 | savesegment(fs, addr->selector); | ||
177 | break; | ||
178 | case PREFIX_GS_-1: | 173 | case PREFIX_GS_-1: |
174 | /* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */ | ||
179 | savesegment(gs, addr->selector); | 175 | savesegment(gs, addr->selector); |
180 | break; | 176 | break; |
181 | default: | 177 | default: |
diff --git a/arch/i386/math-emu/status_w.h b/arch/i386/math-emu/status_w.h index 78d7b7689dd6..59e73302aa60 100644 --- a/arch/i386/math-emu/status_w.h +++ b/arch/i386/math-emu/status_w.h | |||
@@ -48,9 +48,11 @@ | |||
48 | 48 | ||
49 | #define status_word() \ | 49 | #define status_word() \ |
50 | ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) | 50 | ((partial_status & ~SW_Top & 0xffff) | ((top << SW_Top_Shift) & SW_Top)) |
51 | #define setcc(cc) ({ \ | 51 | static inline void setcc(int cc) |
52 | partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); \ | 52 | { |
53 | partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); }) | 53 | partial_status &= ~(SW_C0|SW_C1|SW_C2|SW_C3); |
54 | partial_status |= (cc) & (SW_C0|SW_C1|SW_C2|SW_C3); | ||
55 | } | ||
54 | 56 | ||
55 | #ifdef PECULIAR_486 | 57 | #ifdef PECULIAR_486 |
56 | /* Default, this conveys no information, but an 80486 does it. */ | 58 | /* Default, this conveys no information, but an 80486 does it. */ |
diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index e0c390d6ceb5..aa58720f6871 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c | |||
@@ -101,7 +101,6 @@ extern void find_max_pfn(void); | |||
101 | extern void add_one_highpage_init(struct page *, int, int); | 101 | extern void add_one_highpage_init(struct page *, int, int); |
102 | 102 | ||
103 | extern struct e820map e820; | 103 | extern struct e820map e820; |
104 | extern unsigned long init_pg_tables_end; | ||
105 | extern unsigned long highend_pfn, highstart_pfn; | 104 | extern unsigned long highend_pfn, highstart_pfn; |
106 | extern unsigned long max_low_pfn; | 105 | extern unsigned long max_low_pfn; |
107 | extern unsigned long totalram_pages; | 106 | extern unsigned long totalram_pages; |
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index cba9b3894a33..b8c4e259fc8b 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -46,17 +46,17 @@ int unregister_page_fault_notifier(struct notifier_block *nb) | |||
46 | } | 46 | } |
47 | EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); | 47 | EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); |
48 | 48 | ||
49 | static inline int notify_page_fault(enum die_val val, const char *str, | 49 | static inline int notify_page_fault(struct pt_regs *regs, long err) |
50 | struct pt_regs *regs, long err, int trap, int sig) | ||
51 | { | 50 | { |
52 | struct die_args args = { | 51 | struct die_args args = { |
53 | .regs = regs, | 52 | .regs = regs, |
54 | .str = str, | 53 | .str = "page fault", |
55 | .err = err, | 54 | .err = err, |
56 | .trapnr = trap, | 55 | .trapnr = 14, |
57 | .signr = sig | 56 | .signr = SIGSEGV |
58 | }; | 57 | }; |
59 | return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); | 58 | return atomic_notifier_call_chain(¬ify_page_fault_chain, |
59 | DIE_PAGE_FAULT, &args); | ||
60 | } | 60 | } |
61 | 61 | ||
62 | /* | 62 | /* |
@@ -327,8 +327,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
327 | if (unlikely(address >= TASK_SIZE)) { | 327 | if (unlikely(address >= TASK_SIZE)) { |
328 | if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) | 328 | if (!(error_code & 0x0000000d) && vmalloc_fault(address) >= 0) |
329 | return; | 329 | return; |
330 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 330 | if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
331 | SIGSEGV) == NOTIFY_STOP) | ||
332 | return; | 331 | return; |
333 | /* | 332 | /* |
334 | * Don't take the mm semaphore here. If we fixup a prefetch | 333 | * Don't take the mm semaphore here. If we fixup a prefetch |
@@ -337,8 +336,7 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
337 | goto bad_area_nosemaphore; | 336 | goto bad_area_nosemaphore; |
338 | } | 337 | } |
339 | 338 | ||
340 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 339 | if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
341 | SIGSEGV) == NOTIFY_STOP) | ||
342 | return; | 340 | return; |
343 | 341 | ||
344 | /* It's safe to allow irq's after cr2 has been saved and the vmalloc | 342 | /* It's safe to allow irq's after cr2 has been saved and the vmalloc |
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index c5c5ea700cc7..ae436882af7a 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
@@ -62,6 +62,7 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
62 | 62 | ||
63 | #ifdef CONFIG_X86_PAE | 63 | #ifdef CONFIG_X86_PAE |
64 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 64 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
65 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); | ||
65 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 66 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
66 | pud = pud_offset(pgd, 0); | 67 | pud = pud_offset(pgd, 0); |
67 | if (pmd_table != pmd_offset(pud, 0)) | 68 | if (pmd_table != pmd_offset(pud, 0)) |
@@ -82,6 +83,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
82 | { | 83 | { |
83 | if (pmd_none(*pmd)) { | 84 | if (pmd_none(*pmd)) { |
84 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 85 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
86 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); | ||
85 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 87 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
86 | if (page_table != pte_offset_kernel(pmd, 0)) | 88 | if (page_table != pte_offset_kernel(pmd, 0)) |
87 | BUG(); | 89 | BUG(); |
@@ -345,6 +347,8 @@ static void __init pagetable_init (void) | |||
345 | /* Init entries of the first-level page table to the zero page */ | 347 | /* Init entries of the first-level page table to the zero page */ |
346 | for (i = 0; i < PTRS_PER_PGD; i++) | 348 | for (i = 0; i < PTRS_PER_PGD; i++) |
347 | set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | 349 | set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); |
350 | #else | ||
351 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); | ||
348 | #endif | 352 | #endif |
349 | 353 | ||
350 | /* Enable PSE if available */ | 354 | /* Enable PSE if available */ |
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index e223b1d4981c..412ebbd8adb0 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c | |||
@@ -60,6 +60,7 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
60 | address = __pa(address); | 60 | address = __pa(address); |
61 | addr = address & LARGE_PAGE_MASK; | 61 | addr = address & LARGE_PAGE_MASK; |
62 | pbase = (pte_t *)page_address(base); | 62 | pbase = (pte_t *)page_address(base); |
63 | paravirt_alloc_pt(page_to_pfn(base)); | ||
63 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | 64 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { |
64 | set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, | 65 | set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, |
65 | addr == address ? prot : ref_prot)); | 66 | addr == address ? prot : ref_prot)); |
@@ -172,6 +173,7 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
172 | if (!PageReserved(kpte_page)) { | 173 | if (!PageReserved(kpte_page)) { |
173 | if (cpu_has_pse && (page_private(kpte_page) == 0)) { | 174 | if (cpu_has_pse && (page_private(kpte_page) == 0)) { |
174 | ClearPagePrivate(kpte_page); | 175 | ClearPagePrivate(kpte_page); |
176 | paravirt_release_pt(page_to_pfn(kpte_page)); | ||
175 | list_add(&kpte_page->lru, &df_list); | 177 | list_add(&kpte_page->lru, &df_list); |
176 | revert_page(kpte_page, address); | 178 | revert_page(kpte_page, address); |
177 | } | 179 | } |
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index f349eaf450b0..fa0cfbd551e1 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -171,6 +171,8 @@ void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | |||
171 | void reserve_top_address(unsigned long reserve) | 171 | void reserve_top_address(unsigned long reserve) |
172 | { | 172 | { |
173 | BUG_ON(fixmaps > 0); | 173 | BUG_ON(fixmaps > 0); |
174 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", | ||
175 | (int)-reserve); | ||
174 | #ifdef CONFIG_COMPAT_VDSO | 176 | #ifdef CONFIG_COMPAT_VDSO |
175 | BUG_ON(reserve != 0); | 177 | BUG_ON(reserve != 0); |
176 | #else | 178 | #else |
@@ -248,9 +250,15 @@ void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |||
248 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | 250 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
249 | swapper_pg_dir + USER_PTRS_PER_PGD, | 251 | swapper_pg_dir + USER_PTRS_PER_PGD, |
250 | KERNEL_PGD_PTRS); | 252 | KERNEL_PGD_PTRS); |
253 | |||
251 | if (PTRS_PER_PMD > 1) | 254 | if (PTRS_PER_PMD > 1) |
252 | return; | 255 | return; |
253 | 256 | ||
257 | /* must happen under lock */ | ||
258 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | ||
259 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | ||
260 | USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); | ||
261 | |||
254 | pgd_list_add(pgd); | 262 | pgd_list_add(pgd); |
255 | spin_unlock_irqrestore(&pgd_lock, flags); | 263 | spin_unlock_irqrestore(&pgd_lock, flags); |
256 | } | 264 | } |
@@ -260,6 +268,7 @@ void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | |||
260 | { | 268 | { |
261 | unsigned long flags; /* can be called from interrupt context */ | 269 | unsigned long flags; /* can be called from interrupt context */ |
262 | 270 | ||
271 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | ||
263 | spin_lock_irqsave(&pgd_lock, flags); | 272 | spin_lock_irqsave(&pgd_lock, flags); |
264 | pgd_list_del(pgd); | 273 | pgd_list_del(pgd); |
265 | spin_unlock_irqrestore(&pgd_lock, flags); | 274 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -277,13 +286,18 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
277 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | 286 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); |
278 | if (!pmd) | 287 | if (!pmd) |
279 | goto out_oom; | 288 | goto out_oom; |
289 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | ||
280 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | 290 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
281 | } | 291 | } |
282 | return pgd; | 292 | return pgd; |
283 | 293 | ||
284 | out_oom: | 294 | out_oom: |
285 | for (i--; i >= 0; i--) | 295 | for (i--; i >= 0; i--) { |
286 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 296 | pgd_t pgdent = pgd[i]; |
297 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
298 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
299 | kmem_cache_free(pmd_cache, pmd); | ||
300 | } | ||
287 | kmem_cache_free(pgd_cache, pgd); | 301 | kmem_cache_free(pgd_cache, pgd); |
288 | return NULL; | 302 | return NULL; |
289 | } | 303 | } |
@@ -294,8 +308,12 @@ void pgd_free(pgd_t *pgd) | |||
294 | 308 | ||
295 | /* in the PAE case user pgd entries are overwritten before usage */ | 309 | /* in the PAE case user pgd entries are overwritten before usage */ |
296 | if (PTRS_PER_PMD > 1) | 310 | if (PTRS_PER_PMD > 1) |
297 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) | 311 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { |
298 | kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); | 312 | pgd_t pgdent = pgd[i]; |
313 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | ||
314 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | ||
315 | kmem_cache_free(pmd_cache, pmd); | ||
316 | } | ||
299 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | 317 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
300 | kmem_cache_free(pgd_cache, pgd); | 318 | kmem_cache_free(pgd_cache, pgd); |
301 | } | 319 | } |
diff --git a/arch/i386/oprofile/op_model_ppro.c b/arch/i386/oprofile/op_model_ppro.c index ca2447e05e15..c554f52cb808 100644 --- a/arch/i386/oprofile/op_model_ppro.c +++ b/arch/i386/oprofile/op_model_ppro.c | |||
@@ -24,7 +24,8 @@ | |||
24 | 24 | ||
25 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) | 25 | #define CTR_IS_RESERVED(msrs,c) (msrs->counters[(c)].addr ? 1 : 0) |
26 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) | 26 | #define CTR_READ(l,h,msrs,c) do {rdmsr(msrs->counters[(c)].addr, (l), (h));} while (0) |
27 | #define CTR_WRITE(l,msrs,c) do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), -1);} while (0) | 27 | #define CTR_32BIT_WRITE(l,msrs,c) \ |
28 | do {wrmsr(msrs->counters[(c)].addr, -(u32)(l), 0);} while (0) | ||
28 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) | 29 | #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) |
29 | 30 | ||
30 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) | 31 | #define CTRL_IS_RESERVED(msrs,c) (msrs->controls[(c)].addr ? 1 : 0) |
@@ -79,7 +80,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
79 | for (i = 0; i < NUM_COUNTERS; ++i) { | 80 | for (i = 0; i < NUM_COUNTERS; ++i) { |
80 | if (unlikely(!CTR_IS_RESERVED(msrs,i))) | 81 | if (unlikely(!CTR_IS_RESERVED(msrs,i))) |
81 | continue; | 82 | continue; |
82 | CTR_WRITE(1, msrs, i); | 83 | CTR_32BIT_WRITE(1, msrs, i); |
83 | } | 84 | } |
84 | 85 | ||
85 | /* enable active counters */ | 86 | /* enable active counters */ |
@@ -87,7 +88,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) | |||
87 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { | 88 | if ((counter_config[i].enabled) && (CTR_IS_RESERVED(msrs,i))) { |
88 | reset_value[i] = counter_config[i].count; | 89 | reset_value[i] = counter_config[i].count; |
89 | 90 | ||
90 | CTR_WRITE(counter_config[i].count, msrs, i); | 91 | CTR_32BIT_WRITE(counter_config[i].count, msrs, i); |
91 | 92 | ||
92 | CTRL_READ(low, high, msrs, i); | 93 | CTRL_READ(low, high, msrs, i); |
93 | CTRL_CLEAR(low); | 94 | CTRL_CLEAR(low); |
@@ -116,7 +117,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
116 | CTR_READ(low, high, msrs, i); | 117 | CTR_READ(low, high, msrs, i); |
117 | if (CTR_OVERFLOWED(low)) { | 118 | if (CTR_OVERFLOWED(low)) { |
118 | oprofile_add_sample(regs, i); | 119 | oprofile_add_sample(regs, i); |
119 | CTR_WRITE(reset_value[i], msrs, i); | 120 | CTR_32BIT_WRITE(reset_value[i], msrs, i); |
120 | } | 121 | } |
121 | } | 122 | } |
122 | 123 | ||
diff --git a/arch/i386/pci/Makefile b/arch/i386/pci/Makefile index 1594d2f55c8f..44650e03308b 100644 --- a/arch/i386/pci/Makefile +++ b/arch/i386/pci/Makefile | |||
@@ -1,7 +1,7 @@ | |||
1 | obj-y := i386.o init.o | 1 | obj-y := i386.o init.o |
2 | 2 | ||
3 | obj-$(CONFIG_PCI_BIOS) += pcbios.o | 3 | obj-$(CONFIG_PCI_BIOS) += pcbios.o |
4 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o | 4 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o mmconfig-shared.o |
5 | obj-$(CONFIG_PCI_DIRECT) += direct.o | 5 | obj-$(CONFIG_PCI_DIRECT) += direct.o |
6 | 6 | ||
7 | pci-y := fixup.o | 7 | pci-y := fixup.o |
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c new file mode 100644 index 000000000000..747d8c63b0c4 --- /dev/null +++ b/arch/i386/pci/mmconfig-shared.c | |||
@@ -0,0 +1,264 @@ | |||
1 | /* | ||
2 | * mmconfig-shared.c - Low-level direct PCI config space access via | ||
3 | * MMCONFIG - common code between i386 and x86-64. | ||
4 | * | ||
5 | * This code does: | ||
6 | * - known chipset handling | ||
7 | * - ACPI decoding and validation | ||
8 | * | ||
9 | * Per-architecture code takes care of the mappings and accesses | ||
10 | * themselves. | ||
11 | */ | ||
12 | |||
13 | #include <linux/pci.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/acpi.h> | ||
16 | #include <linux/bitmap.h> | ||
17 | #include <asm/e820.h> | ||
18 | |||
19 | #include "pci.h" | ||
20 | |||
21 | /* aperture is up to 256MB but BIOS may reserve less */ | ||
22 | #define MMCONFIG_APER_MIN (2 * 1024*1024) | ||
23 | #define MMCONFIG_APER_MAX (256 * 1024*1024) | ||
24 | |||
25 | DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); | ||
26 | |||
27 | /* K8 systems have some devices (typically in the builtin northbridge) | ||
28 | that are only accessible using type1 | ||
29 | Normally this can be expressed in the MCFG by not listing them | ||
30 | and assigning suitable _SEGs, but this isn't implemented in some BIOS. | ||
31 | Instead try to discover all devices on bus 0 that are unreachable using MM | ||
32 | and fallback for them. */ | ||
33 | static void __init unreachable_devices(void) | ||
34 | { | ||
35 | int i, bus; | ||
36 | /* Use the max bus number from ACPI here? */ | ||
37 | for (bus = 0; bus < PCI_MMCFG_MAX_CHECK_BUS; bus++) { | ||
38 | for (i = 0; i < 32; i++) { | ||
39 | unsigned int devfn = PCI_DEVFN(i, 0); | ||
40 | u32 val1, val2; | ||
41 | |||
42 | pci_conf1_read(0, bus, devfn, 0, 4, &val1); | ||
43 | if (val1 == 0xffffffff) | ||
44 | continue; | ||
45 | |||
46 | if (pci_mmcfg_arch_reachable(0, bus, devfn)) { | ||
47 | raw_pci_ops->read(0, bus, devfn, 0, 4, &val2); | ||
48 | if (val1 == val2) | ||
49 | continue; | ||
50 | } | ||
51 | set_bit(i + 32 * bus, pci_mmcfg_fallback_slots); | ||
52 | printk(KERN_NOTICE "PCI: No mmconfig possible on device" | ||
53 | " %02x:%02x\n", bus, i); | ||
54 | } | ||
55 | } | ||
56 | } | ||
57 | |||
58 | static const char __init *pci_mmcfg_e7520(void) | ||
59 | { | ||
60 | u32 win; | ||
61 | pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); | ||
62 | |||
63 | pci_mmcfg_config_num = 1; | ||
64 | pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); | ||
65 | if (!pci_mmcfg_config) | ||
66 | return NULL; | ||
67 | pci_mmcfg_config[0].address = (win & 0xf000) << 16; | ||
68 | pci_mmcfg_config[0].pci_segment = 0; | ||
69 | pci_mmcfg_config[0].start_bus_number = 0; | ||
70 | pci_mmcfg_config[0].end_bus_number = 255; | ||
71 | |||
72 | return "Intel Corporation E7520 Memory Controller Hub"; | ||
73 | } | ||
74 | |||
75 | static const char __init *pci_mmcfg_intel_945(void) | ||
76 | { | ||
77 | u32 pciexbar, mask = 0, len = 0; | ||
78 | |||
79 | pci_mmcfg_config_num = 1; | ||
80 | |||
81 | pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0x48, 4, &pciexbar); | ||
82 | |||
83 | /* Enable bit */ | ||
84 | if (!(pciexbar & 1)) | ||
85 | pci_mmcfg_config_num = 0; | ||
86 | |||
87 | /* Size bits */ | ||
88 | switch ((pciexbar >> 1) & 3) { | ||
89 | case 0: | ||
90 | mask = 0xf0000000U; | ||
91 | len = 0x10000000U; | ||
92 | break; | ||
93 | case 1: | ||
94 | mask = 0xf8000000U; | ||
95 | len = 0x08000000U; | ||
96 | break; | ||
97 | case 2: | ||
98 | mask = 0xfc000000U; | ||
99 | len = 0x04000000U; | ||
100 | break; | ||
101 | default: | ||
102 | pci_mmcfg_config_num = 0; | ||
103 | } | ||
104 | |||
105 | /* Errata #2, things break when not aligned on a 256Mb boundary */ | ||
106 | /* Can only happen in 64M/128M mode */ | ||
107 | |||
108 | if ((pciexbar & mask) & 0x0fffffffU) | ||
109 | pci_mmcfg_config_num = 0; | ||
110 | |||
111 | if (pci_mmcfg_config_num) { | ||
112 | pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); | ||
113 | if (!pci_mmcfg_config) | ||
114 | return NULL; | ||
115 | pci_mmcfg_config[0].address = pciexbar & mask; | ||
116 | pci_mmcfg_config[0].pci_segment = 0; | ||
117 | pci_mmcfg_config[0].start_bus_number = 0; | ||
118 | pci_mmcfg_config[0].end_bus_number = (len >> 20) - 1; | ||
119 | } | ||
120 | |||
121 | return "Intel Corporation 945G/GZ/P/PL Express Memory Controller Hub"; | ||
122 | } | ||
123 | |||
124 | struct pci_mmcfg_hostbridge_probe { | ||
125 | u32 vendor; | ||
126 | u32 device; | ||
127 | const char *(*probe)(void); | ||
128 | }; | ||
129 | |||
130 | static struct pci_mmcfg_hostbridge_probe pci_mmcfg_probes[] __initdata = { | ||
131 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, pci_mmcfg_e7520 }, | ||
132 | { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82945G_HB, pci_mmcfg_intel_945 }, | ||
133 | }; | ||
134 | |||
135 | static int __init pci_mmcfg_check_hostbridge(void) | ||
136 | { | ||
137 | u32 l; | ||
138 | u16 vendor, device; | ||
139 | int i; | ||
140 | const char *name; | ||
141 | |||
142 | pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0, 4, &l); | ||
143 | vendor = l & 0xffff; | ||
144 | device = (l >> 16) & 0xffff; | ||
145 | |||
146 | pci_mmcfg_config_num = 0; | ||
147 | pci_mmcfg_config = NULL; | ||
148 | name = NULL; | ||
149 | |||
150 | for (i = 0; !name && i < ARRAY_SIZE(pci_mmcfg_probes); i++) { | ||
151 | if (pci_mmcfg_probes[i].vendor == vendor && | ||
152 | pci_mmcfg_probes[i].device == device) | ||
153 | name = pci_mmcfg_probes[i].probe(); | ||
154 | } | ||
155 | |||
156 | if (name) { | ||
157 | printk(KERN_INFO "PCI: Found %s %s MMCONFIG support.\n", | ||
158 | name, pci_mmcfg_config_num ? "with" : "without"); | ||
159 | } | ||
160 | |||
161 | return name != NULL; | ||
162 | } | ||
163 | |||
164 | static void __init pci_mmcfg_insert_resources(void) | ||
165 | { | ||
166 | #define PCI_MMCFG_RESOURCE_NAME_LEN 19 | ||
167 | int i; | ||
168 | struct resource *res; | ||
169 | char *names; | ||
170 | unsigned num_buses; | ||
171 | |||
172 | res = kcalloc(PCI_MMCFG_RESOURCE_NAME_LEN + sizeof(*res), | ||
173 | pci_mmcfg_config_num, GFP_KERNEL); | ||
174 | if (!res) { | ||
175 | printk(KERN_ERR "PCI: Unable to allocate MMCONFIG resources\n"); | ||
176 | return; | ||
177 | } | ||
178 | |||
179 | names = (void *)&res[pci_mmcfg_config_num]; | ||
180 | for (i = 0; i < pci_mmcfg_config_num; i++, res++) { | ||
181 | struct acpi_mcfg_allocation *cfg = &pci_mmcfg_config[i]; | ||
182 | num_buses = cfg->end_bus_number - cfg->start_bus_number + 1; | ||
183 | res->name = names; | ||
184 | snprintf(names, PCI_MMCFG_RESOURCE_NAME_LEN, "PCI MMCONFIG %u", | ||
185 | cfg->pci_segment); | ||
186 | res->start = cfg->address; | ||
187 | res->end = res->start + (num_buses << 20) - 1; | ||
188 | res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
189 | insert_resource(&iomem_resource, res); | ||
190 | names += PCI_MMCFG_RESOURCE_NAME_LEN; | ||
191 | } | ||
192 | } | ||
193 | |||
194 | static void __init pci_mmcfg_reject_broken(int type) | ||
195 | { | ||
196 | typeof(pci_mmcfg_config[0]) *cfg; | ||
197 | |||
198 | if ((pci_mmcfg_config_num == 0) || | ||
199 | (pci_mmcfg_config == NULL) || | ||
200 | (pci_mmcfg_config[0].address == 0)) | ||
201 | return; | ||
202 | |||
203 | cfg = &pci_mmcfg_config[0]; | ||
204 | |||
205 | /* | ||
206 | * Handle more broken MCFG tables on Asus etc. | ||
207 | * They only contain a single entry for bus 0-0. | ||
208 | */ | ||
209 | if (pci_mmcfg_config_num == 1 && | ||
210 | cfg->pci_segment == 0 && | ||
211 | (cfg->start_bus_number | cfg->end_bus_number) == 0) { | ||
212 | printk(KERN_ERR "PCI: start and end of bus number is 0. " | ||
213 | "Rejected as broken MCFG.\n"); | ||
214 | goto reject; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Only do this check when type 1 works. If it doesn't work | ||
219 | * assume we run on a Mac and always use MCFG | ||
220 | */ | ||
221 | if (type == 1 && !e820_all_mapped(cfg->address, | ||
222 | cfg->address + MMCONFIG_APER_MIN, | ||
223 | E820_RESERVED)) { | ||
224 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" | ||
225 | " E820-reserved\n", cfg->address); | ||
226 | goto reject; | ||
227 | } | ||
228 | return; | ||
229 | |||
230 | reject: | ||
231 | printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); | ||
232 | kfree(pci_mmcfg_config); | ||
233 | pci_mmcfg_config = NULL; | ||
234 | pci_mmcfg_config_num = 0; | ||
235 | } | ||
236 | |||
237 | void __init pci_mmcfg_init(int type) | ||
238 | { | ||
239 | int known_bridge = 0; | ||
240 | |||
241 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
242 | return; | ||
243 | |||
244 | if (type == 1 && pci_mmcfg_check_hostbridge()) | ||
245 | known_bridge = 1; | ||
246 | |||
247 | if (!known_bridge) { | ||
248 | acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); | ||
249 | pci_mmcfg_reject_broken(type); | ||
250 | } | ||
251 | |||
252 | if ((pci_mmcfg_config_num == 0) || | ||
253 | (pci_mmcfg_config == NULL) || | ||
254 | (pci_mmcfg_config[0].address == 0)) | ||
255 | return; | ||
256 | |||
257 | if (pci_mmcfg_arch_init()) { | ||
258 | if (type == 1) | ||
259 | unreachable_devices(); | ||
260 | if (known_bridge) | ||
261 | pci_mmcfg_insert_resources(); | ||
262 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | ||
263 | } | ||
264 | } | ||
diff --git a/arch/i386/pci/mmconfig.c b/arch/i386/pci/mmconfig.c index 5700220dcf5f..bb1afd9e589d 100644 --- a/arch/i386/pci/mmconfig.c +++ b/arch/i386/pci/mmconfig.c | |||
@@ -15,55 +15,33 @@ | |||
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include "pci.h" | 16 | #include "pci.h" |
17 | 17 | ||
18 | /* aperture is up to 256MB but BIOS may reserve less */ | ||
19 | #define MMCONFIG_APER_MIN (2 * 1024*1024) | ||
20 | #define MMCONFIG_APER_MAX (256 * 1024*1024) | ||
21 | |||
22 | /* Assume systems with more busses have correct MCFG */ | 18 | /* Assume systems with more busses have correct MCFG */ |
23 | #define MAX_CHECK_BUS 16 | ||
24 | |||
25 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) | 19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) |
26 | 20 | ||
27 | /* The base address of the last MMCONFIG device accessed */ | 21 | /* The base address of the last MMCONFIG device accessed */ |
28 | static u32 mmcfg_last_accessed_device; | 22 | static u32 mmcfg_last_accessed_device; |
29 | static int mmcfg_last_accessed_cpu; | 23 | static int mmcfg_last_accessed_cpu; |
30 | 24 | ||
31 | static DECLARE_BITMAP(fallback_slots, MAX_CHECK_BUS*32); | ||
32 | |||
33 | /* | 25 | /* |
34 | * Functions for accessing PCI configuration space with MMCONFIG accesses | 26 | * Functions for accessing PCI configuration space with MMCONFIG accesses |
35 | */ | 27 | */ |
36 | static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) | 28 | static u32 get_base_addr(unsigned int seg, int bus, unsigned devfn) |
37 | { | 29 | { |
38 | int cfg_num = -1; | ||
39 | struct acpi_mcfg_allocation *cfg; | 30 | struct acpi_mcfg_allocation *cfg; |
31 | int cfg_num; | ||
40 | 32 | ||
41 | if (seg == 0 && bus < MAX_CHECK_BUS && | 33 | if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS && |
42 | test_bit(PCI_SLOT(devfn) + 32*bus, fallback_slots)) | 34 | test_bit(PCI_SLOT(devfn) + 32*bus, pci_mmcfg_fallback_slots)) |
43 | return 0; | 35 | return 0; |
44 | 36 | ||
45 | while (1) { | 37 | for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { |
46 | ++cfg_num; | ||
47 | if (cfg_num >= pci_mmcfg_config_num) { | ||
48 | break; | ||
49 | } | ||
50 | cfg = &pci_mmcfg_config[cfg_num]; | 38 | cfg = &pci_mmcfg_config[cfg_num]; |
51 | if (cfg->pci_segment != seg) | 39 | if (cfg->pci_segment == seg && |
52 | continue; | 40 | (cfg->start_bus_number <= bus) && |
53 | if ((cfg->start_bus_number <= bus) && | ||
54 | (cfg->end_bus_number >= bus)) | 41 | (cfg->end_bus_number >= bus)) |
55 | return cfg->address; | 42 | return cfg->address; |
56 | } | 43 | } |
57 | 44 | ||
58 | /* Handle more broken MCFG tables on Asus etc. | ||
59 | They only contain a single entry for bus 0-0. Assume | ||
60 | this applies to all busses. */ | ||
61 | cfg = &pci_mmcfg_config[0]; | ||
62 | if (pci_mmcfg_config_num == 1 && | ||
63 | cfg->pci_segment == 0 && | ||
64 | (cfg->start_bus_number | cfg->end_bus_number) == 0) | ||
65 | return cfg->address; | ||
66 | |||
67 | /* Fall back to type 0 */ | 45 | /* Fall back to type 0 */ |
68 | return 0; | 46 | return 0; |
69 | } | 47 | } |
@@ -158,67 +136,15 @@ static struct pci_raw_ops pci_mmcfg = { | |||
158 | .write = pci_mmcfg_write, | 136 | .write = pci_mmcfg_write, |
159 | }; | 137 | }; |
160 | 138 | ||
161 | /* K8 systems have some devices (typically in the builtin northbridge) | 139 | int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, |
162 | that are only accessible using type1 | 140 | unsigned int devfn) |
163 | Normally this can be expressed in the MCFG by not listing them | ||
164 | and assigning suitable _SEGs, but this isn't implemented in some BIOS. | ||
165 | Instead try to discover all devices on bus 0 that are unreachable using MM | ||
166 | and fallback for them. */ | ||
167 | static __init void unreachable_devices(void) | ||
168 | { | 141 | { |
169 | int i, k; | 142 | return get_base_addr(seg, bus, devfn) != 0; |
170 | unsigned long flags; | ||
171 | |||
172 | for (k = 0; k < MAX_CHECK_BUS; k++) { | ||
173 | for (i = 0; i < 32; i++) { | ||
174 | u32 val1; | ||
175 | u32 addr; | ||
176 | |||
177 | pci_conf1_read(0, k, PCI_DEVFN(i, 0), 0, 4, &val1); | ||
178 | if (val1 == 0xffffffff) | ||
179 | continue; | ||
180 | |||
181 | /* Locking probably not needed, but safer */ | ||
182 | spin_lock_irqsave(&pci_config_lock, flags); | ||
183 | addr = get_base_addr(0, k, PCI_DEVFN(i, 0)); | ||
184 | if (addr != 0) | ||
185 | pci_exp_set_dev_base(addr, k, PCI_DEVFN(i, 0)); | ||
186 | if (addr == 0 || | ||
187 | readl((u32 __iomem *)mmcfg_virt_addr) != val1) { | ||
188 | set_bit(i + 32*k, fallback_slots); | ||
189 | printk(KERN_NOTICE | ||
190 | "PCI: No mmconfig possible on %x:%x\n", k, i); | ||
191 | } | ||
192 | spin_unlock_irqrestore(&pci_config_lock, flags); | ||
193 | } | ||
194 | } | ||
195 | } | 143 | } |
196 | 144 | ||
197 | void __init pci_mmcfg_init(int type) | 145 | int __init pci_mmcfg_arch_init(void) |
198 | { | 146 | { |
199 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
200 | return; | ||
201 | |||
202 | acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); | ||
203 | if ((pci_mmcfg_config_num == 0) || | ||
204 | (pci_mmcfg_config == NULL) || | ||
205 | (pci_mmcfg_config[0].address == 0)) | ||
206 | return; | ||
207 | |||
208 | /* Only do this check when type 1 works. If it doesn't work | ||
209 | assume we run on a Mac and always use MCFG */ | ||
210 | if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].address, | ||
211 | pci_mmcfg_config[0].address + MMCONFIG_APER_MIN, | ||
212 | E820_RESERVED)) { | ||
213 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %lx is not E820-reserved\n", | ||
214 | (unsigned long)pci_mmcfg_config[0].address); | ||
215 | printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | printk(KERN_INFO "PCI: Using MMCONFIG\n"); | 147 | printk(KERN_INFO "PCI: Using MMCONFIG\n"); |
220 | raw_pci_ops = &pci_mmcfg; | 148 | raw_pci_ops = &pci_mmcfg; |
221 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | 149 | return 1; |
222 | |||
223 | unreachable_devices(); | ||
224 | } | 150 | } |
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h index a0a25180b61a..e58bae2076ad 100644 --- a/arch/i386/pci/pci.h +++ b/arch/i386/pci/pci.h | |||
@@ -94,3 +94,13 @@ extern void pci_pcbios_init(void); | |||
94 | extern void pci_mmcfg_init(int type); | 94 | extern void pci_mmcfg_init(int type); |
95 | extern void pcibios_sort(void); | 95 | extern void pcibios_sort(void); |
96 | 96 | ||
97 | /* pci-mmconfig.c */ | ||
98 | |||
99 | /* Verify the first 16 busses. We assume that systems with more busses | ||
100 | get MCFG right. */ | ||
101 | #define PCI_MMCFG_MAX_CHECK_BUS 16 | ||
102 | extern DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS); | ||
103 | |||
104 | extern int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, | ||
105 | unsigned int devfn); | ||
106 | extern int __init pci_mmcfg_arch_init(void); | ||
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 02dd39457bcf..7982cbc3bc94 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -152,18 +152,18 @@ config MPSC | |||
152 | Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs | 152 | Optimize for Intel Pentium 4 and older Nocona/Dempsey Xeon CPUs |
153 | with Intel Extended Memory 64 Technology(EM64T). For details see | 153 | with Intel Extended Memory 64 Technology(EM64T). For details see |
154 | <http://www.intel.com/technology/64bitextensions/>. | 154 | <http://www.intel.com/technology/64bitextensions/>. |
155 | Note the the latest Xeons (Xeon 51xx and 53xx) are not based on the | 155 | Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the |
156 | Netburst core and shouldn't use this option. You can distingush them | 156 | Netburst core and shouldn't use this option. You can distinguish them |
157 | using the cpu family field | 157 | using the cpu family field |
158 | in /proc/cpuinfo. Family 15 is a older Xeon, Family 6 a newer one | 158 | in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one |
159 | (this rule only applies to system that support EM64T) | 159 | (this rule only applies to systems that support EM64T) |
160 | 160 | ||
161 | config MCORE2 | 161 | config MCORE2 |
162 | bool "Intel Core2 / newer Xeon" | 162 | bool "Intel Core2 / newer Xeon" |
163 | help | 163 | help |
164 | Optimize for Intel Core2 and newer Xeons (51xx) | 164 | Optimize for Intel Core2 and newer Xeons (51xx) |
165 | You can distingush the newer Xeons from the older ones using | 165 | You can distinguish the newer Xeons from the older ones using |
166 | the cpu family field in /proc/cpuinfo. 15 is a older Xeon | 166 | the cpu family field in /proc/cpuinfo. 15 is an older Xeon |
167 | (use CONFIG_MPSC then), 6 is a newer one. This rule only | 167 | (use CONFIG_MPSC then), 6 is a newer one. This rule only |
168 | applies to CPUs that support EM64T. | 168 | applies to CPUs that support EM64T. |
169 | 169 | ||
@@ -458,8 +458,8 @@ config IOMMU | |||
458 | on systems with more than 3GB. This is usually needed for USB, | 458 | on systems with more than 3GB. This is usually needed for USB, |
459 | sound, many IDE/SATA chipsets and some other devices. | 459 | sound, many IDE/SATA chipsets and some other devices. |
460 | Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART | 460 | Provides a driver for the AMD Athlon64/Opteron/Turion/Sempron GART |
461 | based IOMMU and a software bounce buffer based IOMMU used on Intel | 461 | based hardware IOMMU and a software bounce buffer based IOMMU used |
462 | systems and as fallback. | 462 | on Intel systems and as fallback. |
463 | The code is only active when needed (enough memory and limited | 463 | The code is only active when needed (enough memory and limited |
464 | device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified | 464 | device) unless CONFIG_IOMMU_DEBUG or iommu=force is specified |
465 | too. | 465 | too. |
@@ -496,6 +496,12 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT | |||
496 | # need this always selected by IOMMU for the VIA workaround | 496 | # need this always selected by IOMMU for the VIA workaround |
497 | config SWIOTLB | 497 | config SWIOTLB |
498 | bool | 498 | bool |
499 | help | ||
500 | Support for software bounce buffers used on x86-64 systems | ||
501 | which don't have a hardware IOMMU (e.g. the current generation | ||
502 | of Intel's x86-64 CPUs). Using this PCI devices which can only | ||
503 | access 32-bits of memory can be used on systems with more than | ||
504 | 3 GB of memory. If unsure, say Y. | ||
499 | 505 | ||
500 | config X86_MCE | 506 | config X86_MCE |
501 | bool "Machine check support" if EMBEDDED | 507 | bool "Machine check support" if EMBEDDED |
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index 69584c295305..293a4a4c609e 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.20-rc3 | 3 | # Linux kernel version: 2.6.20-git8 |
4 | # Fri Jan 5 11:54:41 2007 | 4 | # Tue Feb 13 11:25:16 2007 |
5 | # | 5 | # |
6 | CONFIG_X86_64=y | 6 | CONFIG_X86_64=y |
7 | CONFIG_64BIT=y | 7 | CONFIG_64BIT=y |
@@ -11,6 +11,7 @@ CONFIG_LOCKDEP_SUPPORT=y | |||
11 | CONFIG_STACKTRACE_SUPPORT=y | 11 | CONFIG_STACKTRACE_SUPPORT=y |
12 | CONFIG_SEMAPHORE_SLEEPERS=y | 12 | CONFIG_SEMAPHORE_SLEEPERS=y |
13 | CONFIG_MMU=y | 13 | CONFIG_MMU=y |
14 | CONFIG_ZONE_DMA=y | ||
14 | CONFIG_RWSEM_GENERIC_SPINLOCK=y | 15 | CONFIG_RWSEM_GENERIC_SPINLOCK=y |
15 | CONFIG_GENERIC_HWEIGHT=y | 16 | CONFIG_GENERIC_HWEIGHT=y |
16 | CONFIG_GENERIC_CALIBRATE_DELAY=y | 17 | CONFIG_GENERIC_CALIBRATE_DELAY=y |
@@ -153,6 +154,7 @@ CONFIG_NEED_MULTIPLE_NODES=y | |||
153 | CONFIG_SPLIT_PTLOCK_CPUS=4 | 154 | CONFIG_SPLIT_PTLOCK_CPUS=4 |
154 | CONFIG_MIGRATION=y | 155 | CONFIG_MIGRATION=y |
155 | CONFIG_RESOURCES_64BIT=y | 156 | CONFIG_RESOURCES_64BIT=y |
157 | CONFIG_ZONE_DMA_FLAG=1 | ||
156 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y | 158 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y |
157 | CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y | 159 | CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y |
158 | CONFIG_NR_CPUS=32 | 160 | CONFIG_NR_CPUS=32 |
@@ -201,13 +203,14 @@ CONFIG_ACPI=y | |||
201 | CONFIG_ACPI_SLEEP=y | 203 | CONFIG_ACPI_SLEEP=y |
202 | CONFIG_ACPI_SLEEP_PROC_FS=y | 204 | CONFIG_ACPI_SLEEP_PROC_FS=y |
203 | CONFIG_ACPI_SLEEP_PROC_SLEEP=y | 205 | CONFIG_ACPI_SLEEP_PROC_SLEEP=y |
206 | CONFIG_ACPI_PROCFS=y | ||
204 | CONFIG_ACPI_AC=y | 207 | CONFIG_ACPI_AC=y |
205 | CONFIG_ACPI_BATTERY=y | 208 | CONFIG_ACPI_BATTERY=y |
206 | CONFIG_ACPI_BUTTON=y | 209 | CONFIG_ACPI_BUTTON=y |
207 | # CONFIG_ACPI_VIDEO is not set | ||
208 | # CONFIG_ACPI_HOTKEY is not set | 210 | # CONFIG_ACPI_HOTKEY is not set |
209 | CONFIG_ACPI_FAN=y | 211 | CONFIG_ACPI_FAN=y |
210 | # CONFIG_ACPI_DOCK is not set | 212 | # CONFIG_ACPI_DOCK is not set |
213 | # CONFIG_ACPI_BAY is not set | ||
211 | CONFIG_ACPI_PROCESSOR=y | 214 | CONFIG_ACPI_PROCESSOR=y |
212 | CONFIG_ACPI_HOTPLUG_CPU=y | 215 | CONFIG_ACPI_HOTPLUG_CPU=y |
213 | CONFIG_ACPI_THERMAL=y | 216 | CONFIG_ACPI_THERMAL=y |
@@ -263,7 +266,6 @@ CONFIG_PCI_MMCONFIG=y | |||
263 | CONFIG_PCIEPORTBUS=y | 266 | CONFIG_PCIEPORTBUS=y |
264 | CONFIG_PCIEAER=y | 267 | CONFIG_PCIEAER=y |
265 | CONFIG_PCI_MSI=y | 268 | CONFIG_PCI_MSI=y |
266 | # CONFIG_PCI_MULTITHREAD_PROBE is not set | ||
267 | # CONFIG_PCI_DEBUG is not set | 269 | # CONFIG_PCI_DEBUG is not set |
268 | # CONFIG_HT_IRQ is not set | 270 | # CONFIG_HT_IRQ is not set |
269 | 271 | ||
@@ -398,6 +400,7 @@ CONFIG_STANDALONE=y | |||
398 | CONFIG_PREVENT_FIRMWARE_BUILD=y | 400 | CONFIG_PREVENT_FIRMWARE_BUILD=y |
399 | CONFIG_FW_LOADER=y | 401 | CONFIG_FW_LOADER=y |
400 | # CONFIG_DEBUG_DRIVER is not set | 402 | # CONFIG_DEBUG_DRIVER is not set |
403 | # CONFIG_DEBUG_DEVRES is not set | ||
401 | # CONFIG_SYS_HYPERVISOR is not set | 404 | # CONFIG_SYS_HYPERVISOR is not set |
402 | 405 | ||
403 | # | 406 | # |
@@ -466,6 +469,7 @@ CONFIG_BLK_DEV_IDECD=y | |||
466 | # CONFIG_BLK_DEV_IDETAPE is not set | 469 | # CONFIG_BLK_DEV_IDETAPE is not set |
467 | # CONFIG_BLK_DEV_IDEFLOPPY is not set | 470 | # CONFIG_BLK_DEV_IDEFLOPPY is not set |
468 | # CONFIG_BLK_DEV_IDESCSI is not set | 471 | # CONFIG_BLK_DEV_IDESCSI is not set |
472 | CONFIG_BLK_DEV_IDEACPI=y | ||
469 | # CONFIG_IDE_TASK_IOCTL is not set | 473 | # CONFIG_IDE_TASK_IOCTL is not set |
470 | 474 | ||
471 | # | 475 | # |
@@ -497,6 +501,7 @@ CONFIG_BLK_DEV_ATIIXP=y | |||
497 | # CONFIG_BLK_DEV_JMICRON is not set | 501 | # CONFIG_BLK_DEV_JMICRON is not set |
498 | # CONFIG_BLK_DEV_SC1200 is not set | 502 | # CONFIG_BLK_DEV_SC1200 is not set |
499 | CONFIG_BLK_DEV_PIIX=y | 503 | CONFIG_BLK_DEV_PIIX=y |
504 | # CONFIG_BLK_DEV_IT8213 is not set | ||
500 | # CONFIG_BLK_DEV_IT821X is not set | 505 | # CONFIG_BLK_DEV_IT821X is not set |
501 | # CONFIG_BLK_DEV_NS87415 is not set | 506 | # CONFIG_BLK_DEV_NS87415 is not set |
502 | # CONFIG_BLK_DEV_PDC202XX_OLD is not set | 507 | # CONFIG_BLK_DEV_PDC202XX_OLD is not set |
@@ -507,6 +512,7 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y | |||
507 | # CONFIG_BLK_DEV_SLC90E66 is not set | 512 | # CONFIG_BLK_DEV_SLC90E66 is not set |
508 | # CONFIG_BLK_DEV_TRM290 is not set | 513 | # CONFIG_BLK_DEV_TRM290 is not set |
509 | # CONFIG_BLK_DEV_VIA82CXXX is not set | 514 | # CONFIG_BLK_DEV_VIA82CXXX is not set |
515 | # CONFIG_BLK_DEV_TC86C001 is not set | ||
510 | # CONFIG_IDE_ARM is not set | 516 | # CONFIG_IDE_ARM is not set |
511 | CONFIG_BLK_DEV_IDEDMA=y | 517 | CONFIG_BLK_DEV_IDEDMA=y |
512 | # CONFIG_IDEDMA_IVB is not set | 518 | # CONFIG_IDEDMA_IVB is not set |
@@ -599,6 +605,7 @@ CONFIG_MEGARAID_SAS=y | |||
599 | # Serial ATA (prod) and Parallel ATA (experimental) drivers | 605 | # Serial ATA (prod) and Parallel ATA (experimental) drivers |
600 | # | 606 | # |
601 | CONFIG_ATA=y | 607 | CONFIG_ATA=y |
608 | # CONFIG_ATA_NONSTANDARD is not set | ||
602 | CONFIG_SATA_AHCI=y | 609 | CONFIG_SATA_AHCI=y |
603 | CONFIG_SATA_SVW=y | 610 | CONFIG_SATA_SVW=y |
604 | CONFIG_ATA_PIIX=y | 611 | CONFIG_ATA_PIIX=y |
@@ -614,6 +621,7 @@ CONFIG_SATA_SIL=y | |||
614 | # CONFIG_SATA_ULI is not set | 621 | # CONFIG_SATA_ULI is not set |
615 | CONFIG_SATA_VIA=y | 622 | CONFIG_SATA_VIA=y |
616 | # CONFIG_SATA_VITESSE is not set | 623 | # CONFIG_SATA_VITESSE is not set |
624 | # CONFIG_SATA_INIC162X is not set | ||
617 | CONFIG_SATA_INTEL_COMBINED=y | 625 | CONFIG_SATA_INTEL_COMBINED=y |
618 | # CONFIG_PATA_ALI is not set | 626 | # CONFIG_PATA_ALI is not set |
619 | # CONFIG_PATA_AMD is not set | 627 | # CONFIG_PATA_AMD is not set |
@@ -630,6 +638,7 @@ CONFIG_SATA_INTEL_COMBINED=y | |||
630 | # CONFIG_PATA_HPT3X2N is not set | 638 | # CONFIG_PATA_HPT3X2N is not set |
631 | # CONFIG_PATA_HPT3X3 is not set | 639 | # CONFIG_PATA_HPT3X3 is not set |
632 | # CONFIG_PATA_IT821X is not set | 640 | # CONFIG_PATA_IT821X is not set |
641 | # CONFIG_PATA_IT8213 is not set | ||
633 | # CONFIG_PATA_JMICRON is not set | 642 | # CONFIG_PATA_JMICRON is not set |
634 | # CONFIG_PATA_TRIFLEX is not set | 643 | # CONFIG_PATA_TRIFLEX is not set |
635 | # CONFIG_PATA_MARVELL is not set | 644 | # CONFIG_PATA_MARVELL is not set |
@@ -682,9 +691,7 @@ CONFIG_IEEE1394=y | |||
682 | # Subsystem Options | 691 | # Subsystem Options |
683 | # | 692 | # |
684 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set | 693 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set |
685 | # CONFIG_IEEE1394_OUI_DB is not set | ||
686 | # CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set | 694 | # CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set |
687 | # CONFIG_IEEE1394_EXPORT_FULL_API is not set | ||
688 | 695 | ||
689 | # | 696 | # |
690 | # Device Drivers | 697 | # Device Drivers |
@@ -707,6 +714,11 @@ CONFIG_IEEE1394_RAWIO=y | |||
707 | # CONFIG_I2O is not set | 714 | # CONFIG_I2O is not set |
708 | 715 | ||
709 | # | 716 | # |
717 | # Macintosh device drivers | ||
718 | # | ||
719 | # CONFIG_MAC_EMUMOUSEBTN is not set | ||
720 | |||
721 | # | ||
710 | # Network device support | 722 | # Network device support |
711 | # | 723 | # |
712 | CONFIG_NETDEVICES=y | 724 | CONFIG_NETDEVICES=y |
@@ -774,6 +786,7 @@ CONFIG_8139TOO=y | |||
774 | # CONFIG_EPIC100 is not set | 786 | # CONFIG_EPIC100 is not set |
775 | # CONFIG_SUNDANCE is not set | 787 | # CONFIG_SUNDANCE is not set |
776 | # CONFIG_VIA_RHINE is not set | 788 | # CONFIG_VIA_RHINE is not set |
789 | # CONFIG_SC92031 is not set | ||
777 | 790 | ||
778 | # | 791 | # |
779 | # Ethernet (1000 Mbit) | 792 | # Ethernet (1000 Mbit) |
@@ -795,11 +808,13 @@ CONFIG_E1000=y | |||
795 | CONFIG_TIGON3=y | 808 | CONFIG_TIGON3=y |
796 | CONFIG_BNX2=y | 809 | CONFIG_BNX2=y |
797 | # CONFIG_QLA3XXX is not set | 810 | # CONFIG_QLA3XXX is not set |
811 | # CONFIG_ATL1 is not set | ||
798 | 812 | ||
799 | # | 813 | # |
800 | # Ethernet (10000 Mbit) | 814 | # Ethernet (10000 Mbit) |
801 | # | 815 | # |
802 | # CONFIG_CHELSIO_T1 is not set | 816 | # CONFIG_CHELSIO_T1 is not set |
817 | # CONFIG_CHELSIO_T3 is not set | ||
803 | # CONFIG_IXGB is not set | 818 | # CONFIG_IXGB is not set |
804 | CONFIG_S2IO=m | 819 | CONFIG_S2IO=m |
805 | # CONFIG_S2IO_NAPI is not set | 820 | # CONFIG_S2IO_NAPI is not set |
@@ -1115,6 +1130,7 @@ CONFIG_SOUND=y | |||
1115 | # Open Sound System | 1130 | # Open Sound System |
1116 | # | 1131 | # |
1117 | CONFIG_SOUND_PRIME=y | 1132 | CONFIG_SOUND_PRIME=y |
1133 | CONFIG_OBSOLETE_OSS=y | ||
1118 | # CONFIG_SOUND_BT878 is not set | 1134 | # CONFIG_SOUND_BT878 is not set |
1119 | # CONFIG_SOUND_ES1371 is not set | 1135 | # CONFIG_SOUND_ES1371 is not set |
1120 | CONFIG_SOUND_ICH=y | 1136 | CONFIG_SOUND_ICH=y |
@@ -1128,6 +1144,7 @@ CONFIG_SOUND_ICH=y | |||
1128 | # HID Devices | 1144 | # HID Devices |
1129 | # | 1145 | # |
1130 | CONFIG_HID=y | 1146 | CONFIG_HID=y |
1147 | # CONFIG_HID_DEBUG is not set | ||
1131 | 1148 | ||
1132 | # | 1149 | # |
1133 | # USB support | 1150 | # USB support |
@@ -1142,10 +1159,8 @@ CONFIG_USB=y | |||
1142 | # Miscellaneous USB options | 1159 | # Miscellaneous USB options |
1143 | # | 1160 | # |
1144 | CONFIG_USB_DEVICEFS=y | 1161 | CONFIG_USB_DEVICEFS=y |
1145 | # CONFIG_USB_BANDWIDTH is not set | ||
1146 | # CONFIG_USB_DYNAMIC_MINORS is not set | 1162 | # CONFIG_USB_DYNAMIC_MINORS is not set |
1147 | # CONFIG_USB_SUSPEND is not set | 1163 | # CONFIG_USB_SUSPEND is not set |
1148 | # CONFIG_USB_MULTITHREAD_PROBE is not set | ||
1149 | # CONFIG_USB_OTG is not set | 1164 | # CONFIG_USB_OTG is not set |
1150 | 1165 | ||
1151 | # | 1166 | # |
@@ -1155,9 +1170,11 @@ CONFIG_USB_EHCI_HCD=y | |||
1155 | # CONFIG_USB_EHCI_SPLIT_ISO is not set | 1170 | # CONFIG_USB_EHCI_SPLIT_ISO is not set |
1156 | # CONFIG_USB_EHCI_ROOT_HUB_TT is not set | 1171 | # CONFIG_USB_EHCI_ROOT_HUB_TT is not set |
1157 | # CONFIG_USB_EHCI_TT_NEWSCHED is not set | 1172 | # CONFIG_USB_EHCI_TT_NEWSCHED is not set |
1173 | # CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set | ||
1158 | # CONFIG_USB_ISP116X_HCD is not set | 1174 | # CONFIG_USB_ISP116X_HCD is not set |
1159 | CONFIG_USB_OHCI_HCD=y | 1175 | CONFIG_USB_OHCI_HCD=y |
1160 | # CONFIG_USB_OHCI_BIG_ENDIAN is not set | 1176 | # CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set |
1177 | # CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set | ||
1161 | CONFIG_USB_OHCI_LITTLE_ENDIAN=y | 1178 | CONFIG_USB_OHCI_LITTLE_ENDIAN=y |
1162 | CONFIG_USB_UHCI_HCD=y | 1179 | CONFIG_USB_UHCI_HCD=y |
1163 | # CONFIG_USB_SL811_HCD is not set | 1180 | # CONFIG_USB_SL811_HCD is not set |
@@ -1208,6 +1225,7 @@ CONFIG_USB_HID=y | |||
1208 | # CONFIG_USB_ATI_REMOTE2 is not set | 1225 | # CONFIG_USB_ATI_REMOTE2 is not set |
1209 | # CONFIG_USB_KEYSPAN_REMOTE is not set | 1226 | # CONFIG_USB_KEYSPAN_REMOTE is not set |
1210 | # CONFIG_USB_APPLETOUCH is not set | 1227 | # CONFIG_USB_APPLETOUCH is not set |
1228 | # CONFIG_USB_GTCO is not set | ||
1211 | 1229 | ||
1212 | # | 1230 | # |
1213 | # USB Imaging devices | 1231 | # USB Imaging devices |
@@ -1313,6 +1331,10 @@ CONFIG_USB_MON=y | |||
1313 | # | 1331 | # |
1314 | 1332 | ||
1315 | # | 1333 | # |
1334 | # Auxiliary Display support | ||
1335 | # | ||
1336 | |||
1337 | # | ||
1316 | # Virtualization | 1338 | # Virtualization |
1317 | # | 1339 | # |
1318 | # CONFIG_KVM is not set | 1340 | # CONFIG_KVM is not set |
@@ -1512,6 +1534,7 @@ CONFIG_UNUSED_SYMBOLS=y | |||
1512 | CONFIG_DEBUG_FS=y | 1534 | CONFIG_DEBUG_FS=y |
1513 | # CONFIG_HEADERS_CHECK is not set | 1535 | # CONFIG_HEADERS_CHECK is not set |
1514 | CONFIG_DEBUG_KERNEL=y | 1536 | CONFIG_DEBUG_KERNEL=y |
1537 | # CONFIG_DEBUG_SHIRQ is not set | ||
1515 | CONFIG_LOG_BUF_SHIFT=18 | 1538 | CONFIG_LOG_BUF_SHIFT=18 |
1516 | CONFIG_DETECT_SOFTLOCKUP=y | 1539 | CONFIG_DETECT_SOFTLOCKUP=y |
1517 | # CONFIG_SCHEDSTATS is not set | 1540 | # CONFIG_SCHEDSTATS is not set |
@@ -1520,7 +1543,6 @@ CONFIG_DETECT_SOFTLOCKUP=y | |||
1520 | # CONFIG_RT_MUTEX_TESTER is not set | 1543 | # CONFIG_RT_MUTEX_TESTER is not set |
1521 | # CONFIG_DEBUG_SPINLOCK is not set | 1544 | # CONFIG_DEBUG_SPINLOCK is not set |
1522 | # CONFIG_DEBUG_MUTEXES is not set | 1545 | # CONFIG_DEBUG_MUTEXES is not set |
1523 | # CONFIG_DEBUG_RWSEMS is not set | ||
1524 | # CONFIG_DEBUG_LOCK_ALLOC is not set | 1546 | # CONFIG_DEBUG_LOCK_ALLOC is not set |
1525 | # CONFIG_PROVE_LOCKING is not set | 1547 | # CONFIG_PROVE_LOCKING is not set |
1526 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set | 1548 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set |
@@ -1560,4 +1582,5 @@ CONFIG_CRC32=y | |||
1560 | # CONFIG_LIBCRC32C is not set | 1582 | # CONFIG_LIBCRC32C is not set |
1561 | CONFIG_ZLIB_INFLATE=y | 1583 | CONFIG_ZLIB_INFLATE=y |
1562 | CONFIG_PLIST=y | 1584 | CONFIG_PLIST=y |
1563 | CONFIG_IOMAP_COPY=y | 1585 | CONFIG_HAS_IOMEM=y |
1586 | CONFIG_HAS_IOPORT=y | ||
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index ff499ef2a1ba..359eacc38509 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/stddef.h> | 21 | #include <linux/stddef.h> |
22 | #include <linux/personality.h> | 22 | #include <linux/personality.h> |
23 | #include <linux/compat.h> | 23 | #include <linux/compat.h> |
24 | #include <linux/binfmts.h> | ||
24 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/i387.h> | 27 | #include <asm/i387.h> |
@@ -449,7 +450,11 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
449 | 450 | ||
450 | /* Return stub is in 32bit vsyscall page */ | 451 | /* Return stub is in 32bit vsyscall page */ |
451 | { | 452 | { |
452 | void __user *restorer = VSYSCALL32_SIGRETURN; | 453 | void __user *restorer; |
454 | if (current->binfmt->hasvdso) | ||
455 | restorer = VSYSCALL32_SIGRETURN; | ||
456 | else | ||
457 | restorer = (void *)&frame->retcode; | ||
453 | if (ka->sa.sa_flags & SA_RESTORER) | 458 | if (ka->sa.sa_flags & SA_RESTORER) |
454 | restorer = ka->sa.sa_restorer; | 459 | restorer = ka->sa.sa_restorer; |
455 | err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); | 460 | err |= __put_user(ptr_to_compat(restorer), &frame->pretcode); |
@@ -495,7 +500,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
495 | ptrace_notify(SIGTRAP); | 500 | ptrace_notify(SIGTRAP); |
496 | 501 | ||
497 | #if DEBUG_SIG | 502 | #if DEBUG_SIG |
498 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 503 | printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", |
499 | current->comm, current->pid, frame, regs->rip, frame->pretcode); | 504 | current->comm, current->pid, frame, regs->rip, frame->pretcode); |
500 | #endif | 505 | #endif |
501 | 506 | ||
@@ -601,7 +606,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
601 | ptrace_notify(SIGTRAP); | 606 | ptrace_notify(SIGTRAP); |
602 | 607 | ||
603 | #if DEBUG_SIG | 608 | #if DEBUG_SIG |
604 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 609 | printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", |
605 | current->comm, current->pid, frame, regs->rip, frame->pretcode); | 610 | current->comm, current->pid, frame, regs->rip, frame->pretcode); |
606 | #endif | 611 | #endif |
607 | 612 | ||
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 5f32cf4de5fb..eda7a0d4dc15 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -718,4 +718,5 @@ ia32_sys_call_table: | |||
718 | .quad compat_sys_vmsplice | 718 | .quad compat_sys_vmsplice |
719 | .quad compat_sys_move_pages | 719 | .quad compat_sys_move_pages |
720 | .quad sys_getcpu | 720 | .quad sys_getcpu |
721 | .quad sys_epoll_pwait | ||
721 | ia32_syscall_end: | 722 | ia32_syscall_end: |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 3c7cbff04d3d..ae399458024b 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -43,6 +43,7 @@ obj-$(CONFIG_PCI) += early-quirks.o | |||
43 | 43 | ||
44 | obj-y += topology.o | 44 | obj-y += topology.o |
45 | obj-y += intel_cacheinfo.o | 45 | obj-y += intel_cacheinfo.o |
46 | obj-y += pcspeaker.o | ||
46 | 47 | ||
47 | CFLAGS_vsyscall.o := $(PROFILING) -g0 | 48 | CFLAGS_vsyscall.o := $(PROFILING) -g0 |
48 | 49 | ||
@@ -56,3 +57,4 @@ quirks-y += ../../i386/kernel/quirks.o | |||
56 | i8237-y += ../../i386/kernel/i8237.o | 57 | i8237-y += ../../i386/kernel/i8237.o |
57 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o | 58 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o |
58 | alternative-y += ../../i386/kernel/alternative.o | 59 | alternative-y += ../../i386/kernel/alternative.o |
60 | pcspeaker-y += ../../i386/kernel/pcspeaker.o | ||
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index 5ebf62c7a3d2..23178ce6c783 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c | |||
@@ -58,7 +58,7 @@ unsigned long acpi_wakeup_address = 0; | |||
58 | unsigned long acpi_video_flags; | 58 | unsigned long acpi_video_flags; |
59 | extern char wakeup_start, wakeup_end; | 59 | extern char wakeup_start, wakeup_end; |
60 | 60 | ||
61 | extern unsigned long FASTCALL(acpi_copy_wakeup_routine(unsigned long)); | 61 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
62 | 62 | ||
63 | static pgd_t low_ptr; | 63 | static pgd_t low_ptr; |
64 | 64 | ||
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index 6fe191c58084..4651fd22b213 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -83,6 +83,13 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
83 | return 1; | 83 | return 1; |
84 | } | 84 | } |
85 | 85 | ||
86 | #ifdef CONFIG_NUMA | ||
87 | /* NUMA memory to node map */ | ||
88 | if (last >= nodemap_addr && addr < nodemap_addr + nodemap_size) { | ||
89 | *addrp = nodemap_addr + nodemap_size; | ||
90 | return 1; | ||
91 | } | ||
92 | #endif | ||
86 | /* XXX ramdisk image here? */ | 93 | /* XXX ramdisk image here? */ |
87 | return 0; | 94 | return 0; |
88 | } | 95 | } |
@@ -184,6 +191,37 @@ unsigned long __init e820_end_of_ram(void) | |||
184 | } | 191 | } |
185 | 192 | ||
186 | /* | 193 | /* |
194 | * Find the hole size in the range. | ||
195 | */ | ||
196 | unsigned long __init e820_hole_size(unsigned long start, unsigned long end) | ||
197 | { | ||
198 | unsigned long ram = 0; | ||
199 | int i; | ||
200 | |||
201 | for (i = 0; i < e820.nr_map; i++) { | ||
202 | struct e820entry *ei = &e820.map[i]; | ||
203 | unsigned long last, addr; | ||
204 | |||
205 | if (ei->type != E820_RAM || | ||
206 | ei->addr+ei->size <= start || | ||
207 | ei->addr >= end) | ||
208 | continue; | ||
209 | |||
210 | addr = round_up(ei->addr, PAGE_SIZE); | ||
211 | if (addr < start) | ||
212 | addr = start; | ||
213 | |||
214 | last = round_down(ei->addr + ei->size, PAGE_SIZE); | ||
215 | if (last >= end) | ||
216 | last = end; | ||
217 | |||
218 | if (last > addr) | ||
219 | ram += last - addr; | ||
220 | } | ||
221 | return ((end - start) - ram); | ||
222 | } | ||
223 | |||
224 | /* | ||
187 | * Mark e820 reserved areas as busy for the resource manager. | 225 | * Mark e820 reserved areas as busy for the resource manager. |
188 | */ | 226 | */ |
189 | void __init e820_reserve_resources(void) | 227 | void __init e820_reserve_resources(void) |
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 1e6f80870679..598a4d0351fc 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -163,6 +163,20 @@ startup_64: | |||
163 | */ | 163 | */ |
164 | lgdt cpu_gdt_descr | 164 | lgdt cpu_gdt_descr |
165 | 165 | ||
166 | /* set up data segments. actually 0 would do too */ | ||
167 | movl $__KERNEL_DS,%eax | ||
168 | movl %eax,%ds | ||
169 | movl %eax,%ss | ||
170 | movl %eax,%es | ||
171 | |||
172 | /* | ||
173 | * We don't really need to load %fs or %gs, but load them anyway | ||
174 | * to kill any stale realmode selectors. This allows execution | ||
175 | * under VT hardware. | ||
176 | */ | ||
177 | movl %eax,%fs | ||
178 | movl %eax,%gs | ||
179 | |||
166 | /* | 180 | /* |
167 | * Setup up a dummy PDA. this is just for some early bootup code | 181 | * Setup up a dummy PDA. this is just for some early bootup code |
168 | * that does in_interrupt() | 182 | * that does in_interrupt() |
@@ -173,12 +187,6 @@ startup_64: | |||
173 | shrq $32,%rdx | 187 | shrq $32,%rdx |
174 | wrmsr | 188 | wrmsr |
175 | 189 | ||
176 | /* set up data segments. actually 0 would do too */ | ||
177 | movl $__KERNEL_DS,%eax | ||
178 | movl %eax,%ds | ||
179 | movl %eax,%ss | ||
180 | movl %eax,%es | ||
181 | |||
182 | /* esi is pointer to real mode structure with interesting info. | 190 | /* esi is pointer to real mode structure with interesting info. |
183 | pass it to C */ | 191 | pass it to C */ |
184 | movl %esi, %edi | 192 | movl %esi, %edi |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index 6be6730acb5c..566e64d966c4 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -831,7 +831,7 @@ static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) | |||
831 | entry.delivery_mode = INT_DELIVERY_MODE; | 831 | entry.delivery_mode = INT_DELIVERY_MODE; |
832 | entry.dest_mode = INT_DEST_MODE; | 832 | entry.dest_mode = INT_DEST_MODE; |
833 | entry.mask = 0; /* enable IRQ */ | 833 | entry.mask = 0; /* enable IRQ */ |
834 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | 834 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); |
835 | 835 | ||
836 | entry.trigger = irq_trigger(idx); | 836 | entry.trigger = irq_trigger(idx); |
837 | entry.polarity = irq_polarity(idx); | 837 | entry.polarity = irq_polarity(idx); |
@@ -839,7 +839,7 @@ static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) | |||
839 | if (irq_trigger(idx)) { | 839 | if (irq_trigger(idx)) { |
840 | entry.trigger = 1; | 840 | entry.trigger = 1; |
841 | entry.mask = 1; | 841 | entry.mask = 1; |
842 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | 842 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); |
843 | } | 843 | } |
844 | 844 | ||
845 | if (!apic && !IO_APIC_IRQ(irq)) | 845 | if (!apic && !IO_APIC_IRQ(irq)) |
@@ -851,7 +851,7 @@ static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq) | |||
851 | if (vector < 0) | 851 | if (vector < 0) |
852 | return; | 852 | return; |
853 | 853 | ||
854 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); | 854 | entry.dest = cpu_mask_to_apicid(mask); |
855 | entry.vector = vector; | 855 | entry.vector = vector; |
856 | 856 | ||
857 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); | 857 | ioapic_register_intr(irq, vector, IOAPIC_AUTO); |
@@ -920,7 +920,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
920 | */ | 920 | */ |
921 | entry.dest_mode = INT_DEST_MODE; | 921 | entry.dest_mode = INT_DEST_MODE; |
922 | entry.mask = 0; /* unmask IRQ now */ | 922 | entry.mask = 0; /* unmask IRQ now */ |
923 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS); | 923 | entry.dest = cpu_mask_to_apicid(TARGET_CPUS); |
924 | entry.delivery_mode = INT_DELIVERY_MODE; | 924 | entry.delivery_mode = INT_DELIVERY_MODE; |
925 | entry.polarity = 0; | 925 | entry.polarity = 0; |
926 | entry.trigger = 0; | 926 | entry.trigger = 0; |
@@ -1020,18 +1020,17 @@ void __apicdebuginit print_IO_APIC(void) | |||
1020 | 1020 | ||
1021 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1021 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
1022 | 1022 | ||
1023 | printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol" | 1023 | printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" |
1024 | " Stat Dest Deli Vect: \n"); | 1024 | " Stat Dmod Deli Vect: \n"); |
1025 | 1025 | ||
1026 | for (i = 0; i <= reg_01.bits.entries; i++) { | 1026 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1027 | struct IO_APIC_route_entry entry; | 1027 | struct IO_APIC_route_entry entry; |
1028 | 1028 | ||
1029 | entry = ioapic_read_entry(apic, i); | 1029 | entry = ioapic_read_entry(apic, i); |
1030 | 1030 | ||
1031 | printk(KERN_DEBUG " %02x %03X %02X ", | 1031 | printk(KERN_DEBUG " %02x %03X ", |
1032 | i, | 1032 | i, |
1033 | entry.dest.logical.logical_dest, | 1033 | entry.dest |
1034 | entry.dest.physical.physical_dest | ||
1035 | ); | 1034 | ); |
1036 | 1035 | ||
1037 | printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", | 1036 | printk("%1d %1d %1d %1d %1d %1d %1d %02X\n", |
@@ -1293,8 +1292,7 @@ void disable_IO_APIC(void) | |||
1293 | entry.dest_mode = 0; /* Physical */ | 1292 | entry.dest_mode = 0; /* Physical */ |
1294 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ | 1293 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ |
1295 | entry.vector = 0; | 1294 | entry.vector = 0; |
1296 | entry.dest.physical.physical_dest = | 1295 | entry.dest = GET_APIC_ID(apic_read(APIC_ID)); |
1297 | GET_APIC_ID(apic_read(APIC_ID)); | ||
1298 | 1296 | ||
1299 | /* | 1297 | /* |
1300 | * Add it to the IO-APIC irq-routing table: | 1298 | * Add it to the IO-APIC irq-routing table: |
@@ -1556,7 +1554,7 @@ static inline void unlock_ExtINT_logic(void) | |||
1556 | 1554 | ||
1557 | entry1.dest_mode = 0; /* physical delivery */ | 1555 | entry1.dest_mode = 0; /* physical delivery */ |
1558 | entry1.mask = 0; /* unmask IRQ now */ | 1556 | entry1.mask = 0; /* unmask IRQ now */ |
1559 | entry1.dest.physical.physical_dest = hard_smp_processor_id(); | 1557 | entry1.dest = hard_smp_processor_id(); |
1560 | entry1.delivery_mode = dest_ExtINT; | 1558 | entry1.delivery_mode = dest_ExtINT; |
1561 | entry1.polarity = entry0.polarity; | 1559 | entry1.polarity = entry0.polarity; |
1562 | entry1.trigger = 0; | 1560 | entry1.trigger = 0; |
@@ -2131,7 +2129,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int p | |||
2131 | 2129 | ||
2132 | entry.delivery_mode = INT_DELIVERY_MODE; | 2130 | entry.delivery_mode = INT_DELIVERY_MODE; |
2133 | entry.dest_mode = INT_DEST_MODE; | 2131 | entry.dest_mode = INT_DEST_MODE; |
2134 | entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask); | 2132 | entry.dest = cpu_mask_to_apicid(mask); |
2135 | entry.trigger = triggering; | 2133 | entry.trigger = triggering; |
2136 | entry.polarity = polarity; | 2134 | entry.polarity = polarity; |
2137 | entry.mask = 1; /* Disabled (masked) */ | 2135 | entry.mask = 1; /* Disabled (masked) */ |
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c index fe063d3cfe42..745b1f0f494e 100644 --- a/arch/x86_64/kernel/ioport.c +++ b/arch/x86_64/kernel/ioport.c | |||
@@ -114,6 +114,6 @@ asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs) | |||
114 | if (!capable(CAP_SYS_RAWIO)) | 114 | if (!capable(CAP_SYS_RAWIO)) |
115 | return -EPERM; | 115 | return -EPERM; |
116 | } | 116 | } |
117 | regs->eflags = (regs->eflags &~ 0x3000UL) | (level << 12); | 117 | regs->eflags = (regs->eflags &~ X86_EFLAGS_IOPL) | (level << 12); |
118 | return 0; | 118 | return 0; |
119 | } | 119 | } |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index 0c06af6c13bc..3bc30d2c13d3 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include <asm/io_apic.h> | 19 | #include <asm/io_apic.h> |
20 | #include <asm/idle.h> | 20 | #include <asm/idle.h> |
21 | #include <asm/smp.h> | ||
21 | 22 | ||
22 | atomic_t irq_err_count; | 23 | atomic_t irq_err_count; |
23 | 24 | ||
@@ -120,9 +121,14 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
120 | 121 | ||
121 | if (likely(irq < NR_IRQS)) | 122 | if (likely(irq < NR_IRQS)) |
122 | generic_handle_irq(irq); | 123 | generic_handle_irq(irq); |
123 | else if (printk_ratelimit()) | 124 | else { |
124 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", | 125 | if (!disable_apic) |
125 | __func__, smp_processor_id(), vector); | 126 | ack_APIC_irq(); |
127 | |||
128 | if (printk_ratelimit()) | ||
129 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n", | ||
130 | __func__, smp_processor_id(), vector); | ||
131 | } | ||
126 | 132 | ||
127 | irq_exit(); | 133 | irq_exit(); |
128 | 134 | ||
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index bdb54a2c9f18..8011a8e1c7d4 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
20 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
21 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
22 | #include <linux/kmod.h> | ||
22 | #include <asm/processor.h> | 23 | #include <asm/processor.h> |
23 | #include <asm/msr.h> | 24 | #include <asm/msr.h> |
24 | #include <asm/mce.h> | 25 | #include <asm/mce.h> |
@@ -42,6 +43,10 @@ static unsigned long console_logged; | |||
42 | static int notify_user; | 43 | static int notify_user; |
43 | static int rip_msr; | 44 | static int rip_msr; |
44 | static int mce_bootlog = 1; | 45 | static int mce_bootlog = 1; |
46 | static atomic_t mce_events; | ||
47 | |||
48 | static char trigger[128]; | ||
49 | static char *trigger_argv[2] = { trigger, NULL }; | ||
45 | 50 | ||
46 | /* | 51 | /* |
47 | * Lockless MCE logging infrastructure. | 52 | * Lockless MCE logging infrastructure. |
@@ -57,6 +62,7 @@ struct mce_log mcelog = { | |||
57 | void mce_log(struct mce *mce) | 62 | void mce_log(struct mce *mce) |
58 | { | 63 | { |
59 | unsigned next, entry; | 64 | unsigned next, entry; |
65 | atomic_inc(&mce_events); | ||
60 | mce->finished = 0; | 66 | mce->finished = 0; |
61 | wmb(); | 67 | wmb(); |
62 | for (;;) { | 68 | for (;;) { |
@@ -161,6 +167,17 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | |||
161 | } | 167 | } |
162 | } | 168 | } |
163 | 169 | ||
170 | static void do_mce_trigger(void) | ||
171 | { | ||
172 | static atomic_t mce_logged; | ||
173 | int events = atomic_read(&mce_events); | ||
174 | if (events != atomic_read(&mce_logged) && trigger[0]) { | ||
175 | /* Small race window, but should be harmless. */ | ||
176 | atomic_set(&mce_logged, events); | ||
177 | call_usermodehelper(trigger, trigger_argv, NULL, -1); | ||
178 | } | ||
179 | } | ||
180 | |||
164 | /* | 181 | /* |
165 | * The actual machine check handler | 182 | * The actual machine check handler |
166 | */ | 183 | */ |
@@ -234,8 +251,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
234 | } | 251 | } |
235 | 252 | ||
236 | /* Never do anything final in the polling timer */ | 253 | /* Never do anything final in the polling timer */ |
237 | if (!regs) | 254 | if (!regs) { |
255 | /* Normal interrupt context here. Call trigger for any new | ||
256 | events. */ | ||
257 | do_mce_trigger(); | ||
238 | goto out; | 258 | goto out; |
259 | } | ||
239 | 260 | ||
240 | /* If we didn't find an uncorrectable error, pick | 261 | /* If we didn't find an uncorrectable error, pick |
241 | the last one (shouldn't happen, just being safe). */ | 262 | the last one (shouldn't happen, just being safe). */ |
@@ -606,17 +627,42 @@ DEFINE_PER_CPU(struct sys_device, device_mce); | |||
606 | } \ | 627 | } \ |
607 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | 628 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); |
608 | 629 | ||
630 | /* TBD should generate these dynamically based on number of available banks */ | ||
609 | ACCESSOR(bank0ctl,bank[0],mce_restart()) | 631 | ACCESSOR(bank0ctl,bank[0],mce_restart()) |
610 | ACCESSOR(bank1ctl,bank[1],mce_restart()) | 632 | ACCESSOR(bank1ctl,bank[1],mce_restart()) |
611 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 633 | ACCESSOR(bank2ctl,bank[2],mce_restart()) |
612 | ACCESSOR(bank3ctl,bank[3],mce_restart()) | 634 | ACCESSOR(bank3ctl,bank[3],mce_restart()) |
613 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 635 | ACCESSOR(bank4ctl,bank[4],mce_restart()) |
614 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 636 | ACCESSOR(bank5ctl,bank[5],mce_restart()) |
615 | static struct sysdev_attribute * bank_attributes[NR_BANKS] = { | 637 | |
616 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | 638 | static ssize_t show_trigger(struct sys_device *s, char *buf) |
617 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl}; | 639 | { |
640 | strcpy(buf, trigger); | ||
641 | strcat(buf, "\n"); | ||
642 | return strlen(trigger) + 1; | ||
643 | } | ||
644 | |||
645 | static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | ||
646 | { | ||
647 | char *p; | ||
648 | int len; | ||
649 | strncpy(trigger, buf, sizeof(trigger)); | ||
650 | trigger[sizeof(trigger)-1] = 0; | ||
651 | len = strlen(trigger); | ||
652 | p = strchr(trigger, '\n'); | ||
653 | if (*p) *p = 0; | ||
654 | return len; | ||
655 | } | ||
656 | |||
657 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
618 | ACCESSOR(tolerant,tolerant,) | 658 | ACCESSOR(tolerant,tolerant,) |
619 | ACCESSOR(check_interval,check_interval,mce_restart()) | 659 | ACCESSOR(check_interval,check_interval,mce_restart()) |
660 | static struct sysdev_attribute *mce_attributes[] = { | ||
661 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | ||
662 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | ||
663 | &attr_tolerant, &attr_check_interval, &attr_trigger, | ||
664 | NULL | ||
665 | }; | ||
620 | 666 | ||
621 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ | 667 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ |
622 | static __cpuinit int mce_create_device(unsigned int cpu) | 668 | static __cpuinit int mce_create_device(unsigned int cpu) |
@@ -632,11 +678,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
632 | err = sysdev_register(&per_cpu(device_mce,cpu)); | 678 | err = sysdev_register(&per_cpu(device_mce,cpu)); |
633 | 679 | ||
634 | if (!err) { | 680 | if (!err) { |
635 | for (i = 0; i < banks; i++) | 681 | for (i = 0; mce_attributes[i]; i++) |
636 | sysdev_create_file(&per_cpu(device_mce,cpu), | 682 | sysdev_create_file(&per_cpu(device_mce,cpu), |
637 | bank_attributes[i]); | 683 | mce_attributes[i]); |
638 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); | ||
639 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); | ||
640 | } | 684 | } |
641 | return err; | 685 | return err; |
642 | } | 686 | } |
@@ -645,11 +689,9 @@ static void mce_remove_device(unsigned int cpu) | |||
645 | { | 689 | { |
646 | int i; | 690 | int i; |
647 | 691 | ||
648 | for (i = 0; i < banks; i++) | 692 | for (i = 0; mce_attributes[i]; i++) |
649 | sysdev_remove_file(&per_cpu(device_mce,cpu), | 693 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
650 | bank_attributes[i]); | 694 | mce_attributes[i]); |
651 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); | ||
652 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); | ||
653 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 695 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
654 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); | 696 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); |
655 | } | 697 | } |
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c index 93c707257637..d0bd5d66e103 100644 --- a/arch/x86_64/kernel/mce_amd.c +++ b/arch/x86_64/kernel/mce_amd.c | |||
@@ -37,6 +37,8 @@ | |||
37 | #define THRESHOLD_MAX 0xFFF | 37 | #define THRESHOLD_MAX 0xFFF |
38 | #define INT_TYPE_APIC 0x00020000 | 38 | #define INT_TYPE_APIC 0x00020000 |
39 | #define MASK_VALID_HI 0x80000000 | 39 | #define MASK_VALID_HI 0x80000000 |
40 | #define MASK_CNTP_HI 0x40000000 | ||
41 | #define MASK_LOCKED_HI 0x20000000 | ||
40 | #define MASK_LVTOFF_HI 0x00F00000 | 42 | #define MASK_LVTOFF_HI 0x00F00000 |
41 | #define MASK_COUNT_EN_HI 0x00080000 | 43 | #define MASK_COUNT_EN_HI 0x00080000 |
42 | #define MASK_INT_TYPE_HI 0x00060000 | 44 | #define MASK_INT_TYPE_HI 0x00060000 |
@@ -122,14 +124,17 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
122 | for (block = 0; block < NR_BLOCKS; ++block) { | 124 | for (block = 0; block < NR_BLOCKS; ++block) { |
123 | if (block == 0) | 125 | if (block == 0) |
124 | address = MSR_IA32_MC0_MISC + bank * 4; | 126 | address = MSR_IA32_MC0_MISC + bank * 4; |
125 | else if (block == 1) | 127 | else if (block == 1) { |
126 | address = MCG_XBLK_ADDR | 128 | address = (low & MASK_BLKPTR_LO) >> 21; |
127 | + ((low & MASK_BLKPTR_LO) >> 21); | 129 | if (!address) |
130 | break; | ||
131 | address += MCG_XBLK_ADDR; | ||
132 | } | ||
128 | else | 133 | else |
129 | ++address; | 134 | ++address; |
130 | 135 | ||
131 | if (rdmsr_safe(address, &low, &high)) | 136 | if (rdmsr_safe(address, &low, &high)) |
132 | continue; | 137 | break; |
133 | 138 | ||
134 | if (!(high & MASK_VALID_HI)) { | 139 | if (!(high & MASK_VALID_HI)) { |
135 | if (block) | 140 | if (block) |
@@ -138,8 +143,8 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
138 | break; | 143 | break; |
139 | } | 144 | } |
140 | 145 | ||
141 | if (!(high & MASK_VALID_HI >> 1) || | 146 | if (!(high & MASK_CNTP_HI) || |
142 | (high & MASK_VALID_HI >> 2)) | 147 | (high & MASK_LOCKED_HI)) |
143 | continue; | 148 | continue; |
144 | 149 | ||
145 | if (!block) | 150 | if (!block) |
@@ -187,17 +192,22 @@ asmlinkage void mce_threshold_interrupt(void) | |||
187 | 192 | ||
188 | /* assume first bank caused it */ | 193 | /* assume first bank caused it */ |
189 | for (bank = 0; bank < NR_BANKS; ++bank) { | 194 | for (bank = 0; bank < NR_BANKS; ++bank) { |
195 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) | ||
196 | continue; | ||
190 | for (block = 0; block < NR_BLOCKS; ++block) { | 197 | for (block = 0; block < NR_BLOCKS; ++block) { |
191 | if (block == 0) | 198 | if (block == 0) |
192 | address = MSR_IA32_MC0_MISC + bank * 4; | 199 | address = MSR_IA32_MC0_MISC + bank * 4; |
193 | else if (block == 1) | 200 | else if (block == 1) { |
194 | address = MCG_XBLK_ADDR | 201 | address = (low & MASK_BLKPTR_LO) >> 21; |
195 | + ((low & MASK_BLKPTR_LO) >> 21); | 202 | if (!address) |
203 | break; | ||
204 | address += MCG_XBLK_ADDR; | ||
205 | } | ||
196 | else | 206 | else |
197 | ++address; | 207 | ++address; |
198 | 208 | ||
199 | if (rdmsr_safe(address, &low, &high)) | 209 | if (rdmsr_safe(address, &low, &high)) |
200 | continue; | 210 | break; |
201 | 211 | ||
202 | if (!(high & MASK_VALID_HI)) { | 212 | if (!(high & MASK_VALID_HI)) { |
203 | if (block) | 213 | if (block) |
@@ -206,10 +216,14 @@ asmlinkage void mce_threshold_interrupt(void) | |||
206 | break; | 216 | break; |
207 | } | 217 | } |
208 | 218 | ||
209 | if (!(high & MASK_VALID_HI >> 1) || | 219 | if (!(high & MASK_CNTP_HI) || |
210 | (high & MASK_VALID_HI >> 2)) | 220 | (high & MASK_LOCKED_HI)) |
211 | continue; | 221 | continue; |
212 | 222 | ||
223 | /* Log the machine check that caused the threshold | ||
224 | event. */ | ||
225 | do_machine_check(NULL, 0); | ||
226 | |||
213 | if (high & MASK_OVERFLOW_HI) { | 227 | if (high & MASK_OVERFLOW_HI) { |
214 | rdmsrl(address, m.misc); | 228 | rdmsrl(address, m.misc); |
215 | rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, | 229 | rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, |
@@ -385,7 +399,7 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
385 | return 0; | 399 | return 0; |
386 | 400 | ||
387 | if (rdmsr_safe(address, &low, &high)) | 401 | if (rdmsr_safe(address, &low, &high)) |
388 | goto recurse; | 402 | return 0; |
389 | 403 | ||
390 | if (!(high & MASK_VALID_HI)) { | 404 | if (!(high & MASK_VALID_HI)) { |
391 | if (block) | 405 | if (block) |
@@ -394,8 +408,8 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
394 | return 0; | 408 | return 0; |
395 | } | 409 | } |
396 | 410 | ||
397 | if (!(high & MASK_VALID_HI >> 1) || | 411 | if (!(high & MASK_CNTP_HI) || |
398 | (high & MASK_VALID_HI >> 2)) | 412 | (high & MASK_LOCKED_HI)) |
399 | goto recurse; | 413 | goto recurse; |
400 | 414 | ||
401 | b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL); | 415 | b = kzalloc(sizeof(struct threshold_block), GFP_KERNEL); |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 9cb42ecb7f89..486f4c61a948 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -172,7 +172,7 @@ static __cpuinit inline int nmi_known_cpu(void) | |||
172 | { | 172 | { |
173 | switch (boot_cpu_data.x86_vendor) { | 173 | switch (boot_cpu_data.x86_vendor) { |
174 | case X86_VENDOR_AMD: | 174 | case X86_VENDOR_AMD: |
175 | return boot_cpu_data.x86 == 15; | 175 | return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16; |
176 | case X86_VENDOR_INTEL: | 176 | case X86_VENDOR_INTEL: |
177 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 177 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
178 | return 1; | 178 | return 1; |
@@ -214,6 +214,23 @@ static __init void nmi_cpu_busy(void *data) | |||
214 | } | 214 | } |
215 | #endif | 215 | #endif |
216 | 216 | ||
217 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
218 | { | ||
219 | unsigned int retval = hz; | ||
220 | |||
221 | /* | ||
222 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
223 | * are writable, with higher bits sign extending from bit 31. | ||
224 | * So, we can only program the counter with 31 bit values and | ||
225 | * 32nd bit should be 1, for 33.. to be 1. | ||
226 | * Find the appropriate nmi_hz | ||
227 | */ | ||
228 | if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) { | ||
229 | retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1; | ||
230 | } | ||
231 | return retval; | ||
232 | } | ||
233 | |||
217 | int __init check_nmi_watchdog (void) | 234 | int __init check_nmi_watchdog (void) |
218 | { | 235 | { |
219 | int *counts; | 236 | int *counts; |
@@ -268,17 +285,8 @@ int __init check_nmi_watchdog (void) | |||
268 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 285 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
269 | 286 | ||
270 | nmi_hz = 1; | 287 | nmi_hz = 1; |
271 | /* | 288 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) |
272 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | 289 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
273 | * are writable, with higher bits sign extending from bit 31. | ||
274 | * So, we can only program the counter with 31 bit values and | ||
275 | * 32nd bit should be 1, for 33.. to be 1. | ||
276 | * Find the appropriate nmi_hz | ||
277 | */ | ||
278 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
279 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
280 | nmi_hz = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1; | ||
281 | } | ||
282 | } | 290 | } |
283 | 291 | ||
284 | kfree(counts); | 292 | kfree(counts); |
@@ -360,6 +368,33 @@ void enable_timer_nmi_watchdog(void) | |||
360 | } | 368 | } |
361 | } | 369 | } |
362 | 370 | ||
371 | static void __acpi_nmi_disable(void *__unused) | ||
372 | { | ||
373 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * Disable timer based NMIs on all CPUs: | ||
378 | */ | ||
379 | void acpi_nmi_disable(void) | ||
380 | { | ||
381 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
382 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
383 | } | ||
384 | |||
385 | static void __acpi_nmi_enable(void *__unused) | ||
386 | { | ||
387 | apic_write(APIC_LVT0, APIC_DM_NMI); | ||
388 | } | ||
389 | |||
390 | /* | ||
391 | * Enable timer based NMIs on all CPUs: | ||
392 | */ | ||
393 | void acpi_nmi_enable(void) | ||
394 | { | ||
395 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
396 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
397 | } | ||
363 | #ifdef CONFIG_PM | 398 | #ifdef CONFIG_PM |
364 | 399 | ||
365 | static int nmi_pm_active; /* nmi_active before suspend */ | 400 | static int nmi_pm_active; /* nmi_active before suspend */ |
@@ -634,7 +669,9 @@ static int setup_intel_arch_watchdog(void) | |||
634 | 669 | ||
635 | /* setup the timer */ | 670 | /* setup the timer */ |
636 | wrmsr(evntsel_msr, evntsel, 0); | 671 | wrmsr(evntsel_msr, evntsel, 0); |
637 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | 672 | |
673 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
674 | wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0); | ||
638 | 675 | ||
639 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 676 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
640 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 677 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
@@ -855,15 +892,23 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
855 | dummy &= ~P4_CCCR_OVF; | 892 | dummy &= ~P4_CCCR_OVF; |
856 | wrmsrl(wd->cccr_msr, dummy); | 893 | wrmsrl(wd->cccr_msr, dummy); |
857 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 894 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
895 | /* start the cycle over again */ | ||
896 | wrmsrl(wd->perfctr_msr, | ||
897 | -((u64)cpu_khz * 1000 / nmi_hz)); | ||
858 | } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | 898 | } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { |
859 | /* | 899 | /* |
860 | * ArchPerfom/Core Duo needs to re-unmask | 900 | * ArchPerfom/Core Duo needs to re-unmask |
861 | * the apic vector | 901 | * the apic vector |
862 | */ | 902 | */ |
863 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 903 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
904 | /* ARCH_PERFMON has 32 bit counter writes */ | ||
905 | wrmsr(wd->perfctr_msr, | ||
906 | (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0); | ||
907 | } else { | ||
908 | /* start the cycle over again */ | ||
909 | wrmsrl(wd->perfctr_msr, | ||
910 | -((u64)cpu_khz * 1000 / nmi_hz)); | ||
864 | } | 911 | } |
865 | /* start the cycle over again */ | ||
866 | wrmsrl(wd->perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
867 | rc = 1; | 912 | rc = 1; |
868 | } else if (nmi_watchdog == NMI_IO_APIC) { | 913 | } else if (nmi_watchdog == NMI_IO_APIC) { |
869 | /* don't know how to accurately check for this. | 914 | /* don't know how to accurately check for this. |
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 3d65b1d4c2b3..04480c3b68f5 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c | |||
@@ -138,6 +138,8 @@ static const unsigned long phb_debug_offsets[] = { | |||
138 | 138 | ||
139 | #define PHB_DEBUG_STUFF_OFFSET 0x0020 | 139 | #define PHB_DEBUG_STUFF_OFFSET 0x0020 |
140 | 140 | ||
141 | #define EMERGENCY_PAGES 32 /* = 128KB */ | ||
142 | |||
141 | unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; | 143 | unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED; |
142 | static int translate_empty_slots __read_mostly = 0; | 144 | static int translate_empty_slots __read_mostly = 0; |
143 | static int calgary_detected __read_mostly = 0; | 145 | static int calgary_detected __read_mostly = 0; |
@@ -296,6 +298,16 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
296 | { | 298 | { |
297 | unsigned long entry; | 299 | unsigned long entry; |
298 | unsigned long badbit; | 300 | unsigned long badbit; |
301 | unsigned long badend; | ||
302 | |||
303 | /* were we called with bad_dma_address? */ | ||
304 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | ||
305 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | ||
306 | printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " | ||
307 | "address 0x%Lx\n", dma_addr); | ||
308 | WARN_ON(1); | ||
309 | return; | ||
310 | } | ||
299 | 311 | ||
300 | entry = dma_addr >> PAGE_SHIFT; | 312 | entry = dma_addr >> PAGE_SHIFT; |
301 | 313 | ||
@@ -656,8 +668,8 @@ static void __init calgary_reserve_regions(struct pci_dev *dev) | |||
656 | u64 start; | 668 | u64 start; |
657 | struct iommu_table *tbl = dev->sysdata; | 669 | struct iommu_table *tbl = dev->sysdata; |
658 | 670 | ||
659 | /* reserve bad_dma_address in case it's a legal address */ | 671 | /* reserve EMERGENCY_PAGES from bad_dma_address and up */ |
660 | iommu_range_reserve(tbl, bad_dma_address, 1); | 672 | iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES); |
661 | 673 | ||
662 | /* avoid the BIOS/VGA first 640KB-1MB region */ | 674 | /* avoid the BIOS/VGA first 640KB-1MB region */ |
663 | start = (640 * 1024); | 675 | start = (640 * 1024); |
@@ -1176,6 +1188,7 @@ int __init calgary_iommu_init(void) | |||
1176 | } | 1188 | } |
1177 | 1189 | ||
1178 | force_iommu = 1; | 1190 | force_iommu = 1; |
1191 | bad_dma_address = 0x0; | ||
1179 | dma_ops = &calgary_dma_ops; | 1192 | dma_ops = &calgary_dma_ops; |
1180 | 1193 | ||
1181 | return 0; | 1194 | return 0; |
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index 683b7a5c1ab3..651ccfb06697 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -223,30 +223,10 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
223 | } | 223 | } |
224 | EXPORT_SYMBOL(dma_set_mask); | 224 | EXPORT_SYMBOL(dma_set_mask); |
225 | 225 | ||
226 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | 226 | /* |
227 | [,forcesac][,fullflush][,nomerge][,biomerge] | 227 | * See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter |
228 | size set size of iommu (in bytes) | 228 | * documentation. |
229 | noagp don't initialize the AGP driver and use full aperture. | 229 | */ |
230 | off don't use the IOMMU | ||
231 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
232 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
233 | noforce don't force IOMMU usage. Default. | ||
234 | force Force IOMMU. | ||
235 | merge Do lazy merging. This may improve performance on some block devices. | ||
236 | Implies force (experimental) | ||
237 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
238 | but should be only done with very big IOMMUs. Implies merge,force. | ||
239 | nomerge Don't do SG merging. | ||
240 | forcesac For SAC mode for masks <40bits (experimental) | ||
241 | fullflush Flush IOMMU on each allocation (default) | ||
242 | nofullflush Don't use IOMMU fullflush | ||
243 | allowed overwrite iommu off workarounds for specific chipsets. | ||
244 | soft Use software bounce buffering (default for Intel machines) | ||
245 | noaperture Don't touch the aperture for AGP. | ||
246 | allowdac Allow DMA >4GB | ||
247 | nodac Forbid DMA >4GB | ||
248 | panic Force panic when IOMMU overflows | ||
249 | */ | ||
250 | __init int iommu_setup(char *p) | 230 | __init int iommu_setup(char *p) |
251 | { | 231 | { |
252 | iommu_merge = 1; | 232 | iommu_merge = 1; |
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index fc1960f1f243..030eb3753358 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -185,7 +185,7 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
185 | static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) | 185 | static inline int need_iommu(struct device *dev, unsigned long addr, size_t size) |
186 | { | 186 | { |
187 | u64 mask = *dev->dma_mask; | 187 | u64 mask = *dev->dma_mask; |
188 | int high = addr + size >= mask; | 188 | int high = addr + size > mask; |
189 | int mmu = high; | 189 | int mmu = high; |
190 | if (force_iommu) | 190 | if (force_iommu) |
191 | mmu = 1; | 191 | mmu = 1; |
@@ -195,7 +195,7 @@ static inline int need_iommu(struct device *dev, unsigned long addr, size_t size | |||
195 | static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 195 | static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
196 | { | 196 | { |
197 | u64 mask = *dev->dma_mask; | 197 | u64 mask = *dev->dma_mask; |
198 | int high = addr + size >= mask; | 198 | int high = addr + size > mask; |
199 | int mmu = high; | 199 | int mmu = high; |
200 | return mmu; | 200 | return mmu; |
201 | } | 201 | } |
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index addc14af0c56..4326a690a509 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -536,8 +536,12 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
536 | } | 536 | } |
537 | ret = 0; | 537 | ret = 0; |
538 | for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { | 538 | for (ui = 0; ui < sizeof(struct user_regs_struct); ui += sizeof(long)) { |
539 | ret |= __get_user(tmp, (unsigned long __user *) data); | 539 | ret = __get_user(tmp, (unsigned long __user *) data); |
540 | putreg(child, ui, tmp); | 540 | if (ret) |
541 | break; | ||
542 | ret = putreg(child, ui, tmp); | ||
543 | if (ret) | ||
544 | break; | ||
541 | data += sizeof(long); | 545 | data += sizeof(long); |
542 | } | 546 | } |
543 | break; | 547 | break; |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 60477244d1a3..3d98b696881d 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -138,128 +138,6 @@ struct resource code_resource = { | |||
138 | .flags = IORESOURCE_RAM, | 138 | .flags = IORESOURCE_RAM, |
139 | }; | 139 | }; |
140 | 140 | ||
141 | #define IORESOURCE_ROM (IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM) | ||
142 | |||
143 | static struct resource system_rom_resource = { | ||
144 | .name = "System ROM", | ||
145 | .start = 0xf0000, | ||
146 | .end = 0xfffff, | ||
147 | .flags = IORESOURCE_ROM, | ||
148 | }; | ||
149 | |||
150 | static struct resource extension_rom_resource = { | ||
151 | .name = "Extension ROM", | ||
152 | .start = 0xe0000, | ||
153 | .end = 0xeffff, | ||
154 | .flags = IORESOURCE_ROM, | ||
155 | }; | ||
156 | |||
157 | static struct resource adapter_rom_resources[] = { | ||
158 | { .name = "Adapter ROM", .start = 0xc8000, .end = 0, | ||
159 | .flags = IORESOURCE_ROM }, | ||
160 | { .name = "Adapter ROM", .start = 0, .end = 0, | ||
161 | .flags = IORESOURCE_ROM }, | ||
162 | { .name = "Adapter ROM", .start = 0, .end = 0, | ||
163 | .flags = IORESOURCE_ROM }, | ||
164 | { .name = "Adapter ROM", .start = 0, .end = 0, | ||
165 | .flags = IORESOURCE_ROM }, | ||
166 | { .name = "Adapter ROM", .start = 0, .end = 0, | ||
167 | .flags = IORESOURCE_ROM }, | ||
168 | { .name = "Adapter ROM", .start = 0, .end = 0, | ||
169 | .flags = IORESOURCE_ROM } | ||
170 | }; | ||
171 | |||
172 | static struct resource video_rom_resource = { | ||
173 | .name = "Video ROM", | ||
174 | .start = 0xc0000, | ||
175 | .end = 0xc7fff, | ||
176 | .flags = IORESOURCE_ROM, | ||
177 | }; | ||
178 | |||
179 | static struct resource video_ram_resource = { | ||
180 | .name = "Video RAM area", | ||
181 | .start = 0xa0000, | ||
182 | .end = 0xbffff, | ||
183 | .flags = IORESOURCE_RAM, | ||
184 | }; | ||
185 | |||
186 | #define romsignature(x) (*(unsigned short *)(x) == 0xaa55) | ||
187 | |||
188 | static int __init romchecksum(unsigned char *rom, unsigned long length) | ||
189 | { | ||
190 | unsigned char *p, sum = 0; | ||
191 | |||
192 | for (p = rom; p < rom + length; p++) | ||
193 | sum += *p; | ||
194 | return sum == 0; | ||
195 | } | ||
196 | |||
197 | static void __init probe_roms(void) | ||
198 | { | ||
199 | unsigned long start, length, upper; | ||
200 | unsigned char *rom; | ||
201 | int i; | ||
202 | |||
203 | /* video rom */ | ||
204 | upper = adapter_rom_resources[0].start; | ||
205 | for (start = video_rom_resource.start; start < upper; start += 2048) { | ||
206 | rom = isa_bus_to_virt(start); | ||
207 | if (!romsignature(rom)) | ||
208 | continue; | ||
209 | |||
210 | video_rom_resource.start = start; | ||
211 | |||
212 | /* 0 < length <= 0x7f * 512, historically */ | ||
213 | length = rom[2] * 512; | ||
214 | |||
215 | /* if checksum okay, trust length byte */ | ||
216 | if (length && romchecksum(rom, length)) | ||
217 | video_rom_resource.end = start + length - 1; | ||
218 | |||
219 | request_resource(&iomem_resource, &video_rom_resource); | ||
220 | break; | ||
221 | } | ||
222 | |||
223 | start = (video_rom_resource.end + 1 + 2047) & ~2047UL; | ||
224 | if (start < upper) | ||
225 | start = upper; | ||
226 | |||
227 | /* system rom */ | ||
228 | request_resource(&iomem_resource, &system_rom_resource); | ||
229 | upper = system_rom_resource.start; | ||
230 | |||
231 | /* check for extension rom (ignore length byte!) */ | ||
232 | rom = isa_bus_to_virt(extension_rom_resource.start); | ||
233 | if (romsignature(rom)) { | ||
234 | length = extension_rom_resource.end - extension_rom_resource.start + 1; | ||
235 | if (romchecksum(rom, length)) { | ||
236 | request_resource(&iomem_resource, &extension_rom_resource); | ||
237 | upper = extension_rom_resource.start; | ||
238 | } | ||
239 | } | ||
240 | |||
241 | /* check for adapter roms on 2k boundaries */ | ||
242 | for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; | ||
243 | start += 2048) { | ||
244 | rom = isa_bus_to_virt(start); | ||
245 | if (!romsignature(rom)) | ||
246 | continue; | ||
247 | |||
248 | /* 0 < length <= 0x7f * 512, historically */ | ||
249 | length = rom[2] * 512; | ||
250 | |||
251 | /* but accept any length that fits if checksum okay */ | ||
252 | if (!length || start + length > upper || !romchecksum(rom, length)) | ||
253 | continue; | ||
254 | |||
255 | adapter_rom_resources[i].start = start; | ||
256 | adapter_rom_resources[i].end = start + length - 1; | ||
257 | request_resource(&iomem_resource, &adapter_rom_resources[i]); | ||
258 | |||
259 | start = adapter_rom_resources[i++].end & ~2047UL; | ||
260 | } | ||
261 | } | ||
262 | |||
263 | #ifdef CONFIG_PROC_VMCORE | 141 | #ifdef CONFIG_PROC_VMCORE |
264 | /* elfcorehdr= specifies the location of elf core header | 142 | /* elfcorehdr= specifies the location of elf core header |
265 | * stored by the crashed kernel. This option will be passed | 143 | * stored by the crashed kernel. This option will be passed |
@@ -444,6 +322,11 @@ void __init setup_arch(char **cmdline_p) | |||
444 | /* reserve ebda region */ | 322 | /* reserve ebda region */ |
445 | if (ebda_addr) | 323 | if (ebda_addr) |
446 | reserve_bootmem_generic(ebda_addr, ebda_size); | 324 | reserve_bootmem_generic(ebda_addr, ebda_size); |
325 | #ifdef CONFIG_NUMA | ||
326 | /* reserve nodemap region */ | ||
327 | if (nodemap_addr) | ||
328 | reserve_bootmem_generic(nodemap_addr, nodemap_size); | ||
329 | #endif | ||
447 | 330 | ||
448 | #ifdef CONFIG_SMP | 331 | #ifdef CONFIG_SMP |
449 | /* | 332 | /* |
@@ -519,15 +402,11 @@ void __init setup_arch(char **cmdline_p) | |||
519 | init_apic_mappings(); | 402 | init_apic_mappings(); |
520 | 403 | ||
521 | /* | 404 | /* |
522 | * Request address space for all standard RAM and ROM resources | 405 | * We trust e820 completely. No explicit ROM probing in memory. |
523 | * and also for regions reported as reserved by the e820. | 406 | */ |
524 | */ | ||
525 | probe_roms(); | ||
526 | e820_reserve_resources(); | 407 | e820_reserve_resources(); |
527 | e820_mark_nosave_regions(); | 408 | e820_mark_nosave_regions(); |
528 | 409 | ||
529 | request_resource(&iomem_resource, &video_ram_resource); | ||
530 | |||
531 | { | 410 | { |
532 | unsigned i; | 411 | unsigned i; |
533 | /* request I/O space for devices used on all i[345]86 PCs */ | 412 | /* request I/O space for devices used on all i[345]86 PCs */ |
@@ -1063,7 +942,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1063 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 942 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1064 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | 943 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
1065 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, | 944 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, |
1066 | NULL, "fxsr_opt", NULL, "rdtscp", NULL, "lm", "3dnowext", "3dnow", | 945 | NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm", |
946 | "3dnowext", "3dnow", | ||
1067 | 947 | ||
1068 | /* Transmeta-defined */ | 948 | /* Transmeta-defined */ |
1069 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | 949 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
@@ -1081,7 +961,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1081 | /* Intel-defined (#2) */ | 961 | /* Intel-defined (#2) */ |
1082 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | 962 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
1083 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, | 963 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
1084 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, | 964 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", |
1085 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 965 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1086 | 966 | ||
1087 | /* VIA/Cyrix/Centaur-defined */ | 967 | /* VIA/Cyrix/Centaur-defined */ |
@@ -1091,8 +971,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1091 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 971 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1092 | 972 | ||
1093 | /* AMD-defined (#2) */ | 973 | /* AMD-defined (#2) */ |
1094 | "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, | 974 | "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", |
1095 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 975 | "altmovcr8", "abm", "sse4a", |
976 | "misalignsse", "3dnowprefetch", | ||
977 | "osvw", "ibs", NULL, NULL, NULL, NULL, | ||
1096 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 978 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1097 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 979 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1098 | }; | 980 | }; |
@@ -1103,6 +985,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1103 | "ttp", /* thermal trip */ | 985 | "ttp", /* thermal trip */ |
1104 | "tm", | 986 | "tm", |
1105 | "stc", | 987 | "stc", |
988 | "100mhzsteps", | ||
989 | "hwpstate", | ||
990 | NULL, /* tsc invariant mapped to constant_tsc */ | ||
1106 | NULL, | 991 | NULL, |
1107 | /* nothing */ /* constant_tsc - moved to flags */ | 992 | /* nothing */ /* constant_tsc - moved to flags */ |
1108 | }; | 993 | }; |
@@ -1219,23 +1104,3 @@ struct seq_operations cpuinfo_op = { | |||
1219 | .stop = c_stop, | 1104 | .stop = c_stop, |
1220 | .show = show_cpuinfo, | 1105 | .show = show_cpuinfo, |
1221 | }; | 1106 | }; |
1222 | |||
1223 | #if defined(CONFIG_INPUT_PCSPKR) || defined(CONFIG_INPUT_PCSPKR_MODULE) | ||
1224 | #include <linux/platform_device.h> | ||
1225 | static __init int add_pcspkr(void) | ||
1226 | { | ||
1227 | struct platform_device *pd; | ||
1228 | int ret; | ||
1229 | |||
1230 | pd = platform_device_alloc("pcspkr", -1); | ||
1231 | if (!pd) | ||
1232 | return -ENOMEM; | ||
1233 | |||
1234 | ret = platform_device_add(pd); | ||
1235 | if (ret) | ||
1236 | platform_device_put(pd); | ||
1237 | |||
1238 | return ret; | ||
1239 | } | ||
1240 | device_initcall(add_pcspkr); | ||
1241 | #endif | ||
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 8c4b80fe71a1..6a70b55f719d 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -37,7 +37,6 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | |||
37 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); | 37 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); |
38 | 38 | ||
39 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | 39 | unsigned long __supported_pte_mask __read_mostly = ~0UL; |
40 | EXPORT_SYMBOL(__supported_pte_mask); | ||
41 | static int do_not_nx __cpuinitdata = 0; | 40 | static int do_not_nx __cpuinitdata = 0; |
42 | 41 | ||
43 | /* noexec=on|off | 42 | /* noexec=on|off |
diff --git a/arch/x86_64/kernel/stacktrace.c b/arch/x86_64/kernel/stacktrace.c index 6026b31d037e..65ac2c6b34a6 100644 --- a/arch/x86_64/kernel/stacktrace.c +++ b/arch/x86_64/kernel/stacktrace.c | |||
@@ -32,7 +32,7 @@ static void save_stack_address(void *data, unsigned long addr) | |||
32 | trace->skip--; | 32 | trace->skip--; |
33 | return; | 33 | return; |
34 | } | 34 | } |
35 | if (trace->nr_entries < trace->max_entries - 1) | 35 | if (trace->nr_entries < trace->max_entries) |
36 | trace->entries[trace->nr_entries++] = addr; | 36 | trace->entries[trace->nr_entries++] = addr; |
37 | } | 37 | } |
38 | 38 | ||
@@ -49,7 +49,8 @@ static struct stacktrace_ops save_stack_ops = { | |||
49 | void save_stack_trace(struct stack_trace *trace, struct task_struct *task) | 49 | void save_stack_trace(struct stack_trace *trace, struct task_struct *task) |
50 | { | 50 | { |
51 | dump_trace(task, NULL, NULL, &save_stack_ops, trace); | 51 | dump_trace(task, NULL, NULL, &save_stack_ops, trace); |
52 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 52 | if (trace->nr_entries < trace->max_entries) |
53 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
53 | } | 54 | } |
54 | EXPORT_SYMBOL(save_stack_trace); | 55 | EXPORT_SYMBOL(save_stack_trace); |
55 | 56 | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 335cc91c49b7..3cc6886f1fb7 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -657,6 +657,7 @@ core_initcall(cpufreq_tsc); | |||
657 | 657 | ||
658 | #define TICK_COUNT 100000000 | 658 | #define TICK_COUNT 100000000 |
659 | #define TICK_MIN 5000 | 659 | #define TICK_MIN 5000 |
660 | #define MAX_READ_RETRIES 5 | ||
660 | 661 | ||
661 | /* | 662 | /* |
662 | * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none | 663 | * Some platforms take periodic SMI interrupts with 5ms duration. Make sure none |
@@ -664,13 +665,17 @@ core_initcall(cpufreq_tsc); | |||
664 | */ | 665 | */ |
665 | static void __init read_hpet_tsc(int *hpet, int *tsc) | 666 | static void __init read_hpet_tsc(int *hpet, int *tsc) |
666 | { | 667 | { |
667 | int tsc1, tsc2, hpet1; | 668 | int tsc1, tsc2, hpet1, retries = 0; |
669 | static int msg; | ||
668 | 670 | ||
669 | do { | 671 | do { |
670 | tsc1 = get_cycles_sync(); | 672 | tsc1 = get_cycles_sync(); |
671 | hpet1 = hpet_readl(HPET_COUNTER); | 673 | hpet1 = hpet_readl(HPET_COUNTER); |
672 | tsc2 = get_cycles_sync(); | 674 | tsc2 = get_cycles_sync(); |
673 | } while (tsc2 - tsc1 > TICK_MIN); | 675 | } while (tsc2 - tsc1 > TICK_MIN && retries++ < MAX_READ_RETRIES); |
676 | if (retries >= MAX_READ_RETRIES && !msg++) | ||
677 | printk(KERN_WARNING | ||
678 | "hpet.c: exceeded max retries to read HPET & TSC\n"); | ||
674 | *hpet = hpet1; | 679 | *hpet = hpet1; |
675 | *tsc = tsc2; | 680 | *tsc = tsc2; |
676 | } | 681 | } |
@@ -1221,8 +1226,9 @@ static void hpet_rtc_timer_reinit(void) | |||
1221 | if (PIE_on) | 1226 | if (PIE_on) |
1222 | PIE_count += lost_ints; | 1227 | PIE_count += lost_ints; |
1223 | 1228 | ||
1224 | printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", | 1229 | if (printk_ratelimit()) |
1225 | hpet_rtc_int_freq); | 1230 | printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n", |
1231 | hpet_rtc_int_freq); | ||
1226 | } | 1232 | } |
1227 | } | 1233 | } |
1228 | 1234 | ||
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index 6d77e4797a47..0dffae69f4ad 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(__put_user_4); | |||
26 | EXPORT_SYMBOL(__put_user_8); | 26 | EXPORT_SYMBOL(__put_user_8); |
27 | 27 | ||
28 | EXPORT_SYMBOL(copy_user_generic); | 28 | EXPORT_SYMBOL(copy_user_generic); |
29 | EXPORT_SYMBOL(__copy_user_nocache); | ||
29 | EXPORT_SYMBOL(copy_from_user); | 30 | EXPORT_SYMBOL(copy_from_user); |
30 | EXPORT_SYMBOL(copy_to_user); | 31 | EXPORT_SYMBOL(copy_to_user); |
31 | EXPORT_SYMBOL(__copy_from_user_inatomic); | 32 | EXPORT_SYMBOL(__copy_from_user_inatomic); |
@@ -34,8 +35,8 @@ EXPORT_SYMBOL(copy_page); | |||
34 | EXPORT_SYMBOL(clear_page); | 35 | EXPORT_SYMBOL(clear_page); |
35 | 36 | ||
36 | #ifdef CONFIG_SMP | 37 | #ifdef CONFIG_SMP |
37 | extern void FASTCALL( __write_lock_failed(rwlock_t *rw)); | 38 | extern void __write_lock_failed(rwlock_t *rw); |
38 | extern void FASTCALL( __read_lock_failed(rwlock_t *rw)); | 39 | extern void __read_lock_failed(rwlock_t *rw); |
39 | EXPORT_SYMBOL(__write_lock_failed); | 40 | EXPORT_SYMBOL(__write_lock_failed); |
40 | EXPORT_SYMBOL(__read_lock_failed); | 41 | EXPORT_SYMBOL(__read_lock_failed); |
41 | #endif | 42 | #endif |
diff --git a/arch/x86_64/lib/Makefile b/arch/x86_64/lib/Makefile index b78d4170fce2..8d5f835af481 100644 --- a/arch/x86_64/lib/Makefile +++ b/arch/x86_64/lib/Makefile | |||
@@ -9,4 +9,4 @@ obj-y := io.o iomap_copy.o | |||
9 | lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ | 9 | lib-y := csum-partial.o csum-copy.o csum-wrappers.o delay.o \ |
10 | usercopy.o getuser.o putuser.o \ | 10 | usercopy.o getuser.o putuser.o \ |
11 | thunk.o clear_page.o copy_page.o bitstr.o bitops.o | 11 | thunk.o clear_page.o copy_page.o bitstr.o bitops.o |
12 | lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o | 12 | lib-y += memcpy.o memmove.o memset.o copy_user.o rwlock.o copy_user_nocache.o |
diff --git a/arch/x86_64/lib/copy_user_nocache.S b/arch/x86_64/lib/copy_user_nocache.S new file mode 100644 index 000000000000..4620efb12f13 --- /dev/null +++ b/arch/x86_64/lib/copy_user_nocache.S | |||
@@ -0,0 +1,217 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | ||
3 | * | ||
4 | * Functions to copy from and to user space. | ||
5 | */ | ||
6 | |||
7 | #include <linux/linkage.h> | ||
8 | #include <asm/dwarf2.h> | ||
9 | |||
10 | #define FIX_ALIGNMENT 1 | ||
11 | |||
12 | #include <asm/current.h> | ||
13 | #include <asm/asm-offsets.h> | ||
14 | #include <asm/thread_info.h> | ||
15 | #include <asm/cpufeature.h> | ||
16 | |||
17 | /* | ||
18 | * copy_user_nocache - Uncached memory copy with exception handling | ||
19 | * This will force destination/source out of cache for more performance. | ||
20 | * | ||
21 | * Input: | ||
22 | * rdi destination | ||
23 | * rsi source | ||
24 | * rdx count | ||
25 | * rcx zero flag when 1 zero on exception | ||
26 | * | ||
27 | * Output: | ||
28 | * eax uncopied bytes or 0 if successful. | ||
29 | */ | ||
30 | ENTRY(__copy_user_nocache) | ||
31 | CFI_STARTPROC | ||
32 | pushq %rbx | ||
33 | CFI_ADJUST_CFA_OFFSET 8 | ||
34 | CFI_REL_OFFSET rbx, 0 | ||
35 | pushq %rcx /* save zero flag */ | ||
36 | CFI_ADJUST_CFA_OFFSET 8 | ||
37 | CFI_REL_OFFSET rcx, 0 | ||
38 | |||
39 | xorl %eax,%eax /* zero for the exception handler */ | ||
40 | |||
41 | #ifdef FIX_ALIGNMENT | ||
42 | /* check for bad alignment of destination */ | ||
43 | movl %edi,%ecx | ||
44 | andl $7,%ecx | ||
45 | jnz .Lbad_alignment | ||
46 | .Lafter_bad_alignment: | ||
47 | #endif | ||
48 | |||
49 | movq %rdx,%rcx | ||
50 | |||
51 | movl $64,%ebx | ||
52 | shrq $6,%rdx | ||
53 | decq %rdx | ||
54 | js .Lhandle_tail | ||
55 | |||
56 | .p2align 4 | ||
57 | .Lloop: | ||
58 | .Ls1: movq (%rsi),%r11 | ||
59 | .Ls2: movq 1*8(%rsi),%r8 | ||
60 | .Ls3: movq 2*8(%rsi),%r9 | ||
61 | .Ls4: movq 3*8(%rsi),%r10 | ||
62 | .Ld1: movnti %r11,(%rdi) | ||
63 | .Ld2: movnti %r8,1*8(%rdi) | ||
64 | .Ld3: movnti %r9,2*8(%rdi) | ||
65 | .Ld4: movnti %r10,3*8(%rdi) | ||
66 | |||
67 | .Ls5: movq 4*8(%rsi),%r11 | ||
68 | .Ls6: movq 5*8(%rsi),%r8 | ||
69 | .Ls7: movq 6*8(%rsi),%r9 | ||
70 | .Ls8: movq 7*8(%rsi),%r10 | ||
71 | .Ld5: movnti %r11,4*8(%rdi) | ||
72 | .Ld6: movnti %r8,5*8(%rdi) | ||
73 | .Ld7: movnti %r9,6*8(%rdi) | ||
74 | .Ld8: movnti %r10,7*8(%rdi) | ||
75 | |||
76 | dec %rdx | ||
77 | |||
78 | leaq 64(%rsi),%rsi | ||
79 | leaq 64(%rdi),%rdi | ||
80 | |||
81 | jns .Lloop | ||
82 | |||
83 | .p2align 4 | ||
84 | .Lhandle_tail: | ||
85 | movl %ecx,%edx | ||
86 | andl $63,%ecx | ||
87 | shrl $3,%ecx | ||
88 | jz .Lhandle_7 | ||
89 | movl $8,%ebx | ||
90 | .p2align 4 | ||
91 | .Lloop_8: | ||
92 | .Ls9: movq (%rsi),%r8 | ||
93 | .Ld9: movnti %r8,(%rdi) | ||
94 | decl %ecx | ||
95 | leaq 8(%rdi),%rdi | ||
96 | leaq 8(%rsi),%rsi | ||
97 | jnz .Lloop_8 | ||
98 | |||
99 | .Lhandle_7: | ||
100 | movl %edx,%ecx | ||
101 | andl $7,%ecx | ||
102 | jz .Lende | ||
103 | .p2align 4 | ||
104 | .Lloop_1: | ||
105 | .Ls10: movb (%rsi),%bl | ||
106 | .Ld10: movb %bl,(%rdi) | ||
107 | incq %rdi | ||
108 | incq %rsi | ||
109 | decl %ecx | ||
110 | jnz .Lloop_1 | ||
111 | |||
112 | CFI_REMEMBER_STATE | ||
113 | .Lende: | ||
114 | popq %rcx | ||
115 | CFI_ADJUST_CFA_OFFSET -8 | ||
116 | CFI_RESTORE %rcx | ||
117 | popq %rbx | ||
118 | CFI_ADJUST_CFA_OFFSET -8 | ||
119 | CFI_RESTORE rbx | ||
120 | ret | ||
121 | CFI_RESTORE_STATE | ||
122 | |||
123 | #ifdef FIX_ALIGNMENT | ||
124 | /* align destination */ | ||
125 | .p2align 4 | ||
126 | .Lbad_alignment: | ||
127 | movl $8,%r9d | ||
128 | subl %ecx,%r9d | ||
129 | movl %r9d,%ecx | ||
130 | cmpq %r9,%rdx | ||
131 | jz .Lhandle_7 | ||
132 | js .Lhandle_7 | ||
133 | .Lalign_1: | ||
134 | .Ls11: movb (%rsi),%bl | ||
135 | .Ld11: movb %bl,(%rdi) | ||
136 | incq %rsi | ||
137 | incq %rdi | ||
138 | decl %ecx | ||
139 | jnz .Lalign_1 | ||
140 | subq %r9,%rdx | ||
141 | jmp .Lafter_bad_alignment | ||
142 | #endif | ||
143 | |||
144 | /* table sorted by exception address */ | ||
145 | .section __ex_table,"a" | ||
146 | .align 8 | ||
147 | .quad .Ls1,.Ls1e | ||
148 | .quad .Ls2,.Ls2e | ||
149 | .quad .Ls3,.Ls3e | ||
150 | .quad .Ls4,.Ls4e | ||
151 | .quad .Ld1,.Ls1e | ||
152 | .quad .Ld2,.Ls2e | ||
153 | .quad .Ld3,.Ls3e | ||
154 | .quad .Ld4,.Ls4e | ||
155 | .quad .Ls5,.Ls5e | ||
156 | .quad .Ls6,.Ls6e | ||
157 | .quad .Ls7,.Ls7e | ||
158 | .quad .Ls8,.Ls8e | ||
159 | .quad .Ld5,.Ls5e | ||
160 | .quad .Ld6,.Ls6e | ||
161 | .quad .Ld7,.Ls7e | ||
162 | .quad .Ld8,.Ls8e | ||
163 | .quad .Ls9,.Le_quad | ||
164 | .quad .Ld9,.Le_quad | ||
165 | .quad .Ls10,.Le_byte | ||
166 | .quad .Ld10,.Le_byte | ||
167 | #ifdef FIX_ALIGNMENT | ||
168 | .quad .Ls11,.Lzero_rest | ||
169 | .quad .Ld11,.Lzero_rest | ||
170 | #endif | ||
171 | .quad .Le5,.Le_zero | ||
172 | .previous | ||
173 | |||
174 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
175 | pessimistic side. this is gross. it would be better to fix the | ||
176 | interface. */ | ||
177 | /* eax: zero, ebx: 64 */ | ||
178 | .Ls1e: addl $8,%eax | ||
179 | .Ls2e: addl $8,%eax | ||
180 | .Ls3e: addl $8,%eax | ||
181 | .Ls4e: addl $8,%eax | ||
182 | .Ls5e: addl $8,%eax | ||
183 | .Ls6e: addl $8,%eax | ||
184 | .Ls7e: addl $8,%eax | ||
185 | .Ls8e: addl $8,%eax | ||
186 | addq %rbx,%rdi /* +64 */ | ||
187 | subq %rax,%rdi /* correct destination with computed offset */ | ||
188 | |||
189 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
190 | addq %rax,%rdx /* add offset to loopcnt */ | ||
191 | andl $63,%ecx /* remaining bytes */ | ||
192 | addq %rcx,%rdx /* add them */ | ||
193 | jmp .Lzero_rest | ||
194 | |||
195 | /* exception on quad word loop in tail handling */ | ||
196 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
197 | .Le_quad: | ||
198 | shll $3,%ecx | ||
199 | andl $7,%edx | ||
200 | addl %ecx,%edx | ||
201 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
202 | .Lzero_rest: | ||
203 | cmpl $0,(%rsp) /* zero flag set? */ | ||
204 | jz .Le_zero | ||
205 | movq %rdx,%rcx | ||
206 | .Le_byte: | ||
207 | xorl %eax,%eax | ||
208 | .Le5: rep | ||
209 | stosb | ||
210 | /* when there is another exception while zeroing the rest just return */ | ||
211 | .Le_zero: | ||
212 | movq %rdx,%rax | ||
213 | jmp .Lende | ||
214 | CFI_ENDPROC | ||
215 | ENDPROC(__copy_user_nocache) | ||
216 | |||
217 | |||
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 49e8cf2e06f8..6ada7231f3ab 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -56,17 +56,17 @@ int unregister_page_fault_notifier(struct notifier_block *nb) | |||
56 | } | 56 | } |
57 | EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); | 57 | EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); |
58 | 58 | ||
59 | static inline int notify_page_fault(enum die_val val, const char *str, | 59 | static inline int notify_page_fault(struct pt_regs *regs, long err) |
60 | struct pt_regs *regs, long err, int trap, int sig) | ||
61 | { | 60 | { |
62 | struct die_args args = { | 61 | struct die_args args = { |
63 | .regs = regs, | 62 | .regs = regs, |
64 | .str = str, | 63 | .str = "page fault", |
65 | .err = err, | 64 | .err = err, |
66 | .trapnr = trap, | 65 | .trapnr = 14, |
67 | .signr = sig | 66 | .signr = SIGSEGV |
68 | }; | 67 | }; |
69 | return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args); | 68 | return atomic_notifier_call_chain(¬ify_page_fault_chain, |
69 | DIE_PAGE_FAULT, &args); | ||
70 | } | 70 | } |
71 | 71 | ||
72 | /* Sometimes the CPU reports invalid exceptions on prefetch. | 72 | /* Sometimes the CPU reports invalid exceptions on prefetch. |
@@ -355,8 +355,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
355 | if (vmalloc_fault(address) >= 0) | 355 | if (vmalloc_fault(address) >= 0) |
356 | return; | 356 | return; |
357 | } | 357 | } |
358 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 358 | if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
359 | SIGSEGV) == NOTIFY_STOP) | ||
360 | return; | 359 | return; |
361 | /* | 360 | /* |
362 | * Don't take the mm semaphore here. If we fixup a prefetch | 361 | * Don't take the mm semaphore here. If we fixup a prefetch |
@@ -365,8 +364,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
365 | goto bad_area_nosemaphore; | 364 | goto bad_area_nosemaphore; |
366 | } | 365 | } |
367 | 366 | ||
368 | if (notify_page_fault(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 367 | if (notify_page_fault(regs, error_code) == NOTIFY_STOP) |
369 | SIGSEGV) == NOTIFY_STOP) | ||
370 | return; | 368 | return; |
371 | 369 | ||
372 | if (likely(regs->eflags & X86_EFLAGS_IF)) | 370 | if (likely(regs->eflags & X86_EFLAGS_IF)) |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 2ee2e003606c..41b8fb069924 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -36,6 +36,8 @@ unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | |||
36 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | 36 | cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; |
37 | 37 | ||
38 | int numa_off __initdata; | 38 | int numa_off __initdata; |
39 | unsigned long __initdata nodemap_addr; | ||
40 | unsigned long __initdata nodemap_size; | ||
39 | 41 | ||
40 | 42 | ||
41 | /* | 43 | /* |
@@ -52,34 +54,88 @@ populate_memnodemap(const struct bootnode *nodes, int numnodes, int shift) | |||
52 | int res = -1; | 54 | int res = -1; |
53 | unsigned long addr, end; | 55 | unsigned long addr, end; |
54 | 56 | ||
55 | if (shift >= 64) | 57 | memset(memnodemap, 0xff, memnodemapsize); |
56 | return -1; | ||
57 | memset(memnodemap, 0xff, sizeof(memnodemap)); | ||
58 | for (i = 0; i < numnodes; i++) { | 58 | for (i = 0; i < numnodes; i++) { |
59 | addr = nodes[i].start; | 59 | addr = nodes[i].start; |
60 | end = nodes[i].end; | 60 | end = nodes[i].end; |
61 | if (addr >= end) | 61 | if (addr >= end) |
62 | continue; | 62 | continue; |
63 | if ((end >> shift) >= NODEMAPSIZE) | 63 | if ((end >> shift) >= memnodemapsize) |
64 | return 0; | 64 | return 0; |
65 | do { | 65 | do { |
66 | if (memnodemap[addr >> shift] != 0xff) | 66 | if (memnodemap[addr >> shift] != 0xff) |
67 | return -1; | 67 | return -1; |
68 | memnodemap[addr >> shift] = i; | 68 | memnodemap[addr >> shift] = i; |
69 | addr += (1UL << shift); | 69 | addr += (1UL << shift); |
70 | } while (addr < end); | 70 | } while (addr < end); |
71 | res = 1; | 71 | res = 1; |
72 | } | 72 | } |
73 | return res; | 73 | return res; |
74 | } | 74 | } |
75 | 75 | ||
76 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | 76 | static int __init allocate_cachealigned_memnodemap(void) |
77 | { | 77 | { |
78 | int shift = 20; | 78 | unsigned long pad, pad_addr; |
79 | |||
80 | memnodemap = memnode.embedded_map; | ||
81 | if (memnodemapsize <= 48) | ||
82 | return 0; | ||
83 | |||
84 | pad = L1_CACHE_BYTES - 1; | ||
85 | pad_addr = 0x8000; | ||
86 | nodemap_size = pad + memnodemapsize; | ||
87 | nodemap_addr = find_e820_area(pad_addr, end_pfn<<PAGE_SHIFT, | ||
88 | nodemap_size); | ||
89 | if (nodemap_addr == -1UL) { | ||
90 | printk(KERN_ERR | ||
91 | "NUMA: Unable to allocate Memory to Node hash map\n"); | ||
92 | nodemap_addr = nodemap_size = 0; | ||
93 | return -1; | ||
94 | } | ||
95 | pad_addr = (nodemap_addr + pad) & ~pad; | ||
96 | memnodemap = phys_to_virt(pad_addr); | ||
97 | |||
98 | printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n", | ||
99 | nodemap_addr, nodemap_addr + nodemap_size); | ||
100 | return 0; | ||
101 | } | ||
79 | 102 | ||
80 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | 103 | /* |
81 | shift++; | 104 | * The LSB of all start and end addresses in the node map is the value of the |
105 | * maximum possible shift. | ||
106 | */ | ||
107 | static int __init | ||
108 | extract_lsb_from_nodes (const struct bootnode *nodes, int numnodes) | ||
109 | { | ||
110 | int i, nodes_used = 0; | ||
111 | unsigned long start, end; | ||
112 | unsigned long bitfield = 0, memtop = 0; | ||
113 | |||
114 | for (i = 0; i < numnodes; i++) { | ||
115 | start = nodes[i].start; | ||
116 | end = nodes[i].end; | ||
117 | if (start >= end) | ||
118 | continue; | ||
119 | bitfield |= start; | ||
120 | nodes_used++; | ||
121 | if (end > memtop) | ||
122 | memtop = end; | ||
123 | } | ||
124 | if (nodes_used <= 1) | ||
125 | i = 63; | ||
126 | else | ||
127 | i = find_first_bit(&bitfield, sizeof(unsigned long)*8); | ||
128 | memnodemapsize = (memtop >> i)+1; | ||
129 | return i; | ||
130 | } | ||
131 | |||
132 | int __init compute_hash_shift(struct bootnode *nodes, int numnodes) | ||
133 | { | ||
134 | int shift; | ||
82 | 135 | ||
136 | shift = extract_lsb_from_nodes(nodes, numnodes); | ||
137 | if (allocate_cachealigned_memnodemap()) | ||
138 | return -1; | ||
83 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | 139 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", |
84 | shift); | 140 | shift); |
85 | 141 | ||
@@ -216,31 +272,113 @@ void __init numa_init_array(void) | |||
216 | } | 272 | } |
217 | 273 | ||
218 | #ifdef CONFIG_NUMA_EMU | 274 | #ifdef CONFIG_NUMA_EMU |
275 | /* Numa emulation */ | ||
219 | int numa_fake __initdata = 0; | 276 | int numa_fake __initdata = 0; |
220 | 277 | ||
221 | /* Numa emulation */ | 278 | /* |
279 | * This function is used to find out if the start and end correspond to | ||
280 | * different zones. | ||
281 | */ | ||
282 | int zone_cross_over(unsigned long start, unsigned long end) | ||
283 | { | ||
284 | if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) && | ||
285 | (end >= (MAX_DMA32_PFN << PAGE_SHIFT))) | ||
286 | return 1; | ||
287 | return 0; | ||
288 | } | ||
289 | |||
222 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 290 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) |
223 | { | 291 | { |
224 | int i; | 292 | int i, big; |
225 | struct bootnode nodes[MAX_NUMNODES]; | 293 | struct bootnode nodes[MAX_NUMNODES]; |
226 | unsigned long sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake; | 294 | unsigned long sz, old_sz; |
295 | unsigned long hole_size; | ||
296 | unsigned long start, end; | ||
297 | unsigned long max_addr = (end_pfn << PAGE_SHIFT); | ||
298 | |||
299 | start = (start_pfn << PAGE_SHIFT); | ||
300 | hole_size = e820_hole_size(start, max_addr); | ||
301 | sz = (max_addr - start - hole_size) / numa_fake; | ||
227 | 302 | ||
228 | /* Kludge needed for the hash function */ | 303 | /* Kludge needed for the hash function */ |
229 | if (hweight64(sz) > 1) { | ||
230 | unsigned long x = 1; | ||
231 | while ((x << 1) < sz) | ||
232 | x <<= 1; | ||
233 | if (x < sz/2) | ||
234 | printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n"); | ||
235 | sz = x; | ||
236 | } | ||
237 | 304 | ||
305 | old_sz = sz; | ||
306 | /* | ||
307 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
308 | */ | ||
309 | sz &= FAKE_NODE_MIN_HASH_MASK; | ||
310 | |||
311 | /* | ||
312 | * We ensure that each node is at least 64MB big. Smaller than this | ||
313 | * size can cause VM hiccups. | ||
314 | */ | ||
315 | if (sz == 0) { | ||
316 | printk(KERN_INFO "Not enough memory for %d nodes. Reducing " | ||
317 | "the number of nodes\n", numa_fake); | ||
318 | numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE; | ||
319 | printk(KERN_INFO "Number of fake nodes will be = %d\n", | ||
320 | numa_fake); | ||
321 | sz = FAKE_NODE_MIN_SIZE; | ||
322 | } | ||
323 | /* | ||
324 | * Find out how many nodes can get an extra NODE_MIN_SIZE granule. | ||
325 | * This logic ensures the extra memory gets distributed among as many | ||
326 | * nodes as possible (as compared to one single node getting all that | ||
327 | * extra memory. | ||
328 | */ | ||
329 | big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE; | ||
330 | printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: " | ||
331 | "%d\n", | ||
332 | (sz >> 20), (hole_size >> 20), big); | ||
238 | memset(&nodes,0,sizeof(nodes)); | 333 | memset(&nodes,0,sizeof(nodes)); |
334 | end = start; | ||
239 | for (i = 0; i < numa_fake; i++) { | 335 | for (i = 0; i < numa_fake; i++) { |
240 | nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz; | 336 | /* |
337 | * In case we are not able to allocate enough memory for all | ||
338 | * the nodes, we reduce the number of fake nodes. | ||
339 | */ | ||
340 | if (end >= max_addr) { | ||
341 | numa_fake = i - 1; | ||
342 | break; | ||
343 | } | ||
344 | start = nodes[i].start = end; | ||
345 | /* | ||
346 | * Final node can have all the remaining memory. | ||
347 | */ | ||
241 | if (i == numa_fake-1) | 348 | if (i == numa_fake-1) |
242 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; | 349 | sz = max_addr - start; |
243 | nodes[i].end = nodes[i].start + sz; | 350 | end = nodes[i].start + sz; |
351 | /* | ||
352 | * Fir "big" number of nodes get extra granule. | ||
353 | */ | ||
354 | if (i < big) | ||
355 | end += FAKE_NODE_MIN_SIZE; | ||
356 | /* | ||
357 | * Iterate over the range to ensure that this node gets at | ||
358 | * least sz amount of RAM (excluding holes) | ||
359 | */ | ||
360 | while ((end - start - e820_hole_size(start, end)) < sz) { | ||
361 | end += FAKE_NODE_MIN_SIZE; | ||
362 | if (end >= max_addr) | ||
363 | break; | ||
364 | } | ||
365 | /* | ||
366 | * Look at the next node to make sure there is some real memory | ||
367 | * to map. Bad things happen when the only memory present | ||
368 | * in a zone on a fake node is IO hole. | ||
369 | */ | ||
370 | while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) { | ||
371 | if (zone_cross_over(start, end + sz)) { | ||
372 | end = (MAX_DMA32_PFN << PAGE_SHIFT); | ||
373 | break; | ||
374 | } | ||
375 | if (end >= max_addr) | ||
376 | break; | ||
377 | end += FAKE_NODE_MIN_SIZE; | ||
378 | } | ||
379 | if (end > max_addr) | ||
380 | end = max_addr; | ||
381 | nodes[i].end = end; | ||
244 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 382 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", |
245 | i, | 383 | i, |
246 | nodes[i].start, nodes[i].end, | 384 | nodes[i].start, nodes[i].end, |
@@ -290,6 +428,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
290 | end_pfn << PAGE_SHIFT); | 428 | end_pfn << PAGE_SHIFT); |
291 | /* setup dummy node covering all memory */ | 429 | /* setup dummy node covering all memory */ |
292 | memnode_shift = 63; | 430 | memnode_shift = 63; |
431 | memnodemap = memnode.embedded_map; | ||
293 | memnodemap[0] = 0; | 432 | memnodemap[0] = 0; |
294 | nodes_clear(node_online_map); | 433 | nodes_clear(node_online_map); |
295 | node_set_online(0); | 434 | node_set_online(0); |
@@ -321,20 +460,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
321 | return pages; | 460 | return pages; |
322 | } | 461 | } |
323 | 462 | ||
324 | #ifdef CONFIG_SPARSEMEM | ||
325 | static void __init arch_sparse_init(void) | ||
326 | { | ||
327 | int i; | ||
328 | |||
329 | for_each_online_node(i) | ||
330 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); | ||
331 | |||
332 | sparse_init(); | ||
333 | } | ||
334 | #else | ||
335 | #define arch_sparse_init() do {} while (0) | ||
336 | #endif | ||
337 | |||
338 | void __init paging_init(void) | 463 | void __init paging_init(void) |
339 | { | 464 | { |
340 | int i; | 465 | int i; |
@@ -344,7 +469,8 @@ void __init paging_init(void) | |||
344 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 469 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
345 | max_zone_pfns[ZONE_NORMAL] = end_pfn; | 470 | max_zone_pfns[ZONE_NORMAL] = end_pfn; |
346 | 471 | ||
347 | arch_sparse_init(); | 472 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
473 | sparse_init(); | ||
348 | 474 | ||
349 | for_each_online_node(i) { | 475 | for_each_online_node(i) { |
350 | setup_node_zones(i); | 476 | setup_node_zones(i); |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index ccb91dd996a9..65c5eaa59905 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -107,6 +107,7 @@ static void revert_page(unsigned long address, pgprot_t ref_prot) | |||
107 | pud_t *pud; | 107 | pud_t *pud; |
108 | pmd_t *pmd; | 108 | pmd_t *pmd; |
109 | pte_t large_pte; | 109 | pte_t large_pte; |
110 | unsigned long pfn; | ||
110 | 111 | ||
111 | pgd = pgd_offset_k(address); | 112 | pgd = pgd_offset_k(address); |
112 | BUG_ON(pgd_none(*pgd)); | 113 | BUG_ON(pgd_none(*pgd)); |
@@ -114,7 +115,8 @@ static void revert_page(unsigned long address, pgprot_t ref_prot) | |||
114 | BUG_ON(pud_none(*pud)); | 115 | BUG_ON(pud_none(*pud)); |
115 | pmd = pmd_offset(pud, address); | 116 | pmd = pmd_offset(pud, address); |
116 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); | 117 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); |
117 | large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot); | 118 | pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; |
119 | large_pte = pfn_pte(pfn, ref_prot); | ||
118 | large_pte = pte_mkhuge(large_pte); | 120 | large_pte = pte_mkhuge(large_pte); |
119 | set_pte((pte_t *)pmd, large_pte); | 121 | set_pte((pte_t *)pmd, large_pte); |
120 | } | 122 | } |
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index 149aba05a5b8..c9eddc8859c0 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile | |||
@@ -11,7 +11,7 @@ obj-y += fixup.o init.o | |||
11 | obj-$(CONFIG_ACPI) += acpi.o | 11 | obj-$(CONFIG_ACPI) += acpi.o |
12 | obj-y += legacy.o irq.o common.o early.o | 12 | obj-y += legacy.o irq.o common.o early.o |
13 | # mmconfig has a 64bit special | 13 | # mmconfig has a 64bit special |
14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o | 14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o mmconfig-shared.o |
15 | 15 | ||
16 | obj-$(CONFIG_NUMA) += k8-bus.o | 16 | obj-$(CONFIG_NUMA) += k8-bus.o |
17 | 17 | ||
@@ -24,3 +24,4 @@ fixup-y += ../../i386/pci/fixup.o | |||
24 | i386-y += ../../i386/pci/i386.o | 24 | i386-y += ../../i386/pci/i386.o |
25 | init-y += ../../i386/pci/init.o | 25 | init-y += ../../i386/pci/init.o |
26 | early-y += ../../i386/pci/early.o | 26 | early-y += ../../i386/pci/early.o |
27 | mmconfig-shared-y += ../../i386/pci/mmconfig-shared.o | ||
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index faabb6e87f12..65d82736987e 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c | |||
@@ -13,16 +13,6 @@ | |||
13 | 13 | ||
14 | #include "pci.h" | 14 | #include "pci.h" |
15 | 15 | ||
16 | /* aperture is up to 256MB but BIOS may reserve less */ | ||
17 | #define MMCONFIG_APER_MIN (2 * 1024*1024) | ||
18 | #define MMCONFIG_APER_MAX (256 * 1024*1024) | ||
19 | |||
20 | /* Verify the first 16 busses. We assume that systems with more busses | ||
21 | get MCFG right. */ | ||
22 | #define MAX_CHECK_BUS 16 | ||
23 | |||
24 | static DECLARE_BITMAP(fallback_slots, 32*MAX_CHECK_BUS); | ||
25 | |||
26 | /* Static virtual mapping of the MMCONFIG aperture */ | 16 | /* Static virtual mapping of the MMCONFIG aperture */ |
27 | struct mmcfg_virt { | 17 | struct mmcfg_virt { |
28 | struct acpi_mcfg_allocation *cfg; | 18 | struct acpi_mcfg_allocation *cfg; |
@@ -32,30 +22,17 @@ static struct mmcfg_virt *pci_mmcfg_virt; | |||
32 | 22 | ||
33 | static char __iomem *get_virt(unsigned int seg, unsigned bus) | 23 | static char __iomem *get_virt(unsigned int seg, unsigned bus) |
34 | { | 24 | { |
35 | int cfg_num = -1; | ||
36 | struct acpi_mcfg_allocation *cfg; | 25 | struct acpi_mcfg_allocation *cfg; |
26 | int cfg_num; | ||
37 | 27 | ||
38 | while (1) { | 28 | for (cfg_num = 0; cfg_num < pci_mmcfg_config_num; cfg_num++) { |
39 | ++cfg_num; | ||
40 | if (cfg_num >= pci_mmcfg_config_num) | ||
41 | break; | ||
42 | cfg = pci_mmcfg_virt[cfg_num].cfg; | 29 | cfg = pci_mmcfg_virt[cfg_num].cfg; |
43 | if (cfg->pci_segment != seg) | 30 | if (cfg->pci_segment == seg && |
44 | continue; | 31 | (cfg->start_bus_number <= bus) && |
45 | if ((cfg->start_bus_number <= bus) && | ||
46 | (cfg->end_bus_number >= bus)) | 32 | (cfg->end_bus_number >= bus)) |
47 | return pci_mmcfg_virt[cfg_num].virt; | 33 | return pci_mmcfg_virt[cfg_num].virt; |
48 | } | 34 | } |
49 | 35 | ||
50 | /* Handle more broken MCFG tables on Asus etc. | ||
51 | They only contain a single entry for bus 0-0. Assume | ||
52 | this applies to all busses. */ | ||
53 | cfg = &pci_mmcfg_config[0]; | ||
54 | if (pci_mmcfg_config_num == 1 && | ||
55 | cfg->pci_segment == 0 && | ||
56 | (cfg->start_bus_number | cfg->end_bus_number) == 0) | ||
57 | return pci_mmcfg_virt[0].virt; | ||
58 | |||
59 | /* Fall back to type 0 */ | 36 | /* Fall back to type 0 */ |
60 | return NULL; | 37 | return NULL; |
61 | } | 38 | } |
@@ -63,8 +40,8 @@ static char __iomem *get_virt(unsigned int seg, unsigned bus) | |||
63 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) | 40 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) |
64 | { | 41 | { |
65 | char __iomem *addr; | 42 | char __iomem *addr; |
66 | if (seg == 0 && bus < MAX_CHECK_BUS && | 43 | if (seg == 0 && bus < PCI_MMCFG_MAX_CHECK_BUS && |
67 | test_bit(32*bus + PCI_SLOT(devfn), fallback_slots)) | 44 | test_bit(32*bus + PCI_SLOT(devfn), pci_mmcfg_fallback_slots)) |
68 | return NULL; | 45 | return NULL; |
69 | addr = get_virt(seg, bus); | 46 | addr = get_virt(seg, bus); |
70 | if (!addr) | 47 | if (!addr) |
@@ -135,79 +112,46 @@ static struct pci_raw_ops pci_mmcfg = { | |||
135 | .write = pci_mmcfg_write, | 112 | .write = pci_mmcfg_write, |
136 | }; | 113 | }; |
137 | 114 | ||
138 | /* K8 systems have some devices (typically in the builtin northbridge) | 115 | static void __iomem * __init mcfg_ioremap(struct acpi_mcfg_allocation *cfg) |
139 | that are only accessible using type1 | ||
140 | Normally this can be expressed in the MCFG by not listing them | ||
141 | and assigning suitable _SEGs, but this isn't implemented in some BIOS. | ||
142 | Instead try to discover all devices on bus 0 that are unreachable using MM | ||
143 | and fallback for them. */ | ||
144 | static __init void unreachable_devices(void) | ||
145 | { | 116 | { |
146 | int i, k; | 117 | void __iomem *addr; |
147 | /* Use the max bus number from ACPI here? */ | 118 | u32 size; |
148 | for (k = 0; k < MAX_CHECK_BUS; k++) { | 119 | |
149 | for (i = 0; i < 32; i++) { | 120 | size = (cfg->end_bus_number + 1) << 20; |
150 | u32 val1; | 121 | addr = ioremap_nocache(cfg->address, size); |
151 | char __iomem *addr; | 122 | if (addr) { |
152 | 123 | printk(KERN_INFO "PCI: Using MMCONFIG at %Lx - %Lx\n", | |
153 | pci_conf1_read(0, k, PCI_DEVFN(i,0), 0, 4, &val1); | 124 | cfg->address, cfg->address + size - 1); |
154 | if (val1 == 0xffffffff) | ||
155 | continue; | ||
156 | addr = pci_dev_base(0, k, PCI_DEVFN(i, 0)); | ||
157 | if (addr == NULL|| readl(addr) != val1) { | ||
158 | set_bit(i + 32*k, fallback_slots); | ||
159 | printk(KERN_NOTICE "PCI: No mmconfig possible" | ||
160 | " on device %02x:%02x\n", k, i); | ||
161 | } | ||
162 | } | ||
163 | } | 125 | } |
126 | return addr; | ||
164 | } | 127 | } |
165 | 128 | ||
166 | void __init pci_mmcfg_init(int type) | 129 | int __init pci_mmcfg_arch_reachable(unsigned int seg, unsigned int bus, |
130 | unsigned int devfn) | ||
167 | { | 131 | { |
168 | int i; | 132 | return pci_dev_base(seg, bus, devfn) != NULL; |
169 | 133 | } | |
170 | if ((pci_probe & PCI_PROBE_MMCONF) == 0) | ||
171 | return; | ||
172 | |||
173 | acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); | ||
174 | if ((pci_mmcfg_config_num == 0) || | ||
175 | (pci_mmcfg_config == NULL) || | ||
176 | (pci_mmcfg_config[0].address == 0)) | ||
177 | return; | ||
178 | |||
179 | /* Only do this check when type 1 works. If it doesn't work | ||
180 | assume we run on a Mac and always use MCFG */ | ||
181 | if (type == 1 && !e820_all_mapped(pci_mmcfg_config[0].address, | ||
182 | pci_mmcfg_config[0].address + MMCONFIG_APER_MIN, | ||
183 | E820_RESERVED)) { | ||
184 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %lx is not E820-reserved\n", | ||
185 | (unsigned long)pci_mmcfg_config[0].address); | ||
186 | printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); | ||
187 | return; | ||
188 | } | ||
189 | 134 | ||
190 | pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * pci_mmcfg_config_num, GFP_KERNEL); | 135 | int __init pci_mmcfg_arch_init(void) |
136 | { | ||
137 | int i; | ||
138 | pci_mmcfg_virt = kmalloc(sizeof(*pci_mmcfg_virt) * | ||
139 | pci_mmcfg_config_num, GFP_KERNEL); | ||
191 | if (pci_mmcfg_virt == NULL) { | 140 | if (pci_mmcfg_virt == NULL) { |
192 | printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); | 141 | printk(KERN_ERR "PCI: Can not allocate memory for mmconfig structures\n"); |
193 | return; | 142 | return 0; |
194 | } | 143 | } |
144 | |||
195 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | 145 | for (i = 0; i < pci_mmcfg_config_num; ++i) { |
196 | pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; | 146 | pci_mmcfg_virt[i].cfg = &pci_mmcfg_config[i]; |
197 | pci_mmcfg_virt[i].virt = ioremap_nocache(pci_mmcfg_config[i].address, | 147 | pci_mmcfg_virt[i].virt = mcfg_ioremap(&pci_mmcfg_config[i]); |
198 | MMCONFIG_APER_MAX); | ||
199 | if (!pci_mmcfg_virt[i].virt) { | 148 | if (!pci_mmcfg_virt[i].virt) { |
200 | printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " | 149 | printk(KERN_ERR "PCI: Cannot map mmconfig aperture for " |
201 | "segment %d\n", | 150 | "segment %d\n", |
202 | pci_mmcfg_config[i].pci_segment); | 151 | pci_mmcfg_config[i].pci_segment); |
203 | return; | 152 | return 0; |
204 | } | 153 | } |
205 | printk(KERN_INFO "PCI: Using MMCONFIG at %lx\n", | ||
206 | (unsigned long)pci_mmcfg_config[i].address); | ||
207 | } | 154 | } |
208 | |||
209 | unreachable_devices(); | ||
210 | |||
211 | raw_pci_ops = &pci_mmcfg; | 155 | raw_pci_ops = &pci_mmcfg; |
212 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | 156 | return 1; |
213 | } | 157 | } |
diff --git a/drivers/acpi/namespace/nsinit.c b/drivers/acpi/namespace/nsinit.c index 326af8fc0ce7..33db2241044e 100644 --- a/drivers/acpi/namespace/nsinit.c +++ b/drivers/acpi/namespace/nsinit.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <acpi/acnamesp.h> | 45 | #include <acpi/acnamesp.h> |
46 | #include <acpi/acdispat.h> | 46 | #include <acpi/acdispat.h> |
47 | #include <acpi/acinterp.h> | 47 | #include <acpi/acinterp.h> |
48 | #include <linux/nmi.h> | ||
48 | 49 | ||
49 | #define _COMPONENT ACPI_NAMESPACE | 50 | #define _COMPONENT ACPI_NAMESPACE |
50 | ACPI_MODULE_NAME("nsinit") | 51 | ACPI_MODULE_NAME("nsinit") |
@@ -534,7 +535,15 @@ acpi_ns_init_one_device(acpi_handle obj_handle, | |||
534 | info->parameter_type = ACPI_PARAM_ARGS; | 535 | info->parameter_type = ACPI_PARAM_ARGS; |
535 | info->flags = ACPI_IGNORE_RETURN_VALUE; | 536 | info->flags = ACPI_IGNORE_RETURN_VALUE; |
536 | 537 | ||
538 | /* | ||
539 | * Some hardware relies on this being executed as atomically | ||
540 | * as possible (without an NMI being received in the middle of | ||
541 | * this) - so disable NMIs and initialize the device: | ||
542 | */ | ||
543 | acpi_nmi_disable(); | ||
537 | status = acpi_ns_evaluate(info); | 544 | status = acpi_ns_evaluate(info); |
545 | acpi_nmi_enable(); | ||
546 | |||
538 | if (ACPI_SUCCESS(status)) { | 547 | if (ACPI_SUCCESS(status)) { |
539 | walk_info->num_INI++; | 548 | walk_info->num_INI++; |
540 | 549 | ||
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c index 1e640b899175..fd4e91734388 100644 --- a/drivers/kvm/vmx.c +++ b/drivers/kvm/vmx.c | |||
@@ -1879,12 +1879,6 @@ again: | |||
1879 | 1879 | ||
1880 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 1880 | asm ("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
1881 | 1881 | ||
1882 | /* | ||
1883 | * Profile KVM exit RIPs: | ||
1884 | */ | ||
1885 | if (unlikely(prof_on == KVM_PROFILING)) | ||
1886 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | ||
1887 | |||
1888 | kvm_run->exit_type = 0; | 1882 | kvm_run->exit_type = 0; |
1889 | if (fail) { | 1883 | if (fail) { |
1890 | kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; | 1884 | kvm_run->exit_type = KVM_EXIT_TYPE_FAIL_ENTRY; |
@@ -1907,6 +1901,12 @@ again: | |||
1907 | 1901 | ||
1908 | reload_tss(); | 1902 | reload_tss(); |
1909 | } | 1903 | } |
1904 | /* | ||
1905 | * Profile KVM exit RIPs: | ||
1906 | */ | ||
1907 | if (unlikely(prof_on == KVM_PROFILING)) | ||
1908 | profile_hit(KVM_PROFILING, (void *)vmcs_readl(GUEST_RIP)); | ||
1909 | |||
1910 | vcpu->launched = 1; | 1910 | vcpu->launched = 1; |
1911 | kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; | 1911 | kvm_run->exit_type = KVM_EXIT_TYPE_VM_EXIT; |
1912 | r = kvm_handle_exit(kvm_run, vcpu); | 1912 | r = kvm_handle_exit(kvm_run, vcpu); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 669dbe5b0317..51db1182b27e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -76,7 +76,8 @@ static struct linux_binfmt elf_format = { | |||
76 | .load_binary = load_elf_binary, | 76 | .load_binary = load_elf_binary, |
77 | .load_shlib = load_elf_library, | 77 | .load_shlib = load_elf_library, |
78 | .core_dump = elf_core_dump, | 78 | .core_dump = elf_core_dump, |
79 | .min_coredump = ELF_EXEC_PAGESIZE | 79 | .min_coredump = ELF_EXEC_PAGESIZE, |
80 | .hasvdso = 1 | ||
80 | }; | 81 | }; |
81 | 82 | ||
82 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) | 83 | #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE) |
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 9d774d07d95b..00c23433b39f 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h | |||
@@ -183,6 +183,19 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres | |||
183 | #endif | 183 | #endif |
184 | 184 | ||
185 | /* | 185 | /* |
186 | * A facility to provide batching of the reload of page tables with the | ||
187 | * actual context switch code for paravirtualized guests. By convention, | ||
188 | * only one of the lazy modes (CPU, MMU) should be active at any given | ||
189 | * time, entry should never be nested, and entry and exits should always | ||
190 | * be paired. This is for sanity of maintaining and reasoning about the | ||
191 | * kernel code. | ||
192 | */ | ||
193 | #ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE | ||
194 | #define arch_enter_lazy_cpu_mode() do {} while (0) | ||
195 | #define arch_leave_lazy_cpu_mode() do {} while (0) | ||
196 | #endif | ||
197 | |||
198 | /* | ||
186 | * When walking page tables, get the address of the next boundary, | 199 | * When walking page tables, get the address of the next boundary, |
187 | * or the end address of the range if that comes earlier. Although no | 200 | * or the end address of the range if that comes earlier. Although no |
188 | * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. | 201 | * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout. |
diff --git a/include/asm-i386/apic.h b/include/asm-i386/apic.h index 41a44319905f..3a61206fd108 100644 --- a/include/asm-i386/apic.h +++ b/include/asm-i386/apic.h | |||
@@ -43,6 +43,8 @@ extern void generic_apic_probe(void); | |||
43 | #define apic_write native_apic_write | 43 | #define apic_write native_apic_write |
44 | #define apic_write_atomic native_apic_write_atomic | 44 | #define apic_write_atomic native_apic_write_atomic |
45 | #define apic_read native_apic_read | 45 | #define apic_read native_apic_read |
46 | #define setup_boot_clock setup_boot_APIC_clock | ||
47 | #define setup_secondary_clock setup_secondary_APIC_clock | ||
46 | #endif | 48 | #endif |
47 | 49 | ||
48 | static __inline fastcall void native_apic_write(unsigned long reg, | 50 | static __inline fastcall void native_apic_write(unsigned long reg, |
diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 38f1aebbbdb5..c90c7c499302 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h | |||
@@ -160,7 +160,7 @@ static void __init check_config(void) | |||
160 | * If we configured ourselves for a TSC, we'd better have one! | 160 | * If we configured ourselves for a TSC, we'd better have one! |
161 | */ | 161 | */ |
162 | #ifdef CONFIG_X86_TSC | 162 | #ifdef CONFIG_X86_TSC |
163 | if (!cpu_has_tsc) | 163 | if (!cpu_has_tsc && !tsc_disable) |
164 | panic("Kernel compiled for Pentium+, requires TSC feature!"); | 164 | panic("Kernel compiled for Pentium+, requires TSC feature!"); |
165 | #endif | 165 | #endif |
166 | 166 | ||
diff --git a/include/asm-i386/desc.h b/include/asm-i386/desc.h index f398cc456448..050831f34f71 100644 --- a/include/asm-i386/desc.h +++ b/include/asm-i386/desc.h | |||
@@ -22,7 +22,7 @@ struct Xgt_desc_struct { | |||
22 | 22 | ||
23 | extern struct Xgt_desc_struct idt_descr; | 23 | extern struct Xgt_desc_struct idt_descr; |
24 | DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | 24 | DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); |
25 | 25 | extern struct Xgt_desc_struct early_gdt_descr; | |
26 | 26 | ||
27 | static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) | 27 | static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu) |
28 | { | 28 | { |
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h index 369035dfe4b6..8d33c9bb7c1c 100644 --- a/include/asm-i386/elf.h +++ b/include/asm-i386/elf.h | |||
@@ -90,8 +90,8 @@ typedef struct user_fxsr_struct elf_fpxregset_t; | |||
90 | pr_reg[6] = regs->eax; \ | 90 | pr_reg[6] = regs->eax; \ |
91 | pr_reg[7] = regs->xds; \ | 91 | pr_reg[7] = regs->xds; \ |
92 | pr_reg[8] = regs->xes; \ | 92 | pr_reg[8] = regs->xes; \ |
93 | savesegment(fs,pr_reg[9]); \ | 93 | pr_reg[9] = regs->xfs; \ |
94 | pr_reg[10] = regs->xgs; \ | 94 | savesegment(gs,pr_reg[10]); \ |
95 | pr_reg[11] = regs->orig_eax; \ | 95 | pr_reg[11] = regs->orig_eax; \ |
96 | pr_reg[12] = regs->eip; \ | 96 | pr_reg[12] = regs->eip; \ |
97 | pr_reg[13] = regs->xcs; \ | 97 | pr_reg[13] = regs->xcs; \ |
diff --git a/include/asm-i386/idle.h b/include/asm-i386/idle.h new file mode 100644 index 000000000000..87ab93911199 --- /dev/null +++ b/include/asm-i386/idle.h | |||
@@ -0,0 +1,14 @@ | |||
1 | #ifndef _ASM_I386_IDLE_H | ||
2 | #define _ASM_I386_IDLE_H 1 | ||
3 | |||
4 | #define IDLE_START 1 | ||
5 | #define IDLE_END 2 | ||
6 | |||
7 | struct notifier_block; | ||
8 | void idle_notifier_register(struct notifier_block *n); | ||
9 | void idle_notifier_unregister(struct notifier_block *n); | ||
10 | |||
11 | void exit_idle(void); | ||
12 | void enter_idle(void); | ||
13 | |||
14 | #endif | ||
diff --git a/include/asm-i386/mce.h b/include/asm-i386/mce.h index 7cc1a973bf00..b0a02ee34ffd 100644 --- a/include/asm-i386/mce.h +++ b/include/asm-i386/mce.h | |||
@@ -3,3 +3,5 @@ extern void mcheck_init(struct cpuinfo_x86 *c); | |||
3 | #else | 3 | #else |
4 | #define mcheck_init(c) do {} while(0) | 4 | #define mcheck_init(c) do {} while(0) |
5 | #endif | 5 | #endif |
6 | |||
7 | extern int mce_disabled; | ||
diff --git a/include/asm-i386/mmu_context.h b/include/asm-i386/mmu_context.h index 68ff102d6f5e..e6aa30f8de5b 100644 --- a/include/asm-i386/mmu_context.h +++ b/include/asm-i386/mmu_context.h | |||
@@ -63,7 +63,7 @@ static inline void switch_mm(struct mm_struct *prev, | |||
63 | } | 63 | } |
64 | 64 | ||
65 | #define deactivate_mm(tsk, mm) \ | 65 | #define deactivate_mm(tsk, mm) \ |
66 | asm("movl %0,%%fs": :"r" (0)); | 66 | asm("movl %0,%%gs": :"r" (0)); |
67 | 67 | ||
68 | #define activate_mm(prev, next) \ | 68 | #define activate_mm(prev, next) \ |
69 | switch_mm((prev),(next),NULL) | 69 | switch_mm((prev),(next),NULL) |
diff --git a/include/asm-i386/paravirt.h b/include/asm-i386/paravirt.h index 9f06265065f4..6317e0a4d735 100644 --- a/include/asm-i386/paravirt.h +++ b/include/asm-i386/paravirt.h | |||
@@ -59,90 +59,102 @@ struct paravirt_ops | |||
59 | convention. This makes it easier to implement inline | 59 | convention. This makes it easier to implement inline |
60 | assembler replacements. */ | 60 | assembler replacements. */ |
61 | 61 | ||
62 | void (fastcall *cpuid)(unsigned int *eax, unsigned int *ebx, | 62 | void (*cpuid)(unsigned int *eax, unsigned int *ebx, |
63 | unsigned int *ecx, unsigned int *edx); | 63 | unsigned int *ecx, unsigned int *edx); |
64 | 64 | ||
65 | unsigned long (fastcall *get_debugreg)(int regno); | 65 | unsigned long (*get_debugreg)(int regno); |
66 | void (fastcall *set_debugreg)(int regno, unsigned long value); | 66 | void (*set_debugreg)(int regno, unsigned long value); |
67 | 67 | ||
68 | void (fastcall *clts)(void); | 68 | void (*clts)(void); |
69 | 69 | ||
70 | unsigned long (fastcall *read_cr0)(void); | 70 | unsigned long (*read_cr0)(void); |
71 | void (fastcall *write_cr0)(unsigned long); | 71 | void (*write_cr0)(unsigned long); |
72 | 72 | ||
73 | unsigned long (fastcall *read_cr2)(void); | 73 | unsigned long (*read_cr2)(void); |
74 | void (fastcall *write_cr2)(unsigned long); | 74 | void (*write_cr2)(unsigned long); |
75 | 75 | ||
76 | unsigned long (fastcall *read_cr3)(void); | 76 | unsigned long (*read_cr3)(void); |
77 | void (fastcall *write_cr3)(unsigned long); | 77 | void (*write_cr3)(unsigned long); |
78 | 78 | ||
79 | unsigned long (fastcall *read_cr4_safe)(void); | 79 | unsigned long (*read_cr4_safe)(void); |
80 | unsigned long (fastcall *read_cr4)(void); | 80 | unsigned long (*read_cr4)(void); |
81 | void (fastcall *write_cr4)(unsigned long); | 81 | void (*write_cr4)(unsigned long); |
82 | 82 | ||
83 | unsigned long (fastcall *save_fl)(void); | 83 | unsigned long (*save_fl)(void); |
84 | void (fastcall *restore_fl)(unsigned long); | 84 | void (*restore_fl)(unsigned long); |
85 | void (fastcall *irq_disable)(void); | 85 | void (*irq_disable)(void); |
86 | void (fastcall *irq_enable)(void); | 86 | void (*irq_enable)(void); |
87 | void (fastcall *safe_halt)(void); | 87 | void (*safe_halt)(void); |
88 | void (fastcall *halt)(void); | 88 | void (*halt)(void); |
89 | void (fastcall *wbinvd)(void); | 89 | void (*wbinvd)(void); |
90 | 90 | ||
91 | /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ | 91 | /* err = 0/-EFAULT. wrmsr returns 0/-EFAULT. */ |
92 | u64 (fastcall *read_msr)(unsigned int msr, int *err); | 92 | u64 (*read_msr)(unsigned int msr, int *err); |
93 | int (fastcall *write_msr)(unsigned int msr, u64 val); | 93 | int (*write_msr)(unsigned int msr, u64 val); |
94 | 94 | ||
95 | u64 (fastcall *read_tsc)(void); | 95 | u64 (*read_tsc)(void); |
96 | u64 (fastcall *read_pmc)(void); | 96 | u64 (*read_pmc)(void); |
97 | 97 | ||
98 | void (fastcall *load_tr_desc)(void); | 98 | void (*load_tr_desc)(void); |
99 | void (fastcall *load_gdt)(const struct Xgt_desc_struct *); | 99 | void (*load_gdt)(const struct Xgt_desc_struct *); |
100 | void (fastcall *load_idt)(const struct Xgt_desc_struct *); | 100 | void (*load_idt)(const struct Xgt_desc_struct *); |
101 | void (fastcall *store_gdt)(struct Xgt_desc_struct *); | 101 | void (*store_gdt)(struct Xgt_desc_struct *); |
102 | void (fastcall *store_idt)(struct Xgt_desc_struct *); | 102 | void (*store_idt)(struct Xgt_desc_struct *); |
103 | void (fastcall *set_ldt)(const void *desc, unsigned entries); | 103 | void (*set_ldt)(const void *desc, unsigned entries); |
104 | unsigned long (fastcall *store_tr)(void); | 104 | unsigned long (*store_tr)(void); |
105 | void (fastcall *load_tls)(struct thread_struct *t, unsigned int cpu); | 105 | void (*load_tls)(struct thread_struct *t, unsigned int cpu); |
106 | void (fastcall *write_ldt_entry)(void *dt, int entrynum, | 106 | void (*write_ldt_entry)(void *dt, int entrynum, |
107 | u32 low, u32 high); | 107 | u32 low, u32 high); |
108 | void (fastcall *write_gdt_entry)(void *dt, int entrynum, | 108 | void (*write_gdt_entry)(void *dt, int entrynum, |
109 | u32 low, u32 high); | 109 | u32 low, u32 high); |
110 | void (fastcall *write_idt_entry)(void *dt, int entrynum, | 110 | void (*write_idt_entry)(void *dt, int entrynum, |
111 | u32 low, u32 high); | 111 | u32 low, u32 high); |
112 | void (fastcall *load_esp0)(struct tss_struct *tss, | 112 | void (*load_esp0)(struct tss_struct *tss, |
113 | struct thread_struct *thread); | 113 | struct thread_struct *thread); |
114 | 114 | ||
115 | void (fastcall *set_iopl_mask)(unsigned mask); | 115 | void (*set_iopl_mask)(unsigned mask); |
116 | 116 | ||
117 | void (fastcall *io_delay)(void); | 117 | void (*io_delay)(void); |
118 | void (*const_udelay)(unsigned long loops); | 118 | void (*const_udelay)(unsigned long loops); |
119 | 119 | ||
120 | #ifdef CONFIG_X86_LOCAL_APIC | 120 | #ifdef CONFIG_X86_LOCAL_APIC |
121 | void (fastcall *apic_write)(unsigned long reg, unsigned long v); | 121 | void (*apic_write)(unsigned long reg, unsigned long v); |
122 | void (fastcall *apic_write_atomic)(unsigned long reg, unsigned long v); | 122 | void (*apic_write_atomic)(unsigned long reg, unsigned long v); |
123 | unsigned long (fastcall *apic_read)(unsigned long reg); | 123 | unsigned long (*apic_read)(unsigned long reg); |
124 | void (*setup_boot_clock)(void); | ||
125 | void (*setup_secondary_clock)(void); | ||
124 | #endif | 126 | #endif |
125 | 127 | ||
126 | void (fastcall *flush_tlb_user)(void); | 128 | void (*flush_tlb_user)(void); |
127 | void (fastcall *flush_tlb_kernel)(void); | 129 | void (*flush_tlb_kernel)(void); |
128 | void (fastcall *flush_tlb_single)(u32 addr); | 130 | void (*flush_tlb_single)(u32 addr); |
129 | 131 | ||
130 | void (fastcall *set_pte)(pte_t *ptep, pte_t pteval); | 132 | void (*alloc_pt)(u32 pfn); |
131 | void (fastcall *set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); | 133 | void (*alloc_pd)(u32 pfn); |
132 | void (fastcall *set_pmd)(pmd_t *pmdp, pmd_t pmdval); | 134 | void (*alloc_pd_clone)(u32 pfn, u32 clonepfn, u32 start, u32 count); |
133 | void (fastcall *pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); | 135 | void (*release_pt)(u32 pfn); |
134 | void (fastcall *pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); | 136 | void (*release_pd)(u32 pfn); |
137 | |||
138 | void (*set_pte)(pte_t *ptep, pte_t pteval); | ||
139 | void (*set_pte_at)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval); | ||
140 | void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); | ||
141 | void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep); | ||
142 | void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep); | ||
135 | #ifdef CONFIG_X86_PAE | 143 | #ifdef CONFIG_X86_PAE |
136 | void (fastcall *set_pte_atomic)(pte_t *ptep, pte_t pteval); | 144 | void (*set_pte_atomic)(pte_t *ptep, pte_t pteval); |
137 | void (fastcall *set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); | 145 | void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte); |
138 | void (fastcall *set_pud)(pud_t *pudp, pud_t pudval); | 146 | void (*set_pud)(pud_t *pudp, pud_t pudval); |
139 | void (fastcall *pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 147 | void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
140 | void (fastcall *pmd_clear)(pmd_t *pmdp); | 148 | void (*pmd_clear)(pmd_t *pmdp); |
141 | #endif | 149 | #endif |
142 | 150 | ||
151 | void (*set_lazy_mode)(int mode); | ||
152 | |||
143 | /* These two are jmp to, not actually called. */ | 153 | /* These two are jmp to, not actually called. */ |
144 | void (fastcall *irq_enable_sysexit)(void); | 154 | void (*irq_enable_sysexit)(void); |
145 | void (fastcall *iret)(void); | 155 | void (*iret)(void); |
156 | |||
157 | void (*startup_ipi_hook)(int phys_apicid, unsigned long start_eip, unsigned long start_esp); | ||
146 | }; | 158 | }; |
147 | 159 | ||
148 | /* Mark a paravirt probe function. */ | 160 | /* Mark a paravirt probe function. */ |
@@ -313,13 +325,38 @@ static inline unsigned long apic_read(unsigned long reg) | |||
313 | { | 325 | { |
314 | return paravirt_ops.apic_read(reg); | 326 | return paravirt_ops.apic_read(reg); |
315 | } | 327 | } |
328 | |||
329 | static inline void setup_boot_clock(void) | ||
330 | { | ||
331 | paravirt_ops.setup_boot_clock(); | ||
332 | } | ||
333 | |||
334 | static inline void setup_secondary_clock(void) | ||
335 | { | ||
336 | paravirt_ops.setup_secondary_clock(); | ||
337 | } | ||
316 | #endif | 338 | #endif |
317 | 339 | ||
340 | #ifdef CONFIG_SMP | ||
341 | static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip, | ||
342 | unsigned long start_esp) | ||
343 | { | ||
344 | return paravirt_ops.startup_ipi_hook(phys_apicid, start_eip, start_esp); | ||
345 | } | ||
346 | #endif | ||
318 | 347 | ||
319 | #define __flush_tlb() paravirt_ops.flush_tlb_user() | 348 | #define __flush_tlb() paravirt_ops.flush_tlb_user() |
320 | #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() | 349 | #define __flush_tlb_global() paravirt_ops.flush_tlb_kernel() |
321 | #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) | 350 | #define __flush_tlb_single(addr) paravirt_ops.flush_tlb_single(addr) |
322 | 351 | ||
352 | #define paravirt_alloc_pt(pfn) paravirt_ops.alloc_pt(pfn) | ||
353 | #define paravirt_release_pt(pfn) paravirt_ops.release_pt(pfn) | ||
354 | |||
355 | #define paravirt_alloc_pd(pfn) paravirt_ops.alloc_pd(pfn) | ||
356 | #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) \ | ||
357 | paravirt_ops.alloc_pd_clone(pfn, clonepfn, start, count) | ||
358 | #define paravirt_release_pd(pfn) paravirt_ops.release_pd(pfn) | ||
359 | |||
323 | static inline void set_pte(pte_t *ptep, pte_t pteval) | 360 | static inline void set_pte(pte_t *ptep, pte_t pteval) |
324 | { | 361 | { |
325 | paravirt_ops.set_pte(ptep, pteval); | 362 | paravirt_ops.set_pte(ptep, pteval); |
@@ -372,6 +409,19 @@ static inline void pmd_clear(pmd_t *pmdp) | |||
372 | } | 409 | } |
373 | #endif | 410 | #endif |
374 | 411 | ||
412 | /* Lazy mode for batching updates / context switch */ | ||
413 | #define PARAVIRT_LAZY_NONE 0 | ||
414 | #define PARAVIRT_LAZY_MMU 1 | ||
415 | #define PARAVIRT_LAZY_CPU 2 | ||
416 | |||
417 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE | ||
418 | #define arch_enter_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_CPU) | ||
419 | #define arch_leave_lazy_cpu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) | ||
420 | |||
421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | ||
422 | #define arch_enter_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_MMU) | ||
423 | #define arch_leave_lazy_mmu_mode() paravirt_ops.set_lazy_mode(PARAVIRT_LAZY_NONE) | ||
424 | |||
375 | /* These all sit in the .parainstructions section to tell us what to patch. */ | 425 | /* These all sit in the .parainstructions section to tell us what to patch. */ |
376 | struct paravirt_patch { | 426 | struct paravirt_patch { |
377 | u8 *instr; /* original instructions */ | 427 | u8 *instr; /* original instructions */ |
diff --git a/include/asm-i386/pda.h b/include/asm-i386/pda.h index 2ba2736aa109..b12d59a318b7 100644 --- a/include/asm-i386/pda.h +++ b/include/asm-i386/pda.h | |||
@@ -39,19 +39,19 @@ extern struct i386_pda _proxy_pda; | |||
39 | if (0) { T__ tmp__; tmp__ = (val); } \ | 39 | if (0) { T__ tmp__; tmp__ = (val); } \ |
40 | switch (sizeof(_proxy_pda.field)) { \ | 40 | switch (sizeof(_proxy_pda.field)) { \ |
41 | case 1: \ | 41 | case 1: \ |
42 | asm(op "b %1,%%gs:%c2" \ | 42 | asm(op "b %1,%%fs:%c2" \ |
43 | : "+m" (_proxy_pda.field) \ | 43 | : "+m" (_proxy_pda.field) \ |
44 | :"ri" ((T__)val), \ | 44 | :"ri" ((T__)val), \ |
45 | "i"(pda_offset(field))); \ | 45 | "i"(pda_offset(field))); \ |
46 | break; \ | 46 | break; \ |
47 | case 2: \ | 47 | case 2: \ |
48 | asm(op "w %1,%%gs:%c2" \ | 48 | asm(op "w %1,%%fs:%c2" \ |
49 | : "+m" (_proxy_pda.field) \ | 49 | : "+m" (_proxy_pda.field) \ |
50 | :"ri" ((T__)val), \ | 50 | :"ri" ((T__)val), \ |
51 | "i"(pda_offset(field))); \ | 51 | "i"(pda_offset(field))); \ |
52 | break; \ | 52 | break; \ |
53 | case 4: \ | 53 | case 4: \ |
54 | asm(op "l %1,%%gs:%c2" \ | 54 | asm(op "l %1,%%fs:%c2" \ |
55 | : "+m" (_proxy_pda.field) \ | 55 | : "+m" (_proxy_pda.field) \ |
56 | :"ri" ((T__)val), \ | 56 | :"ri" ((T__)val), \ |
57 | "i"(pda_offset(field))); \ | 57 | "i"(pda_offset(field))); \ |
@@ -65,19 +65,19 @@ extern struct i386_pda _proxy_pda; | |||
65 | typeof(_proxy_pda.field) ret__; \ | 65 | typeof(_proxy_pda.field) ret__; \ |
66 | switch (sizeof(_proxy_pda.field)) { \ | 66 | switch (sizeof(_proxy_pda.field)) { \ |
67 | case 1: \ | 67 | case 1: \ |
68 | asm(op "b %%gs:%c1,%0" \ | 68 | asm(op "b %%fs:%c1,%0" \ |
69 | : "=r" (ret__) \ | 69 | : "=r" (ret__) \ |
70 | : "i" (pda_offset(field)), \ | 70 | : "i" (pda_offset(field)), \ |
71 | "m" (_proxy_pda.field)); \ | 71 | "m" (_proxy_pda.field)); \ |
72 | break; \ | 72 | break; \ |
73 | case 2: \ | 73 | case 2: \ |
74 | asm(op "w %%gs:%c1,%0" \ | 74 | asm(op "w %%fs:%c1,%0" \ |
75 | : "=r" (ret__) \ | 75 | : "=r" (ret__) \ |
76 | : "i" (pda_offset(field)), \ | 76 | : "i" (pda_offset(field)), \ |
77 | "m" (_proxy_pda.field)); \ | 77 | "m" (_proxy_pda.field)); \ |
78 | break; \ | 78 | break; \ |
79 | case 4: \ | 79 | case 4: \ |
80 | asm(op "l %%gs:%c1,%0" \ | 80 | asm(op "l %%fs:%c1,%0" \ |
81 | : "=r" (ret__) \ | 81 | : "=r" (ret__) \ |
82 | : "i" (pda_offset(field)), \ | 82 | : "i" (pda_offset(field)), \ |
83 | "m" (_proxy_pda.field)); \ | 83 | "m" (_proxy_pda.field)); \ |
diff --git a/include/asm-i386/pgalloc.h b/include/asm-i386/pgalloc.h index 4b1e61359f89..c8dc2d0141a7 100644 --- a/include/asm-i386/pgalloc.h +++ b/include/asm-i386/pgalloc.h | |||
@@ -5,13 +5,31 @@ | |||
5 | #include <linux/threads.h> | 5 | #include <linux/threads.h> |
6 | #include <linux/mm.h> /* for struct page */ | 6 | #include <linux/mm.h> /* for struct page */ |
7 | 7 | ||
8 | #define pmd_populate_kernel(mm, pmd, pte) \ | 8 | #ifdef CONFIG_PARAVIRT |
9 | set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) | 9 | #include <asm/paravirt.h> |
10 | #else | ||
11 | #define paravirt_alloc_pt(pfn) do { } while (0) | ||
12 | #define paravirt_alloc_pd(pfn) do { } while (0) | ||
13 | #define paravirt_alloc_pd(pfn) do { } while (0) | ||
14 | #define paravirt_alloc_pd_clone(pfn, clonepfn, start, count) do { } while (0) | ||
15 | #define paravirt_release_pt(pfn) do { } while (0) | ||
16 | #define paravirt_release_pd(pfn) do { } while (0) | ||
17 | #endif | ||
18 | |||
19 | #define pmd_populate_kernel(mm, pmd, pte) \ | ||
20 | do { \ | ||
21 | paravirt_alloc_pt(__pa(pte) >> PAGE_SHIFT); \ | ||
22 | set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))); \ | ||
23 | } while (0) | ||
10 | 24 | ||
11 | #define pmd_populate(mm, pmd, pte) \ | 25 | #define pmd_populate(mm, pmd, pte) \ |
26 | do { \ | ||
27 | paravirt_alloc_pt(page_to_pfn(pte)); \ | ||
12 | set_pmd(pmd, __pmd(_PAGE_TABLE + \ | 28 | set_pmd(pmd, __pmd(_PAGE_TABLE + \ |
13 | ((unsigned long long)page_to_pfn(pte) << \ | 29 | ((unsigned long long)page_to_pfn(pte) << \ |
14 | (unsigned long long) PAGE_SHIFT))) | 30 | (unsigned long long) PAGE_SHIFT))); \ |
31 | } while (0) | ||
32 | |||
15 | /* | 33 | /* |
16 | * Allocate and free page tables. | 34 | * Allocate and free page tables. |
17 | */ | 35 | */ |
@@ -32,7 +50,11 @@ static inline void pte_free(struct page *pte) | |||
32 | } | 50 | } |
33 | 51 | ||
34 | 52 | ||
35 | #define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) | 53 | #define __pte_free_tlb(tlb,pte) \ |
54 | do { \ | ||
55 | paravirt_release_pt(page_to_pfn(pte)); \ | ||
56 | tlb_remove_page((tlb),(pte)); \ | ||
57 | } while (0) | ||
36 | 58 | ||
37 | #ifdef CONFIG_X86_PAE | 59 | #ifdef CONFIG_X86_PAE |
38 | /* | 60 | /* |
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 359f10b54f59..edfbe46a5e13 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h | |||
@@ -257,6 +257,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx) | |||
257 | : :"a" (eax), "c" (ecx)); | 257 | : :"a" (eax), "c" (ecx)); |
258 | } | 258 | } |
259 | 259 | ||
260 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||
261 | { | ||
262 | /* "mwait %eax,%ecx;" */ | ||
263 | asm volatile( | ||
264 | "sti; .byte 0x0f,0x01,0xc9;" | ||
265 | : :"a" (eax), "c" (ecx)); | ||
266 | } | ||
267 | |||
260 | extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); | 268 | extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx); |
261 | 269 | ||
262 | /* from system description table in BIOS. Mostly for MCA use, but | 270 | /* from system description table in BIOS. Mostly for MCA use, but |
@@ -424,7 +432,7 @@ struct thread_struct { | |||
424 | .vm86_info = NULL, \ | 432 | .vm86_info = NULL, \ |
425 | .sysenter_cs = __KERNEL_CS, \ | 433 | .sysenter_cs = __KERNEL_CS, \ |
426 | .io_bitmap_ptr = NULL, \ | 434 | .io_bitmap_ptr = NULL, \ |
427 | .gs = __KERNEL_PDA, \ | 435 | .fs = __KERNEL_PDA, \ |
428 | } | 436 | } |
429 | 437 | ||
430 | /* | 438 | /* |
@@ -442,8 +450,8 @@ struct thread_struct { | |||
442 | } | 450 | } |
443 | 451 | ||
444 | #define start_thread(regs, new_eip, new_esp) do { \ | 452 | #define start_thread(regs, new_eip, new_esp) do { \ |
445 | __asm__("movl %0,%%fs": :"r" (0)); \ | 453 | __asm__("movl %0,%%gs": :"r" (0)); \ |
446 | regs->xgs = 0; \ | 454 | regs->xfs = 0; \ |
447 | set_fs(USER_DS); \ | 455 | set_fs(USER_DS); \ |
448 | regs->xds = __USER_DS; \ | 456 | regs->xds = __USER_DS; \ |
449 | regs->xes = __USER_DS; \ | 457 | regs->xes = __USER_DS; \ |
diff --git a/include/asm-i386/ptrace.h b/include/asm-i386/ptrace.h index bdbc894339b4..6002597b9e12 100644 --- a/include/asm-i386/ptrace.h +++ b/include/asm-i386/ptrace.h | |||
@@ -16,8 +16,8 @@ struct pt_regs { | |||
16 | long eax; | 16 | long eax; |
17 | int xds; | 17 | int xds; |
18 | int xes; | 18 | int xes; |
19 | /* int xfs; */ | 19 | int xfs; |
20 | int xgs; | 20 | /* int xgs; */ |
21 | long orig_eax; | 21 | long orig_eax; |
22 | long eip; | 22 | long eip; |
23 | int xcs; | 23 | int xcs; |
@@ -49,6 +49,10 @@ static inline int user_mode_vm(struct pt_regs *regs) | |||
49 | { | 49 | { |
50 | return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; | 50 | return ((regs->xcs & SEGMENT_RPL_MASK) | (regs->eflags & VM_MASK)) >= USER_RPL; |
51 | } | 51 | } |
52 | static inline int v8086_mode(struct pt_regs *regs) | ||
53 | { | ||
54 | return (regs->eflags & VM_MASK); | ||
55 | } | ||
52 | 56 | ||
53 | #define instruction_pointer(regs) ((regs)->eip) | 57 | #define instruction_pointer(regs) ((regs)->eip) |
54 | #define regs_return_value(regs) ((regs)->eax) | 58 | #define regs_return_value(regs) ((regs)->eax) |
diff --git a/include/asm-i386/segment.h b/include/asm-i386/segment.h index 3c796af33776..065f10bfa487 100644 --- a/include/asm-i386/segment.h +++ b/include/asm-i386/segment.h | |||
@@ -83,14 +83,8 @@ | |||
83 | * The GDT has 32 entries | 83 | * The GDT has 32 entries |
84 | */ | 84 | */ |
85 | #define GDT_ENTRIES 32 | 85 | #define GDT_ENTRIES 32 |
86 | |||
87 | #define GDT_SIZE (GDT_ENTRIES * 8) | 86 | #define GDT_SIZE (GDT_ENTRIES * 8) |
88 | 87 | ||
89 | /* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ | ||
90 | #define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) | ||
91 | /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | ||
92 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) | ||
93 | |||
94 | /* Simple and small GDT entries for booting only */ | 88 | /* Simple and small GDT entries for booting only */ |
95 | 89 | ||
96 | #define GDT_ENTRY_BOOT_CS 2 | 90 | #define GDT_ENTRY_BOOT_CS 2 |
@@ -134,4 +128,17 @@ | |||
134 | #ifndef CONFIG_PARAVIRT | 128 | #ifndef CONFIG_PARAVIRT |
135 | #define get_kernel_rpl() 0 | 129 | #define get_kernel_rpl() 0 |
136 | #endif | 130 | #endif |
131 | /* | ||
132 | * Matching rules for certain types of segments. | ||
133 | */ | ||
134 | |||
135 | /* Matches only __KERNEL_CS, ignoring PnP / USER / APM segments */ | ||
136 | #define SEGMENT_IS_KERNEL_CODE(x) (((x) & 0xfc) == GDT_ENTRY_KERNEL_CS * 8) | ||
137 | |||
138 | /* Matches __KERNEL_CS and __USER_CS (they must be 2 entries apart) */ | ||
139 | #define SEGMENT_IS_FLAT_CODE(x) (((x) & 0xec) == GDT_ENTRY_KERNEL_CS * 8) | ||
140 | |||
141 | /* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */ | ||
142 | #define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8) | ||
143 | |||
137 | #endif | 144 | #endif |
diff --git a/include/asm-i386/setup.h b/include/asm-i386/setup.h index 76316275d6f9..0e8077cbfdac 100644 --- a/include/asm-i386/setup.h +++ b/include/asm-i386/setup.h | |||
@@ -77,6 +77,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map); | |||
77 | void __init add_memory_region(unsigned long long start, | 77 | void __init add_memory_region(unsigned long long start, |
78 | unsigned long long size, int type); | 78 | unsigned long long size, int type); |
79 | 79 | ||
80 | extern unsigned long init_pg_tables_end; | ||
81 | |||
80 | #endif /* __ASSEMBLY__ */ | 82 | #endif /* __ASSEMBLY__ */ |
81 | 83 | ||
82 | #endif /* __KERNEL__ */ | 84 | #endif /* __KERNEL__ */ |
diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 64fe624c02ca..6bf0033a301c 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h | |||
@@ -52,6 +52,11 @@ extern void cpu_exit_clear(void); | |||
52 | extern void cpu_uninit(void); | 52 | extern void cpu_uninit(void); |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | #ifndef CONFIG_PARAVIRT | ||
56 | #define startup_ipi_hook(phys_apicid, start_eip, start_esp) \ | ||
57 | do { } while (0) | ||
58 | #endif | ||
59 | |||
55 | /* | 60 | /* |
56 | * This function is needed by all SMP systems. It must _always_ be valid | 61 | * This function is needed by all SMP systems. It must _always_ be valid |
57 | * from the initial startup. We map APIC_BASE very early in page_setup(), | 62 | * from the initial startup. We map APIC_BASE very early in page_setup(), |
diff --git a/include/asm-i386/time.h b/include/asm-i386/time.h index ea8065af825a..571b4294dc2e 100644 --- a/include/asm-i386/time.h +++ b/include/asm-i386/time.h | |||
@@ -30,6 +30,7 @@ static inline int native_set_wallclock(unsigned long nowtime) | |||
30 | 30 | ||
31 | #ifdef CONFIG_PARAVIRT | 31 | #ifdef CONFIG_PARAVIRT |
32 | #include <asm/paravirt.h> | 32 | #include <asm/paravirt.h> |
33 | extern unsigned long long native_sched_clock(void); | ||
33 | #else /* !CONFIG_PARAVIRT */ | 34 | #else /* !CONFIG_PARAVIRT */ |
34 | 35 | ||
35 | #define get_wallclock() native_get_wallclock() | 36 | #define get_wallclock() native_get_wallclock() |
diff --git a/include/asm-i386/timer.h b/include/asm-i386/timer.h index d0ebd05f8516..4752c3a6a708 100644 --- a/include/asm-i386/timer.h +++ b/include/asm-i386/timer.h | |||
@@ -8,6 +8,9 @@ void setup_pit_timer(void); | |||
8 | /* Modifiers for buggy PIT handling */ | 8 | /* Modifiers for buggy PIT handling */ |
9 | extern int pit_latch_buggy; | 9 | extern int pit_latch_buggy; |
10 | extern int timer_ack; | 10 | extern int timer_ack; |
11 | extern int no_timer_check; | ||
12 | extern unsigned long long (*custom_sched_clock)(void); | ||
13 | extern int no_sync_cmos_clock; | ||
11 | extern int recalibrate_cpu_khz(void); | 14 | extern int recalibrate_cpu_khz(void); |
12 | 15 | ||
13 | #endif | 16 | #endif |
diff --git a/include/asm-i386/vmi.h b/include/asm-i386/vmi.h new file mode 100644 index 000000000000..43c89333037e --- /dev/null +++ b/include/asm-i386/vmi.h | |||
@@ -0,0 +1,262 @@ | |||
1 | /* | ||
2 | * VMI interface definition | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Maintained by: Zachary Amsden zach@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | #include <linux/types.h> | ||
25 | |||
26 | /* | ||
27 | *--------------------------------------------------------------------- | ||
28 | * | ||
29 | * VMI Option ROM API | ||
30 | * | ||
31 | *--------------------------------------------------------------------- | ||
32 | */ | ||
33 | #define VMI_SIGNATURE 0x696d5663 /* "cVmi" */ | ||
34 | |||
35 | #define PCI_VENDOR_ID_VMWARE 0x15AD | ||
36 | #define PCI_DEVICE_ID_VMWARE_VMI 0x0801 | ||
37 | |||
38 | /* | ||
39 | * We use two version numbers for compatibility, with the major | ||
40 | * number signifying interface breakages, and the minor number | ||
41 | * interface extensions. | ||
42 | */ | ||
43 | #define VMI_API_REV_MAJOR 3 | ||
44 | #define VMI_API_REV_MINOR 0 | ||
45 | |||
46 | #define VMI_CALL_CPUID 0 | ||
47 | #define VMI_CALL_WRMSR 1 | ||
48 | #define VMI_CALL_RDMSR 2 | ||
49 | #define VMI_CALL_SetGDT 3 | ||
50 | #define VMI_CALL_SetLDT 4 | ||
51 | #define VMI_CALL_SetIDT 5 | ||
52 | #define VMI_CALL_SetTR 6 | ||
53 | #define VMI_CALL_GetGDT 7 | ||
54 | #define VMI_CALL_GetLDT 8 | ||
55 | #define VMI_CALL_GetIDT 9 | ||
56 | #define VMI_CALL_GetTR 10 | ||
57 | #define VMI_CALL_WriteGDTEntry 11 | ||
58 | #define VMI_CALL_WriteLDTEntry 12 | ||
59 | #define VMI_CALL_WriteIDTEntry 13 | ||
60 | #define VMI_CALL_UpdateKernelStack 14 | ||
61 | #define VMI_CALL_SetCR0 15 | ||
62 | #define VMI_CALL_SetCR2 16 | ||
63 | #define VMI_CALL_SetCR3 17 | ||
64 | #define VMI_CALL_SetCR4 18 | ||
65 | #define VMI_CALL_GetCR0 19 | ||
66 | #define VMI_CALL_GetCR2 20 | ||
67 | #define VMI_CALL_GetCR3 21 | ||
68 | #define VMI_CALL_GetCR4 22 | ||
69 | #define VMI_CALL_WBINVD 23 | ||
70 | #define VMI_CALL_SetDR 24 | ||
71 | #define VMI_CALL_GetDR 25 | ||
72 | #define VMI_CALL_RDPMC 26 | ||
73 | #define VMI_CALL_RDTSC 27 | ||
74 | #define VMI_CALL_CLTS 28 | ||
75 | #define VMI_CALL_EnableInterrupts 29 | ||
76 | #define VMI_CALL_DisableInterrupts 30 | ||
77 | #define VMI_CALL_GetInterruptMask 31 | ||
78 | #define VMI_CALL_SetInterruptMask 32 | ||
79 | #define VMI_CALL_IRET 33 | ||
80 | #define VMI_CALL_SYSEXIT 34 | ||
81 | #define VMI_CALL_Halt 35 | ||
82 | #define VMI_CALL_Reboot 36 | ||
83 | #define VMI_CALL_Shutdown 37 | ||
84 | #define VMI_CALL_SetPxE 38 | ||
85 | #define VMI_CALL_SetPxELong 39 | ||
86 | #define VMI_CALL_UpdatePxE 40 | ||
87 | #define VMI_CALL_UpdatePxELong 41 | ||
88 | #define VMI_CALL_MachineToPhysical 42 | ||
89 | #define VMI_CALL_PhysicalToMachine 43 | ||
90 | #define VMI_CALL_AllocatePage 44 | ||
91 | #define VMI_CALL_ReleasePage 45 | ||
92 | #define VMI_CALL_InvalPage 46 | ||
93 | #define VMI_CALL_FlushTLB 47 | ||
94 | #define VMI_CALL_SetLinearMapping 48 | ||
95 | |||
96 | #define VMI_CALL_SetIOPLMask 61 | ||
97 | #define VMI_CALL_SetInitialAPState 62 | ||
98 | #define VMI_CALL_APICWrite 63 | ||
99 | #define VMI_CALL_APICRead 64 | ||
100 | #define VMI_CALL_SetLazyMode 73 | ||
101 | |||
102 | /* | ||
103 | *--------------------------------------------------------------------- | ||
104 | * | ||
105 | * MMU operation flags | ||
106 | * | ||
107 | *--------------------------------------------------------------------- | ||
108 | */ | ||
109 | |||
110 | /* Flags used by VMI_{Allocate|Release}Page call */ | ||
111 | #define VMI_PAGE_PAE 0x10 /* Allocate PAE shadow */ | ||
112 | #define VMI_PAGE_CLONE 0x20 /* Clone from another shadow */ | ||
113 | #define VMI_PAGE_ZEROED 0x40 /* Page is pre-zeroed */ | ||
114 | |||
115 | |||
116 | /* Flags shared by Allocate|Release Page and PTE updates */ | ||
117 | #define VMI_PAGE_PT 0x01 | ||
118 | #define VMI_PAGE_PD 0x02 | ||
119 | #define VMI_PAGE_PDP 0x04 | ||
120 | #define VMI_PAGE_PML4 0x08 | ||
121 | |||
122 | #define VMI_PAGE_NORMAL 0x00 /* for debugging */ | ||
123 | |||
124 | /* Flags used by PTE updates */ | ||
125 | #define VMI_PAGE_CURRENT_AS 0x10 /* implies VMI_PAGE_VA_MASK is valid */ | ||
126 | #define VMI_PAGE_DEFER 0x20 /* may queue update until TLB inval */ | ||
127 | #define VMI_PAGE_VA_MASK 0xfffff000 | ||
128 | |||
129 | #ifdef CONFIG_X86_PAE | ||
130 | #define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_PAE | VMI_PAGE_ZEROED) | ||
131 | #define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_PAE | VMI_PAGE_ZEROED) | ||
132 | #else | ||
133 | #define VMI_PAGE_L1 (VMI_PAGE_PT | VMI_PAGE_ZEROED) | ||
134 | #define VMI_PAGE_L2 (VMI_PAGE_PD | VMI_PAGE_ZEROED) | ||
135 | #endif | ||
136 | |||
137 | /* Flags used by VMI_FlushTLB call */ | ||
138 | #define VMI_FLUSH_TLB 0x01 | ||
139 | #define VMI_FLUSH_GLOBAL 0x02 | ||
140 | |||
141 | /* | ||
142 | *--------------------------------------------------------------------- | ||
143 | * | ||
144 | * VMI relocation definitions for ROM call get_reloc | ||
145 | * | ||
146 | *--------------------------------------------------------------------- | ||
147 | */ | ||
148 | |||
149 | /* VMI Relocation types */ | ||
150 | #define VMI_RELOCATION_NONE 0 | ||
151 | #define VMI_RELOCATION_CALL_REL 1 | ||
152 | #define VMI_RELOCATION_JUMP_REL 2 | ||
153 | #define VMI_RELOCATION_NOP 3 | ||
154 | |||
155 | #ifndef __ASSEMBLY__ | ||
156 | struct vmi_relocation_info { | ||
157 | unsigned char *eip; | ||
158 | unsigned char type; | ||
159 | unsigned char reserved[3]; | ||
160 | }; | ||
161 | #endif | ||
162 | |||
163 | |||
164 | /* | ||
165 | *--------------------------------------------------------------------- | ||
166 | * | ||
167 | * Generic ROM structures and definitions | ||
168 | * | ||
169 | *--------------------------------------------------------------------- | ||
170 | */ | ||
171 | |||
172 | #ifndef __ASSEMBLY__ | ||
173 | |||
174 | struct vrom_header { | ||
175 | u16 rom_signature; // option ROM signature | ||
176 | u8 rom_length; // ROM length in 512 byte chunks | ||
177 | u8 rom_entry[4]; // 16-bit code entry point | ||
178 | u8 rom_pad0; // 4-byte align pad | ||
179 | u32 vrom_signature; // VROM identification signature | ||
180 | u8 api_version_min;// Minor version of API | ||
181 | u8 api_version_maj;// Major version of API | ||
182 | u8 jump_slots; // Number of jump slots | ||
183 | u8 reserved1; // Reserved for expansion | ||
184 | u32 virtual_top; // Hypervisor virtual address start | ||
185 | u16 reserved2; // Reserved for expansion | ||
186 | u16 license_offs; // Offset to License string | ||
187 | u16 pci_header_offs;// Offset to PCI OPROM header | ||
188 | u16 pnp_header_offs;// Offset to PnP OPROM header | ||
189 | u32 rom_pad3; // PnP reserverd / VMI reserved | ||
190 | u8 reserved[96]; // Reserved for headers | ||
191 | char vmi_init[8]; // VMI_Init jump point | ||
192 | char get_reloc[8]; // VMI_GetRelocationInfo jump point | ||
193 | } __attribute__((packed)); | ||
194 | |||
195 | struct pnp_header { | ||
196 | char sig[4]; | ||
197 | char rev; | ||
198 | char size; | ||
199 | short next; | ||
200 | short res; | ||
201 | long devID; | ||
202 | unsigned short manufacturer_offset; | ||
203 | unsigned short product_offset; | ||
204 | } __attribute__((packed)); | ||
205 | |||
206 | struct pci_header { | ||
207 | char sig[4]; | ||
208 | short vendorID; | ||
209 | short deviceID; | ||
210 | short vpdData; | ||
211 | short size; | ||
212 | char rev; | ||
213 | char class; | ||
214 | char subclass; | ||
215 | char interface; | ||
216 | short chunks; | ||
217 | char rom_version_min; | ||
218 | char rom_version_maj; | ||
219 | char codetype; | ||
220 | char lastRom; | ||
221 | short reserved; | ||
222 | } __attribute__((packed)); | ||
223 | |||
224 | /* Function prototypes for bootstrapping */ | ||
225 | extern void vmi_init(void); | ||
226 | extern void vmi_bringup(void); | ||
227 | extern void vmi_apply_boot_page_allocations(void); | ||
228 | |||
229 | /* State needed to start an application processor in an SMP system. */ | ||
230 | struct vmi_ap_state { | ||
231 | u32 cr0; | ||
232 | u32 cr2; | ||
233 | u32 cr3; | ||
234 | u32 cr4; | ||
235 | |||
236 | u64 efer; | ||
237 | |||
238 | u32 eip; | ||
239 | u32 eflags; | ||
240 | u32 eax; | ||
241 | u32 ebx; | ||
242 | u32 ecx; | ||
243 | u32 edx; | ||
244 | u32 esp; | ||
245 | u32 ebp; | ||
246 | u32 esi; | ||
247 | u32 edi; | ||
248 | u16 cs; | ||
249 | u16 ss; | ||
250 | u16 ds; | ||
251 | u16 es; | ||
252 | u16 fs; | ||
253 | u16 gs; | ||
254 | u16 ldtr; | ||
255 | |||
256 | u16 gdtr_limit; | ||
257 | u32 gdtr_base; | ||
258 | u32 idtr_base; | ||
259 | u16 idtr_limit; | ||
260 | }; | ||
261 | |||
262 | #endif | ||
diff --git a/include/asm-i386/vmi_time.h b/include/asm-i386/vmi_time.h new file mode 100644 index 000000000000..c12931211007 --- /dev/null +++ b/include/asm-i386/vmi_time.h | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * VMI Time wrappers | ||
3 | * | ||
4 | * Copyright (C) 2006, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #ifndef __VMI_TIME_H | ||
26 | #define __VMI_TIME_H | ||
27 | |||
28 | /* | ||
29 | * Raw VMI call indices for timer functions | ||
30 | */ | ||
31 | #define VMI_CALL_GetCycleFrequency 66 | ||
32 | #define VMI_CALL_GetCycleCounter 67 | ||
33 | #define VMI_CALL_SetAlarm 68 | ||
34 | #define VMI_CALL_CancelAlarm 69 | ||
35 | #define VMI_CALL_GetWallclockTime 70 | ||
36 | #define VMI_CALL_WallclockUpdated 71 | ||
37 | |||
38 | /* Cached VMI timer operations */ | ||
39 | extern struct vmi_timer_ops { | ||
40 | u64 (*get_cycle_frequency)(void); | ||
41 | u64 (*get_cycle_counter)(int); | ||
42 | u64 (*get_wallclock)(void); | ||
43 | int (*wallclock_updated)(void); | ||
44 | void (*set_alarm)(u32 flags, u64 expiry, u64 period); | ||
45 | void (*cancel_alarm)(u32 flags); | ||
46 | } vmi_timer_ops; | ||
47 | |||
48 | /* Prototypes */ | ||
49 | extern void __init vmi_time_init(void); | ||
50 | extern unsigned long vmi_get_wallclock(void); | ||
51 | extern int vmi_set_wallclock(unsigned long now); | ||
52 | extern unsigned long long vmi_sched_clock(void); | ||
53 | |||
54 | #ifdef CONFIG_X86_LOCAL_APIC | ||
55 | extern void __init vmi_timer_setup_boot_alarm(void); | ||
56 | extern void __init vmi_timer_setup_secondary_alarm(void); | ||
57 | extern void apic_vmi_timer_interrupt(void); | ||
58 | #endif | ||
59 | |||
60 | #ifdef CONFIG_NO_IDLE_HZ | ||
61 | extern int vmi_stop_hz_timer(void); | ||
62 | extern void vmi_account_time_restart_hz_timer(void); | ||
63 | #endif | ||
64 | |||
65 | /* | ||
66 | * When run under a hypervisor, a vcpu is always in one of three states: | ||
67 | * running, halted, or ready. The vcpu is in the 'running' state if it | ||
68 | * is executing. When the vcpu executes the halt interface, the vcpu | ||
69 | * enters the 'halted' state and remains halted until there is some work | ||
70 | * pending for the vcpu (e.g. an alarm expires, host I/O completes on | ||
71 | * behalf of virtual I/O). At this point, the vcpu enters the 'ready' | ||
72 | * state (waiting for the hypervisor to reschedule it). Finally, at any | ||
73 | * time when the vcpu is not in the 'running' state nor the 'halted' | ||
74 | * state, it is in the 'ready' state. | ||
75 | * | ||
76 | * Real time is advances while the vcpu is 'running', 'ready', or | ||
77 | * 'halted'. Stolen time is the time in which the vcpu is in the | ||
78 | * 'ready' state. Available time is the remaining time -- the vcpu is | ||
79 | * either 'running' or 'halted'. | ||
80 | * | ||
81 | * All three views of time are accessible through the VMI cycle | ||
82 | * counters. | ||
83 | */ | ||
84 | |||
85 | /* The cycle counters. */ | ||
86 | #define VMI_CYCLES_REAL 0 | ||
87 | #define VMI_CYCLES_AVAILABLE 1 | ||
88 | #define VMI_CYCLES_STOLEN 2 | ||
89 | |||
90 | /* The alarm interface 'flags' bits */ | ||
91 | #define VMI_ALARM_COUNTERS 2 | ||
92 | |||
93 | #define VMI_ALARM_COUNTER_MASK 0x000000ff | ||
94 | |||
95 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
96 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
97 | |||
98 | #define VMI_ALARM_IS_ONESHOT 0x00000000 | ||
99 | #define VMI_ALARM_IS_PERIODIC 0x00000100 | ||
100 | |||
101 | #define CONFIG_VMI_ALARM_HZ 100 | ||
102 | |||
103 | #endif | ||
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h index 8da9609070f4..d4dbbe5f7bd9 100644 --- a/include/asm-x86_64/bitops.h +++ b/include/asm-x86_64/bitops.h | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | #include <asm/alternative.h> | 8 | #include <asm/alternative.h> |
9 | 9 | ||
10 | #if __GNUC__ < 4 || __GNUC_MINOR__ < 1 | 10 | #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 1) |
11 | /* Technically wrong, but this avoids compilation errors on some gcc | 11 | /* Technically wrong, but this avoids compilation errors on some gcc |
12 | versions. */ | 12 | versions. */ |
13 | #define ADDR "=m" (*(volatile long *) addr) | 13 | #define ADDR "=m" (*(volatile long *) addr) |
diff --git a/include/asm-x86_64/dma-mapping.h b/include/asm-x86_64/dma-mapping.h index 49dbab09ef2b..d2af227f06d0 100644 --- a/include/asm-x86_64/dma-mapping.h +++ b/include/asm-x86_64/dma-mapping.h | |||
@@ -66,6 +66,9 @@ static inline int dma_mapping_error(dma_addr_t dma_addr) | |||
66 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | 66 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) |
67 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | 67 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) |
68 | 68 | ||
69 | #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) | ||
70 | #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) | ||
71 | |||
69 | extern void *dma_alloc_coherent(struct device *dev, size_t size, | 72 | extern void *dma_alloc_coherent(struct device *dev, size_t size, |
70 | dma_addr_t *dma_handle, gfp_t gfp); | 73 | dma_addr_t *dma_handle, gfp_t gfp); |
71 | extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, | 74 | extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, |
diff --git a/include/asm-x86_64/e820.h b/include/asm-x86_64/e820.h index fa2086774105..6216fa3f2802 100644 --- a/include/asm-x86_64/e820.h +++ b/include/asm-x86_64/e820.h | |||
@@ -46,6 +46,7 @@ extern void e820_mark_nosave_regions(void); | |||
46 | extern void e820_print_map(char *who); | 46 | extern void e820_print_map(char *who); |
47 | extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); | 47 | extern int e820_any_mapped(unsigned long start, unsigned long end, unsigned type); |
48 | extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); | 48 | extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); |
49 | extern unsigned long e820_hole_size(unsigned long start, unsigned long end); | ||
49 | 50 | ||
50 | extern void e820_setup_gap(void); | 51 | extern void e820_setup_gap(void); |
51 | extern void e820_register_active_regions(int nid, | 52 | extern void e820_register_active_regions(int nid, |
@@ -56,6 +57,7 @@ extern void finish_e820_parsing(void); | |||
56 | extern struct e820map e820; | 57 | extern struct e820map e820; |
57 | 58 | ||
58 | extern unsigned ebda_addr, ebda_size; | 59 | extern unsigned ebda_addr, ebda_size; |
60 | extern unsigned long nodemap_addr, nodemap_size; | ||
59 | #endif/*!__ASSEMBLY__*/ | 61 | #endif/*!__ASSEMBLY__*/ |
60 | 62 | ||
61 | #endif/*__E820_HEADER*/ | 63 | #endif/*__E820_HEADER*/ |
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h index 179cce755aa7..552df5f10a6d 100644 --- a/include/asm-x86_64/hw_irq.h +++ b/include/asm-x86_64/hw_irq.h | |||
@@ -91,7 +91,7 @@ extern void enable_8259A_irq(unsigned int irq); | |||
91 | extern int i8259A_irq_pending(unsigned int irq); | 91 | extern int i8259A_irq_pending(unsigned int irq); |
92 | extern void make_8259A_irq(unsigned int irq); | 92 | extern void make_8259A_irq(unsigned int irq); |
93 | extern void init_8259A(int aeoi); | 93 | extern void init_8259A(int aeoi); |
94 | extern void FASTCALL(send_IPI_self(int vector)); | 94 | extern void send_IPI_self(int vector); |
95 | extern void init_VISWS_APIC_irqs(void); | 95 | extern void init_VISWS_APIC_irqs(void); |
96 | extern void setup_IO_APIC(void); | 96 | extern void setup_IO_APIC(void); |
97 | extern void disable_IO_APIC(void); | 97 | extern void disable_IO_APIC(void); |
diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h index f5d84bb7c948..de2cd9a2303a 100644 --- a/include/asm-x86_64/io.h +++ b/include/asm-x86_64/io.h | |||
@@ -100,7 +100,7 @@ __OUTS(l) | |||
100 | 100 | ||
101 | #define IO_SPACE_LIMIT 0xffff | 101 | #define IO_SPACE_LIMIT 0xffff |
102 | 102 | ||
103 | #if defined(__KERNEL__) && __x86_64__ | 103 | #if defined(__KERNEL__) && defined(__x86_64__) |
104 | 104 | ||
105 | #include <linux/vmalloc.h> | 105 | #include <linux/vmalloc.h> |
106 | 106 | ||
diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h index 561ecbfd4cb5..f4fb238c89f1 100644 --- a/include/asm-x86_64/io_apic.h +++ b/include/asm-x86_64/io_apic.h | |||
@@ -85,18 +85,8 @@ struct IO_APIC_route_entry { | |||
85 | mask : 1, /* 0: enabled, 1: disabled */ | 85 | mask : 1, /* 0: enabled, 1: disabled */ |
86 | __reserved_2 : 15; | 86 | __reserved_2 : 15; |
87 | 87 | ||
88 | union { struct { __u32 | 88 | __u32 __reserved_3 : 24, |
89 | __reserved_1 : 24, | 89 | dest : 8; |
90 | physical_dest : 4, | ||
91 | __reserved_2 : 4; | ||
92 | } physical; | ||
93 | |||
94 | struct { __u32 | ||
95 | __reserved_1 : 24, | ||
96 | logical_dest : 8; | ||
97 | } logical; | ||
98 | } dest; | ||
99 | |||
100 | } __attribute__ ((packed)); | 90 | } __attribute__ ((packed)); |
101 | 91 | ||
102 | /* | 92 | /* |
diff --git a/include/asm-x86_64/mce.h b/include/asm-x86_64/mce.h index 5a11146d6d9c..177e92b4019b 100644 --- a/include/asm-x86_64/mce.h +++ b/include/asm-x86_64/mce.h | |||
@@ -103,6 +103,8 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status); | |||
103 | 103 | ||
104 | extern atomic_t mce_entry; | 104 | extern atomic_t mce_entry; |
105 | 105 | ||
106 | extern void do_machine_check(struct pt_regs *, long); | ||
107 | |||
106 | #endif | 108 | #endif |
107 | 109 | ||
108 | #endif | 110 | #endif |
diff --git a/include/asm-x86_64/mmzone.h b/include/asm-x86_64/mmzone.h index c38ebdf6f426..fb558fb1d211 100644 --- a/include/asm-x86_64/mmzone.h +++ b/include/asm-x86_64/mmzone.h | |||
@@ -11,24 +11,25 @@ | |||
11 | 11 | ||
12 | #include <asm/smp.h> | 12 | #include <asm/smp.h> |
13 | 13 | ||
14 | /* Should really switch to dynamic allocation at some point */ | ||
15 | #define NODEMAPSIZE 0x4fff | ||
16 | |||
17 | /* Simple perfect hash to map physical addresses to node numbers */ | 14 | /* Simple perfect hash to map physical addresses to node numbers */ |
18 | struct memnode { | 15 | struct memnode { |
19 | int shift; | 16 | int shift; |
20 | u8 map[NODEMAPSIZE]; | 17 | unsigned int mapsize; |
21 | } ____cacheline_aligned; | 18 | u8 *map; |
19 | u8 embedded_map[64-16]; | ||
20 | } ____cacheline_aligned; /* total size = 64 bytes */ | ||
22 | extern struct memnode memnode; | 21 | extern struct memnode memnode; |
23 | #define memnode_shift memnode.shift | 22 | #define memnode_shift memnode.shift |
24 | #define memnodemap memnode.map | 23 | #define memnodemap memnode.map |
24 | #define memnodemapsize memnode.mapsize | ||
25 | 25 | ||
26 | extern struct pglist_data *node_data[]; | 26 | extern struct pglist_data *node_data[]; |
27 | 27 | ||
28 | static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | 28 | static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) |
29 | { | 29 | { |
30 | unsigned nid; | 30 | unsigned nid; |
31 | VIRTUAL_BUG_ON((addr >> memnode_shift) >= NODEMAPSIZE); | 31 | VIRTUAL_BUG_ON(!memnodemap); |
32 | VIRTUAL_BUG_ON((addr >> memnode_shift) >= memnodemapsize); | ||
32 | nid = memnodemap[addr >> memnode_shift]; | 33 | nid = memnodemap[addr >> memnode_shift]; |
33 | VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); | 34 | VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]); |
34 | return nid; | 35 | return nid; |
@@ -46,5 +47,10 @@ static inline __attribute__((pure)) int phys_to_nid(unsigned long addr) | |||
46 | extern int pfn_valid(unsigned long pfn); | 47 | extern int pfn_valid(unsigned long pfn); |
47 | #endif | 48 | #endif |
48 | 49 | ||
50 | #ifdef CONFIG_NUMA_EMU | ||
51 | #define FAKE_NODE_MIN_SIZE (64*1024*1024) | ||
52 | #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1ul)) | ||
53 | #endif | ||
54 | |||
49 | #endif | 55 | #endif |
50 | #endif | 56 | #endif |
diff --git a/include/asm-x86_64/mutex.h b/include/asm-x86_64/mutex.h index 16396b1de3e4..6c2949a3c677 100644 --- a/include/asm-x86_64/mutex.h +++ b/include/asm-x86_64/mutex.h | |||
@@ -21,7 +21,7 @@ do { \ | |||
21 | unsigned long dummy; \ | 21 | unsigned long dummy; \ |
22 | \ | 22 | \ |
23 | typecheck(atomic_t *, v); \ | 23 | typecheck(atomic_t *, v); \ |
24 | typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ | 24 | typecheck_fn(void (*)(atomic_t *), fail_fn); \ |
25 | \ | 25 | \ |
26 | __asm__ __volatile__( \ | 26 | __asm__ __volatile__( \ |
27 | LOCK_PREFIX " decl (%%rdi) \n" \ | 27 | LOCK_PREFIX " decl (%%rdi) \n" \ |
@@ -47,7 +47,7 @@ do { \ | |||
47 | */ | 47 | */ |
48 | static inline int | 48 | static inline int |
49 | __mutex_fastpath_lock_retval(atomic_t *count, | 49 | __mutex_fastpath_lock_retval(atomic_t *count, |
50 | int fastcall (*fail_fn)(atomic_t *)) | 50 | int (*fail_fn)(atomic_t *)) |
51 | { | 51 | { |
52 | if (unlikely(atomic_dec_return(count) < 0)) | 52 | if (unlikely(atomic_dec_return(count) < 0)) |
53 | return fail_fn(count); | 53 | return fail_fn(count); |
@@ -67,7 +67,7 @@ do { \ | |||
67 | unsigned long dummy; \ | 67 | unsigned long dummy; \ |
68 | \ | 68 | \ |
69 | typecheck(atomic_t *, v); \ | 69 | typecheck(atomic_t *, v); \ |
70 | typecheck_fn(fastcall void (*)(atomic_t *), fail_fn); \ | 70 | typecheck_fn(void (*)(atomic_t *), fail_fn); \ |
71 | \ | 71 | \ |
72 | __asm__ __volatile__( \ | 72 | __asm__ __volatile__( \ |
73 | LOCK_PREFIX " incl (%%rdi) \n" \ | 73 | LOCK_PREFIX " incl (%%rdi) \n" \ |
diff --git a/include/asm-x86_64/pgalloc.h b/include/asm-x86_64/pgalloc.h index 43d4c333a8b1..4e28b6060a5e 100644 --- a/include/asm-x86_64/pgalloc.h +++ b/include/asm-x86_64/pgalloc.h | |||
@@ -18,11 +18,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *p | |||
18 | set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); | 18 | set_pmd(pmd, __pmd(_PAGE_TABLE | (page_to_pfn(pte) << PAGE_SHIFT))); |
19 | } | 19 | } |
20 | 20 | ||
21 | static inline pmd_t *get_pmd(void) | ||
22 | { | ||
23 | return (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
24 | } | ||
25 | |||
26 | static inline void pmd_free(pmd_t *pmd) | 21 | static inline void pmd_free(pmd_t *pmd) |
27 | { | 22 | { |
28 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); | 23 | BUG_ON((unsigned long)pmd & (PAGE_SIZE-1)); |
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h index 59901c690a0d..730bd6028416 100644 --- a/include/asm-x86_64/pgtable.h +++ b/include/asm-x86_64/pgtable.h | |||
@@ -359,15 +359,6 @@ static inline int pmd_large(pmd_t pte) { | |||
359 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | 359 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) |
360 | #define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) | 360 | #define mk_pte_huge(entry) (pte_val(entry) |= _PAGE_PRESENT | _PAGE_PSE) |
361 | 361 | ||
362 | /* physical address -> PTE */ | ||
363 | static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot) | ||
364 | { | ||
365 | pte_t pte; | ||
366 | pte_val(pte) = physpage | pgprot_val(pgprot); | ||
367 | pte_val(pte) &= __supported_pte_mask; | ||
368 | return pte; | ||
369 | } | ||
370 | |||
371 | /* Change flags of a PTE */ | 362 | /* Change flags of a PTE */ |
372 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | 363 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) |
373 | { | 364 | { |
diff --git a/include/asm-x86_64/uaccess.h b/include/asm-x86_64/uaccess.h index 8079e29c14fd..1981f70fcad1 100644 --- a/include/asm-x86_64/uaccess.h +++ b/include/asm-x86_64/uaccess.h | |||
@@ -367,4 +367,18 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) | |||
367 | return copy_user_generic((__force void *)dst, src, size); | 367 | return copy_user_generic((__force void *)dst, src, size); |
368 | } | 368 | } |
369 | 369 | ||
370 | #define ARCH_HAS_NOCACHE_UACCESS 1 | ||
371 | extern long __copy_user_nocache(void *dst, const void __user *src, unsigned size, int zerorest); | ||
372 | |||
373 | static inline int __copy_from_user_nocache(void *dst, const void __user *src, unsigned size) | ||
374 | { | ||
375 | might_sleep(); | ||
376 | return __copy_user_nocache(dst, (__force void *)src, size, 1); | ||
377 | } | ||
378 | |||
379 | static inline int __copy_from_user_inatomic_nocache(void *dst, const void __user *src, unsigned size) | ||
380 | { | ||
381 | return __copy_user_nocache(dst, (__force void *)src, size, 0); | ||
382 | } | ||
383 | |||
370 | #endif /* __X86_64_UACCESS_H */ | 384 | #endif /* __X86_64_UACCESS_H */ |
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h index 05cb8dd200de..0c7847165eae 100644 --- a/include/asm-x86_64/vsyscall.h +++ b/include/asm-x86_64/vsyscall.h | |||
@@ -56,11 +56,6 @@ extern struct vxtime_data vxtime; | |||
56 | extern int vgetcpu_mode; | 56 | extern int vgetcpu_mode; |
57 | extern struct timezone sys_tz; | 57 | extern struct timezone sys_tz; |
58 | extern int sysctl_vsyscall; | 58 | extern int sysctl_vsyscall; |
59 | extern seqlock_t xtime_lock; | ||
60 | |||
61 | extern int sysctl_vsyscall; | ||
62 | |||
63 | #define ARCH_HAVE_XTIME_LOCK 1 | ||
64 | 59 | ||
65 | #endif /* __KERNEL__ */ | 60 | #endif /* __KERNEL__ */ |
66 | 61 | ||
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index c1e82c514443..2d956cd566ae 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h | |||
@@ -59,6 +59,7 @@ struct linux_binfmt { | |||
59 | int (*load_shlib)(struct file *); | 59 | int (*load_shlib)(struct file *); |
60 | int (*core_dump)(long signr, struct pt_regs * regs, struct file * file); | 60 | int (*core_dump)(long signr, struct pt_regs * regs, struct file * file); |
61 | unsigned long min_coredump; /* minimal dump size */ | 61 | unsigned long min_coredump; /* minimal dump size */ |
62 | int hasvdso; | ||
62 | }; | 63 | }; |
63 | 64 | ||
64 | extern int register_binfmt(struct linux_binfmt *); | 65 | extern int register_binfmt(struct linux_binfmt *); |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index acb4ed130247..29af2d5df097 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
@@ -17,8 +17,15 @@ | |||
17 | #ifdef ARCH_HAS_NMI_WATCHDOG | 17 | #ifdef ARCH_HAS_NMI_WATCHDOG |
18 | #include <asm/nmi.h> | 18 | #include <asm/nmi.h> |
19 | extern void touch_nmi_watchdog(void); | 19 | extern void touch_nmi_watchdog(void); |
20 | extern void acpi_nmi_disable(void); | ||
21 | extern void acpi_nmi_enable(void); | ||
20 | #else | 22 | #else |
21 | # define touch_nmi_watchdog() touch_softlockup_watchdog() | 23 | static inline void touch_nmi_watchdog(void) |
24 | { | ||
25 | touch_softlockup_watchdog(); | ||
26 | } | ||
27 | static inline void acpi_nmi_disable(void) { } | ||
28 | static inline void acpi_nmi_enable(void) { } | ||
22 | #endif | 29 | #endif |
23 | 30 | ||
24 | #ifndef trigger_all_cpu_backtrace | 31 | #ifndef trigger_all_cpu_backtrace |
diff --git a/include/linux/time.h b/include/linux/time.h index 55cee172d723..eceb1a59b078 100644 --- a/include/linux/time.h +++ b/include/linux/time.h | |||
@@ -90,7 +90,7 @@ static inline struct timespec timespec_sub(struct timespec lhs, | |||
90 | 90 | ||
91 | extern struct timespec xtime; | 91 | extern struct timespec xtime; |
92 | extern struct timespec wall_to_monotonic; | 92 | extern struct timespec wall_to_monotonic; |
93 | extern seqlock_t xtime_lock; | 93 | extern seqlock_t xtime_lock __attribute__((weak)); |
94 | 94 | ||
95 | void timekeeping_init(void); | 95 | void timekeeping_init(void); |
96 | 96 | ||
diff --git a/init/main.c b/init/main.c index 649ab5443d43..2421e1544127 100644 --- a/init/main.c +++ b/init/main.c | |||
@@ -726,7 +726,49 @@ static void run_init_process(char *init_filename) | |||
726 | kernel_execve(init_filename, argv_init, envp_init); | 726 | kernel_execve(init_filename, argv_init, envp_init); |
727 | } | 727 | } |
728 | 728 | ||
729 | static int init(void * unused) | 729 | /* This is a non __init function. Force it to be noinline otherwise gcc |
730 | * makes it inline to init() and it becomes part of init.text section | ||
731 | */ | ||
732 | static int noinline init_post(void) | ||
733 | { | ||
734 | free_initmem(); | ||
735 | unlock_kernel(); | ||
736 | mark_rodata_ro(); | ||
737 | system_state = SYSTEM_RUNNING; | ||
738 | numa_default_policy(); | ||
739 | |||
740 | if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) | ||
741 | printk(KERN_WARNING "Warning: unable to open an initial console.\n"); | ||
742 | |||
743 | (void) sys_dup(0); | ||
744 | (void) sys_dup(0); | ||
745 | |||
746 | if (ramdisk_execute_command) { | ||
747 | run_init_process(ramdisk_execute_command); | ||
748 | printk(KERN_WARNING "Failed to execute %s\n", | ||
749 | ramdisk_execute_command); | ||
750 | } | ||
751 | |||
752 | /* | ||
753 | * We try each of these until one succeeds. | ||
754 | * | ||
755 | * The Bourne shell can be used instead of init if we are | ||
756 | * trying to recover a really broken machine. | ||
757 | */ | ||
758 | if (execute_command) { | ||
759 | run_init_process(execute_command); | ||
760 | printk(KERN_WARNING "Failed to execute %s. Attempting " | ||
761 | "defaults...\n", execute_command); | ||
762 | } | ||
763 | run_init_process("/sbin/init"); | ||
764 | run_init_process("/etc/init"); | ||
765 | run_init_process("/bin/init"); | ||
766 | run_init_process("/bin/sh"); | ||
767 | |||
768 | panic("No init found. Try passing init= option to kernel."); | ||
769 | } | ||
770 | |||
771 | static int __init init(void * unused) | ||
730 | { | 772 | { |
731 | lock_kernel(); | 773 | lock_kernel(); |
732 | /* | 774 | /* |
@@ -774,39 +816,6 @@ static int init(void * unused) | |||
774 | * we're essentially up and running. Get rid of the | 816 | * we're essentially up and running. Get rid of the |
775 | * initmem segments and start the user-mode stuff.. | 817 | * initmem segments and start the user-mode stuff.. |
776 | */ | 818 | */ |
777 | free_initmem(); | 819 | init_post(); |
778 | unlock_kernel(); | 820 | return 0; |
779 | mark_rodata_ro(); | ||
780 | system_state = SYSTEM_RUNNING; | ||
781 | numa_default_policy(); | ||
782 | |||
783 | if (sys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0) | ||
784 | printk(KERN_WARNING "Warning: unable to open an initial console.\n"); | ||
785 | |||
786 | (void) sys_dup(0); | ||
787 | (void) sys_dup(0); | ||
788 | |||
789 | if (ramdisk_execute_command) { | ||
790 | run_init_process(ramdisk_execute_command); | ||
791 | printk(KERN_WARNING "Failed to execute %s\n", | ||
792 | ramdisk_execute_command); | ||
793 | } | ||
794 | |||
795 | /* | ||
796 | * We try each of these until one succeeds. | ||
797 | * | ||
798 | * The Bourne shell can be used instead of init if we are | ||
799 | * trying to recover a really broken machine. | ||
800 | */ | ||
801 | if (execute_command) { | ||
802 | run_init_process(execute_command); | ||
803 | printk(KERN_WARNING "Failed to execute %s. Attempting " | ||
804 | "defaults...\n", execute_command); | ||
805 | } | ||
806 | run_init_process("/sbin/init"); | ||
807 | run_init_process("/etc/init"); | ||
808 | run_init_process("/bin/init"); | ||
809 | run_init_process("/bin/sh"); | ||
810 | |||
811 | panic("No init found. Try passing init= option to kernel."); | ||
812 | } | 821 | } |
diff --git a/kernel/kmod.c b/kernel/kmod.c index 3a7379aa31ca..796276141e51 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
@@ -217,7 +217,10 @@ static int wait_for_helper(void *data) | |||
217 | sub_info->retval = ret; | 217 | sub_info->retval = ret; |
218 | } | 218 | } |
219 | 219 | ||
220 | complete(sub_info->complete); | 220 | if (sub_info->wait < 0) |
221 | kfree(sub_info); | ||
222 | else | ||
223 | complete(sub_info->complete); | ||
221 | return 0; | 224 | return 0; |
222 | } | 225 | } |
223 | 226 | ||
@@ -239,6 +242,9 @@ static void __call_usermodehelper(struct work_struct *work) | |||
239 | pid = kernel_thread(____call_usermodehelper, sub_info, | 242 | pid = kernel_thread(____call_usermodehelper, sub_info, |
240 | CLONE_VFORK | SIGCHLD); | 243 | CLONE_VFORK | SIGCHLD); |
241 | 244 | ||
245 | if (wait < 0) | ||
246 | return; | ||
247 | |||
242 | if (pid < 0) { | 248 | if (pid < 0) { |
243 | sub_info->retval = pid; | 249 | sub_info->retval = pid; |
244 | complete(sub_info->complete); | 250 | complete(sub_info->complete); |
@@ -253,6 +259,9 @@ static void __call_usermodehelper(struct work_struct *work) | |||
253 | * @envp: null-terminated environment list | 259 | * @envp: null-terminated environment list |
254 | * @session_keyring: session keyring for process (NULL for an empty keyring) | 260 | * @session_keyring: session keyring for process (NULL for an empty keyring) |
255 | * @wait: wait for the application to finish and return status. | 261 | * @wait: wait for the application to finish and return status. |
262 | * when -1 don't wait at all, but you get no useful error back when | ||
263 | * the program couldn't be exec'ed. This makes it safe to call | ||
264 | * from interrupt context. | ||
256 | * | 265 | * |
257 | * Runs a user-space application. The application is started | 266 | * Runs a user-space application. The application is started |
258 | * asynchronously if wait is not set, and runs as a child of keventd. | 267 | * asynchronously if wait is not set, and runs as a child of keventd. |
@@ -265,17 +274,8 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, | |||
265 | struct key *session_keyring, int wait) | 274 | struct key *session_keyring, int wait) |
266 | { | 275 | { |
267 | DECLARE_COMPLETION_ONSTACK(done); | 276 | DECLARE_COMPLETION_ONSTACK(done); |
268 | struct subprocess_info sub_info = { | 277 | struct subprocess_info *sub_info; |
269 | .work = __WORK_INITIALIZER(sub_info.work, | 278 | int retval; |
270 | __call_usermodehelper), | ||
271 | .complete = &done, | ||
272 | .path = path, | ||
273 | .argv = argv, | ||
274 | .envp = envp, | ||
275 | .ring = session_keyring, | ||
276 | .wait = wait, | ||
277 | .retval = 0, | ||
278 | }; | ||
279 | 279 | ||
280 | if (!khelper_wq) | 280 | if (!khelper_wq) |
281 | return -EBUSY; | 281 | return -EBUSY; |
@@ -283,9 +283,25 @@ int call_usermodehelper_keys(char *path, char **argv, char **envp, | |||
283 | if (path[0] == '\0') | 283 | if (path[0] == '\0') |
284 | return 0; | 284 | return 0; |
285 | 285 | ||
286 | queue_work(khelper_wq, &sub_info.work); | 286 | sub_info = kzalloc(sizeof(struct subprocess_info), GFP_ATOMIC); |
287 | if (!sub_info) | ||
288 | return -ENOMEM; | ||
289 | |||
290 | INIT_WORK(&sub_info->work, __call_usermodehelper); | ||
291 | sub_info->complete = &done; | ||
292 | sub_info->path = path; | ||
293 | sub_info->argv = argv; | ||
294 | sub_info->envp = envp; | ||
295 | sub_info->ring = session_keyring; | ||
296 | sub_info->wait = wait; | ||
297 | |||
298 | queue_work(khelper_wq, &sub_info->work); | ||
299 | if (wait < 0) /* task has freed sub_info */ | ||
300 | return 0; | ||
287 | wait_for_completion(&done); | 301 | wait_for_completion(&done); |
288 | return sub_info.retval; | 302 | retval = sub_info->retval; |
303 | kfree(sub_info); | ||
304 | return retval; | ||
289 | } | 305 | } |
290 | EXPORT_SYMBOL(call_usermodehelper_keys); | 306 | EXPORT_SYMBOL(call_usermodehelper_keys); |
291 | 307 | ||
diff --git a/kernel/sched.c b/kernel/sched.c index 08f86178aa34..0dc757246d89 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -1853,6 +1853,13 @@ context_switch(struct rq *rq, struct task_struct *prev, | |||
1853 | struct mm_struct *mm = next->mm; | 1853 | struct mm_struct *mm = next->mm; |
1854 | struct mm_struct *oldmm = prev->active_mm; | 1854 | struct mm_struct *oldmm = prev->active_mm; |
1855 | 1855 | ||
1856 | /* | ||
1857 | * For paravirt, this is coupled with an exit in switch_to to | ||
1858 | * combine the page table reload and the switch backend into | ||
1859 | * one hypercall. | ||
1860 | */ | ||
1861 | arch_enter_lazy_cpu_mode(); | ||
1862 | |||
1856 | if (!mm) { | 1863 | if (!mm) { |
1857 | next->active_mm = oldmm; | 1864 | next->active_mm = oldmm; |
1858 | atomic_inc(&oldmm->mm_count); | 1865 | atomic_inc(&oldmm->mm_count); |
diff --git a/kernel/timer.c b/kernel/timer.c index 8533c3796082..4902181e10e6 100644 --- a/kernel/timer.c +++ b/kernel/timer.c | |||
@@ -1162,11 +1162,9 @@ static inline void calc_load(unsigned long ticks) | |||
1162 | * This read-write spinlock protects us from races in SMP while | 1162 | * This read-write spinlock protects us from races in SMP while |
1163 | * playing with xtime and avenrun. | 1163 | * playing with xtime and avenrun. |
1164 | */ | 1164 | */ |
1165 | #ifndef ARCH_HAVE_XTIME_LOCK | 1165 | __attribute__((weak)) __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); |
1166 | __cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock); | ||
1167 | 1166 | ||
1168 | EXPORT_SYMBOL(xtime_lock); | 1167 | EXPORT_SYMBOL(xtime_lock); |
1169 | #endif | ||
1170 | 1168 | ||
1171 | /* | 1169 | /* |
1172 | * This function runs timers and the timer-tq in bottom half context. | 1170 | * This function runs timers and the timer-tq in bottom half context. |
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 2aa47623f5f8..569e68410d7a 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c | |||
@@ -641,12 +641,20 @@ static int secref_whitelist(const char *modname, const char *tosec, | |||
641 | if (f1 && f2) | 641 | if (f1 && f2) |
642 | return 1; | 642 | return 1; |
643 | 643 | ||
644 | /* Whitelist all references from .pci_fixup section if vmlinux */ | 644 | /* Whitelist all references from .pci_fixup section if vmlinux |
645 | * Whitelist all refereces from .text.head to .init.data if vmlinux | ||
646 | * Whitelist all refereces from .text.head to .init.text if vmlinux | ||
647 | */ | ||
645 | if (is_vmlinux(modname)) { | 648 | if (is_vmlinux(modname)) { |
646 | if ((strcmp(fromsec, ".pci_fixup") == 0) && | 649 | if ((strcmp(fromsec, ".pci_fixup") == 0) && |
647 | (strcmp(tosec, ".init.text") == 0)) | 650 | (strcmp(tosec, ".init.text") == 0)) |
648 | return 1; | 651 | return 1; |
649 | 652 | ||
653 | if ((strcmp(fromsec, ".text.head") == 0) && | ||
654 | ((strcmp(tosec, ".init.data") == 0) || | ||
655 | (strcmp(tosec, ".init.text") == 0))) | ||
656 | return 1; | ||
657 | |||
650 | /* Check for pattern 3 */ | 658 | /* Check for pattern 3 */ |
651 | for (s = pat3refsym; *s; s++) | 659 | for (s = pat3refsym; *s; s++) |
652 | if (strcmp(refsymname, *s) == 0) | 660 | if (strcmp(refsymname, *s) == 0) |