aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2008-09-04 11:09:09 -0400
committerH. Peter Anvin <hpa@zytor.com>2008-09-04 11:09:09 -0400
commit0ccd8c39bc664bf5e9fcc26caad50cc17ff866d1 (patch)
tree29cec0edf3acf18c6978b750a8d6560f445df6df /arch/x86
parent1625324d22409e32e3f8eb86018cad72e1c09d61 (diff)
parentec0c15afb41fd9ad45b53468b60db50170e22346 (diff)
Merge branch 'linus' into x86/core
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/kernel/tsc.c235
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/pci/i386.c87
4 files changed, 198 insertions, 156 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0a80d6a5e9f1..21ef9dd36187 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -577,35 +577,29 @@ config SWIOTLB
577 577
578config IOMMU_HELPER 578config IOMMU_HELPER
579 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) 579 def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
580
580config MAXSMP 581config MAXSMP
581 bool "Configure Maximum number of SMP Processors and NUMA Nodes" 582 bool "Configure Maximum number of SMP Processors and NUMA Nodes"
582 depends on X86_64 && SMP 583 depends on X86_64 && SMP && BROKEN
583 default n 584 default n
584 help 585 help
585 Configure maximum number of CPUS and NUMA Nodes for this architecture. 586 Configure maximum number of CPUS and NUMA Nodes for this architecture.
586 If unsure, say N. 587 If unsure, say N.
587 588
588if MAXSMP
589config NR_CPUS
590 int
591 default "4096"
592endif
593
594if !MAXSMP
595config NR_CPUS 589config NR_CPUS
596 int "Maximum number of CPUs (2-4096)" 590 int "Maximum number of CPUs (2-512)" if !MAXSMP
597 range 2 4096 591 range 2 512
598 depends on SMP 592 depends on SMP
593 default "4096" if MAXSMP
599 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 594 default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000
600 default "8" 595 default "8"
601 help 596 help
602 This allows you to specify the maximum number of CPUs which this 597 This allows you to specify the maximum number of CPUs which this
603 kernel will support. The maximum supported value is 4096 and the 598 kernel will support. The maximum supported value is 512 and the
604 minimum value which makes sense is 2. 599 minimum value which makes sense is 2.
605 600
606 This is purely to save memory - each supported CPU adds 601 This is purely to save memory - each supported CPU adds
607 approximately eight kilobytes to the kernel image. 602 approximately eight kilobytes to the kernel image.
608endif
609 603
610config SCHED_SMT 604config SCHED_SMT
611 bool "SMT (Hyperthreading) scheduler support" 605 bool "SMT (Hyperthreading) scheduler support"
@@ -996,17 +990,10 @@ config NUMA_EMU
996 into virtual nodes when booted with "numa=fake=N", where N is the 990 into virtual nodes when booted with "numa=fake=N", where N is the
997 number of nodes. This is only useful for debugging. 991 number of nodes. This is only useful for debugging.
998 992
999if MAXSMP
1000
1001config NODES_SHIFT 993config NODES_SHIFT
1002 int 994 int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
1003 default "9"
1004endif
1005
1006if !MAXSMP
1007config NODES_SHIFT
1008 int "Maximum NUMA Nodes (as a power of 2)"
1009 range 1 9 if X86_64 995 range 1 9 if X86_64
996 default "9" if MAXSMP
1010 default "6" if X86_64 997 default "6" if X86_64
1011 default "4" if X86_NUMAQ 998 default "4" if X86_NUMAQ
1012 default "3" 999 default "3"
@@ -1014,7 +1001,6 @@ config NODES_SHIFT
1014 help 1001 help
1015 Specify the maximum number of NUMA Nodes available on the target 1002 Specify the maximum number of NUMA Nodes available on the target
1016 system. Increases memory reserved to accomodate various tables. 1003 system. Increases memory reserved to accomodate various tables.
1017endif
1018 1004
1019config HAVE_ARCH_BOOTMEM_NODE 1005config HAVE_ARCH_BOOTMEM_NODE
1020 def_bool y 1006 def_bool y
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 8e786b0d665a..346cae5ac423 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -122,80 +122,217 @@ static u64 tsc_read_refs(u64 *pm, u64 *hpet)
122 return ULLONG_MAX; 122 return ULLONG_MAX;
123} 123}
124 124
125/** 125/*
126 * native_calibrate_tsc - calibrate the tsc on boot 126 * Try to calibrate the TSC against the Programmable
127 * Interrupt Timer and return the frequency of the TSC
128 * in kHz.
129 *
130 * Return ULONG_MAX on failure to calibrate.
127 */ 131 */
128unsigned long native_calibrate_tsc(void) 132static unsigned long pit_calibrate_tsc(void)
129{ 133{
130 unsigned long flags; 134 u64 tsc, t1, t2, delta;
131 u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; 135 unsigned long tscmin, tscmax;
132 int hpet = is_hpet_enabled(); 136 int pitcnt;
133 unsigned int tsc_khz_val = 0;
134
135 local_irq_save(flags);
136
137 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
138 137
138 /* Set the Gate high, disable speaker */
139 outb((inb(0x61) & ~0x02) | 0x01, 0x61); 139 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
140 140
141 /*
142 * Setup CTC channel 2* for mode 0, (interrupt on terminal
143 * count mode), binary count. Set the latch register to 50ms
144 * (LSB then MSB) to begin countdown.
145 */
141 outb(0xb0, 0x43); 146 outb(0xb0, 0x43);
142 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); 147 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
143 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); 148 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
144 tr1 = get_cycles();
145 while ((inb(0x61) & 0x20) == 0);
146 tr2 = get_cycles();
147 149
148 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); 150 tsc = t1 = t2 = get_cycles();
149 151
150 local_irq_restore(flags); 152 pitcnt = 0;
153 tscmax = 0;
154 tscmin = ULONG_MAX;
155 while ((inb(0x61) & 0x20) == 0) {
156 t2 = get_cycles();
157 delta = t2 - tsc;
158 tsc = t2;
159 if ((unsigned long) delta < tscmin)
160 tscmin = (unsigned int) delta;
161 if ((unsigned long) delta > tscmax)
162 tscmax = (unsigned int) delta;
163 pitcnt++;
164 }
151 165
152 /* 166 /*
153 * Preset the result with the raw and inaccurate PIT 167 * Sanity checks:
154 * calibration value 168 *
169 * If we were not able to read the PIT more than 5000
170 * times, then we have been hit by a massive SMI
171 *
172 * If the maximum is 10 times larger than the minimum,
173 * then we got hit by an SMI as well.
155 */ 174 */
156 delta = (tr2 - tr1); 175 if (pitcnt < 5000 || tscmax > 10 * tscmin)
176 return ULONG_MAX;
177
178 /* Calculate the PIT value */
179 delta = t2 - t1;
157 do_div(delta, 50); 180 do_div(delta, 50);
158 tsc_khz_val = delta; 181 return delta;
182}
183
184
185/**
186 * native_calibrate_tsc - calibrate the tsc on boot
187 */
188unsigned long native_calibrate_tsc(void)
189{
190 u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2;
191 unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX;
192 unsigned long flags;
193 int hpet = is_hpet_enabled(), i;
159 194
160 /* hpet or pmtimer available ? */ 195 /*
196 * Run 5 calibration loops to get the lowest frequency value
197 * (the best estimate). We use two different calibration modes
198 * here:
199 *
200 * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and
201 * load a timeout of 50ms. We read the time right after we
202 * started the timer and wait until the PIT count down reaches
203 * zero. In each wait loop iteration we read the TSC and check
204 * the delta to the previous read. We keep track of the min
205 * and max values of that delta. The delta is mostly defined
206 * by the IO time of the PIT access, so we can detect when a
207 * SMI/SMM disturbance happend between the two reads. If the
208 * maximum time is significantly larger than the minimum time,
209 * then we discard the result and have another try.
210 *
211 * 2) Reference counter. If available we use the HPET or the
212 * PMTIMER as a reference to check the sanity of that value.
213 * We use separate TSC readouts and check inside of the
214 * reference read for a SMI/SMM disturbance. We dicard
215 * disturbed values here as well. We do that around the PIT
216 * calibration delay loop as we have to wait for a certain
217 * amount of time anyway.
218 */
219 for (i = 0; i < 5; i++) {
220 unsigned long tsc_pit_khz;
221
222 /*
223 * Read the start value and the reference count of
224 * hpet/pmtimer when available. Then do the PIT
225 * calibration, which will take at least 50ms, and
226 * read the end value.
227 */
228 local_irq_save(flags);
229 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
230 tsc_pit_khz = pit_calibrate_tsc();
231 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
232 local_irq_restore(flags);
233
234 /* Pick the lowest PIT TSC calibration so far */
235 tsc_pit_min = min(tsc_pit_min, tsc_pit_khz);
236
237 /* hpet or pmtimer available ? */
238 if (!hpet && !pm1 && !pm2)
239 continue;
240
241 /* Check, whether the sampling was disturbed by an SMI */
242 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)
243 continue;
244
245 tsc2 = (tsc2 - tsc1) * 1000000LL;
246
247 if (hpet) {
248 if (hpet2 < hpet1)
249 hpet2 += 0x100000000ULL;
250 hpet2 -= hpet1;
251 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
252 do_div(tsc1, 1000000);
253 } else {
254 if (pm2 < pm1)
255 pm2 += (u64)ACPI_PM_OVRRUN;
256 pm2 -= pm1;
257 tsc1 = pm2 * 1000000000LL;
258 do_div(tsc1, PMTMR_TICKS_PER_SEC);
259 }
260
261 do_div(tsc2, tsc1);
262 tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2);
263 }
264
265 /*
266 * Now check the results.
267 */
268 if (tsc_pit_min == ULONG_MAX) {
269 /* PIT gave no useful value */
270 printk(KERN_WARNING "TSC: PIT calibration failed due to "
271 "SMI disturbance.\n");
272
273 /* We don't have an alternative source, disable TSC */
274 if (!hpet && !pm1 && !pm2) {
275 printk("TSC: No reference (HPET/PMTIMER) available\n");
276 return 0;
277 }
278
279 /* The alternative source failed as well, disable TSC */
280 if (tsc_ref_min == ULONG_MAX) {
281 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration "
282 "failed due to SMI disturbance.\n");
283 return 0;
284 }
285
286 /* Use the alternative source */
287 printk(KERN_INFO "TSC: using %s reference calibration\n",
288 hpet ? "HPET" : "PMTIMER");
289
290 return tsc_ref_min;
291 }
292
293 /* We don't have an alternative source, use the PIT calibration value */
161 if (!hpet && !pm1 && !pm2) { 294 if (!hpet && !pm1 && !pm2) {
162 printk(KERN_INFO "TSC calibrated against PIT\n"); 295 printk(KERN_INFO "TSC: Using PIT calibration value\n");
163 goto out; 296 return tsc_pit_min;
164 } 297 }
165 298
166 /* Check, whether the sampling was disturbed by an SMI */ 299 /* The alternative source failed, use the PIT calibration value */
167 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { 300 if (tsc_ref_min == ULONG_MAX) {
168 printk(KERN_WARNING "TSC calibration disturbed by SMI, " 301 printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due "
169 "using PIT calibration result\n"); 302 "to SMI disturbance. Using PIT calibration\n");
170 goto out; 303 return tsc_pit_min;
171 } 304 }
172 305
173 tsc2 = (tsc2 - tsc1) * 1000000LL; 306 /* Check the reference deviation */
174 307 delta = ((u64) tsc_pit_min) * 100;
175 if (hpet) { 308 do_div(delta, tsc_ref_min);
176 printk(KERN_INFO "TSC calibrated against HPET\n"); 309
177 if (hpet2 < hpet1) 310 /*
178 hpet2 += 0x100000000ULL; 311 * If both calibration results are inside a 5% window, the we
179 hpet2 -= hpet1; 312 * use the lower frequency of those as it is probably the
180 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); 313 * closest estimate.
181 do_div(tsc1, 1000000); 314 */
182 } else { 315 if (delta >= 95 && delta <= 105) {
183 printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); 316 printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n",
184 if (pm2 < pm1) 317 hpet ? "HPET" : "PMTIMER");
185 pm2 += (u64)ACPI_PM_OVRRUN; 318 printk(KERN_INFO "TSC: using %s calibration value\n",
186 pm2 -= pm1; 319 tsc_pit_min <= tsc_ref_min ? "PIT" :
187 tsc1 = pm2 * 1000000000LL; 320 hpet ? "HPET" : "PMTIMER");
188 do_div(tsc1, PMTMR_TICKS_PER_SEC); 321 return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min;
189 } 322 }
190 323
191 do_div(tsc2, tsc1); 324 printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n",
192 tsc_khz_val = tsc2; 325 hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min);
193 326
194out: 327 /*
195 return tsc_khz_val; 328 * The calibration values differ too much. In doubt, we use
329 * the PIT value as we know that there are PMTIMERs around
330 * running at double speed.
331 */
332 printk(KERN_INFO "TSC: Using PIT calibration value\n");
333 return tsc_pit_min;
196} 334}
197 335
198
199#ifdef CONFIG_X86_32 336#ifdef CONFIG_X86_32
200/* Only called from the Powernow K7 cpu freq driver */ 337/* Only called from the Powernow K7 cpu freq driver */
201int recalibrate_cpu_khz(void) 338int recalibrate_cpu_khz(void)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index f72ac1fa35f0..4a814bff21f2 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -345,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
345 shadow_addr = __pa(shadow_page->spt); 345 shadow_addr = __pa(shadow_page->spt);
346 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK 346 shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK
347 | PT_WRITABLE_MASK | PT_USER_MASK; 347 | PT_WRITABLE_MASK | PT_USER_MASK;
348 *shadow_ent = shadow_pte; 348 set_shadow_pte(shadow_ent, shadow_pte);
349 } 349 }
350 350
351 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, 351 mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access,
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index d765da913842..8791fc55e715 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -31,11 +31,8 @@
31#include <linux/ioport.h> 31#include <linux/ioport.h>
32#include <linux/errno.h> 32#include <linux/errno.h>
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34#include <linux/acpi.h>
35 34
36#include <asm/pat.h> 35#include <asm/pat.h>
37#include <asm/hpet.h>
38#include <asm/io_apic.h>
39 36
40#include "pci.h" 37#include "pci.h"
41 38
@@ -80,77 +77,6 @@ pcibios_align_resource(void *data, struct resource *res,
80} 77}
81EXPORT_SYMBOL(pcibios_align_resource); 78EXPORT_SYMBOL(pcibios_align_resource);
82 79
83static int check_res_with_valid(struct pci_dev *dev, struct resource *res)
84{
85 unsigned long base;
86 unsigned long size;
87 int i;
88
89 base = res->start;
90 size = (res->start == 0 && res->end == res->start) ? 0 :
91 (res->end - res->start + 1);
92
93 if (!base || !size)
94 return 0;
95
96#ifdef CONFIG_HPET_TIMER
97 /* for hpet */
98 if (base == hpet_address && (res->flags & IORESOURCE_MEM)) {
99 dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n",
100 base, base + size - 1);
101 return 1;
102 }
103#endif
104
105#ifdef CONFIG_X86_IO_APIC
106 for (i = 0; i < nr_ioapics; i++) {
107 unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr;
108
109 if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) {
110 dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n",
111 base, base + size - 1);
112 return 1;
113 }
114 }
115#endif
116
117#ifdef CONFIG_PCI_MMCONFIG
118 for (i = 0; i < pci_mmcfg_config_num; i++) {
119 unsigned long addr;
120
121 addr = pci_mmcfg_config[i].address;
122 if (base == addr && (res->flags & IORESOURCE_MEM)) {
123 dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n",
124 base, base + size - 1);
125 return 1;
126 }
127 }
128#endif
129
130 return 0;
131}
132
133static int check_platform(struct pci_dev *dev, struct resource *res)
134{
135 struct resource *root = NULL;
136
137 /*
138 * forcibly insert it into the
139 * resource tree
140 */
141 if (res->flags & IORESOURCE_MEM)
142 root = &iomem_resource;
143 else if (res->flags & IORESOURCE_IO)
144 root = &ioport_resource;
145
146 if (root && check_res_with_valid(dev, res)) {
147 insert_resource(root, res);
148
149 return 1;
150 }
151
152 return 0;
153}
154/* 80/*
155 * Handle resources of PCI devices. If the world were perfect, we could 81 * Handle resources of PCI devices. If the world were perfect, we could
156 * just allocate all the resource regions and do nothing more. It isn't. 82 * just allocate all the resource regions and do nothing more. It isn't.
@@ -202,10 +128,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list)
202 pr = pci_find_parent_resource(dev, r); 128 pr = pci_find_parent_resource(dev, r);
203 if (!r->start || !pr || 129 if (!r->start || !pr ||
204 request_resource(pr, r) < 0) { 130 request_resource(pr, r) < 0) {
205 if (check_platform(dev, r)) 131 dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
206 continue;
207 dev_err(&dev->dev, "BAR %d: can't "
208 "allocate resource\n", idx);
209 /* 132 /*
210 * Something is wrong with the region. 133 * Something is wrong with the region.
211 * Invalidate the resource to prevent 134 * Invalidate the resource to prevent
@@ -240,17 +163,13 @@ static void __init pcibios_allocate_resources(int pass)
240 else 163 else
241 disabled = !(command & PCI_COMMAND_MEMORY); 164 disabled = !(command & PCI_COMMAND_MEMORY);
242 if (pass == disabled) { 165 if (pass == disabled) {
243 dev_dbg(&dev->dev, "resource %#08llx-%#08llx " 166 dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n",
244 "(f=%lx, d=%d, p=%d)\n",
245 (unsigned long long) r->start, 167 (unsigned long long) r->start,
246 (unsigned long long) r->end, 168 (unsigned long long) r->end,
247 r->flags, disabled, pass); 169 r->flags, disabled, pass);
248 pr = pci_find_parent_resource(dev, r); 170 pr = pci_find_parent_resource(dev, r);
249 if (!pr || request_resource(pr, r) < 0) { 171 if (!pr || request_resource(pr, r) < 0) {
250 if (check_platform(dev, r)) 172 dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx);
251 continue;
252 dev_err(&dev->dev, "BAR %d: can't "
253 "allocate resource\n", idx);
254 /* We'll assign a new address later */ 173 /* We'll assign a new address later */
255 r->end -= r->start; 174 r->end -= r->start;
256 r->start = 0; 175 r->start = 0;