From 9a0b5817ad97bb718ab85322759d19a238712b47 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 23 Mar 2006 02:59:32 -0800 Subject: [PATCH] x86: SMP alternatives Implement SMP alternatives, i.e. switching at runtime between different code versions for UP and SMP. The code can patch both SMP->UP and UP->SMP. The UP->SMP case is useful for CPU hotplug. With CONFIG_CPU_HOTPLUG enabled the code switches to UP at boot time and when the number of CPUs goes down to 1, and switches to SMP when the number of CPUs goes up to 2. Without CONFIG_CPU_HOTPLUG or on non-SMP-capable systems the code is patched once at boot time (if needed) and the tables are released afterwards. The changes in detail: * The current alternatives bits are moved to a separate file, the SMP alternatives code is added there. * The patch adds some new elf sections to the kernel: .smp_altinstructions like .altinstructions, also contains a list of alt_instr structs. .smp_altinstr_replacement like .altinstr_replacement, but also has some space to save original instruction before replaving it. .smp_locks list of pointers to lock prefixes which can be nop'ed out on UP. The first two are used to replace more complex instruction sequences such as spinlocks and semaphores. It would be possible to deal with the lock prefixes with that as well, but by handling them as special case the table sizes become much smaller. * The sections are page-aligned and padded up to page size, so they can be free if they are not needed. * Splitted the code to release init pages to a separate function and use it to release the elf sections if they are unused. Signed-off-by: Gerd Hoffmann Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 95 ------------------------------------------------ 1 file changed, 95 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ab62a9f4701e..5f58f8cb9836 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1377,101 +1377,6 @@ static void __init register_memory(void) pci_mem_start, gapstart, gapsize); } -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - unsigned char **noptable = intel_nops; - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - BUG_ON(a->replacementlen > a->instrlen); - memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } - } -} - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA -- cgit v1.2.2 From 99b7de33477882b86d54ce8ecbf90147f9d106d7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 23 Mar 2006 02:59:41 -0800 Subject: [PATCH] x86: early printk handling fixes The history is that -mm kernels do not work for me for a few months already. The things started from crashing somewhere after starting init, and for the last month - no boot at all, just "Uncompressing... OK, booting kernel", and silence. Early console didn't work too. With the latest releases this degraded into an infinite stream of the "Unknown interrupt or fault" messages. So today my patience ran out and I started to think how can I collect at least some info for the bug-report. Attached is the patch that allows to gather some valueable debug info on the problem by making an early console more useable. I can't properly test the patch, as the kernel still doesn't boot, so I'll explain it in details in a hope someone else can justify the intrusive changes. arch_hooks.h: added prototypes for setup_early_printk() and early_printk(). setup.c: killed wrong setup_early_printk() prototype. Moved setup_early_printk() a bit earlier, as it was not "early enough" to cover the bug I was fighting with. early_printk.c: made it to start printing from the bottom of the screen, otherwise the messages interfere with the ones of the boot-loader, so you can't read them. Signed-off-by: Stas Sergeev Cc: Andi Kleen Cc: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 5f58f8cb9836..2d8782960f41 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1459,6 +1459,16 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(cmdline_p); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + setup_early_printk(strchr(s, '=') + 1); + printk("early console enabled\n"); + } + } +#endif + max_low_pfn = setup_memory(); /* @@ -1483,19 +1493,6 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - extern void setup_early_printk(char *); - - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } - } -#endif - - dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH -- cgit v1.2.2 From b408cbc704352eccee301e1103b23203ba1c3a0e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 22 Feb 2006 15:50:30 -0800 Subject: [PATCH] PCI: resource address mismatch On Tue, 21 Feb 2006, Ivan Kokshaysky wrote: > There are two bogus entries in the BIOS memory map table which are > conflicting with a prefetchable memory range of the AGP bridge: > > BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved) > BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) > > 0000:00:02.0 PCI bridge: Silicon Integrated Systems [SiS] Virtual PCI-to-PCI bridge (AGP) (prog-if 00 [Normal decode]) > Flags: bus master, fast devsel, latency 0 > Bus: primary=00, secondary=01, subordinate=01, sec-latency=0 > I/O behind bridge: 0000c000-0000cfff > Memory behind bridge: e7e00000-e7efffff > Prefetchable memory behind bridge: fec00000-ffcfffff > ^^^^^^^^^^^^^^^^^ Yes. However, it's pretty clear that the e820 entries are there for a reason. Probably they are a hack by the BIOS maintainers to keep Windows from stomping/moving that region, exactly because they want to keep the bridge where it is (or, it's actually for the BIOS itself - the BIOS tables are a horrid mess, and BIOS engineers are pretty hacky people: they'll add random entries to make their own broken algorithms do the "right thing"). > Starting from 2.6.13, kernel tries to resolve that sort of conflicts, > so that prefetch window of the bridge and the framebuffer memory behind > it get moved to 0x10000000. I think we could (and probably should) solve this another way: consider the ACPI "reserved regions" from the e820 map exactly the same way that we do other ACPI hints - they should restrict _new_ allocations, but not impact stuff we figure out on our own. Basically, right now we assign _unassigned_ resources at "fs_initcall" time. If we were to add in the e820 "reserved region" stuff before that (but after we've done PCI discovery), we'd probably do the right thing. Right now we do the e820 reserved regions very early indeed: we call "register_memory()" from setup_arch(). We could move at least part of it (the part that registers the resources) down a bit. Here's a test-patch. I'm not saying we should absolutely do this, but it might be interesting to try... Cc: "Antonino A. Daplas" Cc: Ivan Kokshaysky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/setup.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2d8782960f41..d313a11acafa 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1288,7 +1288,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat struct resource *res; if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -1316,13 +1316,15 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources + * + * This is called just before pcibios_assign_resources(), which is also + * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). */ -static void __init register_memory(void) +static int __init request_standard_resources(void) { - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; + int i; + printk("Setting up standard PCI resources\n"); if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else @@ -1334,6 +1336,16 @@ static void __init register_memory(void) /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +fs_initcall(request_standard_resources); + +static void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; /* * Search for the bigest gap in the low 32 bits of the e820 -- cgit v1.2.2 From 10dbe196a8da6b3196881269c6639c0ec11c36cb Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sun, 26 Mar 2006 01:37:02 -0800 Subject: [PATCH] i386: export: memory more than 4G through /proc/iomem Currently /proc/iomem exports physical memory also apart from io device memory. But on i386, it truncates any memory more than 4GB. This leads to problems for kexec/kdump. Kexec reads /proc/iomem to determine the system memory layout and prepares a memory map based on that and passes it to the kernel being kexeced. Given the fact that memory more than 4GB has been truncated, new kernel never gets to see and use that memory. Kdump also reads /proc/iomem to determine the physical memory layout of the system and encodes this informaiton in ELF headers. After a crash new kernel parses these ELF headers being used by previous kernel and vmcore is prepared accordingly. As memory more than 4GB has been truncated, kdump never sees that memory and never prepares ELF headers for it. Hence vmcore is truncated and limited to 4GB even if there is more physical memory in the system. This patch exports memory more than 4GB through /proc/iomem on i386. Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index d313a11acafa..cec3c925ef99 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1286,8 +1286,6 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat probe_roms(); for (i = 0; i < e820.nr_map; i++) { struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; -- cgit v1.2.2 From 23dd842c0033dbb05248c42929c3c526c55386de Mon Sep 17 00:00:00 2001 From: "Tolentino, Matthew E" Date: Sun, 26 Mar 2006 01:37:09 -0800 Subject: [PATCH] EFI fixes Here's a patch that fixes EFI boot for x86 on 2.6.16-rc5-mm3. The off-by-one is admittedly my fault, but the other two fix up the rest. Cc: Bjorn Helgaas Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: "Brown, Len" Cc: Andi Kleen Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index cec3c925ef99..6917daa159ab 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1058,10 +1058,10 @@ static int __init free_available_memory(unsigned long start, unsigned long end, void *arg) { /* check max_low_pfn */ - if (start >= ((max_low_pfn + 1) << PAGE_SHIFT)) + if (start >= (max_low_pfn << PAGE_SHIFT)) return 0; - if (end >= ((max_low_pfn + 1) << PAGE_SHIFT)) - end = (max_low_pfn + 1) << PAGE_SHIFT; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; if (start < end) free_bootmem(start, end - start); -- cgit v1.2.2 From 22a9835c350782a5c3257343713932af3ac92ee0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 27 Mar 2006 01:16:04 -0800 Subject: [PATCH] unify PFN_* macros Just about every architecture defines some macros to do operations on pfns. They're all virtually identical. This patch consolidates all of them. One minor glitch is that at least i386 uses them in a very skeletal header file. To keep away from #include dependency hell, I stuck the new definitions in a new, isolated header. Of all of the implementations, sh64 is the only one that varied by a bit. It used some masks to ensure that any sign-extension got ripped away before the arithmetic is done. This has been posted to that sh64 maintainers and the development list. Compiles on x86, x86_64, ia64 and ppc64. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386/kernel/setup.c') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6917daa159ab..8c08660b4e5d 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -46,6 +46,7 @@ #include #include #include +#include #include