From 2fd4ef85e0db9ed75c98e13953257a967ea55e03 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Wed, 14 Sep 2005 06:13:02 +0100 Subject: [PATCH] error path in setup_arg_pages() misses vm_unacct_memory() Pavel Emelianov and Kirill Korotaev observe that fs and arch users of security_vm_enough_memory tend to forget to vm_unacct_memory when a failure occurs further down (typically in setup_arg_pages variants). These are all users of insert_vm_struct, and that reservation will only be unaccounted on exit if the vma is marked VM_ACCOUNT: which in some cases it is (hidden inside VM_STACK_FLAGS) and in some cases it isn't. So x86_64 32-bit and ppc64 vDSO ELFs have been leaking memory into Committed_AS each time they're run. But don't add VM_ACCOUNT to them, it's inappropriate to reserve against the very unlikely case that gdb be used to COW a vDSO page - we ought to do something about that in do_wp_page, but there are yet other inconsistencies to be resolved. The safe and economical way to fix this is to let insert_vm_struct do the security_vm_enough_memory check when it finds VM_ACCOUNT is set. And the MIPS irix_brk has been calling security_vm_enough_memory before calling do_brk which repeats it, doubly accounting and so also leaking. Remove that, and all the fs and arch calls to security_vm_enough_memory: give it a less misleading name later on. Signed-off-by: Hugh Dickins Signed-Off-By: Kirill Korotaev Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/vdso.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/ppc64') diff --git a/arch/ppc64/kernel/vdso.c b/arch/ppc64/kernel/vdso.c index 4777676365fe..efa985f05aca 100644 --- a/arch/ppc64/kernel/vdso.c +++ b/arch/ppc64/kernel/vdso.c @@ -224,10 +224,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); if (vma == NULL) return -ENOMEM; - if (security_vm_enough_memory(vdso_pages)) { - kmem_cache_free(vm_area_cachep, vma); - return -ENOMEM; - } + memset(vma, 0, sizeof(*vma)); /* @@ -237,8 +234,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) */ vdso_base = get_unmapped_area(NULL, vdso_base, vdso_pages << PAGE_SHIFT, 0, 0); - if (vdso_base & ~PAGE_MASK) + if (vdso_base & ~PAGE_MASK) { + kmem_cache_free(vm_area_cachep, vma); return (int)vdso_base; + } current->thread.vdso_base = vdso_base; @@ -266,7 +265,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack) vma->vm_ops = &vdso_vmops; down_write(&mm->mmap_sem); - insert_vm_struct(mm, vma); + if (insert_vm_struct(mm, vma)) { + up_write(&mm->mmap_sem); + kmem_cache_free(vm_area_cachep, vma); + return -ENOMEM; + } mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; up_write(&mm->mmap_sem); -- cgit v1.2.2 From 4db2ce0199f04b6e99999f22e28ef9a0ae5f0d2f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 14 Sep 2005 21:47:01 -0700 Subject: [LIB]: Consolidate _atomic_dec_and_lock() Several implementations were essentialy a common piece of C code using the cmpxchg() macro. Put the implementation in one spot that everyone can share, and convert sparc64 over to using this. Alpha is the lone arch-specific implementation, which codes up a special fast path for the common case in order to avoid GP reloading which a pure C version would require. Signed-off-by: David S. Miller --- arch/ppc64/Kconfig | 4 ---- arch/ppc64/lib/Makefile | 2 +- arch/ppc64/lib/dec_and_lock.c | 47 ------------------------------------------- 3 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 arch/ppc64/lib/dec_and_lock.c (limited to 'arch/ppc64') diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index deca68ad644a..c658650af429 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -28,10 +28,6 @@ config GENERIC_ISA_DMA bool default y -config HAVE_DEC_LOCK - bool - default y - config EARLY_PRINTK bool default y diff --git a/arch/ppc64/lib/Makefile b/arch/ppc64/lib/Makefile index 76fbfa9f706f..0b6e967de948 100644 --- a/arch/ppc64/lib/Makefile +++ b/arch/ppc64/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for ppc64-specific library files.. # -lib-y := checksum.o dec_and_lock.o string.o strcase.o +lib-y := checksum.o string.o strcase.o lib-y += copypage.o memcpy.o copyuser.o usercopy.o # Lock primitives are defined as no-ops in include/linux/spinlock.h diff --git a/arch/ppc64/lib/dec_and_lock.c b/arch/ppc64/lib/dec_and_lock.c deleted file mode 100644 index 7b9d4da5cf92..000000000000 --- a/arch/ppc64/lib/dec_and_lock.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * ppc64 version of atomic_dec_and_lock() using cmpxchg - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include - -/* - * This is an implementation of the notion of "decrement a - * reference count, and return locked if it decremented to zero". - * - * This implementation can be used on any architecture that - * has a cmpxchg, and where atomic->value is an int holding - * the value of the atomic (i.e. the high bits aren't used - * for a lock or anything like that). - */ -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) -{ - int counter; - int newcount; - - for (;;) { - counter = atomic_read(atomic); - newcount = counter - 1; - if (!newcount) - break; /* do it the slow way */ - - newcount = cmpxchg(&atomic->counter, counter, newcount); - if (newcount == counter) - return 0; - } - - spin_lock(lock); - if (atomic_dec_and_test(atomic)) - return 1; - spin_unlock(lock); - return 0; -} - -EXPORT_SYMBOL(_atomic_dec_and_lock); -- cgit v1.2.2 From be201f7f4cd26b1bb994062803e0d87877e749ce Mon Sep 17 00:00:00 2001 From: Jimi Xenidis Date: Thu, 15 Sep 2005 08:42:19 -0400 Subject: [PATCH] ppc64: Fix recent regression As noted by Olof Johansson : "A recent patch changed the way the LPAR bit is checked during early boot. This resulted in a polarity change in a conditional branch without changing the branch, causing at least some legacy machines to not boot." This fixes it. Signed-off-by: Jimi Xenidis Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/head.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/ppc64') diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 58c314738c99..72c61041151a 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1649,7 +1649,7 @@ _GLOBAL(__secondary_start) ld r3,0(r3) lwz r3,PLATFORM(r3) /* r3 = platform flags */ andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ - bne 98f + beq 98f /* branch if result is 0 */ mfspr r3,PVR srwi r3,r3,16 cmpwi r3,0x37 /* SStar */ @@ -1813,7 +1813,7 @@ _STATIC(start_here_multiplatform) ld r3,0(r3) lwz r3,PLATFORM(r3) /* r3 = platform flags */ andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ - bne 98f + beq 98f /* branch if result is 0 */ mfspr r3,PVR srwi r3,r3,16 cmpwi r3,0x37 /* SStar */ @@ -1834,7 +1834,7 @@ _STATIC(start_here_multiplatform) lwz r3,PLATFORM(r3) /* r3 = platform flags */ /* Test if bit 0 is set (LPAR bit) */ andi. r3,r3,PLATFORM_LPAR - bne 98f + bne 98f /* branch if result is !0 */ LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ sub r6,r6,r26 ld r6,0(r6) /* get the value of _SDR1 */ -- cgit v1.2.2 From 8a70ce8a4b1c9d39e0a180173728a8d102274631 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 16 Sep 2005 19:27:58 -0700 Subject: [PATCH] ppc64: build fix I forgot to include siginfo.h when I added data breakpoint support. We must include it in a round-a-bout way in mainline. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/mm/fault.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/ppc64') diff --git a/arch/ppc64/mm/fault.c b/arch/ppc64/mm/fault.c index 7fbc68bbb739..be3f25cf3e9f 100644 --- a/arch/ppc64/mm/fault.c +++ b/arch/ppc64/mm/fault.c @@ -38,6 +38,7 @@ #include #include #include +#include /* * Check whether the instruction at regs->nip is a store using -- cgit v1.2.2 From d79e743e9fcf03f521300a970eb1ab69641910ba Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Wed, 21 Sep 2005 14:14:22 +1000 Subject: [PATCH] ppc64: Fix PCI flags when using OF device tree My code to set up the PCI tree from the Open Firmware device tree was setting IORESOURCE_* flags on the resources for the devices, but not the PCI_BASE_ADDRESS_* flags. This meant that some drivers misbehaved, and /proc/pci showed the wrong types for the resources. This fixes it. Signed-off-by: Paul Mackerras Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/pci.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/ppc64') diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 861138ad092c..ff4be1da69d5 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -246,11 +246,14 @@ static unsigned int pci_parse_of_flags(u32 addr0) unsigned int flags = 0; if (addr0 & 0x02000000) { - flags |= IORESOURCE_MEM; + flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; + flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; if (addr0 & 0x40000000) - flags |= IORESOURCE_PREFETCH; + flags |= IORESOURCE_PREFETCH + | PCI_BASE_ADDRESS_MEM_PREFETCH; } else if (addr0 & 0x01000000) - flags |= IORESOURCE_IO; + flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; return flags; } -- cgit v1.2.2 From 3c2822ccb1f8cc96fc006aa82e68e1944290014a Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 21 Sep 2005 09:55:31 -0700 Subject: [PATCH] PPC64: Fix boot for some pre-POWER4 systems Some RS64 systems (such as F80) have non-python host bridges with EADS. However, they have two EADS with 4 buses each under them, so the old logic that assumed no more than 7 busses per PHB failed miserably. Big thanks to Olaf Hering for helping me test this, he's got one of the few machines that broke from the previous logic. Also, to be a bit smarter at detecting the need for a PHB-level IOMMU table by checking for the presence of an ISA bus. Only PHBs with ISA bridges should need the PHB-level table. Signed-off-by: Olof Johansson Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/pSeries_iommu.c | 169 ++++++++++++++++++++------------------ 1 file changed, 89 insertions(+), 80 deletions(-) (limited to 'arch/ppc64') diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index f0fd7fbd6531..8c6313e7e145 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -265,8 +265,10 @@ static void iommu_table_setparms(struct pci_controller *phb, tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; /* Test if we are going over 2GB of DMA space */ - if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31)) + if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { + udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + } phb->dma_window_base_cur += phb->dma_window_size; @@ -310,92 +312,84 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, static void iommu_bus_setup_pSeries(struct pci_bus *bus) { - struct device_node *dn, *pdn; - struct pci_dn *pci; + struct device_node *dn; struct iommu_table *tbl; + struct device_node *isa_dn, *isa_dn_orig; + struct device_node *tmp; + struct pci_dn *pci; + int children; DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); - /* For each (root) bus, we carve up the available DMA space in 256MB - * pieces. Since each piece is used by one (sub) bus/device, that would - * give a maximum of 7 devices per PHB. In most cases, this is plenty. - * - * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS - * bridges below the PHB to allocate the sectioned tables to, so instead - * we allocate a 1GB table at the PHB level. + dn = pci_bus_to_OF_node(bus); + pci = PCI_DN(dn); + + if (bus->self) { + /* This is not a root bus, any setup will be done for the + * device-side of the bridge in iommu_dev_setup_pSeries(). + */ + return; + } + + /* Check if the ISA bus on the system is under + * this PHB. */ + isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); - dn = pci_bus_to_OF_node(bus); - pci = dn->data; - - if (!bus->self) { - /* Root bus */ - if (is_python(dn)) { - unsigned int *iohole; - - DBG("Python root bus %s\n", bus->name); - - iohole = (unsigned int *)get_property(dn, "io-hole", 0); - - if (iohole) { - /* On first bus we need to leave room for the - * ISA address space. Just skip the first 256MB - * alltogether. This leaves 768MB for the window. - */ - DBG("PHB has io-hole, reserving 256MB\n"); - pci->phb->dma_window_size = 3 << 28; - pci->phb->dma_window_base_cur = 1 << 28; - } else { - /* 1GB window by default */ - pci->phb->dma_window_size = 1 << 30; - pci->phb->dma_window_base_cur = 0; - } - - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl); - } else { - /* Do a 128MB table at root. This is used for the IDE - * controller on some SMP-mode POWER4 machines. It - * doesn't hurt to allocate it on other machines - * -- it'll just be unused since new tables are - * allocated on the EADS level. - * - * Allocate at offset 128MB to avoid having to deal - * with ISA holes; 128MB table for IDE is plenty. - */ - pci->phb->dma_window_size = 1 << 27; - pci->phb->dma_window_base_cur = 1 << 27; - - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl); - - /* All child buses have 256MB tables */ - pci->phb->dma_window_size = 1 << 28; - } - } else { - pdn = pci_bus_to_OF_node(bus->parent); + while (isa_dn && isa_dn != dn) + isa_dn = isa_dn->parent; + + if (isa_dn_orig) + of_node_put(isa_dn_orig); - if (!bus->parent->self && !is_python(pdn)) { - struct iommu_table *tbl; - /* First child and not python means this is the EADS - * level. Allocate new table for this slot with 256MB - * window. - */ + /* Count number of direct PCI children of the PHB. + * All PCI device nodes have class-code property, so it's + * an easy way to find them. + */ + for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) + if (get_property(tmp, "class-code", NULL)) + children++; - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + DBG("Children: %d\n", children); - iommu_table_setparms(pci->phb, dn, tbl); + /* Calculate amount of DMA window per slot. Each window must be + * a power of two (due to pci_alloc_consistent requirements). + * + * Keep 256MB aside for PHBs with ISA. + */ - pci->iommu_table = iommu_init_table(tbl); - } else { - /* Lower than first child or under python, use parent table */ - pci->iommu_table = PCI_DN(pdn)->iommu_table; - } + if (!isa_dn) { + /* No ISA/IDE - just set window size and return */ + pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ + + while (pci->phb->dma_window_size * children > 0x80000000ul) + pci->phb->dma_window_size >>= 1; + DBG("No ISA/IDE, window size is %x\n", pci->phb->dma_window_size); + pci->phb->dma_window_base_cur = 0; + + return; } + + /* If we have ISA, then we probably have an IDE + * controller too. Allocate a 128MB table but + * skip the first 128MB to avoid stepping on ISA + * space. + */ + pci->phb->dma_window_size = 0x8000000ul; + pci->phb->dma_window_base_cur = 0x8000000ul; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(pci->phb, dn, tbl); + pci->iommu_table = iommu_init_table(tbl); + + /* Divide the rest (1.75GB) among the children */ + pci->phb->dma_window_size = 0x80000000ul; + while (pci->phb->dma_window_size * children > 0x70000000ul) + pci->phb->dma_window_size >>= 1; + + DBG("ISA/IDE, window size is %x\n", pci->phb->dma_window_size); + } @@ -446,14 +440,29 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) static void iommu_dev_setup_pSeries(struct pci_dev *dev) { struct device_node *dn, *mydn; + struct iommu_table *tbl; DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name); - /* Now copy the iommu_table ptr from the bus device down to the - * pci device_node. This means get_iommu_table() won't need to search - * up the device tree to find it. - */ + mydn = dn = pci_device_to_OF_node(dev); + /* If we're the direct child of a root bus, then we need to allocate + * an iommu table ourselves. The bus setup code should have setup + * the window sizes already. + */ + if (!dev->bus->self) { + DBG(" --> first child, no bridge. Allocating iommu table.\n"); + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl); + PCI_DN(mydn)->iommu_table = iommu_init_table(tbl); + + return; + } + + /* If this device is further down the bus tree, search upwards until + * an already allocated iommu table is found and use that. + */ + while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL) dn = dn->parent; -- cgit v1.2.2 From 1bc2a3bb86688ffca691ba2ad30a2d5dcba774cf Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 21 Sep 2005 09:55:33 -0700 Subject: [PATCH] ppc64: Fix issue with non zero boot cpu The new version of the flattened device tree passes the boot cpuid in the header instead of via a linux,boot-cpu property. We need to update the in kernel OF parsing code to do this, otherwise machines with a non zero boot cpuid fail to come up. Signed-off-by: Anton Blanchard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/kernel/prom_init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/ppc64') diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index 9979919cdf92..f252670874a4 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1711,6 +1711,7 @@ static void __init flatten_device_tree(void) unsigned long offset = reloc_offset(); unsigned long mem_start, mem_end, room; struct boot_param_header *hdr; + struct prom_t *_prom = PTRRELOC(&prom); char *namep; u64 *rsvmap; @@ -1765,6 +1766,7 @@ static void __init flatten_device_tree(void) RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); /* Finish header */ + hdr->boot_cpuid_phys = _prom->cpu; hdr->magic = OF_DT_HEADER; hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); @@ -1854,7 +1856,6 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); - prom_setprop(cpu_pkg, "linux,boot-cpu", NULL, 0); prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); _prom->cpu = getprop_rval; -- cgit v1.2.2 From 40da47e1159b89f2df5c718d2d5e269ced72b541 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 21 Sep 2005 09:55:34 -0700 Subject: [PATCH] ppc64: Build zImage.vmode for G5 zImage.vmode was recently added. It's a version of zImage in which the ELF note section used by open firmware indicates that it requires a virtual mode instance of OF instead of real mode. This allows it to work with Apple OF, and thus is directly bootable (or netbootable) from OF command line. (Unfortunately, pSeries OF sort-of requires real mode and Apple OF sort-of requires virtual mode, and both tend to be unhappy if no notes section specifies the mode at all). However, we forgot to add zImage.vmode to the default G5 build. This fixes it. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ppc64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/ppc64') diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile index 17d2c1eac3b8..521c2a5a2862 100644 --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -107,7 +107,7 @@ install: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ defaultimage-$(CONFIG_PPC_PSERIES) := zImage -defaultimage-$(CONFIG_PPC_PMAC) := vmlinux +defaultimage-$(CONFIG_PPC_PMAC) := zImage.vmode defaultimage-$(CONFIG_PPC_MAPLE) := zImage defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux KBUILD_IMAGE := $(defaultimage-y) -- cgit v1.2.2