From 1c0fe6e3bda0464728c23c8d84aa47567e8b716c Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 6 Jan 2009 14:38:59 -0800 Subject: mm: invoke oom-killer from page fault Rather than have the pagefault handler kill a process directly if it gets a VM_FAULT_OOM, have it call into the OOM killer. With increasingly sophisticated oom behaviour (cpusets, memory cgroups, oom killing throttling, oom priority adjustment or selective disabling, panic on oom, etc), it's silly to unconditionally kill the faulting process at page fault time. Create a hook for pagefault oom path to call into instead. Only converted x86 and uml so far. [akpm@linux-foundation.org: make __out_of_memory() static] [akpm@linux-foundation.org: fix comment] Signed-off-by: Nick Piggin Cc: Jeff Dike Acked-by: Ingo Molnar Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/fault.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 57ec8c86a877..9e268b6b204e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -667,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(in_atomic() || !mm)) goto bad_area_nosemaphore; -again: /* * When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the @@ -859,25 +858,14 @@ no_context: oops_end(flags, regs, sig); #endif -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ out_of_memory: + /* + * We ran out of memory, call the OOM killer, and return the userspace + * (which will retry the fault, or kill us if we got oom-killed). + */ up_read(&mm->mmap_sem); - if (is_global_init(tsk)) { - yield(); - /* - * Re-lookup the vma - in theory the vma tree might - * have changed: - */ - goto again; - } - - printk("VM: killing process %s\n", tsk->comm); - if (error_code & PF_USER) - do_group_exit(SIGKILL); - goto no_context; + pagefault_out_of_memory(); + return; do_sigbus: up_read(&mm->mmap_sem); -- cgit v1.2.2 From c04fc586c1a480ba198f03ae7b6cbd7b57380b91 Mon Sep 17 00:00:00 2001 From: Gary Hade Date: Tue, 6 Jan 2009 14:39:14 -0800 Subject: mm: show node to memory section relationship with symlinks in sysfs Show node to memory section relationship with symlinks in sysfs Add /sys/devices/system/node/nodeX/memoryY symlinks for all the memory sections located on nodeX. For example: /sys/devices/system/node/node1/memory135 -> ../../memory/memory135 indicates that memory section 135 resides on node1. Also revises documentation to cover this change as well as updating Documentation/ABI/testing/sysfs-devices-memory to include descriptions of memory hotremove files 'phys_device', 'phys_index', and 'state' that were previously not described there. In addition to it always being a good policy to provide users with the maximum possible amount of physical location information for resources that can be hot-added and/or hot-removed, the following are some (but likely not all) of the user benefits provided by this change. Immediate: - Provides information needed to determine the specific node on which a defective DIMM is located. This will reduce system downtime when the node or defective DIMM is swapped out. - Prevents unintended onlining of a memory section that was previously offlined due to a defective DIMM. This could happen during node hot-add when the user or node hot-add assist script onlines _all_ offlined sections due to user or script inability to identify the specific memory sections located on the hot-added node. The consequences of reintroducing the defective memory could be ugly. - Provides information needed to vary the amount and distribution of memory on specific nodes for testing or debugging purposes. Future: - Will provide information needed to identify the memory sections that need to be offlined prior to physical removal of a specific node. Symlink creation during boot was tested on 2-node x86_64, 2-node ppc64, and 2-node ia64 systems. Symlink creation during physical memory hot-add tested on a 2-node x86_64 system. Signed-off-by: Gary Hade Signed-off-by: Badari Pulavarty Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_32.c | 2 +- arch/x86/mm/init_64.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index f99a6c6c432e..544d724caeee 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1079,7 +1079,7 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; - return __add_pages(zone, start_pfn, nr_pages); + return __add_pages(nid, zone, start_pfn, nr_pages); } #endif diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9f7a0d24d42a..54c437e96541 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size) if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn; - ret = __add_pages(zone, start_pfn, nr_pages); + ret = __add_pages(nid, zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); return ret; -- cgit v1.2.2 From e8de1481fd7126ee9e93d6889da6f00c05e1e019 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Wed, 22 Oct 2008 19:55:31 -0700 Subject: resource: allow MMIO exclusivity for device drivers Device drivers that use pci_request_regions() (and similar APIs) have a reasonable expectation that they are the only ones accessing their device. As part of the e1000e hunt, we were afraid that some userland (X or some bootsplash stuff) was mapping the MMIO region that the driver thought it had exclusively via /dev/mem or via various sysfs resource mappings. This patch adds the option for device drivers to cause their reserved regions to the "banned from /dev/mem use" list, so now both kernel memory and device-exclusive MMIO regions are banned. NOTE: This is only active when CONFIG_STRICT_DEVMEM is set. In addition to the config option, a kernel parameter iomem=relaxed is provided for the cases where developers want to diagnose, in the field, drivers issues from userspace. Reviewed-by: Matthew Wilcox Signed-off-by: Arjan van de Ven Signed-off-by: Jesse Barnes --- arch/x86/mm/init_32.c | 2 ++ arch/x86/mm/init_64.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 544d724caeee..88f1b10de3be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -328,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 54c437e96541..23f68e77ad1f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr) { if (pagenr <= 256) return 1; + if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) + return 0; if (!page_is_ram(pagenr)) return 1; return 0; -- cgit v1.2.2 From 9b4778f680aa79d838ae2be6ab958938f744ce5f Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 7 Jan 2009 14:42:41 -0800 Subject: trivial: replace last usages of __FUNCTION__ in kernel __FUNCTION__ is gcc-specific, use __func__ Signed-off-by: Harvey Harrison Acked-by: Mauro Carvalho Chehab Signed-off-by: Linus Torvalds --- arch/x86/mm/numa_32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 8518c678d83f..d1f7439d173c 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -239,7 +239,7 @@ void resume_map_numa_kva(pgd_t *pgd_base) start_pfn = node_remap_start_pfn[node]; size = node_remap_size[node]; - printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node); + printk(KERN_DEBUG "%s: node %d\n", __func__, node); for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); @@ -251,7 +251,7 @@ void resume_map_numa_kva(pgd_t *pgd_base) PAGE_KERNEL_LARGE_EXEC)); printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", - __FUNCTION__, vaddr, start_pfn + pfn); + __func__, vaddr, start_pfn + pfn); } } } -- cgit v1.2.2