diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cpu.c | 2 | ||||
| -rw-r--r-- | kernel/cpuset.c | 130 | ||||
| -rw-r--r-- | kernel/events/uprobes.c | 213 | ||||
| -rw-r--r-- | kernel/exit.c | 2 | ||||
| -rw-r--r-- | kernel/fork.c | 46 | ||||
| -rw-r--r-- | kernel/irq/handle.c | 7 | ||||
| -rw-r--r-- | kernel/irq/manage.c | 17 | ||||
| -rw-r--r-- | kernel/kexec.c | 2 | ||||
| -rw-r--r-- | kernel/kmod.c | 37 | ||||
| -rw-r--r-- | kernel/panic.c | 8 | ||||
| -rw-r--r-- | kernel/power/suspend.c | 3 | ||||
| -rw-r--r-- | kernel/printk.c | 191 | ||||
| -rw-r--r-- | kernel/resource.c | 24 | ||||
| -rw-r--r-- | kernel/sched/core.c | 94 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 113 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 23 | ||||
| -rw-r--r-- | kernel/softirq.c | 9 | ||||
| -rw-r--r-- | kernel/sys.c | 57 | ||||
| -rw-r--r-- | kernel/sysctl.c | 51 | ||||
| -rw-r--r-- | kernel/sysctl_binary.c | 2 | ||||
| -rw-r--r-- | kernel/taskstats.c | 5 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 7 | ||||
| -rw-r--r-- | kernel/trace/trace_functions.c | 36 | ||||
| -rw-r--r-- | kernel/watchdog.c | 21 |
24 files changed, 742 insertions, 358 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index a4eb5227a19e..14d32588cccd 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
| @@ -416,7 +416,7 @@ int __cpuinit cpu_up(unsigned int cpu) | |||
| 416 | 416 | ||
| 417 | if (pgdat->node_zonelists->_zonerefs->zone == NULL) { | 417 | if (pgdat->node_zonelists->_zonerefs->zone == NULL) { |
| 418 | mutex_lock(&zonelists_mutex); | 418 | mutex_lock(&zonelists_mutex); |
| 419 | build_all_zonelists(NULL); | 419 | build_all_zonelists(NULL, NULL); |
| 420 | mutex_unlock(&zonelists_mutex); | 420 | mutex_unlock(&zonelists_mutex); |
| 421 | } | 421 | } |
| 422 | #endif | 422 | #endif |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 8c8bd652dd12..f33c7153b6d7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
| @@ -147,6 +147,12 @@ typedef enum { | |||
| 147 | CS_SPREAD_SLAB, | 147 | CS_SPREAD_SLAB, |
| 148 | } cpuset_flagbits_t; | 148 | } cpuset_flagbits_t; |
| 149 | 149 | ||
| 150 | /* the type of hotplug event */ | ||
| 151 | enum hotplug_event { | ||
| 152 | CPUSET_CPU_OFFLINE, | ||
| 153 | CPUSET_MEM_OFFLINE, | ||
| 154 | }; | ||
| 155 | |||
| 150 | /* convenient tests for these bits */ | 156 | /* convenient tests for these bits */ |
| 151 | static inline int is_cpu_exclusive(const struct cpuset *cs) | 157 | static inline int is_cpu_exclusive(const struct cpuset *cs) |
| 152 | { | 158 | { |
| @@ -1990,8 +1996,36 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
| 1990 | } | 1996 | } |
| 1991 | 1997 | ||
| 1992 | /* | 1998 | /* |
| 1993 | * Walk the specified cpuset subtree and look for empty cpusets. | 1999 | * Helper function to traverse cpusets. |
| 1994 | * The tasks of such cpuset must be moved to a parent cpuset. | 2000 | * It can be used to walk the cpuset tree from top to bottom, completing |
| 2001 | * one layer before dropping down to the next (thus always processing a | ||
| 2002 | * node before any of its children). | ||
| 2003 | */ | ||
| 2004 | static struct cpuset *cpuset_next(struct list_head *queue) | ||
| 2005 | { | ||
| 2006 | struct cpuset *cp; | ||
| 2007 | struct cpuset *child; /* scans child cpusets of cp */ | ||
| 2008 | struct cgroup *cont; | ||
| 2009 | |||
| 2010 | if (list_empty(queue)) | ||
| 2011 | return NULL; | ||
| 2012 | |||
| 2013 | cp = list_first_entry(queue, struct cpuset, stack_list); | ||
| 2014 | list_del(queue->next); | ||
| 2015 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | ||
| 2016 | child = cgroup_cs(cont); | ||
| 2017 | list_add_tail(&child->stack_list, queue); | ||
| 2018 | } | ||
| 2019 | |||
| 2020 | return cp; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | |||
| 2024 | /* | ||
| 2025 | * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory | ||
| 2026 | * online/offline) and update the cpusets accordingly. | ||
| 2027 | * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such | ||
| 2028 | * cpuset must be moved to a parent cpuset. | ||
| 1995 | * | 2029 | * |
| 1996 | * Called with cgroup_mutex held. We take callback_mutex to modify | 2030 | * Called with cgroup_mutex held. We take callback_mutex to modify |
| 1997 | * cpus_allowed and mems_allowed. | 2031 | * cpus_allowed and mems_allowed. |
| @@ -2000,50 +2034,61 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs) | |||
| 2000 | * before dropping down to the next. It always processes a node before | 2034 | * before dropping down to the next. It always processes a node before |
| 2001 | * any of its children. | 2035 | * any of its children. |
| 2002 | * | 2036 | * |
| 2003 | * For now, since we lack memory hot unplug, we'll never see a cpuset | 2037 | * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY |
| 2004 | * that has tasks along with an empty 'mems'. But if we did see such | 2038 | * if all present pages from a node are offlined. |
| 2005 | * a cpuset, we'd handle it just like we do if its 'cpus' was empty. | ||
| 2006 | */ | 2039 | */ |
| 2007 | static void scan_for_empty_cpusets(struct cpuset *root) | 2040 | static void |
| 2041 | scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event) | ||
| 2008 | { | 2042 | { |
| 2009 | LIST_HEAD(queue); | 2043 | LIST_HEAD(queue); |
| 2010 | struct cpuset *cp; /* scans cpusets being updated */ | 2044 | struct cpuset *cp; /* scans cpusets being updated */ |
| 2011 | struct cpuset *child; /* scans child cpusets of cp */ | ||
| 2012 | struct cgroup *cont; | ||
| 2013 | static nodemask_t oldmems; /* protected by cgroup_mutex */ | 2045 | static nodemask_t oldmems; /* protected by cgroup_mutex */ |
| 2014 | 2046 | ||
| 2015 | list_add_tail((struct list_head *)&root->stack_list, &queue); | 2047 | list_add_tail((struct list_head *)&root->stack_list, &queue); |
| 2016 | 2048 | ||
| 2017 | while (!list_empty(&queue)) { | 2049 | switch (event) { |
| 2018 | cp = list_first_entry(&queue, struct cpuset, stack_list); | 2050 | case CPUSET_CPU_OFFLINE: |
| 2019 | list_del(queue.next); | 2051 | while ((cp = cpuset_next(&queue)) != NULL) { |
| 2020 | list_for_each_entry(cont, &cp->css.cgroup->children, sibling) { | 2052 | |
| 2021 | child = cgroup_cs(cont); | 2053 | /* Continue past cpusets with all cpus online */ |
| 2022 | list_add_tail(&child->stack_list, &queue); | 2054 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask)) |
| 2055 | continue; | ||
| 2056 | |||
| 2057 | /* Remove offline cpus from this cpuset. */ | ||
| 2058 | mutex_lock(&callback_mutex); | ||
| 2059 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | ||
| 2060 | cpu_active_mask); | ||
| 2061 | mutex_unlock(&callback_mutex); | ||
| 2062 | |||
| 2063 | /* Move tasks from the empty cpuset to a parent */ | ||
| 2064 | if (cpumask_empty(cp->cpus_allowed)) | ||
| 2065 | remove_tasks_in_empty_cpuset(cp); | ||
| 2066 | else | ||
| 2067 | update_tasks_cpumask(cp, NULL); | ||
| 2023 | } | 2068 | } |
| 2069 | break; | ||
| 2024 | 2070 | ||
| 2025 | /* Continue past cpusets with all cpus, mems online */ | 2071 | case CPUSET_MEM_OFFLINE: |
| 2026 | if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) && | 2072 | while ((cp = cpuset_next(&queue)) != NULL) { |
| 2027 | nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY])) | ||
| 2028 | continue; | ||
| 2029 | 2073 | ||
| 2030 | oldmems = cp->mems_allowed; | 2074 | /* Continue past cpusets with all mems online */ |
| 2075 | if (nodes_subset(cp->mems_allowed, | ||
| 2076 | node_states[N_HIGH_MEMORY])) | ||
| 2077 | continue; | ||
| 2031 | 2078 | ||
| 2032 | /* Remove offline cpus and mems from this cpuset. */ | 2079 | oldmems = cp->mems_allowed; |
| 2033 | mutex_lock(&callback_mutex); | 2080 | |
| 2034 | cpumask_and(cp->cpus_allowed, cp->cpus_allowed, | 2081 | /* Remove offline mems from this cpuset. */ |
| 2035 | cpu_active_mask); | 2082 | mutex_lock(&callback_mutex); |
| 2036 | nodes_and(cp->mems_allowed, cp->mems_allowed, | 2083 | nodes_and(cp->mems_allowed, cp->mems_allowed, |
| 2037 | node_states[N_HIGH_MEMORY]); | 2084 | node_states[N_HIGH_MEMORY]); |
| 2038 | mutex_unlock(&callback_mutex); | 2085 | mutex_unlock(&callback_mutex); |
| 2039 | 2086 | ||
| 2040 | /* Move tasks from the empty cpuset to a parent */ | 2087 | /* Move tasks from the empty cpuset to a parent */ |
| 2041 | if (cpumask_empty(cp->cpus_allowed) || | 2088 | if (nodes_empty(cp->mems_allowed)) |
| 2042 | nodes_empty(cp->mems_allowed)) | 2089 | remove_tasks_in_empty_cpuset(cp); |
| 2043 | remove_tasks_in_empty_cpuset(cp); | 2090 | else |
| 2044 | else { | 2091 | update_tasks_nodemask(cp, &oldmems, NULL); |
| 2045 | update_tasks_cpumask(cp, NULL); | ||
| 2046 | update_tasks_nodemask(cp, &oldmems, NULL); | ||
| 2047 | } | 2092 | } |
| 2048 | } | 2093 | } |
| 2049 | } | 2094 | } |
| @@ -2054,13 +2099,19 @@ static void scan_for_empty_cpusets(struct cpuset *root) | |||
| 2054 | * (of no affect) on systems that are actively using CPU hotplug | 2099 | * (of no affect) on systems that are actively using CPU hotplug |
| 2055 | * but making no active use of cpusets. | 2100 | * but making no active use of cpusets. |
| 2056 | * | 2101 | * |
| 2102 | * The only exception to this is suspend/resume, where we don't | ||
| 2103 | * modify cpusets at all. | ||
| 2104 | * | ||
| 2057 | * This routine ensures that top_cpuset.cpus_allowed tracks | 2105 | * This routine ensures that top_cpuset.cpus_allowed tracks |
| 2058 | * cpu_active_mask on each CPU hotplug (cpuhp) event. | 2106 | * cpu_active_mask on each CPU hotplug (cpuhp) event. |
| 2059 | * | 2107 | * |
| 2060 | * Called within get_online_cpus(). Needs to call cgroup_lock() | 2108 | * Called within get_online_cpus(). Needs to call cgroup_lock() |
| 2061 | * before calling generate_sched_domains(). | 2109 | * before calling generate_sched_domains(). |
| 2110 | * | ||
| 2111 | * @cpu_online: Indicates whether this is a CPU online event (true) or | ||
| 2112 | * a CPU offline event (false). | ||
| 2062 | */ | 2113 | */ |
| 2063 | void cpuset_update_active_cpus(void) | 2114 | void cpuset_update_active_cpus(bool cpu_online) |
| 2064 | { | 2115 | { |
| 2065 | struct sched_domain_attr *attr; | 2116 | struct sched_domain_attr *attr; |
| 2066 | cpumask_var_t *doms; | 2117 | cpumask_var_t *doms; |
| @@ -2070,7 +2121,10 @@ void cpuset_update_active_cpus(void) | |||
| 2070 | mutex_lock(&callback_mutex); | 2121 | mutex_lock(&callback_mutex); |
| 2071 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); | 2122 | cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask); |
| 2072 | mutex_unlock(&callback_mutex); | 2123 | mutex_unlock(&callback_mutex); |
| 2073 | scan_for_empty_cpusets(&top_cpuset); | 2124 | |
| 2125 | if (!cpu_online) | ||
| 2126 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE); | ||
| 2127 | |||
| 2074 | ndoms = generate_sched_domains(&doms, &attr); | 2128 | ndoms = generate_sched_domains(&doms, &attr); |
| 2075 | cgroup_unlock(); | 2129 | cgroup_unlock(); |
| 2076 | 2130 | ||
| @@ -2082,7 +2136,7 @@ void cpuset_update_active_cpus(void) | |||
| 2082 | /* | 2136 | /* |
| 2083 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. | 2137 | * Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY]. |
| 2084 | * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. | 2138 | * Call this routine anytime after node_states[N_HIGH_MEMORY] changes. |
| 2085 | * See also the previous routine cpuset_track_online_cpus(). | 2139 | * See cpuset_update_active_cpus() for CPU hotplug handling. |
| 2086 | */ | 2140 | */ |
| 2087 | static int cpuset_track_online_nodes(struct notifier_block *self, | 2141 | static int cpuset_track_online_nodes(struct notifier_block *self, |
| 2088 | unsigned long action, void *arg) | 2142 | unsigned long action, void *arg) |
| @@ -2101,9 +2155,9 @@ static int cpuset_track_online_nodes(struct notifier_block *self, | |||
| 2101 | case MEM_OFFLINE: | 2155 | case MEM_OFFLINE: |
| 2102 | /* | 2156 | /* |
| 2103 | * needn't update top_cpuset.mems_allowed explicitly because | 2157 | * needn't update top_cpuset.mems_allowed explicitly because |
| 2104 | * scan_for_empty_cpusets() will update it. | 2158 | * scan_cpusets_upon_hotplug() will update it. |
| 2105 | */ | 2159 | */ |
| 2106 | scan_for_empty_cpusets(&top_cpuset); | 2160 | scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE); |
| 2107 | break; | 2161 | break; |
| 2108 | default: | 2162 | default: |
| 2109 | break; | 2163 | break; |
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index f93532748bca..c08a22d02f72 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c | |||
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/swap.h> /* try_to_free_swap */ | 32 | #include <linux/swap.h> /* try_to_free_swap */ |
| 33 | #include <linux/ptrace.h> /* user_enable_single_step */ | 33 | #include <linux/ptrace.h> /* user_enable_single_step */ |
| 34 | #include <linux/kdebug.h> /* notifier mechanism */ | 34 | #include <linux/kdebug.h> /* notifier mechanism */ |
| 35 | #include "../../mm/internal.h" /* munlock_vma_page */ | ||
| 35 | 36 | ||
| 36 | #include <linux/uprobes.h> | 37 | #include <linux/uprobes.h> |
| 37 | 38 | ||
| @@ -112,14 +113,14 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) | |||
| 112 | return false; | 113 | return false; |
| 113 | } | 114 | } |
| 114 | 115 | ||
| 115 | static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | 116 | static unsigned long offset_to_vaddr(struct vm_area_struct *vma, loff_t offset) |
| 116 | { | 117 | { |
| 117 | loff_t vaddr; | 118 | return vma->vm_start + offset - ((loff_t)vma->vm_pgoff << PAGE_SHIFT); |
| 118 | 119 | } | |
| 119 | vaddr = vma->vm_start + offset; | ||
| 120 | vaddr -= vma->vm_pgoff << PAGE_SHIFT; | ||
| 121 | 120 | ||
| 122 | return vaddr; | 121 | static loff_t vaddr_to_offset(struct vm_area_struct *vma, unsigned long vaddr) |
| 122 | { | ||
| 123 | return ((loff_t)vma->vm_pgoff << PAGE_SHIFT) + (vaddr - vma->vm_start); | ||
| 123 | } | 124 | } |
| 124 | 125 | ||
| 125 | /** | 126 | /** |
| @@ -127,25 +128,27 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) | |||
| 127 | * based on replace_page in mm/ksm.c | 128 | * based on replace_page in mm/ksm.c |
| 128 | * | 129 | * |
| 129 | * @vma: vma that holds the pte pointing to page | 130 | * @vma: vma that holds the pte pointing to page |
| 131 | * @addr: address the old @page is mapped at | ||
| 130 | * @page: the cowed page we are replacing by kpage | 132 | * @page: the cowed page we are replacing by kpage |
| 131 | * @kpage: the modified page we replace page by | 133 | * @kpage: the modified page we replace page by |
| 132 | * | 134 | * |
| 133 | * Returns 0 on success, -EFAULT on failure. | 135 | * Returns 0 on success, -EFAULT on failure. |
| 134 | */ | 136 | */ |
| 135 | static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) | 137 | static int __replace_page(struct vm_area_struct *vma, unsigned long addr, |
| 138 | struct page *page, struct page *kpage) | ||
| 136 | { | 139 | { |
| 137 | struct mm_struct *mm = vma->vm_mm; | 140 | struct mm_struct *mm = vma->vm_mm; |
| 138 | unsigned long addr; | ||
| 139 | spinlock_t *ptl; | 141 | spinlock_t *ptl; |
| 140 | pte_t *ptep; | 142 | pte_t *ptep; |
| 143 | int err; | ||
| 141 | 144 | ||
| 142 | addr = page_address_in_vma(page, vma); | 145 | /* For try_to_free_swap() and munlock_vma_page() below */ |
| 143 | if (addr == -EFAULT) | 146 | lock_page(page); |
| 144 | return -EFAULT; | ||
| 145 | 147 | ||
| 148 | err = -EAGAIN; | ||
| 146 | ptep = page_check_address(page, mm, addr, &ptl, 0); | 149 | ptep = page_check_address(page, mm, addr, &ptl, 0); |
| 147 | if (!ptep) | 150 | if (!ptep) |
| 148 | return -EAGAIN; | 151 | goto unlock; |
| 149 | 152 | ||
| 150 | get_page(kpage); | 153 | get_page(kpage); |
| 151 | page_add_new_anon_rmap(kpage, vma, addr); | 154 | page_add_new_anon_rmap(kpage, vma, addr); |
| @@ -162,10 +165,16 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct | |||
| 162 | page_remove_rmap(page); | 165 | page_remove_rmap(page); |
| 163 | if (!page_mapped(page)) | 166 | if (!page_mapped(page)) |
| 164 | try_to_free_swap(page); | 167 | try_to_free_swap(page); |
| 165 | put_page(page); | ||
| 166 | pte_unmap_unlock(ptep, ptl); | 168 | pte_unmap_unlock(ptep, ptl); |
| 167 | 169 | ||
| 168 | return 0; | 170 | if (vma->vm_flags & VM_LOCKED) |
| 171 | munlock_vma_page(page); | ||
| 172 | put_page(page); | ||
| 173 | |||
| 174 | err = 0; | ||
| 175 | unlock: | ||
| 176 | unlock_page(page); | ||
| 177 | return err; | ||
| 169 | } | 178 | } |
| 170 | 179 | ||
| 171 | /** | 180 | /** |
| @@ -206,45 +215,23 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, | |||
| 206 | unsigned long vaddr, uprobe_opcode_t opcode) | 215 | unsigned long vaddr, uprobe_opcode_t opcode) |
| 207 | { | 216 | { |
| 208 | struct page *old_page, *new_page; | 217 | struct page *old_page, *new_page; |
| 209 | struct address_space *mapping; | ||
| 210 | void *vaddr_old, *vaddr_new; | 218 | void *vaddr_old, *vaddr_new; |
| 211 | struct vm_area_struct *vma; | 219 | struct vm_area_struct *vma; |
| 212 | struct uprobe *uprobe; | ||
| 213 | int ret; | 220 | int ret; |
| 221 | |||
| 214 | retry: | 222 | retry: |
| 215 | /* Read the page with vaddr into memory */ | 223 | /* Read the page with vaddr into memory */ |
| 216 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); | 224 | ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); |
| 217 | if (ret <= 0) | 225 | if (ret <= 0) |
| 218 | return ret; | 226 | return ret; |
| 219 | 227 | ||
| 220 | ret = -EINVAL; | ||
| 221 | |||
| 222 | /* | ||
| 223 | * We are interested in text pages only. Our pages of interest | ||
| 224 | * should be mapped for read and execute only. We desist from | ||
| 225 | * adding probes in write mapped pages since the breakpoints | ||
| 226 | * might end up in the file copy. | ||
| 227 | */ | ||
| 228 | if (!valid_vma(vma, is_swbp_insn(&opcode))) | ||
| 229 | goto put_out; | ||
| 230 | |||
| 231 | uprobe = container_of(auprobe, struct uprobe, arch); | ||
| 232 | mapping = uprobe->inode->i_mapping; | ||
| 233 | if (mapping != vma->vm_file->f_mapping) | ||
| 234 | goto put_out; | ||
| 235 | |||
| 236 | ret = -ENOMEM; | 228 | ret = -ENOMEM; |
| 237 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); | 229 | new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); |
| 238 | if (!new_page) | 230 | if (!new_page) |
| 239 | goto put_out; | 231 | goto put_old; |
| 240 | 232 | ||
| 241 | __SetPageUptodate(new_page); | 233 | __SetPageUptodate(new_page); |
| 242 | 234 | ||
| 243 | /* | ||
| 244 | * lock page will serialize against do_wp_page()'s | ||
| 245 | * PageAnon() handling | ||
| 246 | */ | ||
| 247 | lock_page(old_page); | ||
| 248 | /* copy the page now that we've got it stable */ | 235 | /* copy the page now that we've got it stable */ |
| 249 | vaddr_old = kmap_atomic(old_page); | 236 | vaddr_old = kmap_atomic(old_page); |
| 250 | vaddr_new = kmap_atomic(new_page); | 237 | vaddr_new = kmap_atomic(new_page); |
| @@ -257,17 +244,13 @@ retry: | |||
| 257 | 244 | ||
| 258 | ret = anon_vma_prepare(vma); | 245 | ret = anon_vma_prepare(vma); |
| 259 | if (ret) | 246 | if (ret) |
| 260 | goto unlock_out; | 247 | goto put_new; |
| 261 | 248 | ||
| 262 | lock_page(new_page); | 249 | ret = __replace_page(vma, vaddr, old_page, new_page); |
| 263 | ret = __replace_page(vma, old_page, new_page); | ||
| 264 | unlock_page(new_page); | ||
| 265 | 250 | ||
| 266 | unlock_out: | 251 | put_new: |
| 267 | unlock_page(old_page); | ||
| 268 | page_cache_release(new_page); | 252 | page_cache_release(new_page); |
| 269 | 253 | put_old: | |
| 270 | put_out: | ||
| 271 | put_page(old_page); | 254 | put_page(old_page); |
| 272 | 255 | ||
| 273 | if (unlikely(ret == -EAGAIN)) | 256 | if (unlikely(ret == -EAGAIN)) |
| @@ -791,7 +774,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register) | |||
| 791 | curr = info; | 774 | curr = info; |
| 792 | 775 | ||
| 793 | info->mm = vma->vm_mm; | 776 | info->mm = vma->vm_mm; |
| 794 | info->vaddr = vma_address(vma, offset); | 777 | info->vaddr = offset_to_vaddr(vma, offset); |
| 795 | } | 778 | } |
| 796 | mutex_unlock(&mapping->i_mmap_mutex); | 779 | mutex_unlock(&mapping->i_mmap_mutex); |
| 797 | 780 | ||
| @@ -839,12 +822,13 @@ static int register_for_each_vma(struct uprobe *uprobe, bool is_register) | |||
| 839 | goto free; | 822 | goto free; |
| 840 | 823 | ||
| 841 | down_write(&mm->mmap_sem); | 824 | down_write(&mm->mmap_sem); |
| 842 | vma = find_vma(mm, (unsigned long)info->vaddr); | 825 | vma = find_vma(mm, info->vaddr); |
| 843 | if (!vma || !valid_vma(vma, is_register)) | 826 | if (!vma || !valid_vma(vma, is_register) || |
| 827 | vma->vm_file->f_mapping->host != uprobe->inode) | ||
| 844 | goto unlock; | 828 | goto unlock; |
| 845 | 829 | ||
| 846 | if (vma->vm_file->f_mapping->host != uprobe->inode || | 830 | if (vma->vm_start > info->vaddr || |
| 847 | vma_address(vma, uprobe->offset) != info->vaddr) | 831 | vaddr_to_offset(vma, info->vaddr) != uprobe->offset) |
| 848 | goto unlock; | 832 | goto unlock; |
| 849 | 833 | ||
| 850 | if (is_register) { | 834 | if (is_register) { |
| @@ -960,59 +944,66 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume | |||
| 960 | put_uprobe(uprobe); | 944 | put_uprobe(uprobe); |
| 961 | } | 945 | } |
| 962 | 946 | ||
| 963 | /* | 947 | static struct rb_node * |
| 964 | * Of all the nodes that correspond to the given inode, return the node | 948 | find_node_in_range(struct inode *inode, loff_t min, loff_t max) |
| 965 | * with the least offset. | ||
| 966 | */ | ||
| 967 | static struct rb_node *find_least_offset_node(struct inode *inode) | ||
| 968 | { | 949 | { |
| 969 | struct uprobe u = { .inode = inode, .offset = 0}; | ||
| 970 | struct rb_node *n = uprobes_tree.rb_node; | 950 | struct rb_node *n = uprobes_tree.rb_node; |
| 971 | struct rb_node *close_node = NULL; | ||
| 972 | struct uprobe *uprobe; | ||
| 973 | int match; | ||
| 974 | 951 | ||
| 975 | while (n) { | 952 | while (n) { |
| 976 | uprobe = rb_entry(n, struct uprobe, rb_node); | 953 | struct uprobe *u = rb_entry(n, struct uprobe, rb_node); |
| 977 | match = match_uprobe(&u, uprobe); | ||
| 978 | |||
| 979 | if (uprobe->inode == inode) | ||
| 980 | close_node = n; | ||
| 981 | |||
| 982 | if (!match) | ||
| 983 | return close_node; | ||
| 984 | 954 | ||
| 985 | if (match < 0) | 955 | if (inode < u->inode) { |
| 986 | n = n->rb_left; | 956 | n = n->rb_left; |
| 987 | else | 957 | } else if (inode > u->inode) { |
| 988 | n = n->rb_right; | 958 | n = n->rb_right; |
| 959 | } else { | ||
| 960 | if (max < u->offset) | ||
| 961 | n = n->rb_left; | ||
| 962 | else if (min > u->offset) | ||
| 963 | n = n->rb_right; | ||
| 964 | else | ||
| 965 | break; | ||
| 966 | } | ||
| 989 | } | 967 | } |
| 990 | 968 | ||
| 991 | return close_node; | 969 | return n; |
| 992 | } | 970 | } |
| 993 | 971 | ||
| 994 | /* | 972 | /* |
| 995 | * For a given inode, build a list of probes that need to be inserted. | 973 | * For a given range in vma, build a list of probes that need to be inserted. |
| 996 | */ | 974 | */ |
| 997 | static void build_probe_list(struct inode *inode, struct list_head *head) | 975 | static void build_probe_list(struct inode *inode, |
| 976 | struct vm_area_struct *vma, | ||
| 977 | unsigned long start, unsigned long end, | ||
| 978 | struct list_head *head) | ||
| 998 | { | 979 | { |
| 999 | struct uprobe *uprobe; | 980 | loff_t min, max; |
| 1000 | unsigned long flags; | 981 | unsigned long flags; |
| 1001 | struct rb_node *n; | 982 | struct rb_node *n, *t; |
| 1002 | 983 | struct uprobe *u; | |
| 1003 | spin_lock_irqsave(&uprobes_treelock, flags); | ||
| 1004 | |||
| 1005 | n = find_least_offset_node(inode); | ||
| 1006 | 984 | ||
| 1007 | for (; n; n = rb_next(n)) { | 985 | INIT_LIST_HEAD(head); |
| 1008 | uprobe = rb_entry(n, struct uprobe, rb_node); | 986 | min = vaddr_to_offset(vma, start); |
| 1009 | if (uprobe->inode != inode) | 987 | max = min + (end - start) - 1; |
| 1010 | break; | ||
| 1011 | 988 | ||
| 1012 | list_add(&uprobe->pending_list, head); | 989 | spin_lock_irqsave(&uprobes_treelock, flags); |
| 1013 | atomic_inc(&uprobe->ref); | 990 | n = find_node_in_range(inode, min, max); |
| 991 | if (n) { | ||
| 992 | for (t = n; t; t = rb_prev(t)) { | ||
| 993 | u = rb_entry(t, struct uprobe, rb_node); | ||
| 994 | if (u->inode != inode || u->offset < min) | ||
| 995 | break; | ||
| 996 | list_add(&u->pending_list, head); | ||
| 997 | atomic_inc(&u->ref); | ||
| 998 | } | ||
| 999 | for (t = n; (t = rb_next(t)); ) { | ||
| 1000 | u = rb_entry(t, struct uprobe, rb_node); | ||
| 1001 | if (u->inode != inode || u->offset > max) | ||
| 1002 | break; | ||
| 1003 | list_add(&u->pending_list, head); | ||
| 1004 | atomic_inc(&u->ref); | ||
| 1005 | } | ||
| 1014 | } | 1006 | } |
| 1015 | |||
| 1016 | spin_unlock_irqrestore(&uprobes_treelock, flags); | 1007 | spin_unlock_irqrestore(&uprobes_treelock, flags); |
| 1017 | } | 1008 | } |
| 1018 | 1009 | ||
| @@ -1031,7 +1022,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) | |||
| 1031 | int uprobe_mmap(struct vm_area_struct *vma) | 1022 | int uprobe_mmap(struct vm_area_struct *vma) |
| 1032 | { | 1023 | { |
| 1033 | struct list_head tmp_list; | 1024 | struct list_head tmp_list; |
| 1034 | struct uprobe *uprobe; | 1025 | struct uprobe *uprobe, *u; |
| 1035 | struct inode *inode; | 1026 | struct inode *inode; |
| 1036 | int ret, count; | 1027 | int ret, count; |
| 1037 | 1028 | ||
| @@ -1042,21 +1033,15 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
| 1042 | if (!inode) | 1033 | if (!inode) |
| 1043 | return 0; | 1034 | return 0; |
| 1044 | 1035 | ||
| 1045 | INIT_LIST_HEAD(&tmp_list); | ||
| 1046 | mutex_lock(uprobes_mmap_hash(inode)); | 1036 | mutex_lock(uprobes_mmap_hash(inode)); |
| 1047 | build_probe_list(inode, &tmp_list); | 1037 | build_probe_list(inode, vma, vma->vm_start, vma->vm_end, &tmp_list); |
| 1048 | 1038 | ||
| 1049 | ret = 0; | 1039 | ret = 0; |
| 1050 | count = 0; | 1040 | count = 0; |
| 1051 | 1041 | ||
| 1052 | list_for_each_entry(uprobe, &tmp_list, pending_list) { | 1042 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { |
| 1053 | if (!ret) { | 1043 | if (!ret) { |
| 1054 | loff_t vaddr = vma_address(vma, uprobe->offset); | 1044 | unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); |
| 1055 | |||
| 1056 | if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { | ||
| 1057 | put_uprobe(uprobe); | ||
| 1058 | continue; | ||
| 1059 | } | ||
| 1060 | 1045 | ||
| 1061 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); | 1046 | ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); |
| 1062 | /* | 1047 | /* |
| @@ -1097,12 +1082,15 @@ int uprobe_mmap(struct vm_area_struct *vma) | |||
| 1097 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) | 1082 | void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) |
| 1098 | { | 1083 | { |
| 1099 | struct list_head tmp_list; | 1084 | struct list_head tmp_list; |
| 1100 | struct uprobe *uprobe; | 1085 | struct uprobe *uprobe, *u; |
| 1101 | struct inode *inode; | 1086 | struct inode *inode; |
| 1102 | 1087 | ||
| 1103 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) | 1088 | if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) |
| 1104 | return; | 1089 | return; |
| 1105 | 1090 | ||
| 1091 | if (!atomic_read(&vma->vm_mm->mm_users)) /* called by mmput() ? */ | ||
| 1092 | return; | ||
| 1093 | |||
| 1106 | if (!atomic_read(&vma->vm_mm->uprobes_state.count)) | 1094 | if (!atomic_read(&vma->vm_mm->uprobes_state.count)) |
| 1107 | return; | 1095 | return; |
| 1108 | 1096 | ||
| @@ -1110,21 +1098,17 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon | |||
| 1110 | if (!inode) | 1098 | if (!inode) |
| 1111 | return; | 1099 | return; |
| 1112 | 1100 | ||
| 1113 | INIT_LIST_HEAD(&tmp_list); | ||
| 1114 | mutex_lock(uprobes_mmap_hash(inode)); | 1101 | mutex_lock(uprobes_mmap_hash(inode)); |
| 1115 | build_probe_list(inode, &tmp_list); | 1102 | build_probe_list(inode, vma, start, end, &tmp_list); |
| 1116 | 1103 | ||
| 1117 | list_for_each_entry(uprobe, &tmp_list, pending_list) { | 1104 | list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { |
| 1118 | loff_t vaddr = vma_address(vma, uprobe->offset); | 1105 | unsigned long vaddr = offset_to_vaddr(vma, uprobe->offset); |
| 1119 | 1106 | /* | |
| 1120 | if (vaddr >= start && vaddr < end) { | 1107 | * An unregister could have removed the probe before |
| 1121 | /* | 1108 | * unmap. So check before we decrement the count. |
| 1122 | * An unregister could have removed the probe before | 1109 | */ |
| 1123 | * unmap. So check before we decrement the count. | 1110 | if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) |
| 1124 | */ | 1111 | atomic_dec(&vma->vm_mm->uprobes_state.count); |
| 1125 | if (is_swbp_at_addr(vma->vm_mm, vaddr) == 1) | ||
| 1126 | atomic_dec(&vma->vm_mm->uprobes_state.count); | ||
| 1127 | } | ||
| 1128 | put_uprobe(uprobe); | 1112 | put_uprobe(uprobe); |
| 1129 | } | 1113 | } |
| 1130 | mutex_unlock(uprobes_mmap_hash(inode)); | 1114 | mutex_unlock(uprobes_mmap_hash(inode)); |
| @@ -1463,12 +1447,9 @@ static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) | |||
| 1463 | vma = find_vma(mm, bp_vaddr); | 1447 | vma = find_vma(mm, bp_vaddr); |
| 1464 | if (vma && vma->vm_start <= bp_vaddr) { | 1448 | if (vma && vma->vm_start <= bp_vaddr) { |
| 1465 | if (valid_vma(vma, false)) { | 1449 | if (valid_vma(vma, false)) { |
| 1466 | struct inode *inode; | 1450 | struct inode *inode = vma->vm_file->f_mapping->host; |
| 1467 | loff_t offset; | 1451 | loff_t offset = vaddr_to_offset(vma, bp_vaddr); |
| 1468 | 1452 | ||
| 1469 | inode = vma->vm_file->f_mapping->host; | ||
| 1470 | offset = bp_vaddr - vma->vm_start; | ||
| 1471 | offset += (vma->vm_pgoff << PAGE_SHIFT); | ||
| 1472 | uprobe = find_uprobe(inode, offset); | 1453 | uprobe = find_uprobe(inode, offset); |
| 1473 | } | 1454 | } |
| 1474 | 1455 | ||
diff --git a/kernel/exit.c b/kernel/exit.c index d17f6c4ddfa9..f65345f9e5bb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
| @@ -483,7 +483,7 @@ static void close_files(struct files_struct * files) | |||
| 483 | rcu_read_unlock(); | 483 | rcu_read_unlock(); |
| 484 | for (;;) { | 484 | for (;;) { |
| 485 | unsigned long set; | 485 | unsigned long set; |
| 486 | i = j * __NFDBITS; | 486 | i = j * BITS_PER_LONG; |
| 487 | if (i >= fdt->max_fds) | 487 | if (i >= fdt->max_fds) |
| 488 | break; | 488 | break; |
| 489 | set = fdt->open_fds[j++]; | 489 | set = fdt->open_fds[j++]; |
diff --git a/kernel/fork.c b/kernel/fork.c index ff1cad3b7bdc..3bd2280d79f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
| @@ -114,6 +114,10 @@ int nr_processes(void) | |||
| 114 | return total; | 114 | return total; |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | void __weak arch_release_task_struct(struct task_struct *tsk) | ||
| 118 | { | ||
| 119 | } | ||
| 120 | |||
| 117 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR | 121 | #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR |
| 118 | static struct kmem_cache *task_struct_cachep; | 122 | static struct kmem_cache *task_struct_cachep; |
| 119 | 123 | ||
| @@ -122,17 +126,17 @@ static inline struct task_struct *alloc_task_struct_node(int node) | |||
| 122 | return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); | 126 | return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node); |
| 123 | } | 127 | } |
| 124 | 128 | ||
| 125 | void __weak arch_release_task_struct(struct task_struct *tsk) { } | ||
| 126 | |||
| 127 | static inline void free_task_struct(struct task_struct *tsk) | 129 | static inline void free_task_struct(struct task_struct *tsk) |
| 128 | { | 130 | { |
| 129 | arch_release_task_struct(tsk); | ||
| 130 | kmem_cache_free(task_struct_cachep, tsk); | 131 | kmem_cache_free(task_struct_cachep, tsk); |
| 131 | } | 132 | } |
| 132 | #endif | 133 | #endif |
| 133 | 134 | ||
| 135 | void __weak arch_release_thread_info(struct thread_info *ti) | ||
| 136 | { | ||
| 137 | } | ||
| 138 | |||
| 134 | #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR | 139 | #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR |
| 135 | void __weak arch_release_thread_info(struct thread_info *ti) { } | ||
| 136 | 140 | ||
| 137 | /* | 141 | /* |
| 138 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a | 142 | * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a |
| @@ -150,7 +154,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | |||
| 150 | 154 | ||
| 151 | static inline void free_thread_info(struct thread_info *ti) | 155 | static inline void free_thread_info(struct thread_info *ti) |
| 152 | { | 156 | { |
| 153 | arch_release_thread_info(ti); | ||
| 154 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); | 157 | free_pages((unsigned long)ti, THREAD_SIZE_ORDER); |
| 155 | } | 158 | } |
| 156 | # else | 159 | # else |
| @@ -164,7 +167,6 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, | |||
| 164 | 167 | ||
| 165 | static void free_thread_info(struct thread_info *ti) | 168 | static void free_thread_info(struct thread_info *ti) |
| 166 | { | 169 | { |
| 167 | arch_release_thread_info(ti); | ||
| 168 | kmem_cache_free(thread_info_cache, ti); | 170 | kmem_cache_free(thread_info_cache, ti); |
| 169 | } | 171 | } |
| 170 | 172 | ||
| @@ -205,10 +207,12 @@ static void account_kernel_stack(struct thread_info *ti, int account) | |||
| 205 | void free_task(struct task_struct *tsk) | 207 | void free_task(struct task_struct *tsk) |
| 206 | { | 208 | { |
| 207 | account_kernel_stack(tsk->stack, -1); | 209 | account_kernel_stack(tsk->stack, -1); |
| 210 | arch_release_thread_info(tsk->stack); | ||
| 208 | free_thread_info(tsk->stack); | 211 | free_thread_info(tsk->stack); |
| 209 | rt_mutex_debug_task_free(tsk); | 212 | rt_mutex_debug_task_free(tsk); |
| 210 | ftrace_graph_exit_task(tsk); | 213 | ftrace_graph_exit_task(tsk); |
| 211 | put_seccomp_filter(tsk); | 214 | put_seccomp_filter(tsk); |
| 215 | arch_release_task_struct(tsk); | ||
| 212 | free_task_struct(tsk); | 216 | free_task_struct(tsk); |
| 213 | } | 217 | } |
| 214 | EXPORT_SYMBOL(free_task); | 218 | EXPORT_SYMBOL(free_task); |
| @@ -298,23 +302,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 298 | return NULL; | 302 | return NULL; |
| 299 | 303 | ||
| 300 | ti = alloc_thread_info_node(tsk, node); | 304 | ti = alloc_thread_info_node(tsk, node); |
| 301 | if (!ti) { | 305 | if (!ti) |
| 302 | free_task_struct(tsk); | 306 | goto free_tsk; |
| 303 | return NULL; | ||
| 304 | } | ||
| 305 | 307 | ||
| 306 | err = arch_dup_task_struct(tsk, orig); | 308 | err = arch_dup_task_struct(tsk, orig); |
| 309 | if (err) | ||
| 310 | goto free_ti; | ||
| 307 | 311 | ||
| 308 | /* | ||
| 309 | * We defer looking at err, because we will need this setup | ||
| 310 | * for the clean up path to work correctly. | ||
| 311 | */ | ||
| 312 | tsk->stack = ti; | 312 | tsk->stack = ti; |
| 313 | setup_thread_stack(tsk, orig); | ||
| 314 | |||
| 315 | if (err) | ||
| 316 | goto out; | ||
| 317 | 313 | ||
| 314 | setup_thread_stack(tsk, orig); | ||
| 318 | clear_user_return_notifier(tsk); | 315 | clear_user_return_notifier(tsk); |
| 319 | clear_tsk_need_resched(tsk); | 316 | clear_tsk_need_resched(tsk); |
| 320 | stackend = end_of_stack(tsk); | 317 | stackend = end_of_stack(tsk); |
| @@ -338,8 +335,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) | |||
| 338 | 335 | ||
| 339 | return tsk; | 336 | return tsk; |
| 340 | 337 | ||
| 341 | out: | 338 | free_ti: |
| 342 | free_thread_info(ti); | 339 | free_thread_info(ti); |
| 340 | free_tsk: | ||
| 343 | free_task_struct(tsk); | 341 | free_task_struct(tsk); |
| 344 | return NULL; | 342 | return NULL; |
| 345 | } | 343 | } |
| @@ -383,16 +381,14 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
| 383 | struct file *file; | 381 | struct file *file; |
| 384 | 382 | ||
| 385 | if (mpnt->vm_flags & VM_DONTCOPY) { | 383 | if (mpnt->vm_flags & VM_DONTCOPY) { |
| 386 | long pages = vma_pages(mpnt); | ||
| 387 | mm->total_vm -= pages; | ||
| 388 | vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, | 384 | vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file, |
| 389 | -pages); | 385 | -vma_pages(mpnt)); |
| 390 | continue; | 386 | continue; |
| 391 | } | 387 | } |
| 392 | charge = 0; | 388 | charge = 0; |
| 393 | if (mpnt->vm_flags & VM_ACCOUNT) { | 389 | if (mpnt->vm_flags & VM_ACCOUNT) { |
| 394 | unsigned long len; | 390 | unsigned long len = vma_pages(mpnt); |
| 395 | len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; | 391 | |
| 396 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ | 392 | if (security_vm_enough_memory_mm(oldmm, len)) /* sic */ |
| 397 | goto fail_nomem; | 393 | goto fail_nomem; |
| 398 | charge = len; | 394 | charge = len; |
| @@ -1310,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, | |||
| 1310 | #ifdef CONFIG_DEBUG_MUTEXES | 1306 | #ifdef CONFIG_DEBUG_MUTEXES |
| 1311 | p->blocked_on = NULL; /* not blocked yet */ | 1307 | p->blocked_on = NULL; /* not blocked yet */ |
| 1312 | #endif | 1308 | #endif |
| 1313 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR | 1309 | #ifdef CONFIG_MEMCG |
| 1314 | p->memcg_batch.do_batch = 0; | 1310 | p->memcg_batch.do_batch = 0; |
| 1315 | p->memcg_batch.memcg = NULL; | 1311 | p->memcg_batch.memcg = NULL; |
| 1316 | #endif | 1312 | #endif |
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index bdb180325551..131ca176b497 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c | |||
| @@ -133,7 +133,7 @@ irqreturn_t | |||
| 133 | handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) | 133 | handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) |
| 134 | { | 134 | { |
| 135 | irqreturn_t retval = IRQ_NONE; | 135 | irqreturn_t retval = IRQ_NONE; |
| 136 | unsigned int random = 0, irq = desc->irq_data.irq; | 136 | unsigned int flags = 0, irq = desc->irq_data.irq; |
| 137 | 137 | ||
| 138 | do { | 138 | do { |
| 139 | irqreturn_t res; | 139 | irqreturn_t res; |
| @@ -161,7 +161,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) | |||
| 161 | 161 | ||
| 162 | /* Fall through to add to randomness */ | 162 | /* Fall through to add to randomness */ |
| 163 | case IRQ_HANDLED: | 163 | case IRQ_HANDLED: |
| 164 | random |= action->flags; | 164 | flags |= action->flags; |
| 165 | break; | 165 | break; |
| 166 | 166 | ||
| 167 | default: | 167 | default: |
| @@ -172,8 +172,7 @@ handle_irq_event_percpu(struct irq_desc *desc, struct irqaction *action) | |||
| 172 | action = action->next; | 172 | action = action->next; |
| 173 | } while (action); | 173 | } while (action); |
| 174 | 174 | ||
| 175 | if (random & IRQF_SAMPLE_RANDOM) | 175 | add_interrupt_randomness(irq, flags); |
| 176 | add_interrupt_randomness(irq); | ||
| 177 | 176 | ||
| 178 | if (!noirqdebug) | 177 | if (!noirqdebug) |
| 179 | note_interrupt(irq, desc, retval); | 178 | note_interrupt(irq, desc, retval); |
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 814c9ef6bba1..0a8e8f059627 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c | |||
| @@ -893,22 +893,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new) | |||
| 893 | return -ENOSYS; | 893 | return -ENOSYS; |
| 894 | if (!try_module_get(desc->owner)) | 894 | if (!try_module_get(desc->owner)) |
| 895 | return -ENODEV; | 895 | return -ENODEV; |
| 896 | /* | ||
| 897 | * Some drivers like serial.c use request_irq() heavily, | ||
| 898 | * so we have to be careful not to interfere with a | ||
| 899 | * running system. | ||
| 900 | */ | ||
| 901 | if (new->flags & IRQF_SAMPLE_RANDOM) { | ||
| 902 | /* | ||
| 903 | * This function might sleep, we want to call it first, | ||
| 904 | * outside of the atomic block. | ||
| 905 | * Yes, this might clear the entropy pool if the wrong | ||
| 906 | * driver is attempted to be loaded, without actually | ||
| 907 | * installing a new handler, but is this really a problem, | ||
| 908 | * only the sysadmin is able to do this. | ||
| 909 | */ | ||
| 910 | rand_initialize_irq(irq); | ||
| 911 | } | ||
| 912 | 896 | ||
| 913 | /* | 897 | /* |
| 914 | * Check whether the interrupt nests into another interrupt | 898 | * Check whether the interrupt nests into another interrupt |
| @@ -1354,7 +1338,6 @@ EXPORT_SYMBOL(free_irq); | |||
| 1354 | * Flags: | 1338 | * Flags: |
| 1355 | * | 1339 | * |
| 1356 | * IRQF_SHARED Interrupt is shared | 1340 | * IRQF_SHARED Interrupt is shared |
| 1357 | * IRQF_SAMPLE_RANDOM The interrupt can be used for entropy | ||
| 1358 | * IRQF_TRIGGER_* Specify active edge(s) or level | 1341 | * IRQF_TRIGGER_* Specify active edge(s) or level |
| 1359 | * | 1342 | * |
| 1360 | */ | 1343 | */ |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 4e2e472f6aeb..0668d58d6413 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
| @@ -1424,7 +1424,7 @@ static void update_vmcoreinfo_note(void) | |||
| 1424 | 1424 | ||
| 1425 | void crash_save_vmcoreinfo(void) | 1425 | void crash_save_vmcoreinfo(void) |
| 1426 | { | 1426 | { |
| 1427 | vmcoreinfo_append_str("CRASHTIME=%ld", get_seconds()); | 1427 | vmcoreinfo_append_str("CRASHTIME=%ld\n", get_seconds()); |
| 1428 | update_vmcoreinfo_note(); | 1428 | update_vmcoreinfo_note(); |
| 1429 | } | 1429 | } |
| 1430 | 1430 | ||
diff --git a/kernel/kmod.c b/kernel/kmod.c index ff2c7cb86d77..6f99aead66c6 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c | |||
| @@ -45,6 +45,13 @@ extern int max_threads; | |||
| 45 | 45 | ||
| 46 | static struct workqueue_struct *khelper_wq; | 46 | static struct workqueue_struct *khelper_wq; |
| 47 | 47 | ||
| 48 | /* | ||
| 49 | * kmod_thread_locker is used for deadlock avoidance. There is no explicit | ||
| 50 | * locking to protect this global - it is private to the singleton khelper | ||
| 51 | * thread and should only ever be modified by that thread. | ||
| 52 | */ | ||
| 53 | static const struct task_struct *kmod_thread_locker; | ||
| 54 | |||
| 48 | #define CAP_BSET (void *)1 | 55 | #define CAP_BSET (void *)1 |
| 49 | #define CAP_PI (void *)2 | 56 | #define CAP_PI (void *)2 |
| 50 | 57 | ||
| @@ -221,6 +228,13 @@ fail: | |||
| 221 | return 0; | 228 | return 0; |
| 222 | } | 229 | } |
| 223 | 230 | ||
| 231 | static int call_helper(void *data) | ||
| 232 | { | ||
| 233 | /* Worker thread started blocking khelper thread. */ | ||
| 234 | kmod_thread_locker = current; | ||
| 235 | return ____call_usermodehelper(data); | ||
| 236 | } | ||
| 237 | |||
| 224 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) | 238 | static void call_usermodehelper_freeinfo(struct subprocess_info *info) |
| 225 | { | 239 | { |
| 226 | if (info->cleanup) | 240 | if (info->cleanup) |
| @@ -295,9 +309,12 @@ static void __call_usermodehelper(struct work_struct *work) | |||
| 295 | if (wait == UMH_WAIT_PROC) | 309 | if (wait == UMH_WAIT_PROC) |
| 296 | pid = kernel_thread(wait_for_helper, sub_info, | 310 | pid = kernel_thread(wait_for_helper, sub_info, |
| 297 | CLONE_FS | CLONE_FILES | SIGCHLD); | 311 | CLONE_FS | CLONE_FILES | SIGCHLD); |
| 298 | else | 312 | else { |
| 299 | pid = kernel_thread(____call_usermodehelper, sub_info, | 313 | pid = kernel_thread(call_helper, sub_info, |
| 300 | CLONE_VFORK | SIGCHLD); | 314 | CLONE_VFORK | SIGCHLD); |
| 315 | /* Worker thread stopped blocking khelper thread. */ | ||
| 316 | kmod_thread_locker = NULL; | ||
| 317 | } | ||
| 301 | 318 | ||
| 302 | switch (wait) { | 319 | switch (wait) { |
| 303 | case UMH_NO_WAIT: | 320 | case UMH_NO_WAIT: |
| @@ -548,6 +565,16 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait) | |||
| 548 | retval = -EBUSY; | 565 | retval = -EBUSY; |
| 549 | goto out; | 566 | goto out; |
| 550 | } | 567 | } |
| 568 | /* | ||
| 569 | * Worker thread must not wait for khelper thread at below | ||
| 570 | * wait_for_completion() if the thread was created with CLONE_VFORK | ||
| 571 | * flag, for khelper thread is already waiting for the thread at | ||
| 572 | * wait_for_completion() in do_fork(). | ||
| 573 | */ | ||
| 574 | if (wait != UMH_NO_WAIT && current == kmod_thread_locker) { | ||
| 575 | retval = -EBUSY; | ||
| 576 | goto out; | ||
| 577 | } | ||
| 551 | 578 | ||
| 552 | sub_info->complete = &done; | 579 | sub_info->complete = &done; |
| 553 | sub_info->wait = wait; | 580 | sub_info->wait = wait; |
| @@ -577,6 +604,12 @@ unlock: | |||
| 577 | return retval; | 604 | return retval; |
| 578 | } | 605 | } |
| 579 | 606 | ||
| 607 | /* | ||
| 608 | * call_usermodehelper_fns() will not run the caller-provided cleanup function | ||
| 609 | * if a memory allocation failure is experienced. So the caller might need to | ||
| 610 | * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform | ||
| 611 | * the necessaary cleanup within the caller. | ||
| 612 | */ | ||
| 580 | int call_usermodehelper_fns( | 613 | int call_usermodehelper_fns( |
| 581 | char *path, char **argv, char **envp, int wait, | 614 | char *path, char **argv, char **envp, int wait, |
| 582 | int (*init)(struct subprocess_info *info, struct cred *new), | 615 | int (*init)(struct subprocess_info *info, struct cred *new), |
diff --git a/kernel/panic.c b/kernel/panic.c index d2a5f4ecc6dd..e1b2822fff97 100644 --- a/kernel/panic.c +++ b/kernel/panic.c | |||
| @@ -75,6 +75,14 @@ void panic(const char *fmt, ...) | |||
| 75 | int state = 0; | 75 | int state = 0; |
| 76 | 76 | ||
| 77 | /* | 77 | /* |
| 78 | * Disable local interrupts. This will prevent panic_smp_self_stop | ||
| 79 | * from deadlocking the first cpu that invokes the panic, since | ||
| 80 | * there is nothing to prevent an interrupt handler (that runs | ||
| 81 | * after the panic_lock is acquired) from invoking panic again. | ||
| 82 | */ | ||
| 83 | local_irq_disable(); | ||
| 84 | |||
| 85 | /* | ||
| 78 | * It's possible to come here directly from a panic-assertion and | 86 | * It's possible to come here directly from a panic-assertion and |
| 79 | * not have preempt disabled. Some functions called from here want | 87 | * not have preempt disabled. Some functions called from here want |
| 80 | * preempt to be disabled. No point enabling it later though... | 88 | * preempt to be disabled. No point enabling it later though... |
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index c8b7446b27df..1da39ea248fd 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c | |||
| @@ -178,6 +178,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup) | |||
| 178 | arch_suspend_enable_irqs(); | 178 | arch_suspend_enable_irqs(); |
| 179 | BUG_ON(irqs_disabled()); | 179 | BUG_ON(irqs_disabled()); |
| 180 | 180 | ||
| 181 | /* Kick the lockup detector */ | ||
| 182 | lockup_detector_bootcpu_resume(); | ||
| 183 | |||
| 181 | Enable_cpus: | 184 | Enable_cpus: |
| 182 | enable_nonboot_cpus(); | 185 | enable_nonboot_cpus(); |
| 183 | 186 | ||
diff --git a/kernel/printk.c b/kernel/printk.c index ac4bc9e79465..6a76ab9d4476 100644 --- a/kernel/printk.c +++ b/kernel/printk.c | |||
| @@ -216,6 +216,7 @@ struct log { | |||
| 216 | */ | 216 | */ |
| 217 | static DEFINE_RAW_SPINLOCK(logbuf_lock); | 217 | static DEFINE_RAW_SPINLOCK(logbuf_lock); |
| 218 | 218 | ||
| 219 | #ifdef CONFIG_PRINTK | ||
| 219 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ | 220 | /* the next printk record to read by syslog(READ) or /proc/kmsg */ |
| 220 | static u64 syslog_seq; | 221 | static u64 syslog_seq; |
| 221 | static u32 syslog_idx; | 222 | static u32 syslog_idx; |
| @@ -228,14 +229,19 @@ static u32 log_first_idx; | |||
| 228 | 229 | ||
| 229 | /* index and sequence number of the next record to store in the buffer */ | 230 | /* index and sequence number of the next record to store in the buffer */ |
| 230 | static u64 log_next_seq; | 231 | static u64 log_next_seq; |
| 231 | #ifdef CONFIG_PRINTK | ||
| 232 | static u32 log_next_idx; | 232 | static u32 log_next_idx; |
| 233 | 233 | ||
| 234 | /* the next printk record to write to the console */ | ||
| 235 | static u64 console_seq; | ||
| 236 | static u32 console_idx; | ||
| 237 | static enum log_flags console_prev; | ||
| 238 | |||
| 234 | /* the next printk record to read after the last 'clear' command */ | 239 | /* the next printk record to read after the last 'clear' command */ |
| 235 | static u64 clear_seq; | 240 | static u64 clear_seq; |
| 236 | static u32 clear_idx; | 241 | static u32 clear_idx; |
| 237 | 242 | ||
| 238 | #define LOG_LINE_MAX 1024 | 243 | #define PREFIX_MAX 32 |
| 244 | #define LOG_LINE_MAX 1024 - PREFIX_MAX | ||
| 239 | 245 | ||
| 240 | /* record buffer */ | 246 | /* record buffer */ |
| 241 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) | 247 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) |
| @@ -360,6 +366,7 @@ static void log_store(int facility, int level, | |||
| 360 | struct devkmsg_user { | 366 | struct devkmsg_user { |
| 361 | u64 seq; | 367 | u64 seq; |
| 362 | u32 idx; | 368 | u32 idx; |
| 369 | enum log_flags prev; | ||
| 363 | struct mutex lock; | 370 | struct mutex lock; |
| 364 | char buf[8192]; | 371 | char buf[8192]; |
| 365 | }; | 372 | }; |
| @@ -382,8 +389,10 @@ static ssize_t devkmsg_writev(struct kiocb *iocb, const struct iovec *iv, | |||
| 382 | 389 | ||
| 383 | line = buf; | 390 | line = buf; |
| 384 | for (i = 0; i < count; i++) { | 391 | for (i = 0; i < count; i++) { |
| 385 | if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) | 392 | if (copy_from_user(line, iv[i].iov_base, iv[i].iov_len)) { |
| 393 | ret = -EFAULT; | ||
| 386 | goto out; | 394 | goto out; |
| 395 | } | ||
| 387 | line += iv[i].iov_len; | 396 | line += iv[i].iov_len; |
| 388 | } | 397 | } |
| 389 | 398 | ||
| @@ -425,6 +434,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
| 425 | struct log *msg; | 434 | struct log *msg; |
| 426 | u64 ts_usec; | 435 | u64 ts_usec; |
| 427 | size_t i; | 436 | size_t i; |
| 437 | char cont = '-'; | ||
| 428 | size_t len; | 438 | size_t len; |
| 429 | ssize_t ret; | 439 | ssize_t ret; |
| 430 | 440 | ||
| @@ -462,8 +472,25 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, | |||
| 462 | msg = log_from_idx(user->idx); | 472 | msg = log_from_idx(user->idx); |
| 463 | ts_usec = msg->ts_nsec; | 473 | ts_usec = msg->ts_nsec; |
| 464 | do_div(ts_usec, 1000); | 474 | do_div(ts_usec, 1000); |
| 465 | len = sprintf(user->buf, "%u,%llu,%llu;", | 475 | |
| 466 | (msg->facility << 3) | msg->level, user->seq, ts_usec); | 476 | /* |
| 477 | * If we couldn't merge continuation line fragments during the print, | ||
| 478 | * export the stored flags to allow an optional external merge of the | ||
| 479 | * records. Merging the records isn't always neccessarily correct, like | ||
| 480 | * when we hit a race during printing. In most cases though, it produces | ||
| 481 | * better readable output. 'c' in the record flags mark the first | ||
| 482 | * fragment of a line, '+' the following. | ||
| 483 | */ | ||
| 484 | if (msg->flags & LOG_CONT && !(user->prev & LOG_CONT)) | ||
| 485 | cont = 'c'; | ||
| 486 | else if ((msg->flags & LOG_CONT) || | ||
| 487 | ((user->prev & LOG_CONT) && !(msg->flags & LOG_PREFIX))) | ||
| 488 | cont = '+'; | ||
| 489 | |||
| 490 | len = sprintf(user->buf, "%u,%llu,%llu,%c;", | ||
| 491 | (msg->facility << 3) | msg->level, | ||
| 492 | user->seq, ts_usec, cont); | ||
| 493 | user->prev = msg->flags; | ||
| 467 | 494 | ||
| 468 | /* escape non-printable characters */ | 495 | /* escape non-printable characters */ |
| 469 | for (i = 0; i < msg->text_len; i++) { | 496 | for (i = 0; i < msg->text_len; i++) { |
| @@ -646,6 +673,15 @@ void log_buf_kexec_setup(void) | |||
| 646 | VMCOREINFO_SYMBOL(log_buf_len); | 673 | VMCOREINFO_SYMBOL(log_buf_len); |
| 647 | VMCOREINFO_SYMBOL(log_first_idx); | 674 | VMCOREINFO_SYMBOL(log_first_idx); |
| 648 | VMCOREINFO_SYMBOL(log_next_idx); | 675 | VMCOREINFO_SYMBOL(log_next_idx); |
| 676 | /* | ||
| 677 | * Export struct log size and field offsets. User space tools can | ||
| 678 | * parse it and detect any changes to structure down the line. | ||
| 679 | */ | ||
| 680 | VMCOREINFO_STRUCT_SIZE(log); | ||
| 681 | VMCOREINFO_OFFSET(log, ts_nsec); | ||
| 682 | VMCOREINFO_OFFSET(log, len); | ||
| 683 | VMCOREINFO_OFFSET(log, text_len); | ||
| 684 | VMCOREINFO_OFFSET(log, dict_len); | ||
| 649 | } | 685 | } |
| 650 | #endif | 686 | #endif |
| 651 | 687 | ||
| @@ -876,7 +912,7 @@ static size_t msg_print_text(const struct log *msg, enum log_flags prev, | |||
| 876 | 912 | ||
| 877 | if (buf) { | 913 | if (buf) { |
| 878 | if (print_prefix(msg, syslog, NULL) + | 914 | if (print_prefix(msg, syslog, NULL) + |
| 879 | text_len + 1>= size - len) | 915 | text_len + 1 >= size - len) |
| 880 | break; | 916 | break; |
| 881 | 917 | ||
| 882 | if (prefix) | 918 | if (prefix) |
| @@ -907,7 +943,7 @@ static int syslog_print(char __user *buf, int size) | |||
| 907 | struct log *msg; | 943 | struct log *msg; |
| 908 | int len = 0; | 944 | int len = 0; |
| 909 | 945 | ||
| 910 | text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); | 946 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); |
| 911 | if (!text) | 947 | if (!text) |
| 912 | return -ENOMEM; | 948 | return -ENOMEM; |
| 913 | 949 | ||
| @@ -930,7 +966,8 @@ static int syslog_print(char __user *buf, int size) | |||
| 930 | 966 | ||
| 931 | skip = syslog_partial; | 967 | skip = syslog_partial; |
| 932 | msg = log_from_idx(syslog_idx); | 968 | msg = log_from_idx(syslog_idx); |
| 933 | n = msg_print_text(msg, syslog_prev, true, text, LOG_LINE_MAX); | 969 | n = msg_print_text(msg, syslog_prev, true, text, |
| 970 | LOG_LINE_MAX + PREFIX_MAX); | ||
| 934 | if (n - syslog_partial <= size) { | 971 | if (n - syslog_partial <= size) { |
| 935 | /* message fits into buffer, move forward */ | 972 | /* message fits into buffer, move forward */ |
| 936 | syslog_idx = log_next(syslog_idx); | 973 | syslog_idx = log_next(syslog_idx); |
| @@ -969,7 +1006,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 969 | char *text; | 1006 | char *text; |
| 970 | int len = 0; | 1007 | int len = 0; |
| 971 | 1008 | ||
| 972 | text = kmalloc(LOG_LINE_MAX, GFP_KERNEL); | 1009 | text = kmalloc(LOG_LINE_MAX + PREFIX_MAX, GFP_KERNEL); |
| 973 | if (!text) | 1010 | if (!text) |
| 974 | return -ENOMEM; | 1011 | return -ENOMEM; |
| 975 | 1012 | ||
| @@ -1022,7 +1059,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear) | |||
| 1022 | struct log *msg = log_from_idx(idx); | 1059 | struct log *msg = log_from_idx(idx); |
| 1023 | int textlen; | 1060 | int textlen; |
| 1024 | 1061 | ||
| 1025 | textlen = msg_print_text(msg, prev, true, text, LOG_LINE_MAX); | 1062 | textlen = msg_print_text(msg, prev, true, text, |
| 1063 | LOG_LINE_MAX + PREFIX_MAX); | ||
| 1026 | if (textlen < 0) { | 1064 | if (textlen < 0) { |
| 1027 | len = textlen; | 1065 | len = textlen; |
| 1028 | break; | 1066 | break; |
| @@ -1349,20 +1387,36 @@ static struct cont { | |||
| 1349 | u64 ts_nsec; /* time of first print */ | 1387 | u64 ts_nsec; /* time of first print */ |
| 1350 | u8 level; /* log level of first message */ | 1388 | u8 level; /* log level of first message */ |
| 1351 | u8 facility; /* log level of first message */ | 1389 | u8 facility; /* log level of first message */ |
| 1390 | enum log_flags flags; /* prefix, newline flags */ | ||
| 1352 | bool flushed:1; /* buffer sealed and committed */ | 1391 | bool flushed:1; /* buffer sealed and committed */ |
| 1353 | } cont; | 1392 | } cont; |
| 1354 | 1393 | ||
| 1355 | static void cont_flush(void) | 1394 | static void cont_flush(enum log_flags flags) |
| 1356 | { | 1395 | { |
| 1357 | if (cont.flushed) | 1396 | if (cont.flushed) |
| 1358 | return; | 1397 | return; |
| 1359 | if (cont.len == 0) | 1398 | if (cont.len == 0) |
| 1360 | return; | 1399 | return; |
| 1361 | 1400 | ||
| 1362 | log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec, | 1401 | if (cont.cons) { |
| 1363 | NULL, 0, cont.buf, cont.len); | 1402 | /* |
| 1364 | 1403 | * If a fragment of this line was directly flushed to the | |
| 1365 | cont.flushed = true; | 1404 | * console; wait for the console to pick up the rest of the |
| 1405 | * line. LOG_NOCONS suppresses a duplicated output. | ||
| 1406 | */ | ||
| 1407 | log_store(cont.facility, cont.level, flags | LOG_NOCONS, | ||
| 1408 | cont.ts_nsec, NULL, 0, cont.buf, cont.len); | ||
| 1409 | cont.flags = flags; | ||
| 1410 | cont.flushed = true; | ||
| 1411 | } else { | ||
| 1412 | /* | ||
| 1413 | * If no fragment of this line ever reached the console, | ||
| 1414 | * just submit it to the store and free the buffer. | ||
| 1415 | */ | ||
| 1416 | log_store(cont.facility, cont.level, flags, 0, | ||
| 1417 | NULL, 0, cont.buf, cont.len); | ||
| 1418 | cont.len = 0; | ||
| 1419 | } | ||
| 1366 | } | 1420 | } |
| 1367 | 1421 | ||
| 1368 | static bool cont_add(int facility, int level, const char *text, size_t len) | 1422 | static bool cont_add(int facility, int level, const char *text, size_t len) |
| @@ -1371,7 +1425,8 @@ static bool cont_add(int facility, int level, const char *text, size_t len) | |||
| 1371 | return false; | 1425 | return false; |
| 1372 | 1426 | ||
| 1373 | if (cont.len + len > sizeof(cont.buf)) { | 1427 | if (cont.len + len > sizeof(cont.buf)) { |
| 1374 | cont_flush(); | 1428 | /* the line gets too long, split it up in separate records */ |
| 1429 | cont_flush(LOG_CONT); | ||
| 1375 | return false; | 1430 | return false; |
| 1376 | } | 1431 | } |
| 1377 | 1432 | ||
| @@ -1380,12 +1435,17 @@ static bool cont_add(int facility, int level, const char *text, size_t len) | |||
| 1380 | cont.level = level; | 1435 | cont.level = level; |
| 1381 | cont.owner = current; | 1436 | cont.owner = current; |
| 1382 | cont.ts_nsec = local_clock(); | 1437 | cont.ts_nsec = local_clock(); |
| 1438 | cont.flags = 0; | ||
| 1383 | cont.cons = 0; | 1439 | cont.cons = 0; |
| 1384 | cont.flushed = false; | 1440 | cont.flushed = false; |
| 1385 | } | 1441 | } |
| 1386 | 1442 | ||
| 1387 | memcpy(cont.buf + cont.len, text, len); | 1443 | memcpy(cont.buf + cont.len, text, len); |
| 1388 | cont.len += len; | 1444 | cont.len += len; |
| 1445 | |||
| 1446 | if (cont.len > (sizeof(cont.buf) * 80) / 100) | ||
| 1447 | cont_flush(LOG_CONT); | ||
| 1448 | |||
| 1389 | return true; | 1449 | return true; |
| 1390 | } | 1450 | } |
| 1391 | 1451 | ||
| @@ -1394,7 +1454,7 @@ static size_t cont_print_text(char *text, size_t size) | |||
| 1394 | size_t textlen = 0; | 1454 | size_t textlen = 0; |
| 1395 | size_t len; | 1455 | size_t len; |
| 1396 | 1456 | ||
| 1397 | if (cont.cons == 0) { | 1457 | if (cont.cons == 0 && (console_prev & LOG_NEWLINE)) { |
| 1398 | textlen += print_time(cont.ts_nsec, text); | 1458 | textlen += print_time(cont.ts_nsec, text); |
| 1399 | size -= textlen; | 1459 | size -= textlen; |
| 1400 | } | 1460 | } |
| @@ -1409,7 +1469,8 @@ static size_t cont_print_text(char *text, size_t size) | |||
| 1409 | } | 1469 | } |
| 1410 | 1470 | ||
| 1411 | if (cont.flushed) { | 1471 | if (cont.flushed) { |
| 1412 | text[textlen++] = '\n'; | 1472 | if (cont.flags & LOG_NEWLINE) |
| 1473 | text[textlen++] = '\n'; | ||
| 1413 | /* got everything, release buffer */ | 1474 | /* got everything, release buffer */ |
| 1414 | cont.len = 0; | 1475 | cont.len = 0; |
| 1415 | } | 1476 | } |
| @@ -1481,17 +1542,23 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1481 | lflags |= LOG_NEWLINE; | 1542 | lflags |= LOG_NEWLINE; |
| 1482 | } | 1543 | } |
| 1483 | 1544 | ||
| 1484 | /* strip syslog prefix and extract log level or control flags */ | 1545 | /* strip kernel syslog prefix and extract log level or control flags */ |
| 1485 | if (text[0] == '<' && text[1] && text[2] == '>') { | 1546 | if (facility == 0) { |
| 1486 | switch (text[1]) { | 1547 | int kern_level = printk_get_level(text); |
| 1487 | case '0' ... '7': | 1548 | |
| 1488 | if (level == -1) | 1549 | if (kern_level) { |
| 1489 | level = text[1] - '0'; | 1550 | const char *end_of_header = printk_skip_level(text); |
| 1490 | case 'd': /* KERN_DEFAULT */ | 1551 | switch (kern_level) { |
| 1491 | lflags |= LOG_PREFIX; | 1552 | case '0' ... '7': |
| 1492 | case 'c': /* KERN_CONT */ | 1553 | if (level == -1) |
| 1493 | text += 3; | 1554 | level = kern_level - '0'; |
| 1494 | text_len -= 3; | 1555 | case 'd': /* KERN_DEFAULT */ |
| 1556 | lflags |= LOG_PREFIX; | ||
| 1557 | case 'c': /* KERN_CONT */ | ||
| 1558 | break; | ||
| 1559 | } | ||
| 1560 | text_len -= end_of_header - text; | ||
| 1561 | text = (char *)end_of_header; | ||
| 1495 | } | 1562 | } |
| 1496 | } | 1563 | } |
| 1497 | 1564 | ||
| @@ -1507,7 +1574,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1507 | * or another task also prints continuation lines. | 1574 | * or another task also prints continuation lines. |
| 1508 | */ | 1575 | */ |
| 1509 | if (cont.len && (lflags & LOG_PREFIX || cont.owner != current)) | 1576 | if (cont.len && (lflags & LOG_PREFIX || cont.owner != current)) |
| 1510 | cont_flush(); | 1577 | cont_flush(LOG_NEWLINE); |
| 1511 | 1578 | ||
| 1512 | /* buffer line if possible, otherwise store it right away */ | 1579 | /* buffer line if possible, otherwise store it right away */ |
| 1513 | if (!cont_add(facility, level, text, text_len)) | 1580 | if (!cont_add(facility, level, text, text_len)) |
| @@ -1525,7 +1592,7 @@ asmlinkage int vprintk_emit(int facility, int level, | |||
| 1525 | if (cont.len && cont.owner == current) { | 1592 | if (cont.len && cont.owner == current) { |
| 1526 | if (!(lflags & LOG_PREFIX)) | 1593 | if (!(lflags & LOG_PREFIX)) |
| 1527 | stored = cont_add(facility, level, text, text_len); | 1594 | stored = cont_add(facility, level, text, text_len); |
| 1528 | cont_flush(); | 1595 | cont_flush(LOG_NEWLINE); |
| 1529 | } | 1596 | } |
| 1530 | 1597 | ||
| 1531 | if (!stored) | 1598 | if (!stored) |
| @@ -1616,9 +1683,20 @@ asmlinkage int printk(const char *fmt, ...) | |||
| 1616 | } | 1683 | } |
| 1617 | EXPORT_SYMBOL(printk); | 1684 | EXPORT_SYMBOL(printk); |
| 1618 | 1685 | ||
| 1619 | #else | 1686 | #else /* CONFIG_PRINTK */ |
| 1620 | 1687 | ||
| 1688 | #define LOG_LINE_MAX 0 | ||
| 1689 | #define PREFIX_MAX 0 | ||
| 1621 | #define LOG_LINE_MAX 0 | 1690 | #define LOG_LINE_MAX 0 |
| 1691 | static u64 syslog_seq; | ||
| 1692 | static u32 syslog_idx; | ||
| 1693 | static u64 console_seq; | ||
| 1694 | static u32 console_idx; | ||
| 1695 | static enum log_flags syslog_prev; | ||
| 1696 | static u64 log_first_seq; | ||
| 1697 | static u32 log_first_idx; | ||
| 1698 | static u64 log_next_seq; | ||
| 1699 | static enum log_flags console_prev; | ||
| 1622 | static struct cont { | 1700 | static struct cont { |
| 1623 | size_t len; | 1701 | size_t len; |
| 1624 | size_t cons; | 1702 | size_t cons; |
| @@ -1902,10 +1980,34 @@ void wake_up_klogd(void) | |||
| 1902 | this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); | 1980 | this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); |
| 1903 | } | 1981 | } |
| 1904 | 1982 | ||
| 1905 | /* the next printk record to write to the console */ | 1983 | static void console_cont_flush(char *text, size_t size) |
| 1906 | static u64 console_seq; | 1984 | { |
| 1907 | static u32 console_idx; | 1985 | unsigned long flags; |
| 1908 | static enum log_flags console_prev; | 1986 | size_t len; |
| 1987 | |||
| 1988 | raw_spin_lock_irqsave(&logbuf_lock, flags); | ||
| 1989 | |||
| 1990 | if (!cont.len) | ||
| 1991 | goto out; | ||
| 1992 | |||
| 1993 | /* | ||
| 1994 | * We still queue earlier records, likely because the console was | ||
| 1995 | * busy. The earlier ones need to be printed before this one, we | ||
| 1996 | * did not flush any fragment so far, so just let it queue up. | ||
| 1997 | */ | ||
| 1998 | if (console_seq < log_next_seq && !cont.cons) | ||
| 1999 | goto out; | ||
| 2000 | |||
| 2001 | len = cont_print_text(text, size); | ||
| 2002 | raw_spin_unlock(&logbuf_lock); | ||
| 2003 | stop_critical_timings(); | ||
| 2004 | call_console_drivers(cont.level, text, len); | ||
| 2005 | start_critical_timings(); | ||
| 2006 | local_irq_restore(flags); | ||
| 2007 | return; | ||
| 2008 | out: | ||
| 2009 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 2010 | } | ||
| 1909 | 2011 | ||
| 1910 | /** | 2012 | /** |
| 1911 | * console_unlock - unlock the console system | 2013 | * console_unlock - unlock the console system |
| @@ -1923,7 +2025,7 @@ static enum log_flags console_prev; | |||
| 1923 | */ | 2025 | */ |
| 1924 | void console_unlock(void) | 2026 | void console_unlock(void) |
| 1925 | { | 2027 | { |
| 1926 | static char text[LOG_LINE_MAX]; | 2028 | static char text[LOG_LINE_MAX + PREFIX_MAX]; |
| 1927 | static u64 seen_seq; | 2029 | static u64 seen_seq; |
| 1928 | unsigned long flags; | 2030 | unsigned long flags; |
| 1929 | bool wake_klogd = false; | 2031 | bool wake_klogd = false; |
| @@ -1937,19 +2039,7 @@ void console_unlock(void) | |||
| 1937 | console_may_schedule = 0; | 2039 | console_may_schedule = 0; |
| 1938 | 2040 | ||
| 1939 | /* flush buffered message fragment immediately to console */ | 2041 | /* flush buffered message fragment immediately to console */ |
| 1940 | raw_spin_lock_irqsave(&logbuf_lock, flags); | 2042 | console_cont_flush(text, sizeof(text)); |
| 1941 | if (cont.len && (cont.cons < cont.len || cont.flushed)) { | ||
| 1942 | size_t len; | ||
| 1943 | |||
| 1944 | len = cont_print_text(text, sizeof(text)); | ||
| 1945 | raw_spin_unlock(&logbuf_lock); | ||
| 1946 | stop_critical_timings(); | ||
| 1947 | call_console_drivers(cont.level, text, len); | ||
| 1948 | start_critical_timings(); | ||
| 1949 | local_irq_restore(flags); | ||
| 1950 | } else | ||
| 1951 | raw_spin_unlock_irqrestore(&logbuf_lock, flags); | ||
| 1952 | |||
| 1953 | again: | 2043 | again: |
| 1954 | for (;;) { | 2044 | for (;;) { |
| 1955 | struct log *msg; | 2045 | struct log *msg; |
| @@ -1986,6 +2076,7 @@ skip: | |||
| 1986 | * will properly dump everything later. | 2076 | * will properly dump everything later. |
| 1987 | */ | 2077 | */ |
| 1988 | msg->flags &= ~LOG_NOCONS; | 2078 | msg->flags &= ~LOG_NOCONS; |
| 2079 | console_prev = msg->flags; | ||
| 1989 | goto skip; | 2080 | goto skip; |
| 1990 | } | 2081 | } |
| 1991 | 2082 | ||
diff --git a/kernel/resource.c b/kernel/resource.c index dc8b47764443..34d45886ee84 100644 --- a/kernel/resource.c +++ b/kernel/resource.c | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | * Arbitrary resource management. | 7 | * Arbitrary resource management. |
| 8 | */ | 8 | */ |
| 9 | 9 | ||
| 10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
| 11 | |||
| 10 | #include <linux/export.h> | 12 | #include <linux/export.h> |
| 11 | #include <linux/errno.h> | 13 | #include <linux/errno.h> |
| 12 | #include <linux/ioport.h> | 14 | #include <linux/ioport.h> |
| @@ -791,8 +793,28 @@ void __init reserve_region_with_split(struct resource *root, | |||
| 791 | resource_size_t start, resource_size_t end, | 793 | resource_size_t start, resource_size_t end, |
| 792 | const char *name) | 794 | const char *name) |
| 793 | { | 795 | { |
| 796 | int abort = 0; | ||
| 797 | |||
| 794 | write_lock(&resource_lock); | 798 | write_lock(&resource_lock); |
| 795 | __reserve_region_with_split(root, start, end, name); | 799 | if (root->start > start || root->end < end) { |
| 800 | pr_err("requested range [0x%llx-0x%llx] not in root %pr\n", | ||
| 801 | (unsigned long long)start, (unsigned long long)end, | ||
| 802 | root); | ||
| 803 | if (start > root->end || end < root->start) | ||
| 804 | abort = 1; | ||
| 805 | else { | ||
| 806 | if (end > root->end) | ||
| 807 | end = root->end; | ||
| 808 | if (start < root->start) | ||
| 809 | start = root->start; | ||
| 810 | pr_err("fixing request to [0x%llx-0x%llx]\n", | ||
| 811 | (unsigned long long)start, | ||
| 812 | (unsigned long long)end); | ||
| 813 | } | ||
| 814 | dump_stack(); | ||
| 815 | } | ||
| 816 | if (!abort) | ||
| 817 | __reserve_region_with_split(root, start, end, name); | ||
| 796 | write_unlock(&resource_lock); | 818 | write_unlock(&resource_lock); |
| 797 | } | 819 | } |
| 798 | 820 | ||
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 468bdd44c1ba..d325c4b2dcbb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c | |||
| @@ -1096,7 +1096,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) | |||
| 1096 | * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. | 1096 | * a task's CPU. ->pi_lock for waking tasks, rq->lock for runnable tasks. |
| 1097 | * | 1097 | * |
| 1098 | * sched_move_task() holds both and thus holding either pins the cgroup, | 1098 | * sched_move_task() holds both and thus holding either pins the cgroup, |
| 1099 | * see set_task_rq(). | 1099 | * see task_group(). |
| 1100 | * | 1100 | * |
| 1101 | * Furthermore, all task_rq users should acquire both locks, see | 1101 | * Furthermore, all task_rq users should acquire both locks, see |
| 1102 | * task_rq_lock(). | 1102 | * task_rq_lock(). |
| @@ -1910,12 +1910,12 @@ static inline void | |||
| 1910 | prepare_task_switch(struct rq *rq, struct task_struct *prev, | 1910 | prepare_task_switch(struct rq *rq, struct task_struct *prev, |
| 1911 | struct task_struct *next) | 1911 | struct task_struct *next) |
| 1912 | { | 1912 | { |
| 1913 | trace_sched_switch(prev, next); | ||
| 1913 | sched_info_switch(prev, next); | 1914 | sched_info_switch(prev, next); |
| 1914 | perf_event_task_sched_out(prev, next); | 1915 | perf_event_task_sched_out(prev, next); |
| 1915 | fire_sched_out_preempt_notifiers(prev, next); | 1916 | fire_sched_out_preempt_notifiers(prev, next); |
| 1916 | prepare_lock_switch(rq, next); | 1917 | prepare_lock_switch(rq, next); |
| 1917 | prepare_arch_switch(next); | 1918 | prepare_arch_switch(next); |
| 1918 | trace_sched_switch(prev, next); | ||
| 1919 | } | 1919 | } |
| 1920 | 1920 | ||
| 1921 | /** | 1921 | /** |
| @@ -6024,6 +6024,11 @@ static void destroy_sched_domains(struct sched_domain *sd, int cpu) | |||
| 6024 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this | 6024 | * SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this |
| 6025 | * allows us to avoid some pointer chasing select_idle_sibling(). | 6025 | * allows us to avoid some pointer chasing select_idle_sibling(). |
| 6026 | * | 6026 | * |
| 6027 | * Iterate domains and sched_groups downward, assigning CPUs to be | ||
| 6028 | * select_idle_sibling() hw buddy. Cross-wiring hw makes bouncing | ||
| 6029 | * due to random perturbation self canceling, ie sw buddies pull | ||
| 6030 | * their counterpart to their CPU's hw counterpart. | ||
| 6031 | * | ||
| 6027 | * Also keep a unique ID per domain (we use the first cpu number in | 6032 | * Also keep a unique ID per domain (we use the first cpu number in |
| 6028 | * the cpumask of the domain), this allows us to quickly tell if | 6033 | * the cpumask of the domain), this allows us to quickly tell if |
| 6029 | * two cpus are in the same cache domain, see cpus_share_cache(). | 6034 | * two cpus are in the same cache domain, see cpus_share_cache(). |
| @@ -6037,8 +6042,40 @@ static void update_top_cache_domain(int cpu) | |||
| 6037 | int id = cpu; | 6042 | int id = cpu; |
| 6038 | 6043 | ||
| 6039 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); | 6044 | sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES); |
| 6040 | if (sd) | 6045 | if (sd) { |
| 6046 | struct sched_domain *tmp = sd; | ||
| 6047 | struct sched_group *sg, *prev; | ||
| 6048 | bool right; | ||
| 6049 | |||
| 6050 | /* | ||
| 6051 | * Traverse to first CPU in group, and count hops | ||
| 6052 | * to cpu from there, switching direction on each | ||
| 6053 | * hop, never ever pointing the last CPU rightward. | ||
| 6054 | */ | ||
| 6055 | do { | ||
| 6056 | id = cpumask_first(sched_domain_span(tmp)); | ||
| 6057 | prev = sg = tmp->groups; | ||
| 6058 | right = 1; | ||
| 6059 | |||
| 6060 | while (cpumask_first(sched_group_cpus(sg)) != id) | ||
| 6061 | sg = sg->next; | ||
| 6062 | |||
| 6063 | while (!cpumask_test_cpu(cpu, sched_group_cpus(sg))) { | ||
| 6064 | prev = sg; | ||
| 6065 | sg = sg->next; | ||
| 6066 | right = !right; | ||
| 6067 | } | ||
| 6068 | |||
| 6069 | /* A CPU went down, never point back to domain start. */ | ||
| 6070 | if (right && cpumask_first(sched_group_cpus(sg->next)) == id) | ||
| 6071 | right = false; | ||
| 6072 | |||
| 6073 | sg = right ? sg->next : prev; | ||
| 6074 | tmp->idle_buddy = cpumask_first(sched_group_cpus(sg)); | ||
| 6075 | } while ((tmp = tmp->child)); | ||
| 6076 | |||
| 6041 | id = cpumask_first(sched_domain_span(sd)); | 6077 | id = cpumask_first(sched_domain_span(sd)); |
| 6078 | } | ||
| 6042 | 6079 | ||
| 6043 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); | 6080 | rcu_assign_pointer(per_cpu(sd_llc, cpu), sd); |
| 6044 | per_cpu(sd_llc_id, cpu) = id; | 6081 | per_cpu(sd_llc_id, cpu) = id; |
| @@ -7097,34 +7134,66 @@ match2: | |||
| 7097 | mutex_unlock(&sched_domains_mutex); | 7134 | mutex_unlock(&sched_domains_mutex); |
| 7098 | } | 7135 | } |
| 7099 | 7136 | ||
| 7137 | static int num_cpus_frozen; /* used to mark begin/end of suspend/resume */ | ||
| 7138 | |||
| 7100 | /* | 7139 | /* |
| 7101 | * Update cpusets according to cpu_active mask. If cpusets are | 7140 | * Update cpusets according to cpu_active mask. If cpusets are |
| 7102 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper | 7141 | * disabled, cpuset_update_active_cpus() becomes a simple wrapper |
| 7103 | * around partition_sched_domains(). | 7142 | * around partition_sched_domains(). |
| 7143 | * | ||
| 7144 | * If we come here as part of a suspend/resume, don't touch cpusets because we | ||
| 7145 | * want to restore it back to its original state upon resume anyway. | ||
| 7104 | */ | 7146 | */ |
| 7105 | static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, | 7147 | static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action, |
| 7106 | void *hcpu) | 7148 | void *hcpu) |
| 7107 | { | 7149 | { |
| 7108 | switch (action & ~CPU_TASKS_FROZEN) { | 7150 | switch (action) { |
| 7151 | case CPU_ONLINE_FROZEN: | ||
| 7152 | case CPU_DOWN_FAILED_FROZEN: | ||
| 7153 | |||
| 7154 | /* | ||
| 7155 | * num_cpus_frozen tracks how many CPUs are involved in suspend | ||
| 7156 | * resume sequence. As long as this is not the last online | ||
| 7157 | * operation in the resume sequence, just build a single sched | ||
| 7158 | * domain, ignoring cpusets. | ||
| 7159 | */ | ||
| 7160 | num_cpus_frozen--; | ||
| 7161 | if (likely(num_cpus_frozen)) { | ||
| 7162 | partition_sched_domains(1, NULL, NULL); | ||
| 7163 | break; | ||
| 7164 | } | ||
| 7165 | |||
| 7166 | /* | ||
| 7167 | * This is the last CPU online operation. So fall through and | ||
| 7168 | * restore the original sched domains by considering the | ||
| 7169 | * cpuset configurations. | ||
| 7170 | */ | ||
| 7171 | |||
| 7109 | case CPU_ONLINE: | 7172 | case CPU_ONLINE: |
| 7110 | case CPU_DOWN_FAILED: | 7173 | case CPU_DOWN_FAILED: |
| 7111 | cpuset_update_active_cpus(); | 7174 | cpuset_update_active_cpus(true); |
| 7112 | return NOTIFY_OK; | 7175 | break; |
| 7113 | default: | 7176 | default: |
| 7114 | return NOTIFY_DONE; | 7177 | return NOTIFY_DONE; |
| 7115 | } | 7178 | } |
| 7179 | return NOTIFY_OK; | ||
| 7116 | } | 7180 | } |
| 7117 | 7181 | ||
| 7118 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, | 7182 | static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long action, |
| 7119 | void *hcpu) | 7183 | void *hcpu) |
| 7120 | { | 7184 | { |
| 7121 | switch (action & ~CPU_TASKS_FROZEN) { | 7185 | switch (action) { |
| 7122 | case CPU_DOWN_PREPARE: | 7186 | case CPU_DOWN_PREPARE: |
| 7123 | cpuset_update_active_cpus(); | 7187 | cpuset_update_active_cpus(false); |
| 7124 | return NOTIFY_OK; | 7188 | break; |
| 7189 | case CPU_DOWN_PREPARE_FROZEN: | ||
| 7190 | num_cpus_frozen++; | ||
| 7191 | partition_sched_domains(1, NULL, NULL); | ||
| 7192 | break; | ||
| 7125 | default: | 7193 | default: |
| 7126 | return NOTIFY_DONE; | 7194 | return NOTIFY_DONE; |
| 7127 | } | 7195 | } |
| 7196 | return NOTIFY_OK; | ||
| 7128 | } | 7197 | } |
| 7129 | 7198 | ||
| 7130 | void __init sched_init_smp(void) | 7199 | void __init sched_init_smp(void) |
| @@ -7589,6 +7658,7 @@ void sched_destroy_group(struct task_group *tg) | |||
| 7589 | */ | 7658 | */ |
| 7590 | void sched_move_task(struct task_struct *tsk) | 7659 | void sched_move_task(struct task_struct *tsk) |
| 7591 | { | 7660 | { |
| 7661 | struct task_group *tg; | ||
| 7592 | int on_rq, running; | 7662 | int on_rq, running; |
| 7593 | unsigned long flags; | 7663 | unsigned long flags; |
| 7594 | struct rq *rq; | 7664 | struct rq *rq; |
| @@ -7603,6 +7673,12 @@ void sched_move_task(struct task_struct *tsk) | |||
| 7603 | if (unlikely(running)) | 7673 | if (unlikely(running)) |
| 7604 | tsk->sched_class->put_prev_task(rq, tsk); | 7674 | tsk->sched_class->put_prev_task(rq, tsk); |
| 7605 | 7675 | ||
| 7676 | tg = container_of(task_subsys_state_check(tsk, cpu_cgroup_subsys_id, | ||
| 7677 | lockdep_is_held(&tsk->sighand->siglock)), | ||
| 7678 | struct task_group, css); | ||
| 7679 | tg = autogroup_task_group(tsk, tg); | ||
| 7680 | tsk->sched_task_group = tg; | ||
| 7681 | |||
| 7606 | #ifdef CONFIG_FAIR_GROUP_SCHED | 7682 | #ifdef CONFIG_FAIR_GROUP_SCHED |
| 7607 | if (tsk->sched_class->task_move_group) | 7683 | if (tsk->sched_class->task_move_group) |
| 7608 | tsk->sched_class->task_move_group(tsk, on_rq); | 7684 | tsk->sched_class->task_move_group(tsk, on_rq); |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c099cc6eebe3..22321db64952 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
| @@ -2637,8 +2637,6 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2637 | int cpu = smp_processor_id(); | 2637 | int cpu = smp_processor_id(); |
| 2638 | int prev_cpu = task_cpu(p); | 2638 | int prev_cpu = task_cpu(p); |
| 2639 | struct sched_domain *sd; | 2639 | struct sched_domain *sd; |
| 2640 | struct sched_group *sg; | ||
| 2641 | int i; | ||
| 2642 | 2640 | ||
| 2643 | /* | 2641 | /* |
| 2644 | * If the task is going to be woken-up on this cpu and if it is | 2642 | * If the task is going to be woken-up on this cpu and if it is |
| @@ -2655,29 +2653,17 @@ static int select_idle_sibling(struct task_struct *p, int target) | |||
| 2655 | return prev_cpu; | 2653 | return prev_cpu; |
| 2656 | 2654 | ||
| 2657 | /* | 2655 | /* |
| 2658 | * Otherwise, iterate the domains and find an elegible idle cpu. | 2656 | * Otherwise, check assigned siblings to find an elegible idle cpu. |
| 2659 | */ | 2657 | */ |
| 2660 | sd = rcu_dereference(per_cpu(sd_llc, target)); | 2658 | sd = rcu_dereference(per_cpu(sd_llc, target)); |
| 2661 | for_each_lower_domain(sd) { | ||
| 2662 | sg = sd->groups; | ||
| 2663 | do { | ||
| 2664 | if (!cpumask_intersects(sched_group_cpus(sg), | ||
| 2665 | tsk_cpus_allowed(p))) | ||
| 2666 | goto next; | ||
| 2667 | |||
| 2668 | for_each_cpu(i, sched_group_cpus(sg)) { | ||
| 2669 | if (!idle_cpu(i)) | ||
| 2670 | goto next; | ||
| 2671 | } | ||
| 2672 | 2659 | ||
| 2673 | target = cpumask_first_and(sched_group_cpus(sg), | 2660 | for_each_lower_domain(sd) { |
| 2674 | tsk_cpus_allowed(p)); | 2661 | if (!cpumask_test_cpu(sd->idle_buddy, tsk_cpus_allowed(p))) |
| 2675 | goto done; | 2662 | continue; |
| 2676 | next: | 2663 | if (idle_cpu(sd->idle_buddy)) |
| 2677 | sg = sg->next; | 2664 | return sd->idle_buddy; |
| 2678 | } while (sg != sd->groups); | ||
| 2679 | } | 2665 | } |
| 2680 | done: | 2666 | |
| 2681 | return target; | 2667 | return target; |
| 2682 | } | 2668 | } |
| 2683 | 2669 | ||
| @@ -3068,16 +3054,19 @@ static unsigned long __read_mostly max_load_balance_interval = HZ/10; | |||
| 3068 | 3054 | ||
| 3069 | #define LBF_ALL_PINNED 0x01 | 3055 | #define LBF_ALL_PINNED 0x01 |
| 3070 | #define LBF_NEED_BREAK 0x02 | 3056 | #define LBF_NEED_BREAK 0x02 |
| 3057 | #define LBF_SOME_PINNED 0x04 | ||
| 3071 | 3058 | ||
| 3072 | struct lb_env { | 3059 | struct lb_env { |
| 3073 | struct sched_domain *sd; | 3060 | struct sched_domain *sd; |
| 3074 | 3061 | ||
| 3075 | int src_cpu; | ||
| 3076 | struct rq *src_rq; | 3062 | struct rq *src_rq; |
| 3063 | int src_cpu; | ||
| 3077 | 3064 | ||
| 3078 | int dst_cpu; | 3065 | int dst_cpu; |
| 3079 | struct rq *dst_rq; | 3066 | struct rq *dst_rq; |
| 3080 | 3067 | ||
| 3068 | struct cpumask *dst_grpmask; | ||
| 3069 | int new_dst_cpu; | ||
| 3081 | enum cpu_idle_type idle; | 3070 | enum cpu_idle_type idle; |
| 3082 | long imbalance; | 3071 | long imbalance; |
| 3083 | unsigned int flags; | 3072 | unsigned int flags; |
| @@ -3145,9 +3134,31 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) | |||
| 3145 | * 3) are cache-hot on their current CPU. | 3134 | * 3) are cache-hot on their current CPU. |
| 3146 | */ | 3135 | */ |
| 3147 | if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { | 3136 | if (!cpumask_test_cpu(env->dst_cpu, tsk_cpus_allowed(p))) { |
| 3137 | int new_dst_cpu; | ||
| 3138 | |||
| 3148 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); | 3139 | schedstat_inc(p, se.statistics.nr_failed_migrations_affine); |
| 3140 | |||
| 3141 | /* | ||
| 3142 | * Remember if this task can be migrated to any other cpu in | ||
| 3143 | * our sched_group. We may want to revisit it if we couldn't | ||
| 3144 | * meet load balance goals by pulling other tasks on src_cpu. | ||
| 3145 | * | ||
| 3146 | * Also avoid computing new_dst_cpu if we have already computed | ||
| 3147 | * one in current iteration. | ||
| 3148 | */ | ||
| 3149 | if (!env->dst_grpmask || (env->flags & LBF_SOME_PINNED)) | ||
| 3150 | return 0; | ||
| 3151 | |||
| 3152 | new_dst_cpu = cpumask_first_and(env->dst_grpmask, | ||
| 3153 | tsk_cpus_allowed(p)); | ||
| 3154 | if (new_dst_cpu < nr_cpu_ids) { | ||
| 3155 | env->flags |= LBF_SOME_PINNED; | ||
| 3156 | env->new_dst_cpu = new_dst_cpu; | ||
| 3157 | } | ||
| 3149 | return 0; | 3158 | return 0; |
| 3150 | } | 3159 | } |
| 3160 | |||
| 3161 | /* Record that we found atleast one task that could run on dst_cpu */ | ||
| 3151 | env->flags &= ~LBF_ALL_PINNED; | 3162 | env->flags &= ~LBF_ALL_PINNED; |
| 3152 | 3163 | ||
| 3153 | if (task_running(env->src_rq, p)) { | 3164 | if (task_running(env->src_rq, p)) { |
| @@ -4227,7 +4238,8 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 4227 | struct sched_domain *sd, enum cpu_idle_type idle, | 4238 | struct sched_domain *sd, enum cpu_idle_type idle, |
| 4228 | int *balance) | 4239 | int *balance) |
| 4229 | { | 4240 | { |
| 4230 | int ld_moved, active_balance = 0; | 4241 | int ld_moved, cur_ld_moved, active_balance = 0; |
| 4242 | int lb_iterations, max_lb_iterations; | ||
| 4231 | struct sched_group *group; | 4243 | struct sched_group *group; |
| 4232 | struct rq *busiest; | 4244 | struct rq *busiest; |
| 4233 | unsigned long flags; | 4245 | unsigned long flags; |
| @@ -4237,11 +4249,13 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
| 4237 | .sd = sd, | 4249 | .sd = sd, |
| 4238 | .dst_cpu = this_cpu, | 4250 | .dst_cpu = this_cpu, |
| 4239 | .dst_rq = this_rq, | 4251 | .dst_rq = this_rq, |
| 4252 | .dst_grpmask = sched_group_cpus(sd->groups), | ||
| 4240 | .idle = idle, | 4253 | .idle = idle, |
| 4241 | .loop_break = sched_nr_migrate_break, | 4254 | .loop_break = sched_nr_migrate_break, |
| 4242 | }; | 4255 | }; |
| 4243 | 4256 | ||
| 4244 | cpumask_copy(cpus, cpu_active_mask); | 4257 | cpumask_copy(cpus, cpu_active_mask); |
| 4258 | max_lb_iterations = cpumask_weight(env.dst_grpmask); | ||
| 4245 | 4259 | ||
| 4246 | schedstat_inc(sd, lb_count[idle]); | 4260 | schedstat_inc(sd, lb_count[idle]); |
| 4247 | 4261 | ||
| @@ -4267,6 +4281,7 @@ redo: | |||
| 4267 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); | 4281 | schedstat_add(sd, lb_imbalance[idle], env.imbalance); |
| 4268 | 4282 | ||
| 4269 | ld_moved = 0; | 4283 | ld_moved = 0; |
| 4284 | lb_iterations = 1; | ||
| 4270 | if (busiest->nr_running > 1) { | 4285 | if (busiest->nr_running > 1) { |
| 4271 | /* | 4286 | /* |
| 4272 | * Attempt to move tasks. If find_busiest_group has found | 4287 | * Attempt to move tasks. If find_busiest_group has found |
| @@ -4284,7 +4299,13 @@ more_balance: | |||
| 4284 | double_rq_lock(this_rq, busiest); | 4299 | double_rq_lock(this_rq, busiest); |
| 4285 | if (!env.loop) | 4300 | if (!env.loop) |
| 4286 | update_h_load(env.src_cpu); | 4301 | update_h_load(env.src_cpu); |
| 4287 | ld_moved += move_tasks(&env); | 4302 | |
| 4303 | /* | ||
| 4304 | * cur_ld_moved - load moved in current iteration | ||
| 4305 | * ld_moved - cumulative load moved across iterations | ||
| 4306 | */ | ||
| 4307 | cur_ld_moved = move_tasks(&env); | ||
| 4308 | ld_moved += cur_ld_moved; | ||
| 4288 | double_rq_unlock(this_rq, busiest); | 4309 | double_rq_unlock(this_rq, busiest); |
| 4289 | local_irq_restore(flags); | 4310 | local_irq_restore(flags); |
| 4290 | 4311 | ||
| @@ -4296,14 +4317,52 @@ more_balance: | |||
| 4296 | /* | 4317 | /* |
| 4297 | * some other cpu did the load balance for us. | 4318 | * some other cpu did the load balance for us. |
| 4298 | */ | 4319 | */ |
| 4299 | if (ld_moved && this_cpu != smp_processor_id()) | 4320 | if (cur_ld_moved && env.dst_cpu != smp_processor_id()) |
| 4300 | resched_cpu(this_cpu); | 4321 | resched_cpu(env.dst_cpu); |
| 4322 | |||
| 4323 | /* | ||
| 4324 | * Revisit (affine) tasks on src_cpu that couldn't be moved to | ||
| 4325 | * us and move them to an alternate dst_cpu in our sched_group | ||
| 4326 | * where they can run. The upper limit on how many times we | ||
| 4327 | * iterate on same src_cpu is dependent on number of cpus in our | ||
| 4328 | * sched_group. | ||
| 4329 | * | ||
| 4330 | * This changes load balance semantics a bit on who can move | ||
| 4331 | * load to a given_cpu. In addition to the given_cpu itself | ||
| 4332 | * (or a ilb_cpu acting on its behalf where given_cpu is | ||
| 4333 | * nohz-idle), we now have balance_cpu in a position to move | ||
| 4334 | * load to given_cpu. In rare situations, this may cause | ||
| 4335 | * conflicts (balance_cpu and given_cpu/ilb_cpu deciding | ||
| 4336 | * _independently_ and at _same_ time to move some load to | ||
| 4337 | * given_cpu) causing exceess load to be moved to given_cpu. | ||
| 4338 | * This however should not happen so much in practice and | ||
| 4339 | * moreover subsequent load balance cycles should correct the | ||
| 4340 | * excess load moved. | ||
| 4341 | */ | ||
| 4342 | if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0 && | ||
| 4343 | lb_iterations++ < max_lb_iterations) { | ||
| 4344 | |||
| 4345 | this_rq = cpu_rq(env.new_dst_cpu); | ||
| 4346 | env.dst_rq = this_rq; | ||
| 4347 | env.dst_cpu = env.new_dst_cpu; | ||
| 4348 | env.flags &= ~LBF_SOME_PINNED; | ||
| 4349 | env.loop = 0; | ||
| 4350 | env.loop_break = sched_nr_migrate_break; | ||
| 4351 | /* | ||
| 4352 | * Go back to "more_balance" rather than "redo" since we | ||
| 4353 | * need to continue with same src_cpu. | ||
| 4354 | */ | ||
| 4355 | goto more_balance; | ||
| 4356 | } | ||
| 4301 | 4357 | ||
| 4302 | /* All tasks on this runqueue were pinned by CPU affinity */ | 4358 | /* All tasks on this runqueue were pinned by CPU affinity */ |
| 4303 | if (unlikely(env.flags & LBF_ALL_PINNED)) { | 4359 | if (unlikely(env.flags & LBF_ALL_PINNED)) { |
| 4304 | cpumask_clear_cpu(cpu_of(busiest), cpus); | 4360 | cpumask_clear_cpu(cpu_of(busiest), cpus); |
| 4305 | if (!cpumask_empty(cpus)) | 4361 | if (!cpumask_empty(cpus)) { |
| 4362 | env.loop = 0; | ||
| 4363 | env.loop_break = sched_nr_migrate_break; | ||
| 4306 | goto redo; | 4364 | goto redo; |
| 4365 | } | ||
| 4307 | goto out_balanced; | 4366 | goto out_balanced; |
| 4308 | } | 4367 | } |
| 4309 | } | 4368 | } |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 55844f24435a..c35a1a7dd4d6 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
| @@ -538,22 +538,19 @@ extern int group_balance_cpu(struct sched_group *sg); | |||
| 538 | /* | 538 | /* |
| 539 | * Return the group to which this tasks belongs. | 539 | * Return the group to which this tasks belongs. |
| 540 | * | 540 | * |
| 541 | * We use task_subsys_state_check() and extend the RCU verification with | 541 | * We cannot use task_subsys_state() and friends because the cgroup |
| 542 | * pi->lock and rq->lock because cpu_cgroup_attach() holds those locks for each | 542 | * subsystem changes that value before the cgroup_subsys::attach() method |
| 543 | * task it moves into the cgroup. Therefore by holding either of those locks, | 543 | * is called, therefore we cannot pin it and might observe the wrong value. |
| 544 | * we pin the task to the current cgroup. | 544 | * |
| 545 | * The same is true for autogroup's p->signal->autogroup->tg, the autogroup | ||
| 546 | * core changes this before calling sched_move_task(). | ||
| 547 | * | ||
| 548 | * Instead we use a 'copy' which is updated from sched_move_task() while | ||
| 549 | * holding both task_struct::pi_lock and rq::lock. | ||
| 545 | */ | 550 | */ |
| 546 | static inline struct task_group *task_group(struct task_struct *p) | 551 | static inline struct task_group *task_group(struct task_struct *p) |
| 547 | { | 552 | { |
| 548 | struct task_group *tg; | 553 | return p->sched_task_group; |
| 549 | struct cgroup_subsys_state *css; | ||
| 550 | |||
| 551 | css = task_subsys_state_check(p, cpu_cgroup_subsys_id, | ||
| 552 | lockdep_is_held(&p->pi_lock) || | ||
| 553 | lockdep_is_held(&task_rq(p)->lock)); | ||
| 554 | tg = container_of(css, struct task_group, css); | ||
| 555 | |||
| 556 | return autogroup_task_group(p, tg); | ||
| 557 | } | 554 | } |
| 558 | 555 | ||
| 559 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ | 556 | /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */ |
diff --git a/kernel/softirq.c b/kernel/softirq.c index 671f9594e368..b73e681df09e 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c | |||
| @@ -210,6 +210,14 @@ asmlinkage void __do_softirq(void) | |||
| 210 | __u32 pending; | 210 | __u32 pending; |
| 211 | int max_restart = MAX_SOFTIRQ_RESTART; | 211 | int max_restart = MAX_SOFTIRQ_RESTART; |
| 212 | int cpu; | 212 | int cpu; |
| 213 | unsigned long old_flags = current->flags; | ||
| 214 | |||
| 215 | /* | ||
| 216 | * Mask out PF_MEMALLOC s current task context is borrowed for the | ||
| 217 | * softirq. A softirq handled such as network RX might set PF_MEMALLOC | ||
| 218 | * again if the socket is related to swap | ||
| 219 | */ | ||
| 220 | current->flags &= ~PF_MEMALLOC; | ||
| 213 | 221 | ||
| 214 | pending = local_softirq_pending(); | 222 | pending = local_softirq_pending(); |
| 215 | account_system_vtime(current); | 223 | account_system_vtime(current); |
| @@ -265,6 +273,7 @@ restart: | |||
| 265 | 273 | ||
| 266 | account_system_vtime(current); | 274 | account_system_vtime(current); |
| 267 | __local_bh_enable(SOFTIRQ_OFFSET); | 275 | __local_bh_enable(SOFTIRQ_OFFSET); |
| 276 | tsk_restore_flags(current, old_flags, PF_MEMALLOC); | ||
| 268 | } | 277 | } |
| 269 | 278 | ||
| 270 | #ifndef __ARCH_HAS_DO_SOFTIRQ | 279 | #ifndef __ARCH_HAS_DO_SOFTIRQ |
diff --git a/kernel/sys.c b/kernel/sys.c index 2d39a84cd857..241507f23eca 100644 --- a/kernel/sys.c +++ b/kernel/sys.c | |||
| @@ -2015,7 +2015,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2015 | break; | 2015 | break; |
| 2016 | } | 2016 | } |
| 2017 | me->pdeath_signal = arg2; | 2017 | me->pdeath_signal = arg2; |
| 2018 | error = 0; | ||
| 2019 | break; | 2018 | break; |
| 2020 | case PR_GET_PDEATHSIG: | 2019 | case PR_GET_PDEATHSIG: |
| 2021 | error = put_user(me->pdeath_signal, (int __user *)arg2); | 2020 | error = put_user(me->pdeath_signal, (int __user *)arg2); |
| @@ -2029,7 +2028,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2029 | break; | 2028 | break; |
| 2030 | } | 2029 | } |
| 2031 | set_dumpable(me->mm, arg2); | 2030 | set_dumpable(me->mm, arg2); |
| 2032 | error = 0; | ||
| 2033 | break; | 2031 | break; |
| 2034 | 2032 | ||
| 2035 | case PR_SET_UNALIGN: | 2033 | case PR_SET_UNALIGN: |
| @@ -2056,10 +2054,7 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2056 | case PR_SET_TIMING: | 2054 | case PR_SET_TIMING: |
| 2057 | if (arg2 != PR_TIMING_STATISTICAL) | 2055 | if (arg2 != PR_TIMING_STATISTICAL) |
| 2058 | error = -EINVAL; | 2056 | error = -EINVAL; |
| 2059 | else | ||
| 2060 | error = 0; | ||
| 2061 | break; | 2057 | break; |
| 2062 | |||
| 2063 | case PR_SET_NAME: | 2058 | case PR_SET_NAME: |
| 2064 | comm[sizeof(me->comm)-1] = 0; | 2059 | comm[sizeof(me->comm)-1] = 0; |
| 2065 | if (strncpy_from_user(comm, (char __user *)arg2, | 2060 | if (strncpy_from_user(comm, (char __user *)arg2, |
| @@ -2067,20 +2062,19 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2067 | return -EFAULT; | 2062 | return -EFAULT; |
| 2068 | set_task_comm(me, comm); | 2063 | set_task_comm(me, comm); |
| 2069 | proc_comm_connector(me); | 2064 | proc_comm_connector(me); |
| 2070 | return 0; | 2065 | break; |
| 2071 | case PR_GET_NAME: | 2066 | case PR_GET_NAME: |
| 2072 | get_task_comm(comm, me); | 2067 | get_task_comm(comm, me); |
| 2073 | if (copy_to_user((char __user *)arg2, comm, | 2068 | if (copy_to_user((char __user *)arg2, comm, |
| 2074 | sizeof(comm))) | 2069 | sizeof(comm))) |
| 2075 | return -EFAULT; | 2070 | return -EFAULT; |
| 2076 | return 0; | 2071 | break; |
| 2077 | case PR_GET_ENDIAN: | 2072 | case PR_GET_ENDIAN: |
| 2078 | error = GET_ENDIAN(me, arg2); | 2073 | error = GET_ENDIAN(me, arg2); |
| 2079 | break; | 2074 | break; |
| 2080 | case PR_SET_ENDIAN: | 2075 | case PR_SET_ENDIAN: |
| 2081 | error = SET_ENDIAN(me, arg2); | 2076 | error = SET_ENDIAN(me, arg2); |
| 2082 | break; | 2077 | break; |
| 2083 | |||
| 2084 | case PR_GET_SECCOMP: | 2078 | case PR_GET_SECCOMP: |
| 2085 | error = prctl_get_seccomp(); | 2079 | error = prctl_get_seccomp(); |
| 2086 | break; | 2080 | break; |
| @@ -2108,7 +2102,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2108 | current->default_timer_slack_ns; | 2102 | current->default_timer_slack_ns; |
| 2109 | else | 2103 | else |
| 2110 | current->timer_slack_ns = arg2; | 2104 | current->timer_slack_ns = arg2; |
| 2111 | error = 0; | ||
| 2112 | break; | 2105 | break; |
| 2113 | case PR_MCE_KILL: | 2106 | case PR_MCE_KILL: |
| 2114 | if (arg4 | arg5) | 2107 | if (arg4 | arg5) |
| @@ -2134,7 +2127,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2134 | default: | 2127 | default: |
| 2135 | return -EINVAL; | 2128 | return -EINVAL; |
| 2136 | } | 2129 | } |
| 2137 | error = 0; | ||
| 2138 | break; | 2130 | break; |
| 2139 | case PR_MCE_KILL_GET: | 2131 | case PR_MCE_KILL_GET: |
| 2140 | if (arg2 | arg3 | arg4 | arg5) | 2132 | if (arg2 | arg3 | arg4 | arg5) |
| @@ -2153,7 +2145,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, | |||
| 2153 | break; | 2145 | break; |
| 2154 | case PR_SET_CHILD_SUBREAPER: | 2146 | case PR_SET_CHILD_SUBREAPER: |
| 2155 | me->signal->is_child_subreaper = !!arg2; | 2147 | me->signal->is_child_subreaper = !!arg2; |
| 2156 | error = 0; | ||
| 2157 | break; | 2148 | break; |
| 2158 | case PR_GET_CHILD_SUBREAPER: | 2149 | case PR_GET_CHILD_SUBREAPER: |
| 2159 | error = put_user(me->signal->is_child_subreaper, | 2150 | error = put_user(me->signal->is_child_subreaper, |
| @@ -2195,46 +2186,52 @@ static void argv_cleanup(struct subprocess_info *info) | |||
| 2195 | argv_free(info->argv); | 2186 | argv_free(info->argv); |
| 2196 | } | 2187 | } |
| 2197 | 2188 | ||
| 2198 | /** | 2189 | static int __orderly_poweroff(void) |
| 2199 | * orderly_poweroff - Trigger an orderly system poweroff | ||
| 2200 | * @force: force poweroff if command execution fails | ||
| 2201 | * | ||
| 2202 | * This may be called from any context to trigger a system shutdown. | ||
| 2203 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
| 2204 | */ | ||
| 2205 | int orderly_poweroff(bool force) | ||
| 2206 | { | 2190 | { |
| 2207 | int argc; | 2191 | int argc; |
| 2208 | char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); | 2192 | char **argv; |
| 2209 | static char *envp[] = { | 2193 | static char *envp[] = { |
| 2210 | "HOME=/", | 2194 | "HOME=/", |
| 2211 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", | 2195 | "PATH=/sbin:/bin:/usr/sbin:/usr/bin", |
| 2212 | NULL | 2196 | NULL |
| 2213 | }; | 2197 | }; |
| 2214 | int ret = -ENOMEM; | 2198 | int ret; |
| 2215 | 2199 | ||
| 2200 | argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc); | ||
| 2216 | if (argv == NULL) { | 2201 | if (argv == NULL) { |
| 2217 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", | 2202 | printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n", |
| 2218 | __func__, poweroff_cmd); | 2203 | __func__, poweroff_cmd); |
| 2219 | goto out; | 2204 | return -ENOMEM; |
| 2220 | } | 2205 | } |
| 2221 | 2206 | ||
| 2222 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, | 2207 | ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_NO_WAIT, |
| 2223 | NULL, argv_cleanup, NULL); | 2208 | NULL, argv_cleanup, NULL); |
| 2224 | out: | ||
| 2225 | if (likely(!ret)) | ||
| 2226 | return 0; | ||
| 2227 | |||
| 2228 | if (ret == -ENOMEM) | 2209 | if (ret == -ENOMEM) |
| 2229 | argv_free(argv); | 2210 | argv_free(argv); |
| 2230 | 2211 | ||
| 2231 | if (force) { | 2212 | return ret; |
| 2213 | } | ||
| 2214 | |||
| 2215 | /** | ||
| 2216 | * orderly_poweroff - Trigger an orderly system poweroff | ||
| 2217 | * @force: force poweroff if command execution fails | ||
| 2218 | * | ||
| 2219 | * This may be called from any context to trigger a system shutdown. | ||
| 2220 | * If the orderly shutdown fails, it will force an immediate shutdown. | ||
| 2221 | */ | ||
| 2222 | int orderly_poweroff(bool force) | ||
| 2223 | { | ||
| 2224 | int ret = __orderly_poweroff(); | ||
| 2225 | |||
| 2226 | if (ret && force) { | ||
| 2232 | printk(KERN_WARNING "Failed to start orderly shutdown: " | 2227 | printk(KERN_WARNING "Failed to start orderly shutdown: " |
| 2233 | "forcing the issue\n"); | 2228 | "forcing the issue\n"); |
| 2234 | 2229 | ||
| 2235 | /* I guess this should try to kick off some daemon to | 2230 | /* |
| 2236 | sync and poweroff asap. Or not even bother syncing | 2231 | * I guess this should try to kick off some daemon to sync and |
| 2237 | if we're doing an emergency shutdown? */ | 2232 | * poweroff asap. Or not even bother syncing if we're doing an |
| 2233 | * emergency shutdown? | ||
| 2234 | */ | ||
| 2238 | emergency_sync(); | 2235 | emergency_sync(); |
| 2239 | kernel_power_off(); | 2236 | kernel_power_off(); |
| 2240 | } | 2237 | } |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4ab11879aeb4..6502d35a25ba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/security.h> | 30 | #include <linux/security.h> |
| 31 | #include <linux/ctype.h> | 31 | #include <linux/ctype.h> |
| 32 | #include <linux/kmemcheck.h> | 32 | #include <linux/kmemcheck.h> |
| 33 | #include <linux/kmemleak.h> | ||
| 33 | #include <linux/fs.h> | 34 | #include <linux/fs.h> |
| 34 | #include <linux/init.h> | 35 | #include <linux/init.h> |
| 35 | #include <linux/kernel.h> | 36 | #include <linux/kernel.h> |
| @@ -174,6 +175,11 @@ static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, | |||
| 174 | void __user *buffer, size_t *lenp, loff_t *ppos); | 175 | void __user *buffer, size_t *lenp, loff_t *ppos); |
| 175 | #endif | 176 | #endif |
| 176 | 177 | ||
| 178 | static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, | ||
| 179 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
| 180 | static int proc_dostring_coredump(struct ctl_table *table, int write, | ||
| 181 | void __user *buffer, size_t *lenp, loff_t *ppos); | ||
| 182 | |||
| 177 | #ifdef CONFIG_MAGIC_SYSRQ | 183 | #ifdef CONFIG_MAGIC_SYSRQ |
| 178 | /* Note: sysrq code uses it's own private copy */ | 184 | /* Note: sysrq code uses it's own private copy */ |
| 179 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; | 185 | static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; |
| @@ -410,7 +416,7 @@ static struct ctl_table kern_table[] = { | |||
| 410 | .data = core_pattern, | 416 | .data = core_pattern, |
| 411 | .maxlen = CORENAME_MAX_SIZE, | 417 | .maxlen = CORENAME_MAX_SIZE, |
| 412 | .mode = 0644, | 418 | .mode = 0644, |
| 413 | .proc_handler = proc_dostring, | 419 | .proc_handler = proc_dostring_coredump, |
| 414 | }, | 420 | }, |
| 415 | { | 421 | { |
| 416 | .procname = "core_pipe_limit", | 422 | .procname = "core_pipe_limit", |
| @@ -1095,11 +1101,9 @@ static struct ctl_table vm_table[] = { | |||
| 1095 | .extra1 = &zero, | 1101 | .extra1 = &zero, |
| 1096 | }, | 1102 | }, |
| 1097 | { | 1103 | { |
| 1098 | .procname = "nr_pdflush_threads", | 1104 | .procname = "nr_pdflush_threads", |
| 1099 | .data = &nr_pdflush_threads, | 1105 | .mode = 0444 /* read-only */, |
| 1100 | .maxlen = sizeof nr_pdflush_threads, | 1106 | .proc_handler = pdflush_proc_obsolete, |
| 1101 | .mode = 0444 /* read-only*/, | ||
| 1102 | .proc_handler = proc_dointvec, | ||
| 1103 | }, | 1107 | }, |
| 1104 | { | 1108 | { |
| 1105 | .procname = "swappiness", | 1109 | .procname = "swappiness", |
| @@ -1498,7 +1502,7 @@ static struct ctl_table fs_table[] = { | |||
| 1498 | .data = &suid_dumpable, | 1502 | .data = &suid_dumpable, |
| 1499 | .maxlen = sizeof(int), | 1503 | .maxlen = sizeof(int), |
| 1500 | .mode = 0644, | 1504 | .mode = 0644, |
| 1501 | .proc_handler = proc_dointvec_minmax, | 1505 | .proc_handler = proc_dointvec_minmax_coredump, |
| 1502 | .extra1 = &zero, | 1506 | .extra1 = &zero, |
| 1503 | .extra2 = &two, | 1507 | .extra2 = &two, |
| 1504 | }, | 1508 | }, |
| @@ -1551,7 +1555,10 @@ static struct ctl_table dev_table[] = { | |||
| 1551 | 1555 | ||
| 1552 | int __init sysctl_init(void) | 1556 | int __init sysctl_init(void) |
| 1553 | { | 1557 | { |
| 1554 | register_sysctl_table(sysctl_base_table); | 1558 | struct ctl_table_header *hdr; |
| 1559 | |||
| 1560 | hdr = register_sysctl_table(sysctl_base_table); | ||
| 1561 | kmemleak_not_leak(hdr); | ||
| 1555 | return 0; | 1562 | return 0; |
| 1556 | } | 1563 | } |
| 1557 | 1564 | ||
| @@ -2009,6 +2016,34 @@ int proc_dointvec_minmax(struct ctl_table *table, int write, | |||
| 2009 | do_proc_dointvec_minmax_conv, ¶m); | 2016 | do_proc_dointvec_minmax_conv, ¶m); |
| 2010 | } | 2017 | } |
| 2011 | 2018 | ||
| 2019 | static void validate_coredump_safety(void) | ||
| 2020 | { | ||
| 2021 | if (suid_dumpable == SUID_DUMPABLE_SAFE && | ||
| 2022 | core_pattern[0] != '/' && core_pattern[0] != '|') { | ||
| 2023 | printk(KERN_WARNING "Unsafe core_pattern used with "\ | ||
| 2024 | "suid_dumpable=2. Pipe handler or fully qualified "\ | ||
| 2025 | "core dump path required.\n"); | ||
| 2026 | } | ||
| 2027 | } | ||
| 2028 | |||
| 2029 | static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, | ||
| 2030 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 2031 | { | ||
| 2032 | int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | ||
| 2033 | if (!error) | ||
| 2034 | validate_coredump_safety(); | ||
| 2035 | return error; | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | static int proc_dostring_coredump(struct ctl_table *table, int write, | ||
| 2039 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
| 2040 | { | ||
| 2041 | int error = proc_dostring(table, write, buffer, lenp, ppos); | ||
| 2042 | if (!error) | ||
| 2043 | validate_coredump_safety(); | ||
| 2044 | return error; | ||
| 2045 | } | ||
| 2046 | |||
| 2012 | static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, | 2047 | static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, |
| 2013 | void __user *buffer, | 2048 | void __user *buffer, |
| 2014 | size_t *lenp, loff_t *ppos, | 2049 | size_t *lenp, loff_t *ppos, |
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index a650694883a1..65bdcf198d4e 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c | |||
| @@ -147,7 +147,7 @@ static const struct bin_table bin_vm_table[] = { | |||
| 147 | { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" }, | 147 | { CTL_INT, VM_DIRTY_RATIO, "dirty_ratio" }, |
| 148 | /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */ | 148 | /* VM_DIRTY_WB_CS "dirty_writeback_centisecs" no longer used */ |
| 149 | /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */ | 149 | /* VM_DIRTY_EXPIRE_CS "dirty_expire_centisecs" no longer used */ |
| 150 | { CTL_INT, VM_NR_PDFLUSH_THREADS, "nr_pdflush_threads" }, | 150 | /* VM_NR_PDFLUSH_THREADS "nr_pdflush_threads" no longer used */ |
| 151 | { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" }, | 151 | { CTL_INT, VM_OVERCOMMIT_RATIO, "overcommit_ratio" }, |
| 152 | /* VM_PAGEBUF unused */ | 152 | /* VM_PAGEBUF unused */ |
| 153 | /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */ | 153 | /* VM_HUGETLB_PAGES "nr_hugepages" no longer used */ |
diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e66046456f4f..d0a32796550f 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c | |||
| @@ -436,6 +436,11 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) | |||
| 436 | 436 | ||
| 437 | na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, | 437 | na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, |
| 438 | sizeof(struct cgroupstats)); | 438 | sizeof(struct cgroupstats)); |
| 439 | if (na == NULL) { | ||
| 440 | rc = -EMSGSIZE; | ||
| 441 | goto err; | ||
| 442 | } | ||
| 443 | |||
| 439 | stats = nla_data(na); | 444 | stats = nla_data(na); |
| 440 | memset(stats, 0, sizeof(*stats)); | 445 | memset(stats, 0, sizeof(*stats)); |
| 441 | 446 | ||
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index a120f98c4112..5c38c81496ce 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c | |||
| @@ -3187,10 +3187,10 @@ static int tracing_set_tracer(const char *buf) | |||
| 3187 | } | 3187 | } |
| 3188 | destroy_trace_option_files(topts); | 3188 | destroy_trace_option_files(topts); |
| 3189 | 3189 | ||
| 3190 | current_trace = t; | 3190 | current_trace = &nop_trace; |
| 3191 | 3191 | ||
| 3192 | topts = create_trace_option_files(current_trace); | 3192 | topts = create_trace_option_files(t); |
| 3193 | if (current_trace->use_max_tr) { | 3193 | if (t->use_max_tr) { |
| 3194 | int cpu; | 3194 | int cpu; |
| 3195 | /* we need to make per cpu buffer sizes equivalent */ | 3195 | /* we need to make per cpu buffer sizes equivalent */ |
| 3196 | for_each_tracing_cpu(cpu) { | 3196 | for_each_tracing_cpu(cpu) { |
| @@ -3210,6 +3210,7 @@ static int tracing_set_tracer(const char *buf) | |||
| 3210 | goto out; | 3210 | goto out; |
| 3211 | } | 3211 | } |
| 3212 | 3212 | ||
| 3213 | current_trace = t; | ||
| 3213 | trace_branch_enable(tr); | 3214 | trace_branch_enable(tr); |
| 3214 | out: | 3215 | out: |
| 3215 | mutex_unlock(&trace_types_lock); | 3216 | mutex_unlock(&trace_types_lock); |
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index c7b0c6a7db09..a426f410c060 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include <linux/debugfs.h> | 13 | #include <linux/debugfs.h> |
| 14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
| 15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
| 16 | #include <linux/pstore.h> | ||
| 16 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 17 | 18 | ||
| 18 | #include "trace.h" | 19 | #include "trace.h" |
| @@ -74,6 +75,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip) | |||
| 74 | preempt_enable_notrace(); | 75 | preempt_enable_notrace(); |
| 75 | } | 76 | } |
| 76 | 77 | ||
| 78 | /* Our two options */ | ||
| 79 | enum { | ||
| 80 | TRACE_FUNC_OPT_STACK = 0x1, | ||
| 81 | TRACE_FUNC_OPT_PSTORE = 0x2, | ||
| 82 | }; | ||
| 83 | |||
| 84 | static struct tracer_flags func_flags; | ||
| 85 | |||
| 77 | static void | 86 | static void |
| 78 | function_trace_call(unsigned long ip, unsigned long parent_ip) | 87 | function_trace_call(unsigned long ip, unsigned long parent_ip) |
| 79 | { | 88 | { |
| @@ -97,6 +106,12 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) | |||
| 97 | disabled = atomic_inc_return(&data->disabled); | 106 | disabled = atomic_inc_return(&data->disabled); |
| 98 | 107 | ||
| 99 | if (likely(disabled == 1)) { | 108 | if (likely(disabled == 1)) { |
| 109 | /* | ||
| 110 | * So far tracing doesn't support multiple buffers, so | ||
| 111 | * we make an explicit call for now. | ||
| 112 | */ | ||
| 113 | if (unlikely(func_flags.val & TRACE_FUNC_OPT_PSTORE)) | ||
| 114 | pstore_ftrace_call(ip, parent_ip); | ||
| 100 | pc = preempt_count(); | 115 | pc = preempt_count(); |
| 101 | trace_function(tr, ip, parent_ip, flags, pc); | 116 | trace_function(tr, ip, parent_ip, flags, pc); |
| 102 | } | 117 | } |
| @@ -158,15 +173,13 @@ static struct ftrace_ops trace_stack_ops __read_mostly = | |||
| 158 | .flags = FTRACE_OPS_FL_GLOBAL, | 173 | .flags = FTRACE_OPS_FL_GLOBAL, |
| 159 | }; | 174 | }; |
| 160 | 175 | ||
| 161 | /* Our two options */ | ||
| 162 | enum { | ||
| 163 | TRACE_FUNC_OPT_STACK = 0x1, | ||
| 164 | }; | ||
| 165 | |||
| 166 | static struct tracer_opt func_opts[] = { | 176 | static struct tracer_opt func_opts[] = { |
| 167 | #ifdef CONFIG_STACKTRACE | 177 | #ifdef CONFIG_STACKTRACE |
| 168 | { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, | 178 | { TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) }, |
| 169 | #endif | 179 | #endif |
| 180 | #ifdef CONFIG_PSTORE_FTRACE | ||
| 181 | { TRACER_OPT(func_pstore, TRACE_FUNC_OPT_PSTORE) }, | ||
| 182 | #endif | ||
| 170 | { } /* Always set a last empty entry */ | 183 | { } /* Always set a last empty entry */ |
| 171 | }; | 184 | }; |
| 172 | 185 | ||
| @@ -204,10 +217,11 @@ static void tracing_stop_function_trace(void) | |||
| 204 | 217 | ||
| 205 | static int func_set_flag(u32 old_flags, u32 bit, int set) | 218 | static int func_set_flag(u32 old_flags, u32 bit, int set) |
| 206 | { | 219 | { |
| 207 | if (bit == TRACE_FUNC_OPT_STACK) { | 220 | switch (bit) { |
| 221 | case TRACE_FUNC_OPT_STACK: | ||
| 208 | /* do nothing if already set */ | 222 | /* do nothing if already set */ |
| 209 | if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) | 223 | if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK)) |
| 210 | return 0; | 224 | break; |
| 211 | 225 | ||
| 212 | if (set) { | 226 | if (set) { |
| 213 | unregister_ftrace_function(&trace_ops); | 227 | unregister_ftrace_function(&trace_ops); |
| @@ -217,10 +231,14 @@ static int func_set_flag(u32 old_flags, u32 bit, int set) | |||
| 217 | register_ftrace_function(&trace_ops); | 231 | register_ftrace_function(&trace_ops); |
| 218 | } | 232 | } |
| 219 | 233 | ||
| 220 | return 0; | 234 | break; |
| 235 | case TRACE_FUNC_OPT_PSTORE: | ||
| 236 | break; | ||
| 237 | default: | ||
| 238 | return -EINVAL; | ||
| 221 | } | 239 | } |
| 222 | 240 | ||
| 223 | return -EINVAL; | 241 | return 0; |
| 224 | } | 242 | } |
| 225 | 243 | ||
| 226 | static struct tracer function_trace __read_mostly = | 244 | static struct tracer function_trace __read_mostly = |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 4b1dfba70f7c..69add8a9da68 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
| @@ -575,7 +575,7 @@ out: | |||
| 575 | /* | 575 | /* |
| 576 | * Create/destroy watchdog threads as CPUs come and go: | 576 | * Create/destroy watchdog threads as CPUs come and go: |
| 577 | */ | 577 | */ |
| 578 | static int __cpuinit | 578 | static int |
| 579 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | 579 | cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) |
| 580 | { | 580 | { |
| 581 | int hotcpu = (unsigned long)hcpu; | 581 | int hotcpu = (unsigned long)hcpu; |
| @@ -610,10 +610,27 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
| 610 | return NOTIFY_OK; | 610 | return NOTIFY_OK; |
| 611 | } | 611 | } |
| 612 | 612 | ||
| 613 | static struct notifier_block __cpuinitdata cpu_nfb = { | 613 | static struct notifier_block cpu_nfb = { |
| 614 | .notifier_call = cpu_callback | 614 | .notifier_call = cpu_callback |
| 615 | }; | 615 | }; |
| 616 | 616 | ||
| 617 | #ifdef CONFIG_SUSPEND | ||
| 618 | /* | ||
| 619 | * On exit from suspend we force an offline->online transition on the boot CPU | ||
| 620 | * so that the PMU state that was lost while in suspended state gets set up | ||
| 621 | * properly for the boot CPU. This information is required for restarting the | ||
| 622 | * NMI watchdog. | ||
| 623 | */ | ||
| 624 | void lockup_detector_bootcpu_resume(void) | ||
| 625 | { | ||
| 626 | void *cpu = (void *)(long)smp_processor_id(); | ||
| 627 | |||
| 628 | cpu_callback(&cpu_nfb, CPU_DEAD_FROZEN, cpu); | ||
| 629 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE_FROZEN, cpu); | ||
| 630 | cpu_callback(&cpu_nfb, CPU_ONLINE_FROZEN, cpu); | ||
| 631 | } | ||
| 632 | #endif | ||
| 633 | |||
| 617 | void __init lockup_detector_init(void) | 634 | void __init lockup_detector_init(void) |
| 618 | { | 635 | { |
| 619 | void *cpu = (void *)(long)smp_processor_id(); | 636 | void *cpu = (void *)(long)smp_processor_id(); |
