diff options
41 files changed, 703 insertions, 189 deletions
@@ -9,6 +9,10 @@ | |||
9 | Linus | 9 | Linus |
10 | ---------- | 10 | ---------- |
11 | 11 | ||
12 | M: Matt Mackal | ||
13 | E: mpm@selenic.com | ||
14 | D: SLOB slab allocator | ||
15 | |||
12 | N: Matti Aarnio | 16 | N: Matti Aarnio |
13 | E: mea@nic.funet.fi | 17 | E: mea@nic.funet.fi |
14 | D: Alpha systems hacking, IPv6 and other network related stuff | 18 | D: Alpha systems hacking, IPv6 and other network related stuff |
diff --git a/Documentation/accounting/getdelays.c b/Documentation/accounting/getdelays.c index c6a06b71594d..f40578026a04 100644 --- a/Documentation/accounting/getdelays.c +++ b/Documentation/accounting/getdelays.c | |||
@@ -314,6 +314,7 @@ int main(int argc, char *argv[]) | |||
314 | break; | 314 | break; |
315 | case 'm': | 315 | case 'm': |
316 | strncpy(cpumask, optarg, sizeof(cpumask)); | 316 | strncpy(cpumask, optarg, sizeof(cpumask)); |
317 | cpumask[sizeof(cpumask) - 1] = '\0'; | ||
317 | maskset = 1; | 318 | maskset = 1; |
318 | printf("cpumask %s maskset %d\n", cpumask, maskset); | 319 | printf("cpumask %s maskset %d\n", cpumask, maskset); |
319 | break; | 320 | break; |
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 884904975d0b..c1b9aa8c5a52 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -3130,6 +3130,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
3130 | [KNL] Should the soft-lockup detector generate panics. | 3130 | [KNL] Should the soft-lockup detector generate panics. |
3131 | Format: <integer> | 3131 | Format: <integer> |
3132 | 3132 | ||
3133 | softlockup_all_cpu_backtrace= | ||
3134 | [KNL] Should the soft-lockup detector generate | ||
3135 | backtraces on all cpus. | ||
3136 | Format: <integer> | ||
3137 | |||
3133 | sonypi.*= [HW] Sony Programmable I/O Control Device driver | 3138 | sonypi.*= [HW] Sony Programmable I/O Control Device driver |
3134 | See Documentation/laptops/sonypi.txt | 3139 | See Documentation/laptops/sonypi.txt |
3135 | 3140 | ||
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt index f304edb8fbe7..45134dc23854 100644 --- a/Documentation/memory-hotplug.txt +++ b/Documentation/memory-hotplug.txt | |||
@@ -209,15 +209,12 @@ If memory device is found, memory hotplug code will be called. | |||
209 | 209 | ||
210 | 4.2 Notify memory hot-add event by hand | 210 | 4.2 Notify memory hot-add event by hand |
211 | ------------ | 211 | ------------ |
212 | On powerpc, the firmware does not notify a memory hotplug event to the kernel. | 212 | On some architectures, the firmware may not notify the kernel of a memory |
213 | Therefore, "probe" interface is supported to notify the event to the kernel. | 213 | hotplug event. Therefore, the memory "probe" interface is supported to |
214 | This interface depends on CONFIG_ARCH_MEMORY_PROBE. | 214 | explicitly notify the kernel. This interface depends on |
215 | 215 | CONFIG_ARCH_MEMORY_PROBE and can be configured on powerpc, sh, and x86 | |
216 | CONFIG_ARCH_MEMORY_PROBE is supported on powerpc only. On x86, this config | 216 | if hotplug is supported, although for x86 this should be handled by ACPI |
217 | option is disabled by default since ACPI notifies a memory hotplug event to | 217 | notification. |
218 | the kernel, which performs its hotplug operation as the result. Please | ||
219 | enable this option if you need the "probe" interface for testing purposes | ||
220 | on x86. | ||
221 | 218 | ||
222 | Probe interface is located at | 219 | Probe interface is located at |
223 | /sys/devices/system/memory/probe | 220 | /sys/devices/system/memory/probe |
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 708bb7f1b7e0..c14374e71775 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt | |||
@@ -75,6 +75,7 @@ show up in /proc/sys/kernel: | |||
75 | - shmall | 75 | - shmall |
76 | - shmmax [ sysv ipc ] | 76 | - shmmax [ sysv ipc ] |
77 | - shmmni | 77 | - shmmni |
78 | - softlockup_all_cpu_backtrace | ||
78 | - stop-a [ SPARC only ] | 79 | - stop-a [ SPARC only ] |
79 | - sysrq ==> Documentation/sysrq.txt | 80 | - sysrq ==> Documentation/sysrq.txt |
80 | - sysctl_writes_strict | 81 | - sysctl_writes_strict |
@@ -783,6 +784,22 @@ via the /proc/sys interface: | |||
783 | 784 | ||
784 | ============================================================== | 785 | ============================================================== |
785 | 786 | ||
787 | softlockup_all_cpu_backtrace: | ||
788 | |||
789 | This value controls the soft lockup detector thread's behavior | ||
790 | when a soft lockup condition is detected as to whether or not | ||
791 | to gather further debug information. If enabled, each cpu will | ||
792 | be issued an NMI and instructed to capture stack trace. | ||
793 | |||
794 | This feature is only applicable for architectures which support | ||
795 | NMI. | ||
796 | |||
797 | 0: do nothing. This is the default behavior. | ||
798 | |||
799 | 1: on detection capture more debug information. | ||
800 | |||
801 | ============================================================== | ||
802 | |||
786 | tainted: | 803 | tainted: |
787 | 804 | ||
788 | Non-zero if the kernel has been tainted. Numeric values, which | 805 | Non-zero if the kernel has been tainted. Numeric values, which |
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index bd4b34c03738..4415aa915681 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -702,7 +702,8 @@ The batch value of each per cpu pagelist is also updated as a result. It is | |||
702 | set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) | 702 | set to pcp->high/4. The upper limit of batch is (PAGE_SHIFT * 8) |
703 | 703 | ||
704 | The initial value is zero. Kernel does not use this value at boot time to set | 704 | The initial value is zero. Kernel does not use this value at boot time to set |
705 | the high water marks for each per cpu page list. | 705 | the high water marks for each per cpu page list. If the user writes '0' to this |
706 | sysctl, it will revert to this default behavior. | ||
706 | 707 | ||
707 | ============================================================== | 708 | ============================================================== |
708 | 709 | ||
diff --git a/MAINTAINERS b/MAINTAINERS index 3f2e171047b9..3cc94fff780f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -8196,13 +8196,15 @@ S: Maintained | |||
8196 | F: drivers/usb/misc/sisusbvga/ | 8196 | F: drivers/usb/misc/sisusbvga/ |
8197 | 8197 | ||
8198 | SLAB ALLOCATOR | 8198 | SLAB ALLOCATOR |
8199 | M: Christoph Lameter <cl@linux-foundation.org> | 8199 | M: Christoph Lameter <cl@linux.com> |
8200 | M: Pekka Enberg <penberg@kernel.org> | 8200 | M: Pekka Enberg <penberg@kernel.org> |
8201 | M: Matt Mackall <mpm@selenic.com> | 8201 | M: David Rientjes <rientjes@google.com> |
8202 | M: Joonsoo Kim <iamjoonsoo.kim@lge.com> | ||
8203 | M: Andrew Morton <akpm@linux-foundation.org> | ||
8202 | L: linux-mm@kvack.org | 8204 | L: linux-mm@kvack.org |
8203 | S: Maintained | 8205 | S: Maintained |
8204 | F: include/linux/sl?b*.h | 8206 | F: include/linux/sl?b*.h |
8205 | F: mm/sl?b.c | 8207 | F: mm/sl?b* |
8206 | 8208 | ||
8207 | SLEEPABLE READ-COPY UPDATE (SRCU) | 8209 | SLEEPABLE READ-COPY UPDATE (SRCU) |
8208 | M: Lai Jiangshan <laijs@cn.fujitsu.com> | 8210 | M: Lai Jiangshan <laijs@cn.fujitsu.com> |
diff --git a/arch/ia64/include/uapi/asm/fcntl.h b/arch/ia64/include/uapi/asm/fcntl.h index 1dd275dc8f65..7b485876cad4 100644 --- a/arch/ia64/include/uapi/asm/fcntl.h +++ b/arch/ia64/include/uapi/asm/fcntl.h | |||
@@ -8,6 +8,7 @@ | |||
8 | #define force_o_largefile() \ | 8 | #define force_o_largefile() \ |
9 | (personality(current->personality) != PER_LINUX32) | 9 | (personality(current->personality) != PER_LINUX32) |
10 | 10 | ||
11 | #include <linux/personality.h> | ||
11 | #include <asm-generic/fcntl.h> | 12 | #include <asm-generic/fcntl.h> |
12 | 13 | ||
13 | #endif /* _ASM_IA64_FCNTL_H */ | 14 | #endif /* _ASM_IA64_FCNTL_H */ |
diff --git a/arch/sparc/include/asm/irq_64.h b/arch/sparc/include/asm/irq_64.h index 375cffcf7dbd..91d219381306 100644 --- a/arch/sparc/include/asm/irq_64.h +++ b/arch/sparc/include/asm/irq_64.h | |||
@@ -89,7 +89,7 @@ static inline unsigned long get_softint(void) | |||
89 | return retval; | 89 | return retval; |
90 | } | 90 | } |
91 | 91 | ||
92 | void arch_trigger_all_cpu_backtrace(void); | 92 | void arch_trigger_all_cpu_backtrace(bool); |
93 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | 93 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
94 | 94 | ||
95 | extern void *hardirq_stack[NR_CPUS]; | 95 | extern void *hardirq_stack[NR_CPUS]; |
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index b2988f25e230..027e09986194 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c | |||
@@ -239,7 +239,7 @@ static void __global_reg_poll(struct global_reg_snapshot *gp) | |||
239 | } | 239 | } |
240 | } | 240 | } |
241 | 241 | ||
242 | void arch_trigger_all_cpu_backtrace(void) | 242 | void arch_trigger_all_cpu_backtrace(bool include_self) |
243 | { | 243 | { |
244 | struct thread_info *tp = current_thread_info(); | 244 | struct thread_info *tp = current_thread_info(); |
245 | struct pt_regs *regs = get_irq_regs(); | 245 | struct pt_regs *regs = get_irq_regs(); |
@@ -251,16 +251,22 @@ void arch_trigger_all_cpu_backtrace(void) | |||
251 | 251 | ||
252 | spin_lock_irqsave(&global_cpu_snapshot_lock, flags); | 252 | spin_lock_irqsave(&global_cpu_snapshot_lock, flags); |
253 | 253 | ||
254 | memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); | ||
255 | |||
256 | this_cpu = raw_smp_processor_id(); | 254 | this_cpu = raw_smp_processor_id(); |
257 | 255 | ||
258 | __global_reg_self(tp, regs, this_cpu); | 256 | memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot)); |
257 | |||
258 | if (include_self) | ||
259 | __global_reg_self(tp, regs, this_cpu); | ||
259 | 260 | ||
260 | smp_fetch_global_regs(); | 261 | smp_fetch_global_regs(); |
261 | 262 | ||
262 | for_each_online_cpu(cpu) { | 263 | for_each_online_cpu(cpu) { |
263 | struct global_reg_snapshot *gp = &global_cpu_snapshot[cpu].reg; | 264 | struct global_reg_snapshot *gp; |
265 | |||
266 | if (!include_self && cpu == this_cpu) | ||
267 | continue; | ||
268 | |||
269 | gp = &global_cpu_snapshot[cpu].reg; | ||
264 | 270 | ||
265 | __global_reg_poll(gp); | 271 | __global_reg_poll(gp); |
266 | 272 | ||
@@ -292,7 +298,7 @@ void arch_trigger_all_cpu_backtrace(void) | |||
292 | 298 | ||
293 | static void sysrq_handle_globreg(int key) | 299 | static void sysrq_handle_globreg(int key) |
294 | { | 300 | { |
295 | arch_trigger_all_cpu_backtrace(); | 301 | arch_trigger_all_cpu_backtrace(true); |
296 | } | 302 | } |
297 | 303 | ||
298 | static struct sysrq_key_op sparc_globalreg_op = { | 304 | static struct sysrq_key_op sparc_globalreg_op = { |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index cb6cfcd034cf..a80cbb88ea91 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -43,7 +43,7 @@ extern int vector_used_by_percpu_irq(unsigned int vector); | |||
43 | extern void init_ISA_irqs(void); | 43 | extern void init_ISA_irqs(void); |
44 | 44 | ||
45 | #ifdef CONFIG_X86_LOCAL_APIC | 45 | #ifdef CONFIG_X86_LOCAL_APIC |
46 | void arch_trigger_all_cpu_backtrace(void); | 46 | void arch_trigger_all_cpu_backtrace(bool); |
47 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace | 47 | #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace |
48 | #endif | 48 | #endif |
49 | 49 | ||
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index c3fcb5de5083..6a1e71bde323 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c | |||
@@ -33,31 +33,41 @@ static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly; | |||
33 | /* "in progress" flag of arch_trigger_all_cpu_backtrace */ | 33 | /* "in progress" flag of arch_trigger_all_cpu_backtrace */ |
34 | static unsigned long backtrace_flag; | 34 | static unsigned long backtrace_flag; |
35 | 35 | ||
36 | void arch_trigger_all_cpu_backtrace(void) | 36 | void arch_trigger_all_cpu_backtrace(bool include_self) |
37 | { | 37 | { |
38 | int i; | 38 | int i; |
39 | int cpu = get_cpu(); | ||
39 | 40 | ||
40 | if (test_and_set_bit(0, &backtrace_flag)) | 41 | if (test_and_set_bit(0, &backtrace_flag)) { |
41 | /* | 42 | /* |
42 | * If there is already a trigger_all_cpu_backtrace() in progress | 43 | * If there is already a trigger_all_cpu_backtrace() in progress |
43 | * (backtrace_flag == 1), don't output double cpu dump infos. | 44 | * (backtrace_flag == 1), don't output double cpu dump infos. |
44 | */ | 45 | */ |
46 | put_cpu(); | ||
45 | return; | 47 | return; |
48 | } | ||
46 | 49 | ||
47 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); | 50 | cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask); |
51 | if (!include_self) | ||
52 | cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); | ||
48 | 53 | ||
49 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | 54 | if (!cpumask_empty(to_cpumask(backtrace_mask))) { |
50 | apic->send_IPI_all(NMI_VECTOR); | 55 | pr_info("sending NMI to %s CPUs:\n", |
56 | (include_self ? "all" : "other")); | ||
57 | apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR); | ||
58 | } | ||
51 | 59 | ||
52 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 60 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
53 | for (i = 0; i < 10 * 1000; i++) { | 61 | for (i = 0; i < 10 * 1000; i++) { |
54 | if (cpumask_empty(to_cpumask(backtrace_mask))) | 62 | if (cpumask_empty(to_cpumask(backtrace_mask))) |
55 | break; | 63 | break; |
56 | mdelay(1); | 64 | mdelay(1); |
65 | touch_softlockup_watchdog(); | ||
57 | } | 66 | } |
58 | 67 | ||
59 | clear_bit(0, &backtrace_flag); | 68 | clear_bit(0, &backtrace_flag); |
60 | smp_mb__after_atomic(); | 69 | smp_mb__after_atomic(); |
70 | put_cpu(); | ||
61 | } | 71 | } |
62 | 72 | ||
63 | static int | 73 | static int |
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index 83969f8c5727..6467c919c509 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c | |||
@@ -176,14 +176,24 @@ static int __init cma_activate_area(struct cma *cma) | |||
176 | base_pfn = pfn; | 176 | base_pfn = pfn; |
177 | for (j = pageblock_nr_pages; j; --j, pfn++) { | 177 | for (j = pageblock_nr_pages; j; --j, pfn++) { |
178 | WARN_ON_ONCE(!pfn_valid(pfn)); | 178 | WARN_ON_ONCE(!pfn_valid(pfn)); |
179 | /* | ||
180 | * alloc_contig_range requires the pfn range | ||
181 | * specified to be in the same zone. Make this | ||
182 | * simple by forcing the entire CMA resv range | ||
183 | * to be in the same zone. | ||
184 | */ | ||
179 | if (page_zone(pfn_to_page(pfn)) != zone) | 185 | if (page_zone(pfn_to_page(pfn)) != zone) |
180 | return -EINVAL; | 186 | goto err; |
181 | } | 187 | } |
182 | init_cma_reserved_pageblock(pfn_to_page(base_pfn)); | 188 | init_cma_reserved_pageblock(pfn_to_page(base_pfn)); |
183 | } while (--i); | 189 | } while (--i); |
184 | 190 | ||
185 | mutex_init(&cma->lock); | 191 | mutex_init(&cma->lock); |
186 | return 0; | 192 | return 0; |
193 | |||
194 | err: | ||
195 | kfree(cma->bitmap); | ||
196 | return -EINVAL; | ||
187 | } | 197 | } |
188 | 198 | ||
189 | static struct cma cma_areas[MAX_CMA_AREAS]; | 199 | static struct cma cma_areas[MAX_CMA_AREAS]; |
diff --git a/drivers/memstick/host/rtsx_pci_ms.c b/drivers/memstick/host/rtsx_pci_ms.c index 2a635b6fdaf7..c880ba685754 100644 --- a/drivers/memstick/host/rtsx_pci_ms.c +++ b/drivers/memstick/host/rtsx_pci_ms.c | |||
@@ -601,6 +601,7 @@ static int rtsx_pci_ms_drv_remove(struct platform_device *pdev) | |||
601 | pcr->slots[RTSX_MS_CARD].card_event = NULL; | 601 | pcr->slots[RTSX_MS_CARD].card_event = NULL; |
602 | msh = host->msh; | 602 | msh = host->msh; |
603 | host->eject = true; | 603 | host->eject = true; |
604 | cancel_work_sync(&host->handle_req); | ||
604 | 605 | ||
605 | mutex_lock(&host->host_mutex); | 606 | mutex_lock(&host->host_mutex); |
606 | if (host->req) { | 607 | if (host->req) { |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a106b3f2b22a..fae17c640df3 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -331,6 +331,7 @@ struct dlm_lock_resource | |||
331 | u16 state; | 331 | u16 state; |
332 | char lvb[DLM_LVB_LEN]; | 332 | char lvb[DLM_LVB_LEN]; |
333 | unsigned int inflight_locks; | 333 | unsigned int inflight_locks; |
334 | unsigned int inflight_assert_workers; | ||
334 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 335 | unsigned long refmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
335 | }; | 336 | }; |
336 | 337 | ||
@@ -910,6 +911,9 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | |||
910 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, | 911 | void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm, |
911 | struct dlm_lock_resource *res); | 912 | struct dlm_lock_resource *res); |
912 | 913 | ||
914 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
915 | struct dlm_lock_resource *res); | ||
916 | |||
913 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 917 | void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
914 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 918 | void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
915 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 919 | void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3087a21d32f9..82abf0cc9a12 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -581,6 +581,7 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, | |||
581 | atomic_set(&res->asts_reserved, 0); | 581 | atomic_set(&res->asts_reserved, 0); |
582 | res->migration_pending = 0; | 582 | res->migration_pending = 0; |
583 | res->inflight_locks = 0; | 583 | res->inflight_locks = 0; |
584 | res->inflight_assert_workers = 0; | ||
584 | 585 | ||
585 | res->dlm = dlm; | 586 | res->dlm = dlm; |
586 | 587 | ||
@@ -683,6 +684,43 @@ void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm, | |||
683 | wake_up(&res->wq); | 684 | wake_up(&res->wq); |
684 | } | 685 | } |
685 | 686 | ||
687 | void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
688 | struct dlm_lock_resource *res) | ||
689 | { | ||
690 | assert_spin_locked(&res->spinlock); | ||
691 | res->inflight_assert_workers++; | ||
692 | mlog(0, "%s:%.*s: inflight assert worker++: now %u\n", | ||
693 | dlm->name, res->lockname.len, res->lockname.name, | ||
694 | res->inflight_assert_workers); | ||
695 | } | ||
696 | |||
697 | static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, | ||
698 | struct dlm_lock_resource *res) | ||
699 | { | ||
700 | spin_lock(&res->spinlock); | ||
701 | __dlm_lockres_grab_inflight_worker(dlm, res); | ||
702 | spin_unlock(&res->spinlock); | ||
703 | } | ||
704 | |||
705 | static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, | ||
706 | struct dlm_lock_resource *res) | ||
707 | { | ||
708 | assert_spin_locked(&res->spinlock); | ||
709 | BUG_ON(res->inflight_assert_workers == 0); | ||
710 | res->inflight_assert_workers--; | ||
711 | mlog(0, "%s:%.*s: inflight assert worker--: now %u\n", | ||
712 | dlm->name, res->lockname.len, res->lockname.name, | ||
713 | res->inflight_assert_workers); | ||
714 | } | ||
715 | |||
716 | static void dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, | ||
717 | struct dlm_lock_resource *res) | ||
718 | { | ||
719 | spin_lock(&res->spinlock); | ||
720 | __dlm_lockres_drop_inflight_worker(dlm, res); | ||
721 | spin_unlock(&res->spinlock); | ||
722 | } | ||
723 | |||
686 | /* | 724 | /* |
687 | * lookup a lock resource by name. | 725 | * lookup a lock resource by name. |
688 | * may already exist in the hashtable. | 726 | * may already exist in the hashtable. |
@@ -1603,7 +1641,8 @@ send_response: | |||
1603 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); | 1641 | mlog(ML_ERROR, "failed to dispatch assert master work\n"); |
1604 | response = DLM_MASTER_RESP_ERROR; | 1642 | response = DLM_MASTER_RESP_ERROR; |
1605 | dlm_lockres_put(res); | 1643 | dlm_lockres_put(res); |
1606 | } | 1644 | } else |
1645 | dlm_lockres_grab_inflight_worker(dlm, res); | ||
1607 | } else { | 1646 | } else { |
1608 | if (res) | 1647 | if (res) |
1609 | dlm_lockres_put(res); | 1648 | dlm_lockres_put(res); |
@@ -2118,6 +2157,8 @@ static void dlm_assert_master_worker(struct dlm_work_item *item, void *data) | |||
2118 | dlm_lockres_release_ast(dlm, res); | 2157 | dlm_lockres_release_ast(dlm, res); |
2119 | 2158 | ||
2120 | put: | 2159 | put: |
2160 | dlm_lockres_drop_inflight_worker(dlm, res); | ||
2161 | |||
2121 | dlm_lockres_put(res); | 2162 | dlm_lockres_put(res); |
2122 | 2163 | ||
2123 | mlog(0, "finished with dlm_assert_master_worker\n"); | 2164 | mlog(0, "finished with dlm_assert_master_worker\n"); |
@@ -3088,11 +3129,15 @@ static int dlm_add_migration_mle(struct dlm_ctxt *dlm, | |||
3088 | /* remove it so that only one mle will be found */ | 3129 | /* remove it so that only one mle will be found */ |
3089 | __dlm_unlink_mle(dlm, tmp); | 3130 | __dlm_unlink_mle(dlm, tmp); |
3090 | __dlm_mle_detach_hb_events(dlm, tmp); | 3131 | __dlm_mle_detach_hb_events(dlm, tmp); |
3091 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; | 3132 | if (tmp->type == DLM_MLE_MASTER) { |
3092 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " | 3133 | ret = DLM_MIGRATE_RESPONSE_MASTERY_REF; |
3093 | "telling master to get ref for cleared out mle " | 3134 | mlog(0, "%s:%.*s: master=%u, newmaster=%u, " |
3094 | "during migration\n", dlm->name, namelen, name, | 3135 | "telling master to get ref " |
3095 | master, new_master); | 3136 | "for cleared out mle during " |
3137 | "migration\n", dlm->name, | ||
3138 | namelen, name, master, | ||
3139 | new_master); | ||
3140 | } | ||
3096 | } | 3141 | } |
3097 | spin_unlock(&tmp->spinlock); | 3142 | spin_unlock(&tmp->spinlock); |
3098 | } | 3143 | } |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 5de019437ea5..45067faf5695 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -1708,7 +1708,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data, | |||
1708 | mlog_errno(-ENOMEM); | 1708 | mlog_errno(-ENOMEM); |
1709 | /* retry!? */ | 1709 | /* retry!? */ |
1710 | BUG(); | 1710 | BUG(); |
1711 | } | 1711 | } else |
1712 | __dlm_lockres_grab_inflight_worker(dlm, res); | ||
1712 | } else /* put.. incase we are not the master */ | 1713 | } else /* put.. incase we are not the master */ |
1713 | dlm_lockres_put(res); | 1714 | dlm_lockres_put(res); |
1714 | spin_unlock(&res->spinlock); | 1715 | spin_unlock(&res->spinlock); |
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index 9db869de829d..69aac6f088ad 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c | |||
@@ -259,12 +259,15 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, | |||
259 | * refs on it. */ | 259 | * refs on it. */ |
260 | unused = __dlm_lockres_unused(lockres); | 260 | unused = __dlm_lockres_unused(lockres); |
261 | if (!unused || | 261 | if (!unused || |
262 | (lockres->state & DLM_LOCK_RES_MIGRATING)) { | 262 | (lockres->state & DLM_LOCK_RES_MIGRATING) || |
263 | (lockres->inflight_assert_workers != 0)) { | ||
263 | mlog(0, "%s: res %.*s is in use or being remastered, " | 264 | mlog(0, "%s: res %.*s is in use or being remastered, " |
264 | "used %d, state %d\n", dlm->name, | 265 | "used %d, state %d, assert master workers %u\n", |
265 | lockres->lockname.len, lockres->lockname.name, | 266 | dlm->name, lockres->lockname.len, |
266 | !unused, lockres->state); | 267 | lockres->lockname.name, |
267 | list_move_tail(&dlm->purge_list, &lockres->purge); | 268 | !unused, lockres->state, |
269 | lockres->inflight_assert_workers); | ||
270 | list_move_tail(&lockres->purge, &dlm->purge_list); | ||
268 | spin_unlock(&lockres->spinlock); | 271 | spin_unlock(&lockres->spinlock); |
269 | continue; | 272 | continue; |
270 | } | 273 | } |
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c index 5698b52cf5c9..2e3c9dbab68c 100644 --- a/fs/ocfs2/dlm/dlmunlock.c +++ b/fs/ocfs2/dlm/dlmunlock.c | |||
@@ -191,7 +191,9 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
191 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); | 191 | DLM_UNLOCK_CLEAR_CONVERT_TYPE); |
192 | } else if (status == DLM_RECOVERING || | 192 | } else if (status == DLM_RECOVERING || |
193 | status == DLM_MIGRATING || | 193 | status == DLM_MIGRATING || |
194 | status == DLM_FORWARD) { | 194 | status == DLM_FORWARD || |
195 | status == DLM_NOLOCKMGR | ||
196 | ) { | ||
195 | /* must clear the actions because this unlock | 197 | /* must clear the actions because this unlock |
196 | * is about to be retried. cannot free or do | 198 | * is about to be retried. cannot free or do |
197 | * any list manipulation. */ | 199 | * any list manipulation. */ |
@@ -200,7 +202,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm, | |||
200 | res->lockname.name, | 202 | res->lockname.name, |
201 | status==DLM_RECOVERING?"recovering": | 203 | status==DLM_RECOVERING?"recovering": |
202 | (status==DLM_MIGRATING?"migrating": | 204 | (status==DLM_MIGRATING?"migrating": |
203 | "forward")); | 205 | (status == DLM_FORWARD ? "forward" : |
206 | "nolockmanager"))); | ||
204 | actions = 0; | 207 | actions = 0; |
205 | } | 208 | } |
206 | if (flags & LKM_CANCEL) | 209 | if (flags & LKM_CANCEL) |
@@ -364,7 +367,10 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm, | |||
364 | * updated state to the recovery master. this thread | 367 | * updated state to the recovery master. this thread |
365 | * just needs to finish out the operation and call | 368 | * just needs to finish out the operation and call |
366 | * the unlockast. */ | 369 | * the unlockast. */ |
367 | ret = DLM_NORMAL; | 370 | if (dlm_is_node_dead(dlm, owner)) |
371 | ret = DLM_NORMAL; | ||
372 | else | ||
373 | ret = DLM_NOLOCKMGR; | ||
368 | } else { | 374 | } else { |
369 | /* something bad. this will BUG in ocfs2 */ | 375 | /* something bad. this will BUG in ocfs2 */ |
370 | ret = dlm_err_to_dlm_status(tmpret); | 376 | ret = dlm_err_to_dlm_status(tmpret); |
@@ -638,7 +644,9 @@ retry: | |||
638 | 644 | ||
639 | if (status == DLM_RECOVERING || | 645 | if (status == DLM_RECOVERING || |
640 | status == DLM_MIGRATING || | 646 | status == DLM_MIGRATING || |
641 | status == DLM_FORWARD) { | 647 | status == DLM_FORWARD || |
648 | status == DLM_NOLOCKMGR) { | ||
649 | |||
642 | /* We want to go away for a tiny bit to allow recovery | 650 | /* We want to go away for a tiny bit to allow recovery |
643 | * / migration to complete on this resource. I don't | 651 | * / migration to complete on this resource. I don't |
644 | * know of any wait queue we could sleep on as this | 652 | * know of any wait queue we could sleep on as this |
@@ -650,7 +658,7 @@ retry: | |||
650 | msleep(50); | 658 | msleep(50); |
651 | 659 | ||
652 | mlog(0, "retrying unlock due to pending recovery/" | 660 | mlog(0, "retrying unlock due to pending recovery/" |
653 | "migration/in-progress\n"); | 661 | "migration/in-progress/reconnect\n"); |
654 | goto retry; | 662 | goto retry; |
655 | } | 663 | } |
656 | 664 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 2060fc398445..8add6f1030d7 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -205,6 +205,21 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode) | |||
205 | return inode; | 205 | return inode; |
206 | } | 206 | } |
207 | 207 | ||
208 | static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb, | ||
209 | struct dentry *dentry, struct inode *inode) | ||
210 | { | ||
211 | struct ocfs2_dentry_lock *dl = dentry->d_fsdata; | ||
212 | |||
213 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | ||
214 | ocfs2_lock_res_free(&dl->dl_lockres); | ||
215 | BUG_ON(dl->dl_count != 1); | ||
216 | spin_lock(&dentry_attach_lock); | ||
217 | dentry->d_fsdata = NULL; | ||
218 | spin_unlock(&dentry_attach_lock); | ||
219 | kfree(dl); | ||
220 | iput(inode); | ||
221 | } | ||
222 | |||
208 | static int ocfs2_mknod(struct inode *dir, | 223 | static int ocfs2_mknod(struct inode *dir, |
209 | struct dentry *dentry, | 224 | struct dentry *dentry, |
210 | umode_t mode, | 225 | umode_t mode, |
@@ -231,6 +246,7 @@ static int ocfs2_mknod(struct inode *dir, | |||
231 | sigset_t oldset; | 246 | sigset_t oldset; |
232 | int did_block_signals = 0; | 247 | int did_block_signals = 0; |
233 | struct posix_acl *default_acl = NULL, *acl = NULL; | 248 | struct posix_acl *default_acl = NULL, *acl = NULL; |
249 | struct ocfs2_dentry_lock *dl = NULL; | ||
234 | 250 | ||
235 | trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, | 251 | trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name, |
236 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 252 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
@@ -423,6 +439,8 @@ static int ocfs2_mknod(struct inode *dir, | |||
423 | goto leave; | 439 | goto leave; |
424 | } | 440 | } |
425 | 441 | ||
442 | dl = dentry->d_fsdata; | ||
443 | |||
426 | status = ocfs2_add_entry(handle, dentry, inode, | 444 | status = ocfs2_add_entry(handle, dentry, inode, |
427 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, | 445 | OCFS2_I(inode)->ip_blkno, parent_fe_bh, |
428 | &lookup); | 446 | &lookup); |
@@ -469,6 +487,9 @@ leave: | |||
469 | * ocfs2_delete_inode will mutex_lock again. | 487 | * ocfs2_delete_inode will mutex_lock again. |
470 | */ | 488 | */ |
471 | if ((status < 0) && inode) { | 489 | if ((status < 0) && inode) { |
490 | if (dl) | ||
491 | ocfs2_cleanup_add_entry_failure(osb, dentry, inode); | ||
492 | |||
472 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | 493 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; |
473 | clear_nlink(inode); | 494 | clear_nlink(inode); |
474 | iput(inode); | 495 | iput(inode); |
@@ -991,6 +1012,65 @@ leave: | |||
991 | return status; | 1012 | return status; |
992 | } | 1013 | } |
993 | 1014 | ||
1015 | static int ocfs2_check_if_ancestor(struct ocfs2_super *osb, | ||
1016 | u64 src_inode_no, u64 dest_inode_no) | ||
1017 | { | ||
1018 | int ret = 0, i = 0; | ||
1019 | u64 parent_inode_no = 0; | ||
1020 | u64 child_inode_no = src_inode_no; | ||
1021 | struct inode *child_inode; | ||
1022 | |||
1023 | #define MAX_LOOKUP_TIMES 32 | ||
1024 | while (1) { | ||
1025 | child_inode = ocfs2_iget(osb, child_inode_no, 0, 0); | ||
1026 | if (IS_ERR(child_inode)) { | ||
1027 | ret = PTR_ERR(child_inode); | ||
1028 | break; | ||
1029 | } | ||
1030 | |||
1031 | ret = ocfs2_inode_lock(child_inode, NULL, 0); | ||
1032 | if (ret < 0) { | ||
1033 | iput(child_inode); | ||
1034 | if (ret != -ENOENT) | ||
1035 | mlog_errno(ret); | ||
1036 | break; | ||
1037 | } | ||
1038 | |||
1039 | ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2, | ||
1040 | &parent_inode_no); | ||
1041 | ocfs2_inode_unlock(child_inode, 0); | ||
1042 | iput(child_inode); | ||
1043 | if (ret < 0) { | ||
1044 | ret = -ENOENT; | ||
1045 | break; | ||
1046 | } | ||
1047 | |||
1048 | if (parent_inode_no == dest_inode_no) { | ||
1049 | ret = 1; | ||
1050 | break; | ||
1051 | } | ||
1052 | |||
1053 | if (parent_inode_no == osb->root_inode->i_ino) { | ||
1054 | ret = 0; | ||
1055 | break; | ||
1056 | } | ||
1057 | |||
1058 | child_inode_no = parent_inode_no; | ||
1059 | |||
1060 | if (++i >= MAX_LOOKUP_TIMES) { | ||
1061 | mlog(ML_NOTICE, "max lookup times reached, filesystem " | ||
1062 | "may have nested directories, " | ||
1063 | "src inode: %llu, dest inode: %llu.\n", | ||
1064 | (unsigned long long)src_inode_no, | ||
1065 | (unsigned long long)dest_inode_no); | ||
1066 | ret = 0; | ||
1067 | break; | ||
1068 | } | ||
1069 | } | ||
1070 | |||
1071 | return ret; | ||
1072 | } | ||
1073 | |||
994 | /* | 1074 | /* |
995 | * The only place this should be used is rename! | 1075 | * The only place this should be used is rename! |
996 | * if they have the same id, then the 1st one is the only one locked. | 1076 | * if they have the same id, then the 1st one is the only one locked. |
@@ -1002,6 +1082,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
1002 | struct inode *inode2) | 1082 | struct inode *inode2) |
1003 | { | 1083 | { |
1004 | int status; | 1084 | int status; |
1085 | int inode1_is_ancestor, inode2_is_ancestor; | ||
1005 | struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); | 1086 | struct ocfs2_inode_info *oi1 = OCFS2_I(inode1); |
1006 | struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); | 1087 | struct ocfs2_inode_info *oi2 = OCFS2_I(inode2); |
1007 | struct buffer_head **tmpbh; | 1088 | struct buffer_head **tmpbh; |
@@ -1015,9 +1096,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb, | |||
1015 | if (*bh2) | 1096 | if (*bh2) |
1016 | *bh2 = NULL; | 1097 | *bh2 = NULL; |
1017 | 1098 | ||
1018 | /* we always want to lock the one with the lower lockid first. */ | 1099 | /* we always want to lock the one with the lower lockid first. |
1100 | * and if they are nested, we lock ancestor first */ | ||
1019 | if (oi1->ip_blkno != oi2->ip_blkno) { | 1101 | if (oi1->ip_blkno != oi2->ip_blkno) { |
1020 | if (oi1->ip_blkno < oi2->ip_blkno) { | 1102 | inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno, |
1103 | oi1->ip_blkno); | ||
1104 | if (inode1_is_ancestor < 0) { | ||
1105 | status = inode1_is_ancestor; | ||
1106 | goto bail; | ||
1107 | } | ||
1108 | |||
1109 | inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno, | ||
1110 | oi2->ip_blkno); | ||
1111 | if (inode2_is_ancestor < 0) { | ||
1112 | status = inode2_is_ancestor; | ||
1113 | goto bail; | ||
1114 | } | ||
1115 | |||
1116 | if ((inode1_is_ancestor == 1) || | ||
1117 | (oi1->ip_blkno < oi2->ip_blkno && | ||
1118 | inode2_is_ancestor == 0)) { | ||
1021 | /* switch id1 and id2 around */ | 1119 | /* switch id1 and id2 around */ |
1022 | tmpbh = bh2; | 1120 | tmpbh = bh2; |
1023 | bh2 = bh1; | 1121 | bh2 = bh1; |
@@ -1098,6 +1196,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1098 | struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, }; | 1196 | struct ocfs2_dir_lookup_result old_entry_lookup = { NULL, }; |
1099 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; | 1197 | struct ocfs2_dir_lookup_result orphan_insert = { NULL, }; |
1100 | struct ocfs2_dir_lookup_result target_insert = { NULL, }; | 1198 | struct ocfs2_dir_lookup_result target_insert = { NULL, }; |
1199 | bool should_add_orphan = false; | ||
1101 | 1200 | ||
1102 | /* At some point it might be nice to break this function up a | 1201 | /* At some point it might be nice to break this function up a |
1103 | * bit. */ | 1202 | * bit. */ |
@@ -1134,6 +1233,21 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1134 | goto bail; | 1233 | goto bail; |
1135 | } | 1234 | } |
1136 | rename_lock = 1; | 1235 | rename_lock = 1; |
1236 | |||
1237 | /* here we cannot guarantee the inodes haven't just been | ||
1238 | * changed, so check if they are nested again */ | ||
1239 | status = ocfs2_check_if_ancestor(osb, new_dir->i_ino, | ||
1240 | old_inode->i_ino); | ||
1241 | if (status < 0) { | ||
1242 | mlog_errno(status); | ||
1243 | goto bail; | ||
1244 | } else if (status == 1) { | ||
1245 | status = -EPERM; | ||
1246 | trace_ocfs2_rename_not_permitted( | ||
1247 | (unsigned long long)old_inode->i_ino, | ||
1248 | (unsigned long long)new_dir->i_ino); | ||
1249 | goto bail; | ||
1250 | } | ||
1137 | } | 1251 | } |
1138 | 1252 | ||
1139 | /* if old and new are the same, this'll just do one lock. */ | 1253 | /* if old and new are the same, this'll just do one lock. */ |
@@ -1304,6 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1304 | mlog_errno(status); | 1418 | mlog_errno(status); |
1305 | goto bail; | 1419 | goto bail; |
1306 | } | 1420 | } |
1421 | should_add_orphan = true; | ||
1307 | } | 1422 | } |
1308 | } else { | 1423 | } else { |
1309 | BUG_ON(new_dentry->d_parent->d_inode != new_dir); | 1424 | BUG_ON(new_dentry->d_parent->d_inode != new_dir); |
@@ -1348,17 +1463,6 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1348 | goto bail; | 1463 | goto bail; |
1349 | } | 1464 | } |
1350 | 1465 | ||
1351 | if (S_ISDIR(new_inode->i_mode) || | ||
1352 | (ocfs2_read_links_count(newfe) == 1)) { | ||
1353 | status = ocfs2_orphan_add(osb, handle, new_inode, | ||
1354 | newfe_bh, orphan_name, | ||
1355 | &orphan_insert, orphan_dir); | ||
1356 | if (status < 0) { | ||
1357 | mlog_errno(status); | ||
1358 | goto bail; | ||
1359 | } | ||
1360 | } | ||
1361 | |||
1362 | /* change the dirent to point to the correct inode */ | 1466 | /* change the dirent to point to the correct inode */ |
1363 | status = ocfs2_update_entry(new_dir, handle, &target_lookup_res, | 1467 | status = ocfs2_update_entry(new_dir, handle, &target_lookup_res, |
1364 | old_inode); | 1468 | old_inode); |
@@ -1373,6 +1477,15 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1373 | else | 1477 | else |
1374 | ocfs2_add_links_count(newfe, -1); | 1478 | ocfs2_add_links_count(newfe, -1); |
1375 | ocfs2_journal_dirty(handle, newfe_bh); | 1479 | ocfs2_journal_dirty(handle, newfe_bh); |
1480 | if (should_add_orphan) { | ||
1481 | status = ocfs2_orphan_add(osb, handle, new_inode, | ||
1482 | newfe_bh, orphan_name, | ||
1483 | &orphan_insert, orphan_dir); | ||
1484 | if (status < 0) { | ||
1485 | mlog_errno(status); | ||
1486 | goto bail; | ||
1487 | } | ||
1488 | } | ||
1376 | } else { | 1489 | } else { |
1377 | /* if the name was not found in new_dir, add it now */ | 1490 | /* if the name was not found in new_dir, add it now */ |
1378 | status = ocfs2_add_entry(handle, new_dentry, old_inode, | 1491 | status = ocfs2_add_entry(handle, new_dentry, old_inode, |
@@ -1642,6 +1755,7 @@ static int ocfs2_symlink(struct inode *dir, | |||
1642 | struct ocfs2_dir_lookup_result lookup = { NULL, }; | 1755 | struct ocfs2_dir_lookup_result lookup = { NULL, }; |
1643 | sigset_t oldset; | 1756 | sigset_t oldset; |
1644 | int did_block_signals = 0; | 1757 | int did_block_signals = 0; |
1758 | struct ocfs2_dentry_lock *dl = NULL; | ||
1645 | 1759 | ||
1646 | trace_ocfs2_symlink_begin(dir, dentry, symname, | 1760 | trace_ocfs2_symlink_begin(dir, dentry, symname, |
1647 | dentry->d_name.len, dentry->d_name.name); | 1761 | dentry->d_name.len, dentry->d_name.name); |
@@ -1830,6 +1944,8 @@ static int ocfs2_symlink(struct inode *dir, | |||
1830 | goto bail; | 1944 | goto bail; |
1831 | } | 1945 | } |
1832 | 1946 | ||
1947 | dl = dentry->d_fsdata; | ||
1948 | |||
1833 | status = ocfs2_add_entry(handle, dentry, inode, | 1949 | status = ocfs2_add_entry(handle, dentry, inode, |
1834 | le64_to_cpu(fe->i_blkno), parent_fe_bh, | 1950 | le64_to_cpu(fe->i_blkno), parent_fe_bh, |
1835 | &lookup); | 1951 | &lookup); |
@@ -1864,6 +1980,9 @@ bail: | |||
1864 | if (xattr_ac) | 1980 | if (xattr_ac) |
1865 | ocfs2_free_alloc_context(xattr_ac); | 1981 | ocfs2_free_alloc_context(xattr_ac); |
1866 | if ((status < 0) && inode) { | 1982 | if ((status < 0) && inode) { |
1983 | if (dl) | ||
1984 | ocfs2_cleanup_add_entry_failure(osb, dentry, inode); | ||
1985 | |||
1867 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; | 1986 | OCFS2_I(inode)->ip_flags |= OCFS2_INODE_SKIP_ORPHAN_DIR; |
1868 | clear_nlink(inode); | 1987 | clear_nlink(inode); |
1869 | iput(inode); | 1988 | iput(inode); |
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h index 1b60c62aa9d6..6cb019b7c6a8 100644 --- a/fs/ocfs2/ocfs2_trace.h +++ b/fs/ocfs2/ocfs2_trace.h | |||
@@ -2292,6 +2292,8 @@ TRACE_EVENT(ocfs2_rename, | |||
2292 | __entry->new_len, __get_str(new_name)) | 2292 | __entry->new_len, __get_str(new_name)) |
2293 | ); | 2293 | ); |
2294 | 2294 | ||
2295 | DEFINE_OCFS2_ULL_ULL_EVENT(ocfs2_rename_not_permitted); | ||
2296 | |||
2295 | TRACE_EVENT(ocfs2_rename_target_exists, | 2297 | TRACE_EVENT(ocfs2_rename_target_exists, |
2296 | TP_PROTO(int new_len, const char *new_name), | 2298 | TP_PROTO(int new_len, const char *new_name), |
2297 | TP_ARGS(new_len, new_name), | 2299 | TP_ARGS(new_len, new_name), |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 714e53b9cc66..636aab69ead5 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -4288,9 +4288,16 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
4288 | goto out; | 4288 | goto out; |
4289 | } | 4289 | } |
4290 | 4290 | ||
4291 | error = ocfs2_rw_lock(inode, 1); | ||
4292 | if (error) { | ||
4293 | mlog_errno(error); | ||
4294 | goto out; | ||
4295 | } | ||
4296 | |||
4291 | error = ocfs2_inode_lock(inode, &old_bh, 1); | 4297 | error = ocfs2_inode_lock(inode, &old_bh, 1); |
4292 | if (error) { | 4298 | if (error) { |
4293 | mlog_errno(error); | 4299 | mlog_errno(error); |
4300 | ocfs2_rw_unlock(inode, 1); | ||
4294 | goto out; | 4301 | goto out; |
4295 | } | 4302 | } |
4296 | 4303 | ||
@@ -4302,6 +4309,7 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, | |||
4302 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | 4309 | up_write(&OCFS2_I(inode)->ip_xattr_sem); |
4303 | 4310 | ||
4304 | ocfs2_inode_unlock(inode, 1); | 4311 | ocfs2_inode_unlock(inode, 1); |
4312 | ocfs2_rw_unlock(inode, 1); | ||
4305 | brelse(old_bh); | 4313 | brelse(old_bh); |
4306 | 4314 | ||
4307 | if (error) { | 4315 | if (error) { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index c7a89cea5c5d..ddb662b32447 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1925,15 +1925,11 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1925 | 1925 | ||
1926 | ocfs2_shutdown_local_alloc(osb); | 1926 | ocfs2_shutdown_local_alloc(osb); |
1927 | 1927 | ||
1928 | ocfs2_truncate_log_shutdown(osb); | ||
1929 | |||
1928 | /* This will disable recovery and flush any recovery work. */ | 1930 | /* This will disable recovery and flush any recovery work. */ |
1929 | ocfs2_recovery_exit(osb); | 1931 | ocfs2_recovery_exit(osb); |
1930 | 1932 | ||
1931 | /* | ||
1932 | * During dismount, when it recovers another node it will call | ||
1933 | * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq. | ||
1934 | */ | ||
1935 | ocfs2_truncate_log_shutdown(osb); | ||
1936 | |||
1937 | ocfs2_journal_shutdown(osb); | 1933 | ocfs2_journal_shutdown(osb); |
1938 | 1934 | ||
1939 | ocfs2_sync_blockdev(sb); | 1935 | ocfs2_sync_blockdev(sb); |
diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 6a45fb583ff1..447775ee2c4b 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h | |||
@@ -32,15 +32,24 @@ static inline void touch_nmi_watchdog(void) | |||
32 | #ifdef arch_trigger_all_cpu_backtrace | 32 | #ifdef arch_trigger_all_cpu_backtrace |
33 | static inline bool trigger_all_cpu_backtrace(void) | 33 | static inline bool trigger_all_cpu_backtrace(void) |
34 | { | 34 | { |
35 | arch_trigger_all_cpu_backtrace(); | 35 | arch_trigger_all_cpu_backtrace(true); |
36 | 36 | ||
37 | return true; | 37 | return true; |
38 | } | 38 | } |
39 | static inline bool trigger_allbutself_cpu_backtrace(void) | ||
40 | { | ||
41 | arch_trigger_all_cpu_backtrace(false); | ||
42 | return true; | ||
43 | } | ||
39 | #else | 44 | #else |
40 | static inline bool trigger_all_cpu_backtrace(void) | 45 | static inline bool trigger_all_cpu_backtrace(void) |
41 | { | 46 | { |
42 | return false; | 47 | return false; |
43 | } | 48 | } |
49 | static inline bool trigger_allbutself_cpu_backtrace(void) | ||
50 | { | ||
51 | return false; | ||
52 | } | ||
44 | #endif | 53 | #endif |
45 | 54 | ||
46 | #ifdef CONFIG_LOCKUP_DETECTOR | 55 | #ifdef CONFIG_LOCKUP_DETECTOR |
@@ -48,6 +57,7 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *); | |||
48 | u64 hw_nmi_get_sample_period(int watchdog_thresh); | 57 | u64 hw_nmi_get_sample_period(int watchdog_thresh); |
49 | extern int watchdog_user_enabled; | 58 | extern int watchdog_user_enabled; |
50 | extern int watchdog_thresh; | 59 | extern int watchdog_thresh; |
60 | extern int sysctl_softlockup_all_cpu_backtrace; | ||
51 | struct ctl_table; | 61 | struct ctl_table; |
52 | extern int proc_dowatchdog(struct ctl_table *, int , | 62 | extern int proc_dowatchdog(struct ctl_table *, int , |
53 | void __user *, size_t *, loff_t *); | 63 | void __user *, size_t *, loff_t *); |
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 3c545b48aeab..8304959ad336 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h | |||
@@ -360,6 +360,9 @@ static inline void ClearPageCompound(struct page *page) | |||
360 | ClearPageHead(page); | 360 | ClearPageHead(page); |
361 | } | 361 | } |
362 | #endif | 362 | #endif |
363 | |||
364 | #define PG_head_mask ((1L << PG_head)) | ||
365 | |||
363 | #else | 366 | #else |
364 | /* | 367 | /* |
365 | * Reduce page flag use as much as possible by overlapping | 368 | * Reduce page flag use as much as possible by overlapping |
diff --git a/kernel/kexec.c b/kernel/kexec.c index 6748688813d0..369f41a94124 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c | |||
@@ -1617,6 +1617,7 @@ static int __init crash_save_vmcoreinfo_init(void) | |||
1617 | #ifdef CONFIG_MEMORY_FAILURE | 1617 | #ifdef CONFIG_MEMORY_FAILURE |
1618 | VMCOREINFO_NUMBER(PG_hwpoison); | 1618 | VMCOREINFO_NUMBER(PG_hwpoison); |
1619 | #endif | 1619 | #endif |
1620 | VMCOREINFO_NUMBER(PG_head_mask); | ||
1620 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); | 1621 | VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE); |
1621 | 1622 | ||
1622 | arch_crash_save_vmcoreinfo(); | 1623 | arch_crash_save_vmcoreinfo(); |
diff --git a/kernel/smp.c b/kernel/smp.c index 306f8180b0d5..80c33f8de14f 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
@@ -29,6 +29,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); | |||
29 | 29 | ||
30 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); | 30 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); |
31 | 31 | ||
32 | static void flush_smp_call_function_queue(bool warn_cpu_offline); | ||
33 | |||
32 | static int | 34 | static int |
33 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | 35 | hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) |
34 | { | 36 | { |
@@ -51,12 +53,27 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
51 | #ifdef CONFIG_HOTPLUG_CPU | 53 | #ifdef CONFIG_HOTPLUG_CPU |
52 | case CPU_UP_CANCELED: | 54 | case CPU_UP_CANCELED: |
53 | case CPU_UP_CANCELED_FROZEN: | 55 | case CPU_UP_CANCELED_FROZEN: |
56 | /* Fall-through to the CPU_DEAD[_FROZEN] case. */ | ||
54 | 57 | ||
55 | case CPU_DEAD: | 58 | case CPU_DEAD: |
56 | case CPU_DEAD_FROZEN: | 59 | case CPU_DEAD_FROZEN: |
57 | free_cpumask_var(cfd->cpumask); | 60 | free_cpumask_var(cfd->cpumask); |
58 | free_percpu(cfd->csd); | 61 | free_percpu(cfd->csd); |
59 | break; | 62 | break; |
63 | |||
64 | case CPU_DYING: | ||
65 | case CPU_DYING_FROZEN: | ||
66 | /* | ||
67 | * The IPIs for the smp-call-function callbacks queued by other | ||
68 | * CPUs might arrive late, either due to hardware latencies or | ||
69 | * because this CPU disabled interrupts (inside stop-machine) | ||
70 | * before the IPIs were sent. So flush out any pending callbacks | ||
71 | * explicitly (without waiting for the IPIs to arrive), to | ||
72 | * ensure that the outgoing CPU doesn't go offline with work | ||
73 | * still pending. | ||
74 | */ | ||
75 | flush_smp_call_function_queue(false); | ||
76 | break; | ||
60 | #endif | 77 | #endif |
61 | }; | 78 | }; |
62 | 79 | ||
@@ -177,23 +194,47 @@ static int generic_exec_single(int cpu, struct call_single_data *csd, | |||
177 | return 0; | 194 | return 0; |
178 | } | 195 | } |
179 | 196 | ||
180 | /* | 197 | /** |
181 | * Invoked by arch to handle an IPI for call function single. Must be | 198 | * generic_smp_call_function_single_interrupt - Execute SMP IPI callbacks |
182 | * called from the arch with interrupts disabled. | 199 | * |
200 | * Invoked by arch to handle an IPI for call function single. | ||
201 | * Must be called with interrupts disabled. | ||
183 | */ | 202 | */ |
184 | void generic_smp_call_function_single_interrupt(void) | 203 | void generic_smp_call_function_single_interrupt(void) |
185 | { | 204 | { |
205 | flush_smp_call_function_queue(true); | ||
206 | } | ||
207 | |||
208 | /** | ||
209 | * flush_smp_call_function_queue - Flush pending smp-call-function callbacks | ||
210 | * | ||
211 | * @warn_cpu_offline: If set to 'true', warn if callbacks were queued on an | ||
212 | * offline CPU. Skip this check if set to 'false'. | ||
213 | * | ||
214 | * Flush any pending smp-call-function callbacks queued on this CPU. This is | ||
215 | * invoked by the generic IPI handler, as well as by a CPU about to go offline, | ||
216 | * to ensure that all pending IPI callbacks are run before it goes completely | ||
217 | * offline. | ||
218 | * | ||
219 | * Loop through the call_single_queue and run all the queued callbacks. | ||
220 | * Must be called with interrupts disabled. | ||
221 | */ | ||
222 | static void flush_smp_call_function_queue(bool warn_cpu_offline) | ||
223 | { | ||
224 | struct llist_head *head; | ||
186 | struct llist_node *entry; | 225 | struct llist_node *entry; |
187 | struct call_single_data *csd, *csd_next; | 226 | struct call_single_data *csd, *csd_next; |
188 | static bool warned; | 227 | static bool warned; |
189 | 228 | ||
190 | entry = llist_del_all(&__get_cpu_var(call_single_queue)); | 229 | WARN_ON(!irqs_disabled()); |
230 | |||
231 | head = &__get_cpu_var(call_single_queue); | ||
232 | entry = llist_del_all(head); | ||
191 | entry = llist_reverse_order(entry); | 233 | entry = llist_reverse_order(entry); |
192 | 234 | ||
193 | /* | 235 | /* There shouldn't be any pending callbacks on an offline CPU. */ |
194 | * Shouldn't receive this interrupt on a cpu that is not yet online. | 236 | if (unlikely(warn_cpu_offline && !cpu_online(smp_processor_id()) && |
195 | */ | 237 | !warned && !llist_empty(head))) { |
196 | if (unlikely(!cpu_online(smp_processor_id()) && !warned)) { | ||
197 | warned = true; | 238 | warned = true; |
198 | WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); | 239 | WARN(1, "IPI on offline CPU %d\n", smp_processor_id()); |
199 | 240 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 7de6555cfea0..75b22e22a72c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -136,7 +136,6 @@ static unsigned long dirty_bytes_min = 2 * PAGE_SIZE; | |||
136 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ | 136 | /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ |
137 | static int maxolduid = 65535; | 137 | static int maxolduid = 65535; |
138 | static int minolduid; | 138 | static int minolduid; |
139 | static int min_percpu_pagelist_fract = 8; | ||
140 | 139 | ||
141 | static int ngroups_max = NGROUPS_MAX; | 140 | static int ngroups_max = NGROUPS_MAX; |
142 | static const int cap_last_cap = CAP_LAST_CAP; | 141 | static const int cap_last_cap = CAP_LAST_CAP; |
@@ -861,6 +860,17 @@ static struct ctl_table kern_table[] = { | |||
861 | .extra1 = &zero, | 860 | .extra1 = &zero, |
862 | .extra2 = &one, | 861 | .extra2 = &one, |
863 | }, | 862 | }, |
863 | #ifdef CONFIG_SMP | ||
864 | { | ||
865 | .procname = "softlockup_all_cpu_backtrace", | ||
866 | .data = &sysctl_softlockup_all_cpu_backtrace, | ||
867 | .maxlen = sizeof(int), | ||
868 | .mode = 0644, | ||
869 | .proc_handler = proc_dointvec_minmax, | ||
870 | .extra1 = &zero, | ||
871 | .extra2 = &one, | ||
872 | }, | ||
873 | #endif /* CONFIG_SMP */ | ||
864 | { | 874 | { |
865 | .procname = "nmi_watchdog", | 875 | .procname = "nmi_watchdog", |
866 | .data = &watchdog_user_enabled, | 876 | .data = &watchdog_user_enabled, |
@@ -1317,7 +1327,7 @@ static struct ctl_table vm_table[] = { | |||
1317 | .maxlen = sizeof(percpu_pagelist_fraction), | 1327 | .maxlen = sizeof(percpu_pagelist_fraction), |
1318 | .mode = 0644, | 1328 | .mode = 0644, |
1319 | .proc_handler = percpu_pagelist_fraction_sysctl_handler, | 1329 | .proc_handler = percpu_pagelist_fraction_sysctl_handler, |
1320 | .extra1 = &min_percpu_pagelist_fract, | 1330 | .extra1 = &zero, |
1321 | }, | 1331 | }, |
1322 | #ifdef CONFIG_MMU | 1332 | #ifdef CONFIG_MMU |
1323 | { | 1333 | { |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 516203e665fc..c3319bd1b040 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -31,6 +31,12 @@ | |||
31 | 31 | ||
32 | int watchdog_user_enabled = 1; | 32 | int watchdog_user_enabled = 1; |
33 | int __read_mostly watchdog_thresh = 10; | 33 | int __read_mostly watchdog_thresh = 10; |
34 | #ifdef CONFIG_SMP | ||
35 | int __read_mostly sysctl_softlockup_all_cpu_backtrace; | ||
36 | #else | ||
37 | #define sysctl_softlockup_all_cpu_backtrace 0 | ||
38 | #endif | ||
39 | |||
34 | static int __read_mostly watchdog_running; | 40 | static int __read_mostly watchdog_running; |
35 | static u64 __read_mostly sample_period; | 41 | static u64 __read_mostly sample_period; |
36 | 42 | ||
@@ -47,6 +53,7 @@ static DEFINE_PER_CPU(bool, watchdog_nmi_touch); | |||
47 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); | 53 | static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved); |
48 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); | 54 | static DEFINE_PER_CPU(struct perf_event *, watchdog_ev); |
49 | #endif | 55 | #endif |
56 | static unsigned long soft_lockup_nmi_warn; | ||
50 | 57 | ||
51 | /* boot commands */ | 58 | /* boot commands */ |
52 | /* | 59 | /* |
@@ -95,6 +102,15 @@ static int __init nosoftlockup_setup(char *str) | |||
95 | } | 102 | } |
96 | __setup("nosoftlockup", nosoftlockup_setup); | 103 | __setup("nosoftlockup", nosoftlockup_setup); |
97 | /* */ | 104 | /* */ |
105 | #ifdef CONFIG_SMP | ||
106 | static int __init softlockup_all_cpu_backtrace_setup(char *str) | ||
107 | { | ||
108 | sysctl_softlockup_all_cpu_backtrace = | ||
109 | !!simple_strtol(str, NULL, 0); | ||
110 | return 1; | ||
111 | } | ||
112 | __setup("softlockup_all_cpu_backtrace=", softlockup_all_cpu_backtrace_setup); | ||
113 | #endif | ||
98 | 114 | ||
99 | /* | 115 | /* |
100 | * Hard-lockup warnings should be triggered after just a few seconds. Soft- | 116 | * Hard-lockup warnings should be triggered after just a few seconds. Soft- |
@@ -271,6 +287,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
271 | unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); | 287 | unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts); |
272 | struct pt_regs *regs = get_irq_regs(); | 288 | struct pt_regs *regs = get_irq_regs(); |
273 | int duration; | 289 | int duration; |
290 | int softlockup_all_cpu_backtrace = sysctl_softlockup_all_cpu_backtrace; | ||
274 | 291 | ||
275 | /* kick the hardlockup detector */ | 292 | /* kick the hardlockup detector */ |
276 | watchdog_interrupt_count(); | 293 | watchdog_interrupt_count(); |
@@ -317,6 +334,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
317 | if (__this_cpu_read(soft_watchdog_warn) == true) | 334 | if (__this_cpu_read(soft_watchdog_warn) == true) |
318 | return HRTIMER_RESTART; | 335 | return HRTIMER_RESTART; |
319 | 336 | ||
337 | if (softlockup_all_cpu_backtrace) { | ||
338 | /* Prevent multiple soft-lockup reports if one cpu is already | ||
339 | * engaged in dumping cpu back traces | ||
340 | */ | ||
341 | if (test_and_set_bit(0, &soft_lockup_nmi_warn)) { | ||
342 | /* Someone else will report us. Let's give up */ | ||
343 | __this_cpu_write(soft_watchdog_warn, true); | ||
344 | return HRTIMER_RESTART; | ||
345 | } | ||
346 | } | ||
347 | |||
320 | printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", | 348 | printk(KERN_EMERG "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n", |
321 | smp_processor_id(), duration, | 349 | smp_processor_id(), duration, |
322 | current->comm, task_pid_nr(current)); | 350 | current->comm, task_pid_nr(current)); |
@@ -327,6 +355,17 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) | |||
327 | else | 355 | else |
328 | dump_stack(); | 356 | dump_stack(); |
329 | 357 | ||
358 | if (softlockup_all_cpu_backtrace) { | ||
359 | /* Avoid generating two back traces for current | ||
360 | * given that one is already made above | ||
361 | */ | ||
362 | trigger_allbutself_cpu_backtrace(); | ||
363 | |||
364 | clear_bit(0, &soft_lockup_nmi_warn); | ||
365 | /* Barrier to sync with other cpus */ | ||
366 | smp_mb__after_atomic(); | ||
367 | } | ||
368 | |||
330 | if (softlockup_panic) | 369 | if (softlockup_panic) |
331 | panic("softlockup: hung tasks"); | 370 | panic("softlockup: hung tasks"); |
332 | __this_cpu_write(soft_watchdog_warn, true); | 371 | __this_cpu_write(soft_watchdog_warn, true); |
@@ -527,10 +566,8 @@ static void update_timers_all_cpus(void) | |||
527 | int cpu; | 566 | int cpu; |
528 | 567 | ||
529 | get_online_cpus(); | 568 | get_online_cpus(); |
530 | preempt_disable(); | ||
531 | for_each_online_cpu(cpu) | 569 | for_each_online_cpu(cpu) |
532 | update_timers(cpu); | 570 | update_timers(cpu); |
533 | preempt_enable(); | ||
534 | put_online_cpus(); | 571 | put_online_cpus(); |
535 | } | 572 | } |
536 | 573 | ||
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7cfcc1b8e101..7a638aa3545b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug | |||
@@ -930,7 +930,7 @@ config LOCKDEP | |||
930 | bool | 930 | bool |
931 | depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT | 931 | depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT |
932 | select STACKTRACE | 932 | select STACKTRACE |
933 | select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC | 933 | select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE |
934 | select KALLSYMS | 934 | select KALLSYMS |
935 | select KALLSYMS_ALL | 935 | select KALLSYMS_ALL |
936 | 936 | ||
@@ -1408,7 +1408,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER | |||
1408 | depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT | 1408 | depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT |
1409 | depends on !X86_64 | 1409 | depends on !X86_64 |
1410 | select STACKTRACE | 1410 | select STACKTRACE |
1411 | select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC | 1411 | select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE |
1412 | help | 1412 | help |
1413 | Provide stacktrace filter for fault-injection capabilities | 1413 | Provide stacktrace filter for fault-injection capabilities |
1414 | 1414 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e60837dc785c..33514d88fef9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -941,6 +941,37 @@ unlock: | |||
941 | spin_unlock(ptl); | 941 | spin_unlock(ptl); |
942 | } | 942 | } |
943 | 943 | ||
944 | /* | ||
945 | * Save CONFIG_DEBUG_PAGEALLOC from faulting falsely on tail pages | ||
946 | * during copy_user_huge_page()'s copy_page_rep(): in the case when | ||
947 | * the source page gets split and a tail freed before copy completes. | ||
948 | * Called under pmd_lock of checked pmd, so safe from splitting itself. | ||
949 | */ | ||
950 | static void get_user_huge_page(struct page *page) | ||
951 | { | ||
952 | if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) { | ||
953 | struct page *endpage = page + HPAGE_PMD_NR; | ||
954 | |||
955 | atomic_add(HPAGE_PMD_NR, &page->_count); | ||
956 | while (++page < endpage) | ||
957 | get_huge_page_tail(page); | ||
958 | } else { | ||
959 | get_page(page); | ||
960 | } | ||
961 | } | ||
962 | |||
963 | static void put_user_huge_page(struct page *page) | ||
964 | { | ||
965 | if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC)) { | ||
966 | struct page *endpage = page + HPAGE_PMD_NR; | ||
967 | |||
968 | while (page < endpage) | ||
969 | put_page(page++); | ||
970 | } else { | ||
971 | put_page(page); | ||
972 | } | ||
973 | } | ||
974 | |||
944 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | 975 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, |
945 | struct vm_area_struct *vma, | 976 | struct vm_area_struct *vma, |
946 | unsigned long address, | 977 | unsigned long address, |
@@ -1074,7 +1105,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1074 | ret |= VM_FAULT_WRITE; | 1105 | ret |= VM_FAULT_WRITE; |
1075 | goto out_unlock; | 1106 | goto out_unlock; |
1076 | } | 1107 | } |
1077 | get_page(page); | 1108 | get_user_huge_page(page); |
1078 | spin_unlock(ptl); | 1109 | spin_unlock(ptl); |
1079 | alloc: | 1110 | alloc: |
1080 | if (transparent_hugepage_enabled(vma) && | 1111 | if (transparent_hugepage_enabled(vma) && |
@@ -1095,7 +1126,7 @@ alloc: | |||
1095 | split_huge_page(page); | 1126 | split_huge_page(page); |
1096 | ret |= VM_FAULT_FALLBACK; | 1127 | ret |= VM_FAULT_FALLBACK; |
1097 | } | 1128 | } |
1098 | put_page(page); | 1129 | put_user_huge_page(page); |
1099 | } | 1130 | } |
1100 | count_vm_event(THP_FAULT_FALLBACK); | 1131 | count_vm_event(THP_FAULT_FALLBACK); |
1101 | goto out; | 1132 | goto out; |
@@ -1105,7 +1136,7 @@ alloc: | |||
1105 | put_page(new_page); | 1136 | put_page(new_page); |
1106 | if (page) { | 1137 | if (page) { |
1107 | split_huge_page(page); | 1138 | split_huge_page(page); |
1108 | put_page(page); | 1139 | put_user_huge_page(page); |
1109 | } else | 1140 | } else |
1110 | split_huge_page_pmd(vma, address, pmd); | 1141 | split_huge_page_pmd(vma, address, pmd); |
1111 | ret |= VM_FAULT_FALLBACK; | 1142 | ret |= VM_FAULT_FALLBACK; |
@@ -1127,7 +1158,7 @@ alloc: | |||
1127 | 1158 | ||
1128 | spin_lock(ptl); | 1159 | spin_lock(ptl); |
1129 | if (page) | 1160 | if (page) |
1130 | put_page(page); | 1161 | put_user_huge_page(page); |
1131 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { | 1162 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { |
1132 | spin_unlock(ptl); | 1163 | spin_unlock(ptl); |
1133 | mem_cgroup_uncharge_page(new_page); | 1164 | mem_cgroup_uncharge_page(new_page); |
@@ -2392,8 +2423,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2392 | pmd = mm_find_pmd(mm, address); | 2423 | pmd = mm_find_pmd(mm, address); |
2393 | if (!pmd) | 2424 | if (!pmd) |
2394 | goto out; | 2425 | goto out; |
2395 | if (pmd_trans_huge(*pmd)) | ||
2396 | goto out; | ||
2397 | 2426 | ||
2398 | anon_vma_lock_write(vma->anon_vma); | 2427 | anon_vma_lock_write(vma->anon_vma); |
2399 | 2428 | ||
@@ -2492,8 +2521,6 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
2492 | pmd = mm_find_pmd(mm, address); | 2521 | pmd = mm_find_pmd(mm, address); |
2493 | if (!pmd) | 2522 | if (!pmd) |
2494 | goto out; | 2523 | goto out; |
2495 | if (pmd_trans_huge(*pmd)) | ||
2496 | goto out; | ||
2497 | 2524 | ||
2498 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); | 2525 | memset(khugepaged_node_load, 0, sizeof(khugepaged_node_load)); |
2499 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); | 2526 | pte = pte_offset_map_lock(mm, pmd, address, &ptl); |
@@ -2846,12 +2873,22 @@ void split_huge_page_pmd_mm(struct mm_struct *mm, unsigned long address, | |||
2846 | static void split_huge_page_address(struct mm_struct *mm, | 2873 | static void split_huge_page_address(struct mm_struct *mm, |
2847 | unsigned long address) | 2874 | unsigned long address) |
2848 | { | 2875 | { |
2876 | pgd_t *pgd; | ||
2877 | pud_t *pud; | ||
2849 | pmd_t *pmd; | 2878 | pmd_t *pmd; |
2850 | 2879 | ||
2851 | VM_BUG_ON(!(address & ~HPAGE_PMD_MASK)); | 2880 | VM_BUG_ON(!(address & ~HPAGE_PMD_MASK)); |
2852 | 2881 | ||
2853 | pmd = mm_find_pmd(mm, address); | 2882 | pgd = pgd_offset(mm, address); |
2854 | if (!pmd) | 2883 | if (!pgd_present(*pgd)) |
2884 | return; | ||
2885 | |||
2886 | pud = pud_offset(pgd, address); | ||
2887 | if (!pud_present(*pud)) | ||
2888 | return; | ||
2889 | |||
2890 | pmd = pmd_offset(pud, address); | ||
2891 | if (!pmd_present(*pmd)) | ||
2855 | return; | 2892 | return; |
2856 | /* | 2893 | /* |
2857 | * Caller holds the mmap_sem write mode, so a huge pmd cannot | 2894 | * Caller holds the mmap_sem write mode, so a huge pmd cannot |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 226910cb7c9b..2024bbd573d2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2520,6 +2520,31 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, | |||
2520 | update_mmu_cache(vma, address, ptep); | 2520 | update_mmu_cache(vma, address, ptep); |
2521 | } | 2521 | } |
2522 | 2522 | ||
2523 | static int is_hugetlb_entry_migration(pte_t pte) | ||
2524 | { | ||
2525 | swp_entry_t swp; | ||
2526 | |||
2527 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2528 | return 0; | ||
2529 | swp = pte_to_swp_entry(pte); | ||
2530 | if (non_swap_entry(swp) && is_migration_entry(swp)) | ||
2531 | return 1; | ||
2532 | else | ||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | ||
2537 | { | ||
2538 | swp_entry_t swp; | ||
2539 | |||
2540 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2541 | return 0; | ||
2542 | swp = pte_to_swp_entry(pte); | ||
2543 | if (non_swap_entry(swp) && is_hwpoison_entry(swp)) | ||
2544 | return 1; | ||
2545 | else | ||
2546 | return 0; | ||
2547 | } | ||
2523 | 2548 | ||
2524 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | 2549 | int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, |
2525 | struct vm_area_struct *vma) | 2550 | struct vm_area_struct *vma) |
@@ -2559,10 +2584,26 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2559 | dst_ptl = huge_pte_lock(h, dst, dst_pte); | 2584 | dst_ptl = huge_pte_lock(h, dst, dst_pte); |
2560 | src_ptl = huge_pte_lockptr(h, src, src_pte); | 2585 | src_ptl = huge_pte_lockptr(h, src, src_pte); |
2561 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); | 2586 | spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); |
2562 | if (!huge_pte_none(huge_ptep_get(src_pte))) { | 2587 | entry = huge_ptep_get(src_pte); |
2588 | if (huge_pte_none(entry)) { /* skip none entry */ | ||
2589 | ; | ||
2590 | } else if (unlikely(is_hugetlb_entry_migration(entry) || | ||
2591 | is_hugetlb_entry_hwpoisoned(entry))) { | ||
2592 | swp_entry_t swp_entry = pte_to_swp_entry(entry); | ||
2593 | |||
2594 | if (is_write_migration_entry(swp_entry) && cow) { | ||
2595 | /* | ||
2596 | * COW mappings require pages in both | ||
2597 | * parent and child to be set to read. | ||
2598 | */ | ||
2599 | make_migration_entry_read(&swp_entry); | ||
2600 | entry = swp_entry_to_pte(swp_entry); | ||
2601 | set_huge_pte_at(src, addr, src_pte, entry); | ||
2602 | } | ||
2603 | set_huge_pte_at(dst, addr, dst_pte, entry); | ||
2604 | } else { | ||
2563 | if (cow) | 2605 | if (cow) |
2564 | huge_ptep_set_wrprotect(src, addr, src_pte); | 2606 | huge_ptep_set_wrprotect(src, addr, src_pte); |
2565 | entry = huge_ptep_get(src_pte); | ||
2566 | ptepage = pte_page(entry); | 2607 | ptepage = pte_page(entry); |
2567 | get_page(ptepage); | 2608 | get_page(ptepage); |
2568 | page_dup_rmap(ptepage); | 2609 | page_dup_rmap(ptepage); |
@@ -2578,32 +2619,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
2578 | return ret; | 2619 | return ret; |
2579 | } | 2620 | } |
2580 | 2621 | ||
2581 | static int is_hugetlb_entry_migration(pte_t pte) | ||
2582 | { | ||
2583 | swp_entry_t swp; | ||
2584 | |||
2585 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2586 | return 0; | ||
2587 | swp = pte_to_swp_entry(pte); | ||
2588 | if (non_swap_entry(swp) && is_migration_entry(swp)) | ||
2589 | return 1; | ||
2590 | else | ||
2591 | return 0; | ||
2592 | } | ||
2593 | |||
2594 | static int is_hugetlb_entry_hwpoisoned(pte_t pte) | ||
2595 | { | ||
2596 | swp_entry_t swp; | ||
2597 | |||
2598 | if (huge_pte_none(pte) || pte_present(pte)) | ||
2599 | return 0; | ||
2600 | swp = pte_to_swp_entry(pte); | ||
2601 | if (non_swap_entry(swp) && is_hwpoison_entry(swp)) | ||
2602 | return 1; | ||
2603 | else | ||
2604 | return 0; | ||
2605 | } | ||
2606 | |||
2607 | void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, | 2622 | void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, |
2608 | unsigned long start, unsigned long end, | 2623 | unsigned long start, unsigned long end, |
2609 | struct page *ref_page) | 2624 | struct page *ref_page) |
@@ -945,7 +945,6 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, | |||
945 | pmd = mm_find_pmd(mm, addr); | 945 | pmd = mm_find_pmd(mm, addr); |
946 | if (!pmd) | 946 | if (!pmd) |
947 | goto out; | 947 | goto out; |
948 | BUG_ON(pmd_trans_huge(*pmd)); | ||
949 | 948 | ||
950 | mmun_start = addr; | 949 | mmun_start = addr; |
951 | mmun_end = addr + PAGE_SIZE; | 950 | mmun_end = addr + PAGE_SIZE; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 284974230459..eb58de19f815 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -656,19 +656,18 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma, | |||
656 | * @nodes and @flags,) it's isolated and queued to the pagelist which is | 656 | * @nodes and @flags,) it's isolated and queued to the pagelist which is |
657 | * passed via @private.) | 657 | * passed via @private.) |
658 | */ | 658 | */ |
659 | static struct vm_area_struct * | 659 | static int |
660 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | 660 | queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, |
661 | const nodemask_t *nodes, unsigned long flags, void *private) | 661 | const nodemask_t *nodes, unsigned long flags, void *private) |
662 | { | 662 | { |
663 | int err; | 663 | int err = 0; |
664 | struct vm_area_struct *first, *vma, *prev; | 664 | struct vm_area_struct *vma, *prev; |
665 | |||
666 | 665 | ||
667 | first = find_vma(mm, start); | 666 | vma = find_vma(mm, start); |
668 | if (!first) | 667 | if (!vma) |
669 | return ERR_PTR(-EFAULT); | 668 | return -EFAULT; |
670 | prev = NULL; | 669 | prev = NULL; |
671 | for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { | 670 | for (; vma && vma->vm_start < end; vma = vma->vm_next) { |
672 | unsigned long endvma = vma->vm_end; | 671 | unsigned long endvma = vma->vm_end; |
673 | 672 | ||
674 | if (endvma > end) | 673 | if (endvma > end) |
@@ -678,9 +677,9 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
678 | 677 | ||
679 | if (!(flags & MPOL_MF_DISCONTIG_OK)) { | 678 | if (!(flags & MPOL_MF_DISCONTIG_OK)) { |
680 | if (!vma->vm_next && vma->vm_end < end) | 679 | if (!vma->vm_next && vma->vm_end < end) |
681 | return ERR_PTR(-EFAULT); | 680 | return -EFAULT; |
682 | if (prev && prev->vm_end < vma->vm_start) | 681 | if (prev && prev->vm_end < vma->vm_start) |
683 | return ERR_PTR(-EFAULT); | 682 | return -EFAULT; |
684 | } | 683 | } |
685 | 684 | ||
686 | if (flags & MPOL_MF_LAZY) { | 685 | if (flags & MPOL_MF_LAZY) { |
@@ -694,15 +693,13 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end, | |||
694 | 693 | ||
695 | err = queue_pages_pgd_range(vma, start, endvma, nodes, | 694 | err = queue_pages_pgd_range(vma, start, endvma, nodes, |
696 | flags, private); | 695 | flags, private); |
697 | if (err) { | 696 | if (err) |
698 | first = ERR_PTR(err); | ||
699 | break; | 697 | break; |
700 | } | ||
701 | } | 698 | } |
702 | next: | 699 | next: |
703 | prev = vma; | 700 | prev = vma; |
704 | } | 701 | } |
705 | return first; | 702 | return err; |
706 | } | 703 | } |
707 | 704 | ||
708 | /* | 705 | /* |
@@ -1156,16 +1153,17 @@ out: | |||
1156 | 1153 | ||
1157 | /* | 1154 | /* |
1158 | * Allocate a new page for page migration based on vma policy. | 1155 | * Allocate a new page for page migration based on vma policy. |
1159 | * Start assuming that page is mapped by vma pointed to by @private. | 1156 | * Start by assuming the page is mapped by the same vma as contains @start. |
1160 | * Search forward from there, if not. N.B., this assumes that the | 1157 | * Search forward from there, if not. N.B., this assumes that the |
1161 | * list of pages handed to migrate_pages()--which is how we get here-- | 1158 | * list of pages handed to migrate_pages()--which is how we get here-- |
1162 | * is in virtual address order. | 1159 | * is in virtual address order. |
1163 | */ | 1160 | */ |
1164 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) | 1161 | static struct page *new_page(struct page *page, unsigned long start, int **x) |
1165 | { | 1162 | { |
1166 | struct vm_area_struct *vma = (struct vm_area_struct *)private; | 1163 | struct vm_area_struct *vma; |
1167 | unsigned long uninitialized_var(address); | 1164 | unsigned long uninitialized_var(address); |
1168 | 1165 | ||
1166 | vma = find_vma(current->mm, start); | ||
1169 | while (vma) { | 1167 | while (vma) { |
1170 | address = page_address_in_vma(page, vma); | 1168 | address = page_address_in_vma(page, vma); |
1171 | if (address != -EFAULT) | 1169 | if (address != -EFAULT) |
@@ -1195,7 +1193,7 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from, | |||
1195 | return -ENOSYS; | 1193 | return -ENOSYS; |
1196 | } | 1194 | } |
1197 | 1195 | ||
1198 | static struct page *new_vma_page(struct page *page, unsigned long private, int **x) | 1196 | static struct page *new_page(struct page *page, unsigned long start, int **x) |
1199 | { | 1197 | { |
1200 | return NULL; | 1198 | return NULL; |
1201 | } | 1199 | } |
@@ -1205,7 +1203,6 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1205 | unsigned short mode, unsigned short mode_flags, | 1203 | unsigned short mode, unsigned short mode_flags, |
1206 | nodemask_t *nmask, unsigned long flags) | 1204 | nodemask_t *nmask, unsigned long flags) |
1207 | { | 1205 | { |
1208 | struct vm_area_struct *vma; | ||
1209 | struct mm_struct *mm = current->mm; | 1206 | struct mm_struct *mm = current->mm; |
1210 | struct mempolicy *new; | 1207 | struct mempolicy *new; |
1211 | unsigned long end; | 1208 | unsigned long end; |
@@ -1271,11 +1268,9 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1271 | if (err) | 1268 | if (err) |
1272 | goto mpol_out; | 1269 | goto mpol_out; |
1273 | 1270 | ||
1274 | vma = queue_pages_range(mm, start, end, nmask, | 1271 | err = queue_pages_range(mm, start, end, nmask, |
1275 | flags | MPOL_MF_INVERT, &pagelist); | 1272 | flags | MPOL_MF_INVERT, &pagelist); |
1276 | 1273 | if (!err) | |
1277 | err = PTR_ERR(vma); /* maybe ... */ | ||
1278 | if (!IS_ERR(vma)) | ||
1279 | err = mbind_range(mm, start, end, new); | 1274 | err = mbind_range(mm, start, end, new); |
1280 | 1275 | ||
1281 | if (!err) { | 1276 | if (!err) { |
@@ -1283,9 +1278,8 @@ static long do_mbind(unsigned long start, unsigned long len, | |||
1283 | 1278 | ||
1284 | if (!list_empty(&pagelist)) { | 1279 | if (!list_empty(&pagelist)) { |
1285 | WARN_ON_ONCE(flags & MPOL_MF_LAZY); | 1280 | WARN_ON_ONCE(flags & MPOL_MF_LAZY); |
1286 | nr_failed = migrate_pages(&pagelist, new_vma_page, | 1281 | nr_failed = migrate_pages(&pagelist, new_page, NULL, |
1287 | NULL, (unsigned long)vma, | 1282 | start, MIGRATE_SYNC, MR_MEMPOLICY_MBIND); |
1288 | MIGRATE_SYNC, MR_MEMPOLICY_MBIND); | ||
1289 | if (nr_failed) | 1283 | if (nr_failed) |
1290 | putback_movable_pages(&pagelist); | 1284 | putback_movable_pages(&pagelist); |
1291 | } | 1285 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 63f0cd559999..9e0beaa91845 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -120,8 +120,6 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, | |||
120 | pmd = mm_find_pmd(mm, addr); | 120 | pmd = mm_find_pmd(mm, addr); |
121 | if (!pmd) | 121 | if (!pmd) |
122 | goto out; | 122 | goto out; |
123 | if (pmd_trans_huge(*pmd)) | ||
124 | goto out; | ||
125 | 123 | ||
126 | ptep = pte_offset_map(pmd, addr); | 124 | ptep = pte_offset_map(pmd, addr); |
127 | 125 | ||
diff --git a/mm/nommu.c b/mm/nommu.c index b78e3a8f5ee7..4a852f6c5709 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -786,7 +786,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) | |||
786 | for (i = 0; i < VMACACHE_SIZE; i++) { | 786 | for (i = 0; i < VMACACHE_SIZE; i++) { |
787 | /* if the vma is cached, invalidate the entire cache */ | 787 | /* if the vma is cached, invalidate the entire cache */ |
788 | if (curr->vmacache[i] == vma) { | 788 | if (curr->vmacache[i] == vma) { |
789 | vmacache_invalidate(curr->mm); | 789 | vmacache_invalidate(mm); |
790 | break; | 790 | break; |
791 | } | 791 | } |
792 | } | 792 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4f59fa29eda8..20d17f8266fe 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -69,6 +69,7 @@ | |||
69 | 69 | ||
70 | /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ | 70 | /* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */ |
71 | static DEFINE_MUTEX(pcp_batch_high_lock); | 71 | static DEFINE_MUTEX(pcp_batch_high_lock); |
72 | #define MIN_PERCPU_PAGELIST_FRACTION (8) | ||
72 | 73 | ||
73 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID | 74 | #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID |
74 | DEFINE_PER_CPU(int, numa_node); | 75 | DEFINE_PER_CPU(int, numa_node); |
@@ -4145,7 +4146,7 @@ static void __meminit zone_init_free_lists(struct zone *zone) | |||
4145 | memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) | 4146 | memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) |
4146 | #endif | 4147 | #endif |
4147 | 4148 | ||
4148 | static int __meminit zone_batchsize(struct zone *zone) | 4149 | static int zone_batchsize(struct zone *zone) |
4149 | { | 4150 | { |
4150 | #ifdef CONFIG_MMU | 4151 | #ifdef CONFIG_MMU |
4151 | int batch; | 4152 | int batch; |
@@ -4261,8 +4262,8 @@ static void pageset_set_high(struct per_cpu_pageset *p, | |||
4261 | pageset_update(&p->pcp, high, batch); | 4262 | pageset_update(&p->pcp, high, batch); |
4262 | } | 4263 | } |
4263 | 4264 | ||
4264 | static void __meminit pageset_set_high_and_batch(struct zone *zone, | 4265 | static void pageset_set_high_and_batch(struct zone *zone, |
4265 | struct per_cpu_pageset *pcp) | 4266 | struct per_cpu_pageset *pcp) |
4266 | { | 4267 | { |
4267 | if (percpu_pagelist_fraction) | 4268 | if (percpu_pagelist_fraction) |
4268 | pageset_set_high(pcp, | 4269 | pageset_set_high(pcp, |
@@ -5881,23 +5882,38 @@ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write, | |||
5881 | void __user *buffer, size_t *length, loff_t *ppos) | 5882 | void __user *buffer, size_t *length, loff_t *ppos) |
5882 | { | 5883 | { |
5883 | struct zone *zone; | 5884 | struct zone *zone; |
5884 | unsigned int cpu; | 5885 | int old_percpu_pagelist_fraction; |
5885 | int ret; | 5886 | int ret; |
5886 | 5887 | ||
5888 | mutex_lock(&pcp_batch_high_lock); | ||
5889 | old_percpu_pagelist_fraction = percpu_pagelist_fraction; | ||
5890 | |||
5887 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); | 5891 | ret = proc_dointvec_minmax(table, write, buffer, length, ppos); |
5888 | if (!write || (ret < 0)) | 5892 | if (!write || ret < 0) |
5889 | return ret; | 5893 | goto out; |
5894 | |||
5895 | /* Sanity checking to avoid pcp imbalance */ | ||
5896 | if (percpu_pagelist_fraction && | ||
5897 | percpu_pagelist_fraction < MIN_PERCPU_PAGELIST_FRACTION) { | ||
5898 | percpu_pagelist_fraction = old_percpu_pagelist_fraction; | ||
5899 | ret = -EINVAL; | ||
5900 | goto out; | ||
5901 | } | ||
5902 | |||
5903 | /* No change? */ | ||
5904 | if (percpu_pagelist_fraction == old_percpu_pagelist_fraction) | ||
5905 | goto out; | ||
5890 | 5906 | ||
5891 | mutex_lock(&pcp_batch_high_lock); | ||
5892 | for_each_populated_zone(zone) { | 5907 | for_each_populated_zone(zone) { |
5893 | unsigned long high; | 5908 | unsigned int cpu; |
5894 | high = zone->managed_pages / percpu_pagelist_fraction; | 5909 | |
5895 | for_each_possible_cpu(cpu) | 5910 | for_each_possible_cpu(cpu) |
5896 | pageset_set_high(per_cpu_ptr(zone->pageset, cpu), | 5911 | pageset_set_high_and_batch(zone, |
5897 | high); | 5912 | per_cpu_ptr(zone->pageset, cpu)); |
5898 | } | 5913 | } |
5914 | out: | ||
5899 | mutex_unlock(&pcp_batch_high_lock); | 5915 | mutex_unlock(&pcp_batch_high_lock); |
5900 | return 0; | 5916 | return ret; |
5901 | } | 5917 | } |
5902 | 5918 | ||
5903 | int hashdist = HASHDIST_DEFAULT; | 5919 | int hashdist = HASHDIST_DEFAULT; |
@@ -569,6 +569,7 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) | |||
569 | pgd_t *pgd; | 569 | pgd_t *pgd; |
570 | pud_t *pud; | 570 | pud_t *pud; |
571 | pmd_t *pmd = NULL; | 571 | pmd_t *pmd = NULL; |
572 | pmd_t pmde; | ||
572 | 573 | ||
573 | pgd = pgd_offset(mm, address); | 574 | pgd = pgd_offset(mm, address); |
574 | if (!pgd_present(*pgd)) | 575 | if (!pgd_present(*pgd)) |
@@ -579,7 +580,13 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) | |||
579 | goto out; | 580 | goto out; |
580 | 581 | ||
581 | pmd = pmd_offset(pud, address); | 582 | pmd = pmd_offset(pud, address); |
582 | if (!pmd_present(*pmd)) | 583 | /* |
584 | * Some THP functions use the sequence pmdp_clear_flush(), set_pmd_at() | ||
585 | * without holding anon_vma lock for write. So when looking for a | ||
586 | * genuine pmde (in which to find pte), test present and !THP together. | ||
587 | */ | ||
588 | pmde = ACCESS_ONCE(*pmd); | ||
589 | if (!pmd_present(pmde) || pmd_trans_huge(pmde)) | ||
583 | pmd = NULL; | 590 | pmd = NULL; |
584 | out: | 591 | out: |
585 | return pmd; | 592 | return pmd; |
@@ -615,9 +622,6 @@ pte_t *__page_check_address(struct page *page, struct mm_struct *mm, | |||
615 | if (!pmd) | 622 | if (!pmd) |
616 | return NULL; | 623 | return NULL; |
617 | 624 | ||
618 | if (pmd_trans_huge(*pmd)) | ||
619 | return NULL; | ||
620 | |||
621 | pte = pte_offset_map(pmd, address); | 625 | pte = pte_offset_map(pmd, address); |
622 | /* Make a quick check before getting the lock */ | 626 | /* Make a quick check before getting the lock */ |
623 | if (!sync && !pte_present(*pte)) { | 627 | if (!sync && !pte_present(*pte)) { |
diff --git a/mm/shmem.c b/mm/shmem.c index f484c276e994..8f419cff9e34 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -80,11 +80,12 @@ static struct vfsmount *shm_mnt; | |||
80 | #define SHORT_SYMLINK_LEN 128 | 80 | #define SHORT_SYMLINK_LEN 128 |
81 | 81 | ||
82 | /* | 82 | /* |
83 | * shmem_fallocate and shmem_writepage communicate via inode->i_private | 83 | * shmem_fallocate communicates with shmem_fault or shmem_writepage via |
84 | * (with i_mutex making sure that it has only one user at a time): | 84 | * inode->i_private (with i_mutex making sure that it has only one user at |
85 | * we would prefer not to enlarge the shmem inode just for that. | 85 | * a time): we would prefer not to enlarge the shmem inode just for that. |
86 | */ | 86 | */ |
87 | struct shmem_falloc { | 87 | struct shmem_falloc { |
88 | int mode; /* FALLOC_FL mode currently operating */ | ||
88 | pgoff_t start; /* start of range currently being fallocated */ | 89 | pgoff_t start; /* start of range currently being fallocated */ |
89 | pgoff_t next; /* the next page offset to be fallocated */ | 90 | pgoff_t next; /* the next page offset to be fallocated */ |
90 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ | 91 | pgoff_t nr_falloced; /* how many new pages have been fallocated */ |
@@ -759,6 +760,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) | |||
759 | spin_lock(&inode->i_lock); | 760 | spin_lock(&inode->i_lock); |
760 | shmem_falloc = inode->i_private; | 761 | shmem_falloc = inode->i_private; |
761 | if (shmem_falloc && | 762 | if (shmem_falloc && |
763 | !shmem_falloc->mode && | ||
762 | index >= shmem_falloc->start && | 764 | index >= shmem_falloc->start && |
763 | index < shmem_falloc->next) | 765 | index < shmem_falloc->next) |
764 | shmem_falloc->nr_unswapped++; | 766 | shmem_falloc->nr_unswapped++; |
@@ -1233,6 +1235,44 @@ static int shmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1233 | int error; | 1235 | int error; |
1234 | int ret = VM_FAULT_LOCKED; | 1236 | int ret = VM_FAULT_LOCKED; |
1235 | 1237 | ||
1238 | /* | ||
1239 | * Trinity finds that probing a hole which tmpfs is punching can | ||
1240 | * prevent the hole-punch from ever completing: which in turn | ||
1241 | * locks writers out with its hold on i_mutex. So refrain from | ||
1242 | * faulting pages into the hole while it's being punched, and | ||
1243 | * wait on i_mutex to be released if vmf->flags permits. | ||
1244 | */ | ||
1245 | if (unlikely(inode->i_private)) { | ||
1246 | struct shmem_falloc *shmem_falloc; | ||
1247 | |||
1248 | spin_lock(&inode->i_lock); | ||
1249 | shmem_falloc = inode->i_private; | ||
1250 | if (!shmem_falloc || | ||
1251 | shmem_falloc->mode != FALLOC_FL_PUNCH_HOLE || | ||
1252 | vmf->pgoff < shmem_falloc->start || | ||
1253 | vmf->pgoff >= shmem_falloc->next) | ||
1254 | shmem_falloc = NULL; | ||
1255 | spin_unlock(&inode->i_lock); | ||
1256 | /* | ||
1257 | * i_lock has protected us from taking shmem_falloc seriously | ||
1258 | * once return from shmem_fallocate() went back up that stack. | ||
1259 | * i_lock does not serialize with i_mutex at all, but it does | ||
1260 | * not matter if sometimes we wait unnecessarily, or sometimes | ||
1261 | * miss out on waiting: we just need to make those cases rare. | ||
1262 | */ | ||
1263 | if (shmem_falloc) { | ||
1264 | if ((vmf->flags & FAULT_FLAG_ALLOW_RETRY) && | ||
1265 | !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { | ||
1266 | up_read(&vma->vm_mm->mmap_sem); | ||
1267 | mutex_lock(&inode->i_mutex); | ||
1268 | mutex_unlock(&inode->i_mutex); | ||
1269 | return VM_FAULT_RETRY; | ||
1270 | } | ||
1271 | /* cond_resched? Leave that to GUP or return to user */ | ||
1272 | return VM_FAULT_NOPAGE; | ||
1273 | } | ||
1274 | } | ||
1275 | |||
1236 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); | 1276 | error = shmem_getpage(inode, vmf->pgoff, &vmf->page, SGP_CACHE, &ret); |
1237 | if (error) | 1277 | if (error) |
1238 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); | 1278 | return ((error == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS); |
@@ -1724,20 +1764,31 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset, | |||
1724 | pgoff_t start, index, end; | 1764 | pgoff_t start, index, end; |
1725 | int error; | 1765 | int error; |
1726 | 1766 | ||
1767 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | ||
1768 | return -EOPNOTSUPP; | ||
1769 | |||
1727 | mutex_lock(&inode->i_mutex); | 1770 | mutex_lock(&inode->i_mutex); |
1728 | 1771 | ||
1772 | shmem_falloc.mode = mode & ~FALLOC_FL_KEEP_SIZE; | ||
1773 | |||
1729 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 1774 | if (mode & FALLOC_FL_PUNCH_HOLE) { |
1730 | struct address_space *mapping = file->f_mapping; | 1775 | struct address_space *mapping = file->f_mapping; |
1731 | loff_t unmap_start = round_up(offset, PAGE_SIZE); | 1776 | loff_t unmap_start = round_up(offset, PAGE_SIZE); |
1732 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; | 1777 | loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1; |
1733 | 1778 | ||
1779 | shmem_falloc.start = unmap_start >> PAGE_SHIFT; | ||
1780 | shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT; | ||
1781 | spin_lock(&inode->i_lock); | ||
1782 | inode->i_private = &shmem_falloc; | ||
1783 | spin_unlock(&inode->i_lock); | ||
1784 | |||
1734 | if ((u64)unmap_end > (u64)unmap_start) | 1785 | if ((u64)unmap_end > (u64)unmap_start) |
1735 | unmap_mapping_range(mapping, unmap_start, | 1786 | unmap_mapping_range(mapping, unmap_start, |
1736 | 1 + unmap_end - unmap_start, 0); | 1787 | 1 + unmap_end - unmap_start, 0); |
1737 | shmem_truncate_range(inode, offset, offset + len - 1); | 1788 | shmem_truncate_range(inode, offset, offset + len - 1); |
1738 | /* No need to unmap again: hole-punching leaves COWed pages */ | 1789 | /* No need to unmap again: hole-punching leaves COWed pages */ |
1739 | error = 0; | 1790 | error = 0; |
1740 | goto out; | 1791 | goto undone; |
1741 | } | 1792 | } |
1742 | 1793 | ||
1743 | /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ | 1794 | /* We need to check rlimit even when FALLOC_FL_KEEP_SIZE */ |
@@ -386,6 +386,39 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
386 | 386 | ||
387 | #endif | 387 | #endif |
388 | 388 | ||
389 | #define OBJECT_FREE (0) | ||
390 | #define OBJECT_ACTIVE (1) | ||
391 | |||
392 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
393 | |||
394 | static void set_obj_status(struct page *page, int idx, int val) | ||
395 | { | ||
396 | int freelist_size; | ||
397 | char *status; | ||
398 | struct kmem_cache *cachep = page->slab_cache; | ||
399 | |||
400 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
401 | status = (char *)page->freelist + freelist_size; | ||
402 | status[idx] = val; | ||
403 | } | ||
404 | |||
405 | static inline unsigned int get_obj_status(struct page *page, int idx) | ||
406 | { | ||
407 | int freelist_size; | ||
408 | char *status; | ||
409 | struct kmem_cache *cachep = page->slab_cache; | ||
410 | |||
411 | freelist_size = cachep->num * sizeof(freelist_idx_t); | ||
412 | status = (char *)page->freelist + freelist_size; | ||
413 | |||
414 | return status[idx]; | ||
415 | } | ||
416 | |||
417 | #else | ||
418 | static inline void set_obj_status(struct page *page, int idx, int val) {} | ||
419 | |||
420 | #endif | ||
421 | |||
389 | /* | 422 | /* |
390 | * Do not go above this order unless 0 objects fit into the slab or | 423 | * Do not go above this order unless 0 objects fit into the slab or |
391 | * overridden on the command line. | 424 | * overridden on the command line. |
@@ -576,12 +609,30 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
576 | return cachep->array[smp_processor_id()]; | 609 | return cachep->array[smp_processor_id()]; |
577 | } | 610 | } |
578 | 611 | ||
612 | static size_t calculate_freelist_size(int nr_objs, size_t align) | ||
613 | { | ||
614 | size_t freelist_size; | ||
615 | |||
616 | freelist_size = nr_objs * sizeof(freelist_idx_t); | ||
617 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
618 | freelist_size += nr_objs * sizeof(char); | ||
619 | |||
620 | if (align) | ||
621 | freelist_size = ALIGN(freelist_size, align); | ||
622 | |||
623 | return freelist_size; | ||
624 | } | ||
625 | |||
579 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, | 626 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, |
580 | size_t idx_size, size_t align) | 627 | size_t idx_size, size_t align) |
581 | { | 628 | { |
582 | int nr_objs; | 629 | int nr_objs; |
630 | size_t remained_size; | ||
583 | size_t freelist_size; | 631 | size_t freelist_size; |
632 | int extra_space = 0; | ||
584 | 633 | ||
634 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
635 | extra_space = sizeof(char); | ||
585 | /* | 636 | /* |
586 | * Ignore padding for the initial guess. The padding | 637 | * Ignore padding for the initial guess. The padding |
587 | * is at most @align-1 bytes, and @buffer_size is at | 638 | * is at most @align-1 bytes, and @buffer_size is at |
@@ -590,14 +641,15 @@ static int calculate_nr_objs(size_t slab_size, size_t buffer_size, | |||
590 | * into the memory allocation when taking the padding | 641 | * into the memory allocation when taking the padding |
591 | * into account. | 642 | * into account. |
592 | */ | 643 | */ |
593 | nr_objs = slab_size / (buffer_size + idx_size); | 644 | nr_objs = slab_size / (buffer_size + idx_size + extra_space); |
594 | 645 | ||
595 | /* | 646 | /* |
596 | * This calculated number will be either the right | 647 | * This calculated number will be either the right |
597 | * amount, or one greater than what we want. | 648 | * amount, or one greater than what we want. |
598 | */ | 649 | */ |
599 | freelist_size = slab_size - nr_objs * buffer_size; | 650 | remained_size = slab_size - nr_objs * buffer_size; |
600 | if (freelist_size < ALIGN(nr_objs * idx_size, align)) | 651 | freelist_size = calculate_freelist_size(nr_objs, align); |
652 | if (remained_size < freelist_size) | ||
601 | nr_objs--; | 653 | nr_objs--; |
602 | 654 | ||
603 | return nr_objs; | 655 | return nr_objs; |
@@ -635,7 +687,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
635 | } else { | 687 | } else { |
636 | nr_objs = calculate_nr_objs(slab_size, buffer_size, | 688 | nr_objs = calculate_nr_objs(slab_size, buffer_size, |
637 | sizeof(freelist_idx_t), align); | 689 | sizeof(freelist_idx_t), align); |
638 | mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align); | 690 | mgmt_size = calculate_freelist_size(nr_objs, align); |
639 | } | 691 | } |
640 | *num = nr_objs; | 692 | *num = nr_objs; |
641 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; | 693 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; |
@@ -2041,13 +2093,16 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2041 | break; | 2093 | break; |
2042 | 2094 | ||
2043 | if (flags & CFLGS_OFF_SLAB) { | 2095 | if (flags & CFLGS_OFF_SLAB) { |
2096 | size_t freelist_size_per_obj = sizeof(freelist_idx_t); | ||
2044 | /* | 2097 | /* |
2045 | * Max number of objs-per-slab for caches which | 2098 | * Max number of objs-per-slab for caches which |
2046 | * use off-slab slabs. Needed to avoid a possible | 2099 | * use off-slab slabs. Needed to avoid a possible |
2047 | * looping condition in cache_grow(). | 2100 | * looping condition in cache_grow(). |
2048 | */ | 2101 | */ |
2102 | if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK)) | ||
2103 | freelist_size_per_obj += sizeof(char); | ||
2049 | offslab_limit = size; | 2104 | offslab_limit = size; |
2050 | offslab_limit /= sizeof(freelist_idx_t); | 2105 | offslab_limit /= freelist_size_per_obj; |
2051 | 2106 | ||
2052 | if (num > offslab_limit) | 2107 | if (num > offslab_limit) |
2053 | break; | 2108 | break; |
@@ -2294,8 +2349,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2294 | if (!cachep->num) | 2349 | if (!cachep->num) |
2295 | return -E2BIG; | 2350 | return -E2BIG; |
2296 | 2351 | ||
2297 | freelist_size = | 2352 | freelist_size = calculate_freelist_size(cachep->num, cachep->align); |
2298 | ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align); | ||
2299 | 2353 | ||
2300 | /* | 2354 | /* |
2301 | * If the slab has been placed off-slab, and we have enough space then | 2355 | * If the slab has been placed off-slab, and we have enough space then |
@@ -2308,7 +2362,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2308 | 2362 | ||
2309 | if (flags & CFLGS_OFF_SLAB) { | 2363 | if (flags & CFLGS_OFF_SLAB) { |
2310 | /* really off slab. No need for manual alignment */ | 2364 | /* really off slab. No need for manual alignment */ |
2311 | freelist_size = cachep->num * sizeof(freelist_idx_t); | 2365 | freelist_size = calculate_freelist_size(cachep->num, 0); |
2312 | 2366 | ||
2313 | #ifdef CONFIG_PAGE_POISONING | 2367 | #ifdef CONFIG_PAGE_POISONING |
2314 | /* If we're going to use the generic kernel_map_pages() | 2368 | /* If we're going to use the generic kernel_map_pages() |
@@ -2612,6 +2666,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2612 | if (cachep->ctor) | 2666 | if (cachep->ctor) |
2613 | cachep->ctor(objp); | 2667 | cachep->ctor(objp); |
2614 | #endif | 2668 | #endif |
2669 | set_obj_status(page, i, OBJECT_FREE); | ||
2615 | set_free_obj(page, i, i); | 2670 | set_free_obj(page, i, i); |
2616 | } | 2671 | } |
2617 | } | 2672 | } |
@@ -2820,6 +2875,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2820 | BUG_ON(objnr >= cachep->num); | 2875 | BUG_ON(objnr >= cachep->num); |
2821 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); | 2876 | BUG_ON(objp != index_to_obj(cachep, page, objnr)); |
2822 | 2877 | ||
2878 | set_obj_status(page, objnr, OBJECT_FREE); | ||
2823 | if (cachep->flags & SLAB_POISON) { | 2879 | if (cachep->flags & SLAB_POISON) { |
2824 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2880 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2825 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { | 2881 | if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
@@ -2953,6 +3009,8 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, | |||
2953 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | 3009 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
2954 | gfp_t flags, void *objp, unsigned long caller) | 3010 | gfp_t flags, void *objp, unsigned long caller) |
2955 | { | 3011 | { |
3012 | struct page *page; | ||
3013 | |||
2956 | if (!objp) | 3014 | if (!objp) |
2957 | return objp; | 3015 | return objp; |
2958 | if (cachep->flags & SLAB_POISON) { | 3016 | if (cachep->flags & SLAB_POISON) { |
@@ -2983,6 +3041,9 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, | |||
2983 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 3041 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
2984 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 3042 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
2985 | } | 3043 | } |
3044 | |||
3045 | page = virt_to_head_page(objp); | ||
3046 | set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE); | ||
2986 | objp += obj_offset(cachep); | 3047 | objp += obj_offset(cachep); |
2987 | if (cachep->ctor && cachep->flags & SLAB_POISON) | 3048 | if (cachep->ctor && cachep->flags & SLAB_POISON) |
2988 | cachep->ctor(objp); | 3049 | cachep->ctor(objp); |
@@ -4219,21 +4280,12 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, | |||
4219 | struct page *page) | 4280 | struct page *page) |
4220 | { | 4281 | { |
4221 | void *p; | 4282 | void *p; |
4222 | int i, j; | 4283 | int i; |
4223 | 4284 | ||
4224 | if (n[0] == n[1]) | 4285 | if (n[0] == n[1]) |
4225 | return; | 4286 | return; |
4226 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { | 4287 | for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) { |
4227 | bool active = true; | 4288 | if (get_obj_status(page, i) != OBJECT_ACTIVE) |
4228 | |||
4229 | for (j = page->active; j < c->num; j++) { | ||
4230 | /* Skip freed item */ | ||
4231 | if (get_free_obj(page, j) == i) { | ||
4232 | active = false; | ||
4233 | break; | ||
4234 | } | ||
4235 | } | ||
4236 | if (!active) | ||
4237 | continue; | 4289 | continue; |
4238 | 4290 | ||
4239 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | 4291 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 010b18ef4ea0..182be0f12407 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -3476,12 +3476,17 @@ sub process { | |||
3476 | } | 3476 | } |
3477 | } | 3477 | } |
3478 | 3478 | ||
3479 | # unnecessary return in a void function? (a single leading tab, then return;) | 3479 | # unnecessary return in a void function |
3480 | if ($sline =~ /^\+\treturn\s*;\s*$/ && | 3480 | # at end-of-function, with the previous line a single leading tab, then return; |
3481 | $prevline =~ /^\+/) { | 3481 | # and the line before that not a goto label target like "out:" |
3482 | if ($sline =~ /^[ \+]}\s*$/ && | ||
3483 | $prevline =~ /^\+\treturn\s*;\s*$/ && | ||
3484 | $linenr >= 3 && | ||
3485 | $lines[$linenr - 3] =~ /^[ +]/ && | ||
3486 | $lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) { | ||
3482 | WARN("RETURN_VOID", | 3487 | WARN("RETURN_VOID", |
3483 | "void function return statements are not generally useful\n" . $herecurr); | 3488 | "void function return statements are not generally useful\n" . $hereprev); |
3484 | } | 3489 | } |
3485 | 3490 | ||
3486 | # if statements using unnecessary parentheses - ie: if ((foo == bar)) | 3491 | # if statements using unnecessary parentheses - ie: if ((foo == bar)) |
3487 | if ($^V && $^V ge 5.10.0 && | 3492 | if ($^V && $^V ge 5.10.0 && |