diff options
72 files changed, 1046 insertions, 534 deletions
@@ -2808,8 +2808,7 @@ S: Ottawa, Ontario | |||
2808 | S: Canada K2P 0X8 | 2808 | S: Canada K2P 0X8 |
2809 | 2809 | ||
2810 | N: Mikael Pettersson | 2810 | N: Mikael Pettersson |
2811 | E: mikpe@it.uu.se | 2811 | E: mikpelinux@gmail.com |
2812 | W: http://user.it.uu.se/~mikpe/linux/ | ||
2813 | D: Miscellaneous fixes | 2812 | D: Miscellaneous fixes |
2814 | 2813 | ||
2815 | N: Reed H. Petty | 2814 | N: Reed H. Petty |
diff --git a/MAINTAINERS b/MAINTAINERS index e61c2e83fc2b..c53fe9559642 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -1812,7 +1812,8 @@ S: Supported | |||
1812 | F: drivers/net/ethernet/broadcom/bnx2x/ | 1812 | F: drivers/net/ethernet/broadcom/bnx2x/ |
1813 | 1813 | ||
1814 | BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE | 1814 | BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE |
1815 | M: Christian Daudt <csd@broadcom.com> | 1815 | M: Christian Daudt <bcm@fixthebug.org> |
1816 | L: bcm-kernel-feedback-list@broadcom.com | ||
1816 | T: git git://git.github.com/broadcom/bcm11351 | 1817 | T: git git://git.github.com/broadcom/bcm11351 |
1817 | S: Maintained | 1818 | S: Maintained |
1818 | F: arch/arm/mach-bcm/ | 1819 | F: arch/arm/mach-bcm/ |
@@ -6595,7 +6596,7 @@ S: Obsolete | |||
6595 | F: drivers/net/wireless/prism54/ | 6596 | F: drivers/net/wireless/prism54/ |
6596 | 6597 | ||
6597 | PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER | 6598 | PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER |
6598 | M: Mikael Pettersson <mikpe@it.uu.se> | 6599 | M: Mikael Pettersson <mikpelinux@gmail.com> |
6599 | L: linux-ide@vger.kernel.org | 6600 | L: linux-ide@vger.kernel.org |
6600 | S: Maintained | 6601 | S: Maintained |
6601 | F: drivers/ata/sata_promise.* | 6602 | F: drivers/ata/sata_promise.* |
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 6a15c968d214..15ca2255f438 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile | |||
@@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c | |||
74 | src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c | 74 | src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c |
75 | src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c | 75 | src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c |
76 | 76 | ||
77 | src-plat-y := of.c | 77 | src-plat-y := of.c epapr.c |
78 | src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ | 78 | src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ |
79 | treeboot-walnut.c cuboot-acadia.c \ | 79 | treeboot-walnut.c cuboot-acadia.c \ |
80 | cuboot-kilauea.c simpleboot.c \ | 80 | cuboot-kilauea.c simpleboot.c \ |
@@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \ | |||
97 | prpmc2800.c | 97 | prpmc2800.c |
98 | src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c | 98 | src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c |
99 | src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c | 99 | src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c |
100 | src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c | 100 | src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c |
101 | 101 | ||
102 | src-wlib := $(sort $(src-wlib-y)) | 102 | src-wlib := $(sort $(src-wlib-y)) |
103 | src-plat := $(sort $(src-plat-y)) | 103 | src-plat := $(sort $(src-plat-y)) |
diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c new file mode 100644 index 000000000000..c10191006673 --- /dev/null +++ b/arch/powerpc/boot/epapr-wrapper.c | |||
@@ -0,0 +1,9 @@ | |||
1 | extern void epapr_platform_init(unsigned long r3, unsigned long r4, | ||
2 | unsigned long r5, unsigned long r6, | ||
3 | unsigned long r7); | ||
4 | |||
5 | void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, | ||
6 | unsigned long r6, unsigned long r7) | ||
7 | { | ||
8 | epapr_platform_init(r3, r4, r5, r6, r7); | ||
9 | } | ||
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c index 06c1961bd124..02e91aa2194a 100644 --- a/arch/powerpc/boot/epapr.c +++ b/arch/powerpc/boot/epapr.c | |||
@@ -48,8 +48,8 @@ static void platform_fixups(void) | |||
48 | fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); | 48 | fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); |
49 | } | 49 | } |
50 | 50 | ||
51 | void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, | 51 | void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, |
52 | unsigned long r6, unsigned long r7) | 52 | unsigned long r6, unsigned long r7) |
53 | { | 53 | { |
54 | epapr_magic = r6; | 54 | epapr_magic = r6; |
55 | ima_size = r7; | 55 | ima_size = r7; |
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c index 61d9899aa0d0..62e2f43ec1df 100644 --- a/arch/powerpc/boot/of.c +++ b/arch/powerpc/boot/of.c | |||
@@ -26,6 +26,9 @@ | |||
26 | 26 | ||
27 | static unsigned long claim_base; | 27 | static unsigned long claim_base; |
28 | 28 | ||
29 | void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5, | ||
30 | unsigned long r6, unsigned long r7); | ||
31 | |||
29 | static void *of_try_claim(unsigned long size) | 32 | static void *of_try_claim(unsigned long size) |
30 | { | 33 | { |
31 | unsigned long addr = 0; | 34 | unsigned long addr = 0; |
@@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr) | |||
61 | } | 64 | } |
62 | } | 65 | } |
63 | 66 | ||
64 | void platform_init(unsigned long a1, unsigned long a2, void *promptr) | 67 | static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr) |
65 | { | 68 | { |
66 | platform_ops.image_hdr = of_image_hdr; | 69 | platform_ops.image_hdr = of_image_hdr; |
67 | platform_ops.malloc = of_try_claim; | 70 | platform_ops.malloc = of_try_claim; |
@@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr) | |||
81 | loader_info.initrd_size = a2; | 84 | loader_info.initrd_size = a2; |
82 | } | 85 | } |
83 | } | 86 | } |
87 | |||
88 | void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, | ||
89 | unsigned long r6, unsigned long r7) | ||
90 | { | ||
91 | /* Detect OF vs. ePAPR boot */ | ||
92 | if (r5) | ||
93 | of_platform_init(r3, r4, (void *)r5); | ||
94 | else | ||
95 | epapr_platform_init(r3, r4, r5, r6, r7); | ||
96 | } | ||
97 | |||
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index 6761c746048d..cd7af841ba05 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper | |||
@@ -148,18 +148,18 @@ make_space=y | |||
148 | 148 | ||
149 | case "$platform" in | 149 | case "$platform" in |
150 | pseries) | 150 | pseries) |
151 | platformo=$object/of.o | 151 | platformo="$object/of.o $object/epapr.o" |
152 | link_address='0x4000000' | 152 | link_address='0x4000000' |
153 | ;; | 153 | ;; |
154 | maple) | 154 | maple) |
155 | platformo=$object/of.o | 155 | platformo="$object/of.o $object/epapr.o" |
156 | link_address='0x400000' | 156 | link_address='0x400000' |
157 | ;; | 157 | ;; |
158 | pmac|chrp) | 158 | pmac|chrp) |
159 | platformo=$object/of.o | 159 | platformo="$object/of.o $object/epapr.o" |
160 | ;; | 160 | ;; |
161 | coff) | 161 | coff) |
162 | platformo="$object/crt0.o $object/of.o" | 162 | platformo="$object/crt0.o $object/of.o $object/epapr.o" |
163 | lds=$object/zImage.coff.lds | 163 | lds=$object/zImage.coff.lds |
164 | link_address='0x500000' | 164 | link_address='0x500000' |
165 | pie= | 165 | pie= |
@@ -253,6 +253,7 @@ treeboot-iss4xx-mpic) | |||
253 | platformo="$object/treeboot-iss4xx.o" | 253 | platformo="$object/treeboot-iss4xx.o" |
254 | ;; | 254 | ;; |
255 | epapr) | 255 | epapr) |
256 | platformo="$object/epapr.o $object/epapr-wrapper.o" | ||
256 | link_address='0x20000000' | 257 | link_address='0x20000000' |
257 | pie=-pie | 258 | pie=-pie |
258 | ;; | 259 | ;; |
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h index 0e40843a1c6e..41f13cec8a8f 100644 --- a/arch/powerpc/include/asm/irq.h +++ b/arch/powerpc/include/asm/irq.h | |||
@@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS]; | |||
69 | 69 | ||
70 | extern void irq_ctx_init(void); | 70 | extern void irq_ctx_init(void); |
71 | extern void call_do_softirq(struct thread_info *tp); | 71 | extern void call_do_softirq(struct thread_info *tp); |
72 | extern int call_handle_irq(int irq, void *p1, | 72 | extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp); |
73 | struct thread_info *tp, void *func); | ||
74 | extern void do_IRQ(struct pt_regs *regs); | 73 | extern void do_IRQ(struct pt_regs *regs); |
74 | extern void __do_irq(struct pt_regs *regs); | ||
75 | 75 | ||
76 | int irq_choose_cpu(const struct cpumask *mask); | 76 | int irq_choose_cpu(const struct cpumask *mask); |
77 | 77 | ||
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index e378cccfca55..ce4de5aed7b5 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h | |||
@@ -149,8 +149,6 @@ typedef struct { | |||
149 | 149 | ||
150 | struct thread_struct { | 150 | struct thread_struct { |
151 | unsigned long ksp; /* Kernel stack pointer */ | 151 | unsigned long ksp; /* Kernel stack pointer */ |
152 | unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ | ||
153 | |||
154 | #ifdef CONFIG_PPC64 | 152 | #ifdef CONFIG_PPC64 |
155 | unsigned long ksp_vsid; | 153 | unsigned long ksp_vsid; |
156 | #endif | 154 | #endif |
@@ -162,6 +160,7 @@ struct thread_struct { | |||
162 | #endif | 160 | #endif |
163 | #ifdef CONFIG_PPC32 | 161 | #ifdef CONFIG_PPC32 |
164 | void *pgdir; /* root of page-table tree */ | 162 | void *pgdir; /* root of page-table tree */ |
163 | unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */ | ||
165 | #endif | 164 | #endif |
166 | #ifdef CONFIG_PPC_ADV_DEBUG_REGS | 165 | #ifdef CONFIG_PPC_ADV_DEBUG_REGS |
167 | /* | 166 | /* |
@@ -321,7 +320,6 @@ struct thread_struct { | |||
321 | #else | 320 | #else |
322 | #define INIT_THREAD { \ | 321 | #define INIT_THREAD { \ |
323 | .ksp = INIT_SP, \ | 322 | .ksp = INIT_SP, \ |
324 | .ksp_limit = INIT_SP_LIMIT, \ | ||
325 | .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ | 323 | .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ |
326 | .fs = KERNEL_DS, \ | 324 | .fs = KERNEL_DS, \ |
327 | .fpr = {{0}}, \ | 325 | .fpr = {{0}}, \ |
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8958be5f31a..502c7a4e73f7 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c | |||
@@ -80,10 +80,11 @@ int main(void) | |||
80 | DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); | 80 | DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); |
81 | #else | 81 | #else |
82 | DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); | 82 | DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); |
83 | DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16)); | ||
84 | DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); | ||
83 | #endif /* CONFIG_PPC64 */ | 85 | #endif /* CONFIG_PPC64 */ |
84 | 86 | ||
85 | DEFINE(KSP, offsetof(struct thread_struct, ksp)); | 87 | DEFINE(KSP, offsetof(struct thread_struct, ksp)); |
86 | DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit)); | ||
87 | DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); | 88 | DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); |
88 | #ifdef CONFIG_BOOKE | 89 | #ifdef CONFIG_BOOKE |
89 | DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); | 90 | DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); |
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index c69440cef7af..57d286a78f86 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c | |||
@@ -441,50 +441,6 @@ void migrate_irqs(void) | |||
441 | } | 441 | } |
442 | #endif | 442 | #endif |
443 | 443 | ||
444 | static inline void handle_one_irq(unsigned int irq) | ||
445 | { | ||
446 | struct thread_info *curtp, *irqtp; | ||
447 | unsigned long saved_sp_limit; | ||
448 | struct irq_desc *desc; | ||
449 | |||
450 | desc = irq_to_desc(irq); | ||
451 | if (!desc) | ||
452 | return; | ||
453 | |||
454 | /* Switch to the irq stack to handle this */ | ||
455 | curtp = current_thread_info(); | ||
456 | irqtp = hardirq_ctx[smp_processor_id()]; | ||
457 | |||
458 | if (curtp == irqtp) { | ||
459 | /* We're already on the irq stack, just handle it */ | ||
460 | desc->handle_irq(irq, desc); | ||
461 | return; | ||
462 | } | ||
463 | |||
464 | saved_sp_limit = current->thread.ksp_limit; | ||
465 | |||
466 | irqtp->task = curtp->task; | ||
467 | irqtp->flags = 0; | ||
468 | |||
469 | /* Copy the softirq bits in preempt_count so that the | ||
470 | * softirq checks work in the hardirq context. */ | ||
471 | irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) | | ||
472 | (curtp->preempt_count & SOFTIRQ_MASK); | ||
473 | |||
474 | current->thread.ksp_limit = (unsigned long)irqtp + | ||
475 | _ALIGN_UP(sizeof(struct thread_info), 16); | ||
476 | |||
477 | call_handle_irq(irq, desc, irqtp, desc->handle_irq); | ||
478 | current->thread.ksp_limit = saved_sp_limit; | ||
479 | irqtp->task = NULL; | ||
480 | |||
481 | /* Set any flag that may have been set on the | ||
482 | * alternate stack | ||
483 | */ | ||
484 | if (irqtp->flags) | ||
485 | set_bits(irqtp->flags, &curtp->flags); | ||
486 | } | ||
487 | |||
488 | static inline void check_stack_overflow(void) | 444 | static inline void check_stack_overflow(void) |
489 | { | 445 | { |
490 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | 446 | #ifdef CONFIG_DEBUG_STACKOVERFLOW |
@@ -501,9 +457,9 @@ static inline void check_stack_overflow(void) | |||
501 | #endif | 457 | #endif |
502 | } | 458 | } |
503 | 459 | ||
504 | void do_IRQ(struct pt_regs *regs) | 460 | void __do_irq(struct pt_regs *regs) |
505 | { | 461 | { |
506 | struct pt_regs *old_regs = set_irq_regs(regs); | 462 | struct irq_desc *desc; |
507 | unsigned int irq; | 463 | unsigned int irq; |
508 | 464 | ||
509 | irq_enter(); | 465 | irq_enter(); |
@@ -519,18 +475,56 @@ void do_IRQ(struct pt_regs *regs) | |||
519 | */ | 475 | */ |
520 | irq = ppc_md.get_irq(); | 476 | irq = ppc_md.get_irq(); |
521 | 477 | ||
522 | /* We can hard enable interrupts now */ | 478 | /* We can hard enable interrupts now to allow perf interrupts */ |
523 | may_hard_irq_enable(); | 479 | may_hard_irq_enable(); |
524 | 480 | ||
525 | /* And finally process it */ | 481 | /* And finally process it */ |
526 | if (irq != NO_IRQ) | 482 | if (unlikely(irq == NO_IRQ)) |
527 | handle_one_irq(irq); | ||
528 | else | ||
529 | __get_cpu_var(irq_stat).spurious_irqs++; | 483 | __get_cpu_var(irq_stat).spurious_irqs++; |
484 | else { | ||
485 | desc = irq_to_desc(irq); | ||
486 | if (likely(desc)) | ||
487 | desc->handle_irq(irq, desc); | ||
488 | } | ||
530 | 489 | ||
531 | trace_irq_exit(regs); | 490 | trace_irq_exit(regs); |
532 | 491 | ||
533 | irq_exit(); | 492 | irq_exit(); |
493 | } | ||
494 | |||
495 | void do_IRQ(struct pt_regs *regs) | ||
496 | { | ||
497 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
498 | struct thread_info *curtp, *irqtp; | ||
499 | |||
500 | /* Switch to the irq stack to handle this */ | ||
501 | curtp = current_thread_info(); | ||
502 | irqtp = hardirq_ctx[raw_smp_processor_id()]; | ||
503 | |||
504 | /* Already there ? */ | ||
505 | if (unlikely(curtp == irqtp)) { | ||
506 | __do_irq(regs); | ||
507 | set_irq_regs(old_regs); | ||
508 | return; | ||
509 | } | ||
510 | |||
511 | /* Prepare the thread_info in the irq stack */ | ||
512 | irqtp->task = curtp->task; | ||
513 | irqtp->flags = 0; | ||
514 | |||
515 | /* Copy the preempt_count so that the [soft]irq checks work. */ | ||
516 | irqtp->preempt_count = curtp->preempt_count; | ||
517 | |||
518 | /* Switch stack and call */ | ||
519 | call_do_irq(regs, irqtp); | ||
520 | |||
521 | /* Restore stack limit */ | ||
522 | irqtp->task = NULL; | ||
523 | |||
524 | /* Copy back updates to the thread_info */ | ||
525 | if (irqtp->flags) | ||
526 | set_bits(irqtp->flags, &curtp->flags); | ||
527 | |||
534 | set_irq_regs(old_regs); | 528 | set_irq_regs(old_regs); |
535 | } | 529 | } |
536 | 530 | ||
@@ -592,28 +586,22 @@ void irq_ctx_init(void) | |||
592 | memset((void *)softirq_ctx[i], 0, THREAD_SIZE); | 586 | memset((void *)softirq_ctx[i], 0, THREAD_SIZE); |
593 | tp = softirq_ctx[i]; | 587 | tp = softirq_ctx[i]; |
594 | tp->cpu = i; | 588 | tp->cpu = i; |
595 | tp->preempt_count = 0; | ||
596 | 589 | ||
597 | memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); | 590 | memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); |
598 | tp = hardirq_ctx[i]; | 591 | tp = hardirq_ctx[i]; |
599 | tp->cpu = i; | 592 | tp->cpu = i; |
600 | tp->preempt_count = HARDIRQ_OFFSET; | ||
601 | } | 593 | } |
602 | } | 594 | } |
603 | 595 | ||
604 | static inline void do_softirq_onstack(void) | 596 | static inline void do_softirq_onstack(void) |
605 | { | 597 | { |
606 | struct thread_info *curtp, *irqtp; | 598 | struct thread_info *curtp, *irqtp; |
607 | unsigned long saved_sp_limit = current->thread.ksp_limit; | ||
608 | 599 | ||
609 | curtp = current_thread_info(); | 600 | curtp = current_thread_info(); |
610 | irqtp = softirq_ctx[smp_processor_id()]; | 601 | irqtp = softirq_ctx[smp_processor_id()]; |
611 | irqtp->task = curtp->task; | 602 | irqtp->task = curtp->task; |
612 | irqtp->flags = 0; | 603 | irqtp->flags = 0; |
613 | current->thread.ksp_limit = (unsigned long)irqtp + | ||
614 | _ALIGN_UP(sizeof(struct thread_info), 16); | ||
615 | call_do_softirq(irqtp); | 604 | call_do_softirq(irqtp); |
616 | current->thread.ksp_limit = saved_sp_limit; | ||
617 | irqtp->task = NULL; | 605 | irqtp->task = NULL; |
618 | 606 | ||
619 | /* Set any flag that may have been set on the | 607 | /* Set any flag that may have been set on the |
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index 777d999f563b..2b0ad9845363 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S | |||
@@ -36,26 +36,41 @@ | |||
36 | 36 | ||
37 | .text | 37 | .text |
38 | 38 | ||
39 | /* | ||
40 | * We store the saved ksp_limit in the unused part | ||
41 | * of the STACK_FRAME_OVERHEAD | ||
42 | */ | ||
39 | _GLOBAL(call_do_softirq) | 43 | _GLOBAL(call_do_softirq) |
40 | mflr r0 | 44 | mflr r0 |
41 | stw r0,4(r1) | 45 | stw r0,4(r1) |
46 | lwz r10,THREAD+KSP_LIMIT(r2) | ||
47 | addi r11,r3,THREAD_INFO_GAP | ||
42 | stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) | 48 | stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) |
43 | mr r1,r3 | 49 | mr r1,r3 |
50 | stw r10,8(r1) | ||
51 | stw r11,THREAD+KSP_LIMIT(r2) | ||
44 | bl __do_softirq | 52 | bl __do_softirq |
53 | lwz r10,8(r1) | ||
45 | lwz r1,0(r1) | 54 | lwz r1,0(r1) |
46 | lwz r0,4(r1) | 55 | lwz r0,4(r1) |
56 | stw r10,THREAD+KSP_LIMIT(r2) | ||
47 | mtlr r0 | 57 | mtlr r0 |
48 | blr | 58 | blr |
49 | 59 | ||
50 | _GLOBAL(call_handle_irq) | 60 | _GLOBAL(call_do_irq) |
51 | mflr r0 | 61 | mflr r0 |
52 | stw r0,4(r1) | 62 | stw r0,4(r1) |
53 | mtctr r6 | 63 | lwz r10,THREAD+KSP_LIMIT(r2) |
54 | stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) | 64 | addi r11,r3,THREAD_INFO_GAP |
55 | mr r1,r5 | 65 | stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) |
56 | bctrl | 66 | mr r1,r4 |
67 | stw r10,8(r1) | ||
68 | stw r11,THREAD+KSP_LIMIT(r2) | ||
69 | bl __do_irq | ||
70 | lwz r10,8(r1) | ||
57 | lwz r1,0(r1) | 71 | lwz r1,0(r1) |
58 | lwz r0,4(r1) | 72 | lwz r0,4(r1) |
73 | stw r10,THREAD+KSP_LIMIT(r2) | ||
59 | mtlr r0 | 74 | mtlr r0 |
60 | blr | 75 | blr |
61 | 76 | ||
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 971d7e78aff2..e59caf874d05 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S | |||
@@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq) | |||
40 | mtlr r0 | 40 | mtlr r0 |
41 | blr | 41 | blr |
42 | 42 | ||
43 | _GLOBAL(call_handle_irq) | 43 | _GLOBAL(call_do_irq) |
44 | ld r8,0(r6) | ||
45 | mflr r0 | 44 | mflr r0 |
46 | std r0,16(r1) | 45 | std r0,16(r1) |
47 | mtctr r8 | 46 | stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4) |
48 | stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) | 47 | mr r1,r4 |
49 | mr r1,r5 | 48 | bl .__do_irq |
50 | bctrl | ||
51 | ld r1,0(r1) | 49 | ld r1,0(r1) |
52 | ld r0,16(r1) | 50 | ld r0,16(r1) |
53 | mtlr r0 | 51 | mtlr r0 |
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 6f428da53e20..96d2fdf3aa9e 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c | |||
@@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, | |||
1000 | kregs = (struct pt_regs *) sp; | 1000 | kregs = (struct pt_regs *) sp; |
1001 | sp -= STACK_FRAME_OVERHEAD; | 1001 | sp -= STACK_FRAME_OVERHEAD; |
1002 | p->thread.ksp = sp; | 1002 | p->thread.ksp = sp; |
1003 | #ifdef CONFIG_PPC32 | ||
1003 | p->thread.ksp_limit = (unsigned long)task_stack_page(p) + | 1004 | p->thread.ksp_limit = (unsigned long)task_stack_page(p) + |
1004 | _ALIGN_UP(sizeof(struct thread_info), 16); | 1005 | _ALIGN_UP(sizeof(struct thread_info), 16); |
1005 | 1006 | #endif | |
1006 | #ifdef CONFIG_HAVE_HW_BREAKPOINT | 1007 | #ifdef CONFIG_HAVE_HW_BREAKPOINT |
1007 | p->thread.ptrace_bps[0] = NULL; | 1008 | p->thread.ptrace_bps[0] = NULL; |
1008 | #endif | 1009 | #endif |
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index 12e656ffe60e..5fe2842e8bab 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c | |||
@@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt; | |||
196 | 196 | ||
197 | static cell_t __initdata regbuf[1024]; | 197 | static cell_t __initdata regbuf[1024]; |
198 | 198 | ||
199 | static bool rtas_has_query_cpu_stopped; | ||
200 | |||
199 | 201 | ||
200 | /* | 202 | /* |
201 | * Error results ... some OF calls will return "-1" on error, some | 203 | * Error results ... some OF calls will return "-1" on error, some |
@@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void) | |||
1574 | prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", | 1576 | prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", |
1575 | &val, sizeof(val)); | 1577 | &val, sizeof(val)); |
1576 | 1578 | ||
1579 | /* Check if it supports "query-cpu-stopped-state" */ | ||
1580 | if (prom_getprop(rtas_node, "query-cpu-stopped-state", | ||
1581 | &val, sizeof(val)) != PROM_ERROR) | ||
1582 | rtas_has_query_cpu_stopped = true; | ||
1583 | |||
1577 | #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) | 1584 | #if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) |
1578 | /* PowerVN takeover hack */ | 1585 | /* PowerVN takeover hack */ |
1579 | prom_rtas_data = base; | 1586 | prom_rtas_data = base; |
@@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void) | |||
1815 | = (void *) LOW_ADDR(__secondary_hold_acknowledge); | 1822 | = (void *) LOW_ADDR(__secondary_hold_acknowledge); |
1816 | unsigned long secondary_hold = LOW_ADDR(__secondary_hold); | 1823 | unsigned long secondary_hold = LOW_ADDR(__secondary_hold); |
1817 | 1824 | ||
1825 | /* | ||
1826 | * On pseries, if RTAS supports "query-cpu-stopped-state", | ||
1827 | * we skip this stage, the CPUs will be started by the | ||
1828 | * kernel using RTAS. | ||
1829 | */ | ||
1830 | if ((of_platform == PLATFORM_PSERIES || | ||
1831 | of_platform == PLATFORM_PSERIES_LPAR) && | ||
1832 | rtas_has_query_cpu_stopped) { | ||
1833 | prom_printf("prom_hold_cpus: skipped\n"); | ||
1834 | return; | ||
1835 | } | ||
1836 | |||
1818 | prom_debug("prom_hold_cpus: start...\n"); | 1837 | prom_debug("prom_hold_cpus: start...\n"); |
1819 | prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); | 1838 | prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); |
1820 | prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); | 1839 | prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); |
@@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, | |||
3011 | * On non-powermacs, put all CPUs in spin-loops. | 3030 | * On non-powermacs, put all CPUs in spin-loops. |
3012 | * | 3031 | * |
3013 | * PowerMacs use a different mechanism to spin CPUs | 3032 | * PowerMacs use a different mechanism to spin CPUs |
3033 | * | ||
3034 | * (This must be done after instanciating RTAS) | ||
3014 | */ | 3035 | */ |
3015 | if (of_platform != PLATFORM_POWERMAC && | 3036 | if (of_platform != PLATFORM_POWERMAC && |
3016 | of_platform != PLATFORM_OPAL) | 3037 | of_platform != PLATFORM_OPAL) |
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index a7ee978fb860..b1faa1593c90 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c | |||
@@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1505 | */ | 1505 | */ |
1506 | if ((ra == 1) && !(regs->msr & MSR_PR) \ | 1506 | if ((ra == 1) && !(regs->msr & MSR_PR) \ |
1507 | && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { | 1507 | && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { |
1508 | #ifdef CONFIG_PPC32 | ||
1508 | /* | 1509 | /* |
1509 | * Check if we will touch kernel sack overflow | 1510 | * Check if we will touch kernel sack overflow |
1510 | */ | 1511 | */ |
@@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) | |||
1513 | err = -EINVAL; | 1514 | err = -EINVAL; |
1514 | break; | 1515 | break; |
1515 | } | 1516 | } |
1516 | 1517 | #endif /* CONFIG_PPC32 */ | |
1517 | /* | 1518 | /* |
1518 | * Check if we already set since that means we'll | 1519 | * Check if we already set since that means we'll |
1519 | * lose the previous value. | 1520 | * lose the previous value. |
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c index 1c1771a40250..24f58cb0a543 100644 --- a/arch/powerpc/platforms/pseries/smp.c +++ b/arch/powerpc/platforms/pseries/smp.c | |||
@@ -233,18 +233,24 @@ static void __init smp_init_pseries(void) | |||
233 | 233 | ||
234 | alloc_bootmem_cpumask_var(&of_spin_mask); | 234 | alloc_bootmem_cpumask_var(&of_spin_mask); |
235 | 235 | ||
236 | /* Mark threads which are still spinning in hold loops. */ | 236 | /* |
237 | if (cpu_has_feature(CPU_FTR_SMT)) { | 237 | * Mark threads which are still spinning in hold loops |
238 | for_each_present_cpu(i) { | 238 | * |
239 | if (cpu_thread_in_core(i) == 0) | 239 | * We know prom_init will not have started them if RTAS supports |
240 | cpumask_set_cpu(i, of_spin_mask); | 240 | * query-cpu-stopped-state. |
241 | } | 241 | */ |
242 | } else { | 242 | if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) { |
243 | cpumask_copy(of_spin_mask, cpu_present_mask); | 243 | if (cpu_has_feature(CPU_FTR_SMT)) { |
244 | for_each_present_cpu(i) { | ||
245 | if (cpu_thread_in_core(i) == 0) | ||
246 | cpumask_set_cpu(i, of_spin_mask); | ||
247 | } | ||
248 | } else | ||
249 | cpumask_copy(of_spin_mask, cpu_present_mask); | ||
250 | |||
251 | cpumask_clear_cpu(boot_cpuid, of_spin_mask); | ||
244 | } | 252 | } |
245 | 253 | ||
246 | cpumask_clear_cpu(boot_cpuid, of_spin_mask); | ||
247 | |||
248 | /* Non-lpar has additional take/give timebase */ | 254 | /* Non-lpar has additional take/give timebase */ |
249 | if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { | 255 | if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { |
250 | smp_ops->give_timebase = rtas_give_timebase; | 256 | smp_ops->give_timebase = rtas_give_timebase; |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8355c84b9729..a9c606bb4945 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -1883,9 +1883,9 @@ static struct pmu pmu = { | |||
1883 | 1883 | ||
1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
1885 | { | 1885 | { |
1886 | userpg->cap_usr_time = 0; | 1886 | userpg->cap_user_time = 0; |
1887 | userpg->cap_usr_time_zero = 0; | 1887 | userpg->cap_user_time_zero = 0; |
1888 | userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; | 1888 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; |
1889 | userpg->pmc_width = x86_pmu.cntval_bits; | 1889 | userpg->pmc_width = x86_pmu.cntval_bits; |
1890 | 1890 | ||
1891 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 1891 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
@@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | |||
1894 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) | 1894 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
1895 | return; | 1895 | return; |
1896 | 1896 | ||
1897 | userpg->cap_usr_time = 1; | 1897 | userpg->cap_user_time = 1; |
1898 | userpg->time_mult = this_cpu_read(cyc2ns); | 1898 | userpg->time_mult = this_cpu_read(cyc2ns); |
1899 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | 1899 | userpg->time_shift = CYC2NS_SCALE_FACTOR; |
1900 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | 1900 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; |
1901 | 1901 | ||
1902 | if (sched_clock_stable && !check_tsc_disabled()) { | 1902 | if (sched_clock_stable && !check_tsc_disabled()) { |
1903 | userpg->cap_usr_time_zero = 1; | 1903 | userpg->cap_user_time_zero = 1; |
1904 | userpg->time_zero = this_cpu_read(cyc2ns_offset); | 1904 | userpg->time_zero = this_cpu_read(cyc2ns_offset); |
1905 | } | 1905 | } |
1906 | } | 1906 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 9db76c31b3c3..f31a1655d1ff 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void) | |||
2325 | break; | 2325 | break; |
2326 | 2326 | ||
2327 | case 55: /* Atom 22nm "Silvermont" */ | 2327 | case 55: /* Atom 22nm "Silvermont" */ |
2328 | case 77: /* Avoton "Silvermont" */ | ||
2328 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, | 2329 | memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, |
2329 | sizeof(hw_cache_event_ids)); | 2330 | sizeof(hw_cache_event_ids)); |
2330 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, | 2331 | memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 8ed44589b0e4..4118f9f68315 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box) | |||
2706 | box->hrtimer.function = uncore_pmu_hrtimer; | 2706 | box->hrtimer.function = uncore_pmu_hrtimer; |
2707 | } | 2707 | } |
2708 | 2708 | ||
2709 | struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) | 2709 | static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node) |
2710 | { | 2710 | { |
2711 | struct intel_uncore_box *box; | 2711 | struct intel_uncore_box *box; |
2712 | int i, size; | 2712 | int i, size; |
2713 | 2713 | ||
2714 | size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); | 2714 | size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); |
2715 | 2715 | ||
2716 | box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); | 2716 | box = kzalloc_node(size, GFP_KERNEL, node); |
2717 | if (!box) | 2717 | if (!box) |
2718 | return NULL; | 2718 | return NULL; |
2719 | 2719 | ||
@@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, | |||
3031 | struct intel_uncore_box *fake_box; | 3031 | struct intel_uncore_box *fake_box; |
3032 | int ret = -EINVAL, n; | 3032 | int ret = -EINVAL, n; |
3033 | 3033 | ||
3034 | fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); | 3034 | fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); |
3035 | if (!fake_box) | 3035 | if (!fake_box) |
3036 | return -ENOMEM; | 3036 | return -ENOMEM; |
3037 | 3037 | ||
@@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id | |||
3294 | } | 3294 | } |
3295 | 3295 | ||
3296 | type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; | 3296 | type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; |
3297 | box = uncore_alloc_box(type, 0); | 3297 | box = uncore_alloc_box(type, NUMA_NO_NODE); |
3298 | if (!box) | 3298 | if (!box) |
3299 | return -ENOMEM; | 3299 | return -ENOMEM; |
3300 | 3300 | ||
@@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id) | |||
3499 | if (pmu->func_id < 0) | 3499 | if (pmu->func_id < 0) |
3500 | pmu->func_id = j; | 3500 | pmu->func_id = j; |
3501 | 3501 | ||
3502 | box = uncore_alloc_box(type, cpu); | 3502 | box = uncore_alloc_box(type, cpu_to_node(cpu)); |
3503 | if (!box) | 3503 | if (!box) |
3504 | return -ENOMEM; | 3504 | return -ENOMEM; |
3505 | 3505 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 563ed91e6faa..e643e744e4d8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -352,12 +352,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
352 | }, | 352 | }, |
353 | { /* Handle problems with rebooting on the Precision M6600. */ | 353 | { /* Handle problems with rebooting on the Precision M6600. */ |
354 | .callback = set_pci_reboot, | 354 | .callback = set_pci_reboot, |
355 | .ident = "Dell OptiPlex 990", | 355 | .ident = "Dell Precision M6600", |
356 | .matches = { | 356 | .matches = { |
357 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | 357 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), |
358 | DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), | 358 | DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), |
359 | }, | 359 | }, |
360 | }, | 360 | }, |
361 | { /* Handle problems with rebooting on the Dell PowerEdge C6100. */ | ||
362 | .callback = set_pci_reboot, | ||
363 | .ident = "Dell PowerEdge C6100", | ||
364 | .matches = { | ||
365 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
366 | DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), | ||
367 | }, | ||
368 | }, | ||
369 | { /* Some C6100 machines were shipped with vendor being 'Dell'. */ | ||
370 | .callback = set_pci_reboot, | ||
371 | .ident = "Dell PowerEdge C6100", | ||
372 | .matches = { | ||
373 | DMI_MATCH(DMI_SYS_VENDOR, "Dell"), | ||
374 | DMI_MATCH(DMI_PRODUCT_NAME, "C6100"), | ||
375 | }, | ||
376 | }, | ||
361 | { } | 377 | { } |
362 | }; | 378 | }; |
363 | 379 | ||
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 90f6ed127096..c7e22ab29a5a 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void) | |||
912 | 912 | ||
913 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 913 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
914 | md = p; | 914 | md = p; |
915 | if (!(md->attribute & EFI_MEMORY_RUNTIME) && | 915 | if (!(md->attribute & EFI_MEMORY_RUNTIME)) { |
916 | md->type != EFI_BOOT_SERVICES_CODE && | 916 | #ifdef CONFIG_X86_64 |
917 | md->type != EFI_BOOT_SERVICES_DATA) | 917 | if (md->type != EFI_BOOT_SERVICES_CODE && |
918 | continue; | 918 | md->type != EFI_BOOT_SERVICES_DATA) |
919 | #endif | ||
920 | continue; | ||
921 | } | ||
919 | 922 | ||
920 | size = md->num_pages << EFI_PAGE_SHIFT; | 923 | size = md->num_pages << EFI_PAGE_SHIFT; |
921 | end = md->phys_addr + size; | 924 | end = md->phys_addr + size; |
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c index 958ba2a420c3..97f4acb54ad6 100644 --- a/drivers/ata/sata_promise.c +++ b/drivers/ata/sata_promise.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * sata_promise.c - Promise SATA | 2 | * sata_promise.c - Promise SATA |
3 | * | 3 | * |
4 | * Maintained by: Tejun Heo <tj@kernel.org> | 4 | * Maintained by: Tejun Heo <tj@kernel.org> |
5 | * Mikael Pettersson <mikpe@it.uu.se> | 5 | * Mikael Pettersson |
6 | * Please ALWAYS copy linux-ide@vger.kernel.org | 6 | * Please ALWAYS copy linux-ide@vger.kernel.org |
7 | * on emails. | 7 | * on emails. |
8 | * | 8 | * |
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index d2d95ff5353b..edfa2515bc86 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode, | |||
1189 | int err; | 1189 | int err; |
1190 | u32 cp; | 1190 | u32 cp; |
1191 | 1191 | ||
1192 | memset(&arg64, 0, sizeof(arg64)); | ||
1192 | err = 0; | 1193 | err = 0; |
1193 | err |= | 1194 | err |= |
1194 | copy_from_user(&arg64.LUN_info, &arg32->LUN_info, | 1195 | copy_from_user(&arg64.LUN_info, &arg32->LUN_info, |
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index 639d26b90b91..2b9440384536 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c | |||
@@ -1193,6 +1193,7 @@ out_passthru: | |||
1193 | ida_pci_info_struct pciinfo; | 1193 | ida_pci_info_struct pciinfo; |
1194 | 1194 | ||
1195 | if (!arg) return -EINVAL; | 1195 | if (!arg) return -EINVAL; |
1196 | memset(&pciinfo, 0, sizeof(pciinfo)); | ||
1196 | pciinfo.bus = host->pci_dev->bus->number; | 1197 | pciinfo.bus = host->pci_dev->bus->number; |
1197 | pciinfo.dev_fn = host->pci_dev->devfn; | 1198 | pciinfo.dev_fn = host->pci_dev->devfn; |
1198 | pciinfo.board_id = host->board_id; | 1199 | pciinfo.board_id = host->board_id; |
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c index b1f8fc69023f..60e84043aa34 100644 --- a/drivers/gpu/drm/i2c/tda998x_drv.c +++ b/drivers/gpu/drm/i2c/tda998x_drv.c | |||
@@ -707,8 +707,7 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode) | |||
707 | reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2); | 707 | reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2); |
708 | break; | 708 | break; |
709 | case DRM_MODE_DPMS_OFF: | 709 | case DRM_MODE_DPMS_OFF: |
710 | /* disable audio and video ports */ | 710 | /* disable video ports */ |
711 | reg_write(encoder, REG_ENA_AP, 0x00); | ||
712 | reg_write(encoder, REG_ENA_VP_0, 0x00); | 711 | reg_write(encoder, REG_ENA_VP_0, 0x00); |
713 | reg_write(encoder, REG_ENA_VP_1, 0x00); | 712 | reg_write(encoder, REG_ENA_VP_1, 0x00); |
714 | reg_write(encoder, REG_ENA_VP_2, 0x00); | 713 | reg_write(encoder, REG_ENA_VP_2, 0x00); |
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index b39f6f0b45f2..0f12382aa35d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
@@ -498,7 +498,7 @@ struct cached_dev { | |||
498 | */ | 498 | */ |
499 | atomic_t has_dirty; | 499 | atomic_t has_dirty; |
500 | 500 | ||
501 | struct ratelimit writeback_rate; | 501 | struct bch_ratelimit writeback_rate; |
502 | struct delayed_work writeback_rate_update; | 502 | struct delayed_work writeback_rate_update; |
503 | 503 | ||
504 | /* | 504 | /* |
@@ -507,10 +507,9 @@ struct cached_dev { | |||
507 | */ | 507 | */ |
508 | sector_t last_read; | 508 | sector_t last_read; |
509 | 509 | ||
510 | /* Number of writeback bios in flight */ | 510 | /* Limit number of writeback bios in flight */ |
511 | atomic_t in_flight; | 511 | struct semaphore in_flight; |
512 | struct closure_with_timer writeback; | 512 | struct closure_with_timer writeback; |
513 | struct closure_waitlist writeback_wait; | ||
514 | 513 | ||
515 | struct keybuf writeback_keys; | 514 | struct keybuf writeback_keys; |
516 | 515 | ||
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 8010eed06a51..22d1ae72c282 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search) | |||
926 | 926 | ||
927 | /* Mergesort */ | 927 | /* Mergesort */ |
928 | 928 | ||
929 | static void sort_key_next(struct btree_iter *iter, | ||
930 | struct btree_iter_set *i) | ||
931 | { | ||
932 | i->k = bkey_next(i->k); | ||
933 | |||
934 | if (i->k == i->end) | ||
935 | *i = iter->data[--iter->used]; | ||
936 | } | ||
937 | |||
929 | static void btree_sort_fixup(struct btree_iter *iter) | 938 | static void btree_sort_fixup(struct btree_iter *iter) |
930 | { | 939 | { |
931 | while (iter->used > 1) { | 940 | while (iter->used > 1) { |
932 | struct btree_iter_set *top = iter->data, *i = top + 1; | 941 | struct btree_iter_set *top = iter->data, *i = top + 1; |
933 | struct bkey *k; | ||
934 | 942 | ||
935 | if (iter->used > 2 && | 943 | if (iter->used > 2 && |
936 | btree_iter_cmp(i[0], i[1])) | 944 | btree_iter_cmp(i[0], i[1])) |
937 | i++; | 945 | i++; |
938 | 946 | ||
939 | for (k = i->k; | 947 | if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0) |
940 | k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0; | ||
941 | k = bkey_next(k)) | ||
942 | if (top->k > i->k) | ||
943 | __bch_cut_front(top->k, k); | ||
944 | else if (KEY_SIZE(k)) | ||
945 | bch_cut_back(&START_KEY(k), top->k); | ||
946 | |||
947 | if (top->k < i->k || k == i->k) | ||
948 | break; | 948 | break; |
949 | 949 | ||
950 | heap_sift(iter, i - top, btree_iter_cmp); | 950 | if (!KEY_SIZE(i->k)) { |
951 | sort_key_next(iter, i); | ||
952 | heap_sift(iter, i - top, btree_iter_cmp); | ||
953 | continue; | ||
954 | } | ||
955 | |||
956 | if (top->k > i->k) { | ||
957 | if (bkey_cmp(top->k, i->k) >= 0) | ||
958 | sort_key_next(iter, i); | ||
959 | else | ||
960 | bch_cut_front(top->k, i->k); | ||
961 | |||
962 | heap_sift(iter, i - top, btree_iter_cmp); | ||
963 | } else { | ||
964 | /* can't happen because of comparison func */ | ||
965 | BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k))); | ||
966 | bch_cut_back(&START_KEY(i->k), top->k); | ||
967 | } | ||
951 | } | 968 | } |
952 | } | 969 | } |
953 | 970 | ||
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index f9764e61978b..f42fc7ed9cd6 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b) | |||
255 | 255 | ||
256 | return; | 256 | return; |
257 | err: | 257 | err: |
258 | bch_cache_set_error(b->c, "io error reading bucket %lu", | 258 | bch_cache_set_error(b->c, "io error reading bucket %zu", |
259 | PTR_BUCKET_NR(b->c, &b->key, 0)); | 259 | PTR_BUCKET_NR(b->c, &b->key, 0)); |
260 | } | 260 | } |
261 | 261 | ||
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink, | |||
612 | return SHRINK_STOP; | 612 | return SHRINK_STOP; |
613 | 613 | ||
614 | /* Return -1 if we can't do anything right now */ | 614 | /* Return -1 if we can't do anything right now */ |
615 | if (sc->gfp_mask & __GFP_WAIT) | 615 | if (sc->gfp_mask & __GFP_IO) |
616 | mutex_lock(&c->bucket_lock); | 616 | mutex_lock(&c->bucket_lock); |
617 | else if (!mutex_trylock(&c->bucket_lock)) | 617 | else if (!mutex_trylock(&c->bucket_lock)) |
618 | return -1; | 618 | return -1; |
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index ba95ab84b2be..8435f81e5d85 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list, | |||
153 | bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); | 153 | bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); |
154 | pr_debug("%u journal buckets", ca->sb.njournal_buckets); | 154 | pr_debug("%u journal buckets", ca->sb.njournal_buckets); |
155 | 155 | ||
156 | /* Read journal buckets ordered by golden ratio hash to quickly | 156 | /* |
157 | * Read journal buckets ordered by golden ratio hash to quickly | ||
157 | * find a sequence of buckets with valid journal entries | 158 | * find a sequence of buckets with valid journal entries |
158 | */ | 159 | */ |
159 | for (i = 0; i < ca->sb.njournal_buckets; i++) { | 160 | for (i = 0; i < ca->sb.njournal_buckets; i++) { |
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list, | |||
166 | goto bsearch; | 167 | goto bsearch; |
167 | } | 168 | } |
168 | 169 | ||
169 | /* If that fails, check all the buckets we haven't checked | 170 | /* |
171 | * If that fails, check all the buckets we haven't checked | ||
170 | * already | 172 | * already |
171 | */ | 173 | */ |
172 | pr_debug("falling back to linear search"); | 174 | pr_debug("falling back to linear search"); |
173 | 175 | ||
174 | for (l = 0; l < ca->sb.njournal_buckets; l++) { | 176 | for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); |
175 | if (test_bit(l, bitmap)) | 177 | l < ca->sb.njournal_buckets; |
176 | continue; | 178 | l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1)) |
177 | |||
178 | if (read_bucket(l)) | 179 | if (read_bucket(l)) |
179 | goto bsearch; | 180 | goto bsearch; |
180 | } | 181 | |
182 | if (list_empty(list)) | ||
183 | continue; | ||
181 | bsearch: | 184 | bsearch: |
182 | /* Binary search */ | 185 | /* Binary search */ |
183 | m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); | 186 | m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); |
@@ -197,10 +200,12 @@ bsearch: | |||
197 | r = m; | 200 | r = m; |
198 | } | 201 | } |
199 | 202 | ||
200 | /* Read buckets in reverse order until we stop finding more | 203 | /* |
204 | * Read buckets in reverse order until we stop finding more | ||
201 | * journal entries | 205 | * journal entries |
202 | */ | 206 | */ |
203 | pr_debug("finishing up"); | 207 | pr_debug("finishing up: m %u njournal_buckets %u", |
208 | m, ca->sb.njournal_buckets); | ||
204 | l = m; | 209 | l = m; |
205 | 210 | ||
206 | while (1) { | 211 | while (1) { |
@@ -228,9 +233,10 @@ bsearch: | |||
228 | } | 233 | } |
229 | } | 234 | } |
230 | 235 | ||
231 | c->journal.seq = list_entry(list->prev, | 236 | if (!list_empty(list)) |
232 | struct journal_replay, | 237 | c->journal.seq = list_entry(list->prev, |
233 | list)->j.seq; | 238 | struct journal_replay, |
239 | list)->j.seq; | ||
234 | 240 | ||
235 | return 0; | 241 | return 0; |
236 | #undef read_bucket | 242 | #undef read_bucket |
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca) | |||
428 | return; | 434 | return; |
429 | } | 435 | } |
430 | 436 | ||
431 | switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { | 437 | switch (atomic_read(&ja->discard_in_flight)) { |
432 | case DISCARD_IN_FLIGHT: | 438 | case DISCARD_IN_FLIGHT: |
433 | return; | 439 | return; |
434 | 440 | ||
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl) | |||
689 | if (cl) | 695 | if (cl) |
690 | BUG_ON(!closure_wait(&w->wait, cl)); | 696 | BUG_ON(!closure_wait(&w->wait, cl)); |
691 | 697 | ||
698 | closure_flush(&c->journal.io); | ||
692 | __journal_try_write(c, true); | 699 | __journal_try_write(c, true); |
693 | } | 700 | } |
694 | } | 701 | } |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 786a1a4f74d8..71eb233b9ace 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s) | |||
997 | } else { | 997 | } else { |
998 | bch_writeback_add(dc); | 998 | bch_writeback_add(dc); |
999 | 999 | ||
1000 | if (s->op.flush_journal) { | 1000 | if (bio->bi_rw & REQ_FLUSH) { |
1001 | /* Also need to send a flush to the backing device */ | 1001 | /* Also need to send a flush to the backing device */ |
1002 | s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, | 1002 | struct bio *flush = bio_alloc_bioset(0, GFP_NOIO, |
1003 | dc->disk.bio_split); | 1003 | dc->disk.bio_split); |
1004 | 1004 | ||
1005 | bio->bi_size = 0; | 1005 | flush->bi_rw = WRITE_FLUSH; |
1006 | bio->bi_vcnt = 0; | 1006 | flush->bi_bdev = bio->bi_bdev; |
1007 | closure_bio_submit(bio, cl, s->d); | 1007 | flush->bi_end_io = request_endio; |
1008 | flush->bi_private = cl; | ||
1009 | |||
1010 | closure_bio_submit(flush, cl, s->d); | ||
1008 | } else { | 1011 | } else { |
1009 | s->op.cache_bio = bio; | 1012 | s->op.cache_bio = bio; |
1010 | } | 1013 | } |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 4fe6ab2fbe2e..924dcfdae111 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
@@ -223,8 +223,13 @@ STORE(__cached_dev) | |||
223 | } | 223 | } |
224 | 224 | ||
225 | if (attr == &sysfs_label) { | 225 | if (attr == &sysfs_label) { |
226 | /* note: endlines are preserved */ | 226 | if (size > SB_LABEL_SIZE) |
227 | memcpy(dc->sb.label, buf, SB_LABEL_SIZE); | 227 | return -EINVAL; |
228 | memcpy(dc->sb.label, buf, size); | ||
229 | if (size < SB_LABEL_SIZE) | ||
230 | dc->sb.label[size] = '\0'; | ||
231 | if (size && dc->sb.label[size - 1] == '\n') | ||
232 | dc->sb.label[size - 1] = '\0'; | ||
228 | bch_write_bdev_super(dc, NULL); | 233 | bch_write_bdev_super(dc, NULL); |
229 | if (dc->disk.c) { | 234 | if (dc->disk.c) { |
230 | memcpy(dc->disk.c->uuids[dc->disk.id].label, | 235 | memcpy(dc->disk.c->uuids[dc->disk.id].label, |
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index 98eb81159a22..420dad545c7d 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c | |||
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time) | |||
190 | stats->last = now ?: 1; | 190 | stats->last = now ?: 1; |
191 | } | 191 | } |
192 | 192 | ||
193 | unsigned bch_next_delay(struct ratelimit *d, uint64_t done) | 193 | /** |
194 | * bch_next_delay() - increment @d by the amount of work done, and return how | ||
195 | * long to delay until the next time to do some work. | ||
196 | * | ||
197 | * @d - the struct bch_ratelimit to update | ||
198 | * @done - the amount of work done, in arbitrary units | ||
199 | * | ||
200 | * Returns the amount of time to delay by, in jiffies | ||
201 | */ | ||
202 | uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done) | ||
194 | { | 203 | { |
195 | uint64_t now = local_clock(); | 204 | uint64_t now = local_clock(); |
196 | 205 | ||
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index 1ae2a73ad85f..ea345c6896f4 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h | |||
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units) | |||
450 | (ewma) >> factor; \ | 450 | (ewma) >> factor; \ |
451 | }) | 451 | }) |
452 | 452 | ||
453 | struct ratelimit { | 453 | struct bch_ratelimit { |
454 | /* Next time we want to do some work, in nanoseconds */ | ||
454 | uint64_t next; | 455 | uint64_t next; |
456 | |||
457 | /* | ||
458 | * Rate at which we want to do work, in units per nanosecond | ||
459 | * The units here correspond to the units passed to bch_next_delay() | ||
460 | */ | ||
455 | unsigned rate; | 461 | unsigned rate; |
456 | }; | 462 | }; |
457 | 463 | ||
458 | static inline void ratelimit_reset(struct ratelimit *d) | 464 | static inline void bch_ratelimit_reset(struct bch_ratelimit *d) |
459 | { | 465 | { |
460 | d->next = local_clock(); | 466 | d->next = local_clock(); |
461 | } | 467 | } |
462 | 468 | ||
463 | unsigned bch_next_delay(struct ratelimit *d, uint64_t done); | 469 | uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done); |
464 | 470 | ||
465 | #define __DIV_SAFE(n, d, zero) \ | 471 | #define __DIV_SAFE(n, d, zero) \ |
466 | ({ \ | 472 | ({ \ |
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 22cbff551628..ba3ee48320f2 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c | |||
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work) | |||
94 | 94 | ||
95 | static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) | 95 | static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) |
96 | { | 96 | { |
97 | uint64_t ret; | ||
98 | |||
97 | if (atomic_read(&dc->disk.detaching) || | 99 | if (atomic_read(&dc->disk.detaching) || |
98 | !dc->writeback_percent) | 100 | !dc->writeback_percent) |
99 | return 0; | 101 | return 0; |
100 | 102 | ||
101 | return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); | 103 | ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); |
104 | |||
105 | return min_t(uint64_t, ret, HZ); | ||
102 | } | 106 | } |
103 | 107 | ||
104 | /* Background writeback */ | 108 | /* Background writeback */ |
@@ -208,7 +212,7 @@ normal_refill: | |||
208 | 212 | ||
209 | up_write(&dc->writeback_lock); | 213 | up_write(&dc->writeback_lock); |
210 | 214 | ||
211 | ratelimit_reset(&dc->writeback_rate); | 215 | bch_ratelimit_reset(&dc->writeback_rate); |
212 | 216 | ||
213 | /* Punt to workqueue only so we don't recurse and blow the stack */ | 217 | /* Punt to workqueue only so we don't recurse and blow the stack */ |
214 | continue_at(cl, read_dirty, dirty_wq); | 218 | continue_at(cl, read_dirty, dirty_wq); |
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl) | |||
318 | } | 322 | } |
319 | 323 | ||
320 | bch_keybuf_del(&dc->writeback_keys, w); | 324 | bch_keybuf_del(&dc->writeback_keys, w); |
321 | atomic_dec_bug(&dc->in_flight); | 325 | up(&dc->in_flight); |
322 | |||
323 | closure_wake_up(&dc->writeback_wait); | ||
324 | 326 | ||
325 | closure_return_with_destructor(cl, dirty_io_destructor); | 327 | closure_return_with_destructor(cl, dirty_io_destructor); |
326 | } | 328 | } |
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl) | |||
349 | 351 | ||
350 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 352 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
351 | 353 | ||
352 | continue_at(cl, write_dirty_finish, dirty_wq); | 354 | continue_at(cl, write_dirty_finish, system_wq); |
353 | } | 355 | } |
354 | 356 | ||
355 | static void read_dirty_endio(struct bio *bio, int error) | 357 | static void read_dirty_endio(struct bio *bio, int error) |
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl) | |||
369 | 371 | ||
370 | closure_bio_submit(&io->bio, cl, &io->dc->disk); | 372 | closure_bio_submit(&io->bio, cl, &io->dc->disk); |
371 | 373 | ||
372 | continue_at(cl, write_dirty, dirty_wq); | 374 | continue_at(cl, write_dirty, system_wq); |
373 | } | 375 | } |
374 | 376 | ||
375 | static void read_dirty(struct closure *cl) | 377 | static void read_dirty(struct closure *cl) |
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl) | |||
394 | 396 | ||
395 | if (delay > 0 && | 397 | if (delay > 0 && |
396 | (KEY_START(&w->key) != dc->last_read || | 398 | (KEY_START(&w->key) != dc->last_read || |
397 | jiffies_to_msecs(delay) > 50)) { | 399 | jiffies_to_msecs(delay) > 50)) |
398 | w->private = NULL; | 400 | delay = schedule_timeout_uninterruptible(delay); |
399 | |||
400 | closure_delay(&dc->writeback, delay); | ||
401 | continue_at(cl, read_dirty, dirty_wq); | ||
402 | } | ||
403 | 401 | ||
404 | dc->last_read = KEY_OFFSET(&w->key); | 402 | dc->last_read = KEY_OFFSET(&w->key); |
405 | 403 | ||
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl) | |||
424 | 422 | ||
425 | trace_bcache_writeback(&w->key); | 423 | trace_bcache_writeback(&w->key); |
426 | 424 | ||
427 | closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); | 425 | down(&dc->in_flight); |
426 | closure_call(&io->cl, read_dirty_submit, NULL, cl); | ||
428 | 427 | ||
429 | delay = writeback_delay(dc, KEY_SIZE(&w->key)); | 428 | delay = writeback_delay(dc, KEY_SIZE(&w->key)); |
430 | |||
431 | atomic_inc(&dc->in_flight); | ||
432 | |||
433 | if (!closure_wait_event(&dc->writeback_wait, cl, | ||
434 | atomic_read(&dc->in_flight) < 64)) | ||
435 | continue_at(cl, read_dirty, dirty_wq); | ||
436 | } | 429 | } |
437 | 430 | ||
438 | if (0) { | 431 | if (0) { |
@@ -442,7 +435,11 @@ err: | |||
442 | bch_keybuf_del(&dc->writeback_keys, w); | 435 | bch_keybuf_del(&dc->writeback_keys, w); |
443 | } | 436 | } |
444 | 437 | ||
445 | refill_dirty(cl); | 438 | /* |
439 | * Wait for outstanding writeback IOs to finish (and keybuf slots to be | ||
440 | * freed) before refilling again | ||
441 | */ | ||
442 | continue_at(cl, refill_dirty, dirty_wq); | ||
446 | } | 443 | } |
447 | 444 | ||
448 | /* Init */ | 445 | /* Init */ |
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc) | |||
484 | 481 | ||
485 | void bch_cached_dev_writeback_init(struct cached_dev *dc) | 482 | void bch_cached_dev_writeback_init(struct cached_dev *dc) |
486 | { | 483 | { |
484 | sema_init(&dc->in_flight, 64); | ||
487 | closure_init_unlocked(&dc->writeback); | 485 | closure_init_unlocked(&dc->writeback); |
488 | init_rwsem(&dc->writeback_lock); | 486 | init_rwsem(&dc->writeback_lock); |
489 | 487 | ||
@@ -513,7 +511,7 @@ void bch_writeback_exit(void) | |||
513 | 511 | ||
514 | int __init bch_writeback_init(void) | 512 | int __init bch_writeback_init(void) |
515 | { | 513 | { |
516 | dirty_wq = create_singlethread_workqueue("bcache_writeback"); | 514 | dirty_wq = create_workqueue("bcache_writeback"); |
517 | if (!dirty_wq) | 515 | if (!dirty_wq) |
518 | return -ENOMEM; | 516 | return -ENOMEM; |
519 | 517 | ||
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src) | |||
917 | src_p = kmap_atomic(src_bv->bv_page); | 917 | src_p = kmap_atomic(src_bv->bv_page); |
918 | dst_p = kmap_atomic(dst_bv->bv_page); | 918 | dst_p = kmap_atomic(dst_bv->bv_page); |
919 | 919 | ||
920 | memcpy(dst_p + dst_bv->bv_offset, | 920 | memcpy(dst_p + dst_offset, |
921 | src_p + src_bv->bv_offset, | 921 | src_p + src_offset, |
922 | bytes); | 922 | bytes); |
923 | 923 | ||
924 | kunmap_atomic(dst_p); | 924 | kunmap_atomic(dst_p); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 121da2dc3be8..d4e81e4a9b04 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1924 | { | 1924 | { |
1925 | int tmp, hangup_needed = 0; | 1925 | int tmp, hangup_needed = 0; |
1926 | struct ocfs2_super *osb = NULL; | 1926 | struct ocfs2_super *osb = NULL; |
1927 | char nodestr[8]; | 1927 | char nodestr[12]; |
1928 | 1928 | ||
1929 | trace_ocfs2_dismount_volume(sb); | 1929 | trace_ocfs2_dismount_volume(sb); |
1930 | 1930 | ||
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 60e95872da29..ecc82b37c4cc 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie { | |||
53 | unsigned int generation; | 53 | unsigned int generation; |
54 | }; | 54 | }; |
55 | 55 | ||
56 | enum mem_cgroup_filter_t { | ||
57 | VISIT, /* visit current node */ | ||
58 | SKIP, /* skip the current node and continue traversal */ | ||
59 | SKIP_TREE, /* skip the whole subtree and continue traversal */ | ||
60 | }; | ||
61 | |||
62 | /* | ||
63 | * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to | ||
64 | * iterate through the hierarchy tree. Each tree element is checked by the | ||
65 | * predicate before it is returned by the iterator. If a filter returns | ||
66 | * SKIP or SKIP_TREE then the iterator code continues traversal (with the | ||
67 | * next node down the hierarchy or the next node that doesn't belong under the | ||
68 | * memcg's subtree). | ||
69 | */ | ||
70 | typedef enum mem_cgroup_filter_t | ||
71 | (*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root); | ||
72 | |||
73 | #ifdef CONFIG_MEMCG | 56 | #ifdef CONFIG_MEMCG |
74 | /* | 57 | /* |
75 | * All "charge" functions with gfp_mask should use GFP_KERNEL or | 58 | * All "charge" functions with gfp_mask should use GFP_KERNEL or |
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage, | |||
137 | extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, | 120 | extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, |
138 | struct page *oldpage, struct page *newpage, bool migration_ok); | 121 | struct page *oldpage, struct page *newpage, bool migration_ok); |
139 | 122 | ||
140 | struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, | 123 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *, |
141 | struct mem_cgroup *prev, | 124 | struct mem_cgroup *, |
142 | struct mem_cgroup_reclaim_cookie *reclaim, | 125 | struct mem_cgroup_reclaim_cookie *); |
143 | mem_cgroup_iter_filter cond); | ||
144 | |||
145 | static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | ||
146 | struct mem_cgroup *prev, | ||
147 | struct mem_cgroup_reclaim_cookie *reclaim) | ||
148 | { | ||
149 | return mem_cgroup_iter_cond(root, prev, reclaim, NULL); | ||
150 | } | ||
151 | |||
152 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); | 126 | void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); |
153 | 127 | ||
154 | /* | 128 | /* |
@@ -260,9 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
260 | mem_cgroup_update_page_stat(page, idx, -1); | 234 | mem_cgroup_update_page_stat(page, idx, -1); |
261 | } | 235 | } |
262 | 236 | ||
263 | enum mem_cgroup_filter_t | 237 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
264 | mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | 238 | gfp_t gfp_mask, |
265 | struct mem_cgroup *root); | 239 | unsigned long *total_scanned); |
266 | 240 | ||
267 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); | 241 | void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); |
268 | static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, | 242 | static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, |
@@ -376,15 +350,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, | |||
376 | struct page *oldpage, struct page *newpage, bool migration_ok) | 350 | struct page *oldpage, struct page *newpage, bool migration_ok) |
377 | { | 351 | { |
378 | } | 352 | } |
379 | static inline struct mem_cgroup * | ||
380 | mem_cgroup_iter_cond(struct mem_cgroup *root, | ||
381 | struct mem_cgroup *prev, | ||
382 | struct mem_cgroup_reclaim_cookie *reclaim, | ||
383 | mem_cgroup_iter_filter cond) | ||
384 | { | ||
385 | /* first call must return non-NULL, second return NULL */ | ||
386 | return (struct mem_cgroup *)(unsigned long)!prev; | ||
387 | } | ||
388 | 353 | ||
389 | static inline struct mem_cgroup * | 354 | static inline struct mem_cgroup * |
390 | mem_cgroup_iter(struct mem_cgroup *root, | 355 | mem_cgroup_iter(struct mem_cgroup *root, |
@@ -471,11 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
471 | } | 436 | } |
472 | 437 | ||
473 | static inline | 438 | static inline |
474 | enum mem_cgroup_filter_t | 439 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, |
475 | mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | 440 | gfp_t gfp_mask, |
476 | struct mem_cgroup *root) | 441 | unsigned long *total_scanned) |
477 | { | 442 | { |
478 | return VISIT; | 443 | return 0; |
479 | } | 444 | } |
480 | 445 | ||
481 | static inline void mem_cgroup_split_huge_fixup(struct page *head) | 446 | static inline void mem_cgroup_split_huge_fixup(struct page *head) |
diff --git a/include/linux/smp.h b/include/linux/smp.h index cfb7ca094b38..731f5237d5f4 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h | |||
@@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, | |||
155 | 155 | ||
156 | static inline void kick_all_cpus_sync(void) { } | 156 | static inline void kick_all_cpus_sync(void) { } |
157 | 157 | ||
158 | static inline void __smp_call_function_single(int cpuid, | ||
159 | struct call_single_data *data, int wait) | ||
160 | { | ||
161 | on_each_cpu(data->func, data->info, wait); | ||
162 | } | ||
163 | |||
158 | #endif /* !SMP */ | 164 | #endif /* !SMP */ |
159 | 165 | ||
160 | /* | 166 | /* |
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 40a1fb807396..009a655a5d35 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h | |||
@@ -380,10 +380,13 @@ struct perf_event_mmap_page { | |||
380 | union { | 380 | union { |
381 | __u64 capabilities; | 381 | __u64 capabilities; |
382 | struct { | 382 | struct { |
383 | __u64 cap_usr_time : 1, | 383 | __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ |
384 | cap_usr_rdpmc : 1, | 384 | cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ |
385 | cap_usr_time_zero : 1, | 385 | |
386 | cap_____res : 61; | 386 | cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ |
387 | cap_user_time : 1, /* The time_* fields are used */ | ||
388 | cap_user_time_zero : 1, /* The time_zero field is used */ | ||
389 | cap_____res : 59; | ||
387 | }; | 390 | }; |
388 | }; | 391 | }; |
389 | 392 | ||
@@ -442,12 +445,13 @@ struct perf_event_mmap_page { | |||
442 | * ((rem * time_mult) >> time_shift); | 445 | * ((rem * time_mult) >> time_shift); |
443 | */ | 446 | */ |
444 | __u64 time_zero; | 447 | __u64 time_zero; |
448 | __u32 size; /* Header size up to __reserved[] fields. */ | ||
445 | 449 | ||
446 | /* | 450 | /* |
447 | * Hole for extension of the self monitor capabilities | 451 | * Hole for extension of the self monitor capabilities |
448 | */ | 452 | */ |
449 | 453 | ||
450 | __u64 __reserved[119]; /* align to 1k */ | 454 | __u8 __reserved[118*8+4]; /* align to 1k. */ |
451 | 455 | ||
452 | /* | 456 | /* |
453 | * Control data for the mmap() data buffer. | 457 | * Control data for the mmap() data buffer. |
@@ -528,6 +532,7 @@ enum perf_event_type { | |||
528 | * u64 len; | 532 | * u64 len; |
529 | * u64 pgoff; | 533 | * u64 pgoff; |
530 | * char filename[]; | 534 | * char filename[]; |
535 | * struct sample_id sample_id; | ||
531 | * }; | 536 | * }; |
532 | */ | 537 | */ |
533 | PERF_RECORD_MMAP = 1, | 538 | PERF_RECORD_MMAP = 1, |
@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s) | |||
165 | ipc_rmid(&msg_ids(ns), &s->q_perm); | 165 | ipc_rmid(&msg_ids(ns), &s->q_perm); |
166 | } | 166 | } |
167 | 167 | ||
168 | static void msg_rcu_free(struct rcu_head *head) | ||
169 | { | ||
170 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
171 | struct msg_queue *msq = ipc_rcu_to_struct(p); | ||
172 | |||
173 | security_msg_queue_free(msq); | ||
174 | ipc_rcu_free(head); | ||
175 | } | ||
176 | |||
168 | /** | 177 | /** |
169 | * newque - Create a new msg queue | 178 | * newque - Create a new msg queue |
170 | * @ns: namespace | 179 | * @ns: namespace |
@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) | |||
189 | msq->q_perm.security = NULL; | 198 | msq->q_perm.security = NULL; |
190 | retval = security_msg_queue_alloc(msq); | 199 | retval = security_msg_queue_alloc(msq); |
191 | if (retval) { | 200 | if (retval) { |
192 | ipc_rcu_putref(msq); | 201 | ipc_rcu_putref(msq, ipc_rcu_free); |
193 | return retval; | 202 | return retval; |
194 | } | 203 | } |
195 | 204 | ||
196 | /* ipc_addid() locks msq upon success. */ | 205 | /* ipc_addid() locks msq upon success. */ |
197 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); | 206 | id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); |
198 | if (id < 0) { | 207 | if (id < 0) { |
199 | security_msg_queue_free(msq); | 208 | ipc_rcu_putref(msq, msg_rcu_free); |
200 | ipc_rcu_putref(msq); | ||
201 | return id; | 209 | return id; |
202 | } | 210 | } |
203 | 211 | ||
@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
276 | free_msg(msg); | 284 | free_msg(msg); |
277 | } | 285 | } |
278 | atomic_sub(msq->q_cbytes, &ns->msg_bytes); | 286 | atomic_sub(msq->q_cbytes, &ns->msg_bytes); |
279 | security_msg_queue_free(msq); | 287 | ipc_rcu_putref(msq, msg_rcu_free); |
280 | ipc_rcu_putref(msq); | ||
281 | } | 288 | } |
282 | 289 | ||
283 | /* | 290 | /* |
@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext, | |||
717 | rcu_read_lock(); | 724 | rcu_read_lock(); |
718 | ipc_lock_object(&msq->q_perm); | 725 | ipc_lock_object(&msq->q_perm); |
719 | 726 | ||
720 | ipc_rcu_putref(msq); | 727 | ipc_rcu_putref(msq, ipc_rcu_free); |
721 | if (msq->q_perm.deleted) { | 728 | if (msq->q_perm.deleted) { |
722 | err = -EIDRM; | 729 | err = -EIDRM; |
723 | goto out_unlock0; | 730 | goto out_unlock0; |
@@ -243,6 +243,15 @@ static void merge_queues(struct sem_array *sma) | |||
243 | } | 243 | } |
244 | } | 244 | } |
245 | 245 | ||
246 | static void sem_rcu_free(struct rcu_head *head) | ||
247 | { | ||
248 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
249 | struct sem_array *sma = ipc_rcu_to_struct(p); | ||
250 | |||
251 | security_sem_free(sma); | ||
252 | ipc_rcu_free(head); | ||
253 | } | ||
254 | |||
246 | /* | 255 | /* |
247 | * If the request contains only one semaphore operation, and there are | 256 | * If the request contains only one semaphore operation, and there are |
248 | * no complex transactions pending, lock only the semaphore involved. | 257 | * no complex transactions pending, lock only the semaphore involved. |
@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns | |||
374 | static inline void sem_lock_and_putref(struct sem_array *sma) | 383 | static inline void sem_lock_and_putref(struct sem_array *sma) |
375 | { | 384 | { |
376 | sem_lock(sma, NULL, -1); | 385 | sem_lock(sma, NULL, -1); |
377 | ipc_rcu_putref(sma); | 386 | ipc_rcu_putref(sma, ipc_rcu_free); |
378 | } | ||
379 | |||
380 | static inline void sem_putref(struct sem_array *sma) | ||
381 | { | ||
382 | ipc_rcu_putref(sma); | ||
383 | } | 387 | } |
384 | 388 | ||
385 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) | 389 | static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) |
@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params) | |||
458 | sma->sem_perm.security = NULL; | 462 | sma->sem_perm.security = NULL; |
459 | retval = security_sem_alloc(sma); | 463 | retval = security_sem_alloc(sma); |
460 | if (retval) { | 464 | if (retval) { |
461 | ipc_rcu_putref(sma); | 465 | ipc_rcu_putref(sma, ipc_rcu_free); |
462 | return retval; | 466 | return retval; |
463 | } | 467 | } |
464 | 468 | ||
465 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); | 469 | id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); |
466 | if (id < 0) { | 470 | if (id < 0) { |
467 | security_sem_free(sma); | 471 | ipc_rcu_putref(sma, sem_rcu_free); |
468 | ipc_rcu_putref(sma); | ||
469 | return id; | 472 | return id; |
470 | } | 473 | } |
471 | ns->used_sems += nsems; | 474 | ns->used_sems += nsems; |
@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp) | |||
1047 | 1050 | ||
1048 | wake_up_sem_queue_do(&tasks); | 1051 | wake_up_sem_queue_do(&tasks); |
1049 | ns->used_sems -= sma->sem_nsems; | 1052 | ns->used_sems -= sma->sem_nsems; |
1050 | security_sem_free(sma); | 1053 | ipc_rcu_putref(sma, sem_rcu_free); |
1051 | ipc_rcu_putref(sma); | ||
1052 | } | 1054 | } |
1053 | 1055 | ||
1054 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) | 1056 | static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) |
@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1292 | rcu_read_unlock(); | 1294 | rcu_read_unlock(); |
1293 | sem_io = ipc_alloc(sizeof(ushort)*nsems); | 1295 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
1294 | if(sem_io == NULL) { | 1296 | if(sem_io == NULL) { |
1295 | sem_putref(sma); | 1297 | ipc_rcu_putref(sma, ipc_rcu_free); |
1296 | return -ENOMEM; | 1298 | return -ENOMEM; |
1297 | } | 1299 | } |
1298 | 1300 | ||
@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum, | |||
1328 | if(nsems > SEMMSL_FAST) { | 1330 | if(nsems > SEMMSL_FAST) { |
1329 | sem_io = ipc_alloc(sizeof(ushort)*nsems); | 1331 | sem_io = ipc_alloc(sizeof(ushort)*nsems); |
1330 | if(sem_io == NULL) { | 1332 | if(sem_io == NULL) { |
1331 | sem_putref(sma); | 1333 | ipc_rcu_putref(sma, ipc_rcu_free); |
1332 | return -ENOMEM; | 1334 | return -ENOMEM; |
1333 | } | 1335 | } |
1334 | } | 1336 | } |
1335 | 1337 | ||
1336 | if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { | 1338 | if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { |
1337 | sem_putref(sma); | 1339 | ipc_rcu_putref(sma, ipc_rcu_free); |
1338 | err = -EFAULT; | 1340 | err = -EFAULT; |
1339 | goto out_free; | 1341 | goto out_free; |
1340 | } | 1342 | } |
1341 | 1343 | ||
1342 | for (i = 0; i < nsems; i++) { | 1344 | for (i = 0; i < nsems; i++) { |
1343 | if (sem_io[i] > SEMVMX) { | 1345 | if (sem_io[i] > SEMVMX) { |
1344 | sem_putref(sma); | 1346 | ipc_rcu_putref(sma, ipc_rcu_free); |
1345 | err = -ERANGE; | 1347 | err = -ERANGE; |
1346 | goto out_free; | 1348 | goto out_free; |
1347 | } | 1349 | } |
@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) | |||
1629 | /* step 2: allocate new undo structure */ | 1631 | /* step 2: allocate new undo structure */ |
1630 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); | 1632 | new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); |
1631 | if (!new) { | 1633 | if (!new) { |
1632 | sem_putref(sma); | 1634 | ipc_rcu_putref(sma, ipc_rcu_free); |
1633 | return ERR_PTR(-ENOMEM); | 1635 | return ERR_PTR(-ENOMEM); |
1634 | } | 1636 | } |
1635 | 1637 | ||
@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp) | |||
167 | ipc_lock_object(&ipcp->shm_perm); | 167 | ipc_lock_object(&ipcp->shm_perm); |
168 | } | 168 | } |
169 | 169 | ||
170 | static void shm_rcu_free(struct rcu_head *head) | ||
171 | { | ||
172 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
173 | struct shmid_kernel *shp = ipc_rcu_to_struct(p); | ||
174 | |||
175 | security_shm_free(shp); | ||
176 | ipc_rcu_free(head); | ||
177 | } | ||
178 | |||
170 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) | 179 | static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) |
171 | { | 180 | { |
172 | ipc_rmid(&shm_ids(ns), &s->shm_perm); | 181 | ipc_rmid(&shm_ids(ns), &s->shm_perm); |
@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) | |||
208 | user_shm_unlock(file_inode(shp->shm_file)->i_size, | 217 | user_shm_unlock(file_inode(shp->shm_file)->i_size, |
209 | shp->mlock_user); | 218 | shp->mlock_user); |
210 | fput (shp->shm_file); | 219 | fput (shp->shm_file); |
211 | security_shm_free(shp); | 220 | ipc_rcu_putref(shp, shm_rcu_free); |
212 | ipc_rcu_putref(shp); | ||
213 | } | 221 | } |
214 | 222 | ||
215 | /* | 223 | /* |
@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) | |||
497 | shp->shm_perm.security = NULL; | 505 | shp->shm_perm.security = NULL; |
498 | error = security_shm_alloc(shp); | 506 | error = security_shm_alloc(shp); |
499 | if (error) { | 507 | if (error) { |
500 | ipc_rcu_putref(shp); | 508 | ipc_rcu_putref(shp, ipc_rcu_free); |
501 | return error; | 509 | return error; |
502 | } | 510 | } |
503 | 511 | ||
@@ -566,8 +574,7 @@ no_id: | |||
566 | user_shm_unlock(size, shp->mlock_user); | 574 | user_shm_unlock(size, shp->mlock_user); |
567 | fput(file); | 575 | fput(file); |
568 | no_file: | 576 | no_file: |
569 | security_shm_free(shp); | 577 | ipc_rcu_putref(shp, shm_rcu_free); |
570 | ipc_rcu_putref(shp); | ||
571 | return error; | 578 | return error; |
572 | } | 579 | } |
573 | 580 | ||
diff --git a/ipc/util.c b/ipc/util.c index e829da9ed01f..fdb8ae740775 100644 --- a/ipc/util.c +++ b/ipc/util.c | |||
@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size) | |||
474 | kfree(ptr); | 474 | kfree(ptr); |
475 | } | 475 | } |
476 | 476 | ||
477 | struct ipc_rcu { | ||
478 | struct rcu_head rcu; | ||
479 | atomic_t refcount; | ||
480 | } ____cacheline_aligned_in_smp; | ||
481 | |||
482 | /** | 477 | /** |
483 | * ipc_rcu_alloc - allocate ipc and rcu space | 478 | * ipc_rcu_alloc - allocate ipc and rcu space |
484 | * @size: size desired | 479 | * @size: size desired |
@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr) | |||
505 | return atomic_inc_not_zero(&p->refcount); | 500 | return atomic_inc_not_zero(&p->refcount); |
506 | } | 501 | } |
507 | 502 | ||
508 | /** | 503 | void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)) |
509 | * ipc_schedule_free - free ipc + rcu space | ||
510 | * @head: RCU callback structure for queued work | ||
511 | */ | ||
512 | static void ipc_schedule_free(struct rcu_head *head) | ||
513 | { | ||
514 | vfree(container_of(head, struct ipc_rcu, rcu)); | ||
515 | } | ||
516 | |||
517 | void ipc_rcu_putref(void *ptr) | ||
518 | { | 504 | { |
519 | struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; | 505 | struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; |
520 | 506 | ||
521 | if (!atomic_dec_and_test(&p->refcount)) | 507 | if (!atomic_dec_and_test(&p->refcount)) |
522 | return; | 508 | return; |
523 | 509 | ||
524 | if (is_vmalloc_addr(ptr)) { | 510 | call_rcu(&p->rcu, func); |
525 | call_rcu(&p->rcu, ipc_schedule_free); | 511 | } |
526 | } else { | 512 | |
527 | kfree_rcu(p, rcu); | 513 | void ipc_rcu_free(struct rcu_head *head) |
528 | } | 514 | { |
515 | struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu); | ||
516 | |||
517 | if (is_vmalloc_addr(p)) | ||
518 | vfree(p); | ||
519 | else | ||
520 | kfree(p); | ||
529 | } | 521 | } |
530 | 522 | ||
531 | /** | 523 | /** |
diff --git a/ipc/util.h b/ipc/util.h index c5f3338ba1fa..f2f5036f2eed 100644 --- a/ipc/util.h +++ b/ipc/util.h | |||
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { } | |||
47 | static inline void shm_exit_ns(struct ipc_namespace *ns) { } | 47 | static inline void shm_exit_ns(struct ipc_namespace *ns) { } |
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | struct ipc_rcu { | ||
51 | struct rcu_head rcu; | ||
52 | atomic_t refcount; | ||
53 | } ____cacheline_aligned_in_smp; | ||
54 | |||
55 | #define ipc_rcu_to_struct(p) ((void *)(p+1)) | ||
56 | |||
50 | /* | 57 | /* |
51 | * Structure that holds the parameters needed by the ipc operations | 58 | * Structure that holds the parameters needed by the ipc operations |
52 | * (see after) | 59 | * (see after) |
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size); | |||
120 | */ | 127 | */ |
121 | void* ipc_rcu_alloc(int size); | 128 | void* ipc_rcu_alloc(int size); |
122 | int ipc_rcu_getref(void *ptr); | 129 | int ipc_rcu_getref(void *ptr); |
123 | void ipc_rcu_putref(void *ptr); | 130 | void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head)); |
131 | void ipc_rcu_free(struct rcu_head *head); | ||
124 | 132 | ||
125 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); | 133 | struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); |
126 | struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); | 134 | struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); |
diff --git a/kernel/audit.c b/kernel/audit.c index 91e53d04b6a9..7b0e23a740ce 100644 --- a/kernel/audit.c +++ b/kernel/audit.c | |||
@@ -1117,9 +1117,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask, | |||
1117 | 1117 | ||
1118 | sleep_time = timeout_start + audit_backlog_wait_time - | 1118 | sleep_time = timeout_start + audit_backlog_wait_time - |
1119 | jiffies; | 1119 | jiffies; |
1120 | if ((long)sleep_time > 0) | 1120 | if ((long)sleep_time > 0) { |
1121 | wait_for_auditd(sleep_time); | 1121 | wait_for_auditd(sleep_time); |
1122 | continue; | 1122 | continue; |
1123 | } | ||
1123 | } | 1124 | } |
1124 | if (audit_rate_check() && printk_ratelimit()) | 1125 | if (audit_rate_check() && printk_ratelimit()) |
1125 | printk(KERN_WARNING | 1126 | printk(KERN_WARNING |
diff --git a/kernel/events/core.c b/kernel/events/core.c index dd236b66ca3a..cb4238e85b38 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c | |||
@@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event, | |||
3660 | *running = ctx_time - event->tstamp_running; | 3660 | *running = ctx_time - event->tstamp_running; |
3661 | } | 3661 | } |
3662 | 3662 | ||
3663 | static void perf_event_init_userpage(struct perf_event *event) | ||
3664 | { | ||
3665 | struct perf_event_mmap_page *userpg; | ||
3666 | struct ring_buffer *rb; | ||
3667 | |||
3668 | rcu_read_lock(); | ||
3669 | rb = rcu_dereference(event->rb); | ||
3670 | if (!rb) | ||
3671 | goto unlock; | ||
3672 | |||
3673 | userpg = rb->user_page; | ||
3674 | |||
3675 | /* Allow new userspace to detect that bit 0 is deprecated */ | ||
3676 | userpg->cap_bit0_is_deprecated = 1; | ||
3677 | userpg->size = offsetof(struct perf_event_mmap_page, __reserved); | ||
3678 | |||
3679 | unlock: | ||
3680 | rcu_read_unlock(); | ||
3681 | } | ||
3682 | |||
3663 | void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 3683 | void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
3664 | { | 3684 | { |
3665 | } | 3685 | } |
@@ -4044,6 +4064,7 @@ again: | |||
4044 | ring_buffer_attach(event, rb); | 4064 | ring_buffer_attach(event, rb); |
4045 | rcu_assign_pointer(event->rb, rb); | 4065 | rcu_assign_pointer(event->rb, rb); |
4046 | 4066 | ||
4067 | perf_event_init_userpage(event); | ||
4047 | perf_event_update_userpage(event); | 4068 | perf_event_update_userpage(event); |
4048 | 4069 | ||
4049 | unlock: | 4070 | unlock: |
diff --git a/kernel/reboot.c b/kernel/reboot.c index 269ed9384cc4..f813b3474646 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c | |||
@@ -32,7 +32,14 @@ EXPORT_SYMBOL(cad_pid); | |||
32 | #endif | 32 | #endif |
33 | enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; | 33 | enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; |
34 | 34 | ||
35 | int reboot_default; | 35 | /* |
36 | * This variable is used privately to keep track of whether or not | ||
37 | * reboot_type is still set to its default value (i.e., reboot= hasn't | ||
38 | * been set on the command line). This is needed so that we can | ||
39 | * suppress DMI scanning for reboot quirks. Without it, it's | ||
40 | * impossible to override a faulty reboot quirk without recompiling. | ||
41 | */ | ||
42 | int reboot_default = 1; | ||
36 | int reboot_cpu; | 43 | int reboot_cpu; |
37 | enum reboot_type reboot_type = BOOT_ACPI; | 44 | enum reboot_type reboot_type = BOOT_ACPI; |
38 | int reboot_force; | 45 | int reboot_force; |
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 11cd13667359..7c70201fbc61 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -4242,7 +4242,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq) | |||
4242 | } | 4242 | } |
4243 | 4243 | ||
4244 | if (!se) { | 4244 | if (!se) { |
4245 | cfs_rq->h_load = rq->avg.load_avg_contrib; | 4245 | cfs_rq->h_load = cfs_rq->runnable_load_avg; |
4246 | cfs_rq->last_h_load_update = now; | 4246 | cfs_rq->last_h_load_update = now; |
4247 | } | 4247 | } |
4248 | 4248 | ||
@@ -4823,8 +4823,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds) | |||
4823 | (busiest->load_per_task * SCHED_POWER_SCALE) / | 4823 | (busiest->load_per_task * SCHED_POWER_SCALE) / |
4824 | busiest->group_power; | 4824 | busiest->group_power; |
4825 | 4825 | ||
4826 | if (busiest->avg_load - local->avg_load + scaled_busy_load_per_task >= | 4826 | if (busiest->avg_load + scaled_busy_load_per_task >= |
4827 | (scaled_busy_load_per_task * imbn)) { | 4827 | local->avg_load + (scaled_busy_load_per_task * imbn)) { |
4828 | env->imbalance = busiest->load_per_task; | 4828 | env->imbalance = busiest->load_per_task; |
4829 | return; | 4829 | return; |
4830 | } | 4830 | } |
@@ -4896,7 +4896,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s | |||
4896 | * max load less than avg load(as we skip the groups at or below | 4896 | * max load less than avg load(as we skip the groups at or below |
4897 | * its cpu_power, while calculating max_load..) | 4897 | * its cpu_power, while calculating max_load..) |
4898 | */ | 4898 | */ |
4899 | if (busiest->avg_load < sds->avg_load) { | 4899 | if (busiest->avg_load <= sds->avg_load || |
4900 | local->avg_load >= sds->avg_load) { | ||
4900 | env->imbalance = 0; | 4901 | env->imbalance = 0; |
4901 | return fix_small_imbalance(env, sds); | 4902 | return fix_small_imbalance(env, sds); |
4902 | } | 4903 | } |
diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 51c4f34d258e..4431610f049a 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c | |||
@@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = { | |||
486 | .unpark = watchdog_enable, | 486 | .unpark = watchdog_enable, |
487 | }; | 487 | }; |
488 | 488 | ||
489 | static int watchdog_enable_all_cpus(void) | 489 | static void restart_watchdog_hrtimer(void *info) |
490 | { | ||
491 | struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer); | ||
492 | int ret; | ||
493 | |||
494 | /* | ||
495 | * No need to cancel and restart hrtimer if it is currently executing | ||
496 | * because it will reprogram itself with the new period now. | ||
497 | * We should never see it unqueued here because we are running per-cpu | ||
498 | * with interrupts disabled. | ||
499 | */ | ||
500 | ret = hrtimer_try_to_cancel(hrtimer); | ||
501 | if (ret == 1) | ||
502 | hrtimer_start(hrtimer, ns_to_ktime(sample_period), | ||
503 | HRTIMER_MODE_REL_PINNED); | ||
504 | } | ||
505 | |||
506 | static void update_timers(int cpu) | ||
507 | { | ||
508 | struct call_single_data data = {.func = restart_watchdog_hrtimer}; | ||
509 | /* | ||
510 | * Make sure that perf event counter will adopt to a new | ||
511 | * sampling period. Updating the sampling period directly would | ||
512 | * be much nicer but we do not have an API for that now so | ||
513 | * let's use a big hammer. | ||
514 | * Hrtimer will adopt the new period on the next tick but this | ||
515 | * might be late already so we have to restart the timer as well. | ||
516 | */ | ||
517 | watchdog_nmi_disable(cpu); | ||
518 | __smp_call_function_single(cpu, &data, 1); | ||
519 | watchdog_nmi_enable(cpu); | ||
520 | } | ||
521 | |||
522 | static void update_timers_all_cpus(void) | ||
523 | { | ||
524 | int cpu; | ||
525 | |||
526 | get_online_cpus(); | ||
527 | preempt_disable(); | ||
528 | for_each_online_cpu(cpu) | ||
529 | update_timers(cpu); | ||
530 | preempt_enable(); | ||
531 | put_online_cpus(); | ||
532 | } | ||
533 | |||
534 | static int watchdog_enable_all_cpus(bool sample_period_changed) | ||
490 | { | 535 | { |
491 | int err = 0; | 536 | int err = 0; |
492 | 537 | ||
@@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void) | |||
496 | pr_err("Failed to create watchdog threads, disabled\n"); | 541 | pr_err("Failed to create watchdog threads, disabled\n"); |
497 | else | 542 | else |
498 | watchdog_running = 1; | 543 | watchdog_running = 1; |
544 | } else if (sample_period_changed) { | ||
545 | update_timers_all_cpus(); | ||
499 | } | 546 | } |
500 | 547 | ||
501 | return err; | 548 | return err; |
@@ -520,13 +567,15 @@ int proc_dowatchdog(struct ctl_table *table, int write, | |||
520 | void __user *buffer, size_t *lenp, loff_t *ppos) | 567 | void __user *buffer, size_t *lenp, loff_t *ppos) |
521 | { | 568 | { |
522 | int err, old_thresh, old_enabled; | 569 | int err, old_thresh, old_enabled; |
570 | static DEFINE_MUTEX(watchdog_proc_mutex); | ||
523 | 571 | ||
572 | mutex_lock(&watchdog_proc_mutex); | ||
524 | old_thresh = ACCESS_ONCE(watchdog_thresh); | 573 | old_thresh = ACCESS_ONCE(watchdog_thresh); |
525 | old_enabled = ACCESS_ONCE(watchdog_user_enabled); | 574 | old_enabled = ACCESS_ONCE(watchdog_user_enabled); |
526 | 575 | ||
527 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); | 576 | err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
528 | if (err || !write) | 577 | if (err || !write) |
529 | return err; | 578 | goto out; |
530 | 579 | ||
531 | set_sample_period(); | 580 | set_sample_period(); |
532 | /* | 581 | /* |
@@ -535,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write, | |||
535 | * watchdog_*_all_cpus() function takes care of this. | 584 | * watchdog_*_all_cpus() function takes care of this. |
536 | */ | 585 | */ |
537 | if (watchdog_user_enabled && watchdog_thresh) | 586 | if (watchdog_user_enabled && watchdog_thresh) |
538 | err = watchdog_enable_all_cpus(); | 587 | err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); |
539 | else | 588 | else |
540 | watchdog_disable_all_cpus(); | 589 | watchdog_disable_all_cpus(); |
541 | 590 | ||
@@ -544,7 +593,8 @@ int proc_dowatchdog(struct ctl_table *table, int write, | |||
544 | watchdog_thresh = old_thresh; | 593 | watchdog_thresh = old_thresh; |
545 | watchdog_user_enabled = old_enabled; | 594 | watchdog_user_enabled = old_enabled; |
546 | } | 595 | } |
547 | 596 | out: | |
597 | mutex_unlock(&watchdog_proc_mutex); | ||
548 | return err; | 598 | return err; |
549 | } | 599 | } |
550 | #endif /* CONFIG_SYSCTL */ | 600 | #endif /* CONFIG_SYSCTL */ |
@@ -554,5 +604,5 @@ void __init lockup_detector_init(void) | |||
554 | set_sample_period(); | 604 | set_sample_period(); |
555 | 605 | ||
556 | if (watchdog_user_enabled) | 606 | if (watchdog_user_enabled) |
557 | watchdog_enable_all_cpus(); | 607 | watchdog_enable_all_cpus(false); |
558 | } | 608 | } |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d5ff3ce13029..1c52ddbc839b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/limits.h> | 39 | #include <linux/limits.h> |
40 | #include <linux/export.h> | 40 | #include <linux/export.h> |
41 | #include <linux/mutex.h> | 41 | #include <linux/mutex.h> |
42 | #include <linux/rbtree.h> | ||
42 | #include <linux/slab.h> | 43 | #include <linux/slab.h> |
43 | #include <linux/swap.h> | 44 | #include <linux/swap.h> |
44 | #include <linux/swapops.h> | 45 | #include <linux/swapops.h> |
@@ -160,6 +161,10 @@ struct mem_cgroup_per_zone { | |||
160 | 161 | ||
161 | struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; | 162 | struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; |
162 | 163 | ||
164 | struct rb_node tree_node; /* RB tree node */ | ||
165 | unsigned long long usage_in_excess;/* Set to the value by which */ | ||
166 | /* the soft limit is exceeded*/ | ||
167 | bool on_tree; | ||
163 | struct mem_cgroup *memcg; /* Back pointer, we cannot */ | 168 | struct mem_cgroup *memcg; /* Back pointer, we cannot */ |
164 | /* use container_of */ | 169 | /* use container_of */ |
165 | }; | 170 | }; |
@@ -168,6 +173,26 @@ struct mem_cgroup_per_node { | |||
168 | struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; | 173 | struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; |
169 | }; | 174 | }; |
170 | 175 | ||
176 | /* | ||
177 | * Cgroups above their limits are maintained in a RB-Tree, independent of | ||
178 | * their hierarchy representation | ||
179 | */ | ||
180 | |||
181 | struct mem_cgroup_tree_per_zone { | ||
182 | struct rb_root rb_root; | ||
183 | spinlock_t lock; | ||
184 | }; | ||
185 | |||
186 | struct mem_cgroup_tree_per_node { | ||
187 | struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES]; | ||
188 | }; | ||
189 | |||
190 | struct mem_cgroup_tree { | ||
191 | struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES]; | ||
192 | }; | ||
193 | |||
194 | static struct mem_cgroup_tree soft_limit_tree __read_mostly; | ||
195 | |||
171 | struct mem_cgroup_threshold { | 196 | struct mem_cgroup_threshold { |
172 | struct eventfd_ctx *eventfd; | 197 | struct eventfd_ctx *eventfd; |
173 | u64 threshold; | 198 | u64 threshold; |
@@ -303,22 +328,6 @@ struct mem_cgroup { | |||
303 | atomic_t numainfo_events; | 328 | atomic_t numainfo_events; |
304 | atomic_t numainfo_updating; | 329 | atomic_t numainfo_updating; |
305 | #endif | 330 | #endif |
306 | /* | ||
307 | * Protects soft_contributed transitions. | ||
308 | * See mem_cgroup_update_soft_limit | ||
309 | */ | ||
310 | spinlock_t soft_lock; | ||
311 | |||
312 | /* | ||
313 | * If true then this group has increased parents' children_in_excess | ||
314 | * when it got over the soft limit. | ||
315 | * When a group falls bellow the soft limit, parents' children_in_excess | ||
316 | * is decreased and soft_contributed changed to false. | ||
317 | */ | ||
318 | bool soft_contributed; | ||
319 | |||
320 | /* Number of children that are in soft limit excess */ | ||
321 | atomic_t children_in_excess; | ||
322 | 331 | ||
323 | struct mem_cgroup_per_node *nodeinfo[0]; | 332 | struct mem_cgroup_per_node *nodeinfo[0]; |
324 | /* WARNING: nodeinfo must be the last member here */ | 333 | /* WARNING: nodeinfo must be the last member here */ |
@@ -422,6 +431,7 @@ static bool move_file(void) | |||
422 | * limit reclaim to prevent infinite loops, if they ever occur. | 431 | * limit reclaim to prevent infinite loops, if they ever occur. |
423 | */ | 432 | */ |
424 | #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 | 433 | #define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 |
434 | #define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2 | ||
425 | 435 | ||
426 | enum charge_type { | 436 | enum charge_type { |
427 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, | 437 | MEM_CGROUP_CHARGE_TYPE_CACHE = 0, |
@@ -648,6 +658,164 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page) | |||
648 | return mem_cgroup_zoneinfo(memcg, nid, zid); | 658 | return mem_cgroup_zoneinfo(memcg, nid, zid); |
649 | } | 659 | } |
650 | 660 | ||
661 | static struct mem_cgroup_tree_per_zone * | ||
662 | soft_limit_tree_node_zone(int nid, int zid) | ||
663 | { | ||
664 | return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; | ||
665 | } | ||
666 | |||
667 | static struct mem_cgroup_tree_per_zone * | ||
668 | soft_limit_tree_from_page(struct page *page) | ||
669 | { | ||
670 | int nid = page_to_nid(page); | ||
671 | int zid = page_zonenum(page); | ||
672 | |||
673 | return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid]; | ||
674 | } | ||
675 | |||
676 | static void | ||
677 | __mem_cgroup_insert_exceeded(struct mem_cgroup *memcg, | ||
678 | struct mem_cgroup_per_zone *mz, | ||
679 | struct mem_cgroup_tree_per_zone *mctz, | ||
680 | unsigned long long new_usage_in_excess) | ||
681 | { | ||
682 | struct rb_node **p = &mctz->rb_root.rb_node; | ||
683 | struct rb_node *parent = NULL; | ||
684 | struct mem_cgroup_per_zone *mz_node; | ||
685 | |||
686 | if (mz->on_tree) | ||
687 | return; | ||
688 | |||
689 | mz->usage_in_excess = new_usage_in_excess; | ||
690 | if (!mz->usage_in_excess) | ||
691 | return; | ||
692 | while (*p) { | ||
693 | parent = *p; | ||
694 | mz_node = rb_entry(parent, struct mem_cgroup_per_zone, | ||
695 | tree_node); | ||
696 | if (mz->usage_in_excess < mz_node->usage_in_excess) | ||
697 | p = &(*p)->rb_left; | ||
698 | /* | ||
699 | * We can't avoid mem cgroups that are over their soft | ||
700 | * limit by the same amount | ||
701 | */ | ||
702 | else if (mz->usage_in_excess >= mz_node->usage_in_excess) | ||
703 | p = &(*p)->rb_right; | ||
704 | } | ||
705 | rb_link_node(&mz->tree_node, parent, p); | ||
706 | rb_insert_color(&mz->tree_node, &mctz->rb_root); | ||
707 | mz->on_tree = true; | ||
708 | } | ||
709 | |||
710 | static void | ||
711 | __mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, | ||
712 | struct mem_cgroup_per_zone *mz, | ||
713 | struct mem_cgroup_tree_per_zone *mctz) | ||
714 | { | ||
715 | if (!mz->on_tree) | ||
716 | return; | ||
717 | rb_erase(&mz->tree_node, &mctz->rb_root); | ||
718 | mz->on_tree = false; | ||
719 | } | ||
720 | |||
721 | static void | ||
722 | mem_cgroup_remove_exceeded(struct mem_cgroup *memcg, | ||
723 | struct mem_cgroup_per_zone *mz, | ||
724 | struct mem_cgroup_tree_per_zone *mctz) | ||
725 | { | ||
726 | spin_lock(&mctz->lock); | ||
727 | __mem_cgroup_remove_exceeded(memcg, mz, mctz); | ||
728 | spin_unlock(&mctz->lock); | ||
729 | } | ||
730 | |||
731 | |||
732 | static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) | ||
733 | { | ||
734 | unsigned long long excess; | ||
735 | struct mem_cgroup_per_zone *mz; | ||
736 | struct mem_cgroup_tree_per_zone *mctz; | ||
737 | int nid = page_to_nid(page); | ||
738 | int zid = page_zonenum(page); | ||
739 | mctz = soft_limit_tree_from_page(page); | ||
740 | |||
741 | /* | ||
742 | * Necessary to update all ancestors when hierarchy is used. | ||
743 | * because their event counter is not touched. | ||
744 | */ | ||
745 | for (; memcg; memcg = parent_mem_cgroup(memcg)) { | ||
746 | mz = mem_cgroup_zoneinfo(memcg, nid, zid); | ||
747 | excess = res_counter_soft_limit_excess(&memcg->res); | ||
748 | /* | ||
749 | * We have to update the tree if mz is on RB-tree or | ||
750 | * mem is over its softlimit. | ||
751 | */ | ||
752 | if (excess || mz->on_tree) { | ||
753 | spin_lock(&mctz->lock); | ||
754 | /* if on-tree, remove it */ | ||
755 | if (mz->on_tree) | ||
756 | __mem_cgroup_remove_exceeded(memcg, mz, mctz); | ||
757 | /* | ||
758 | * Insert again. mz->usage_in_excess will be updated. | ||
759 | * If excess is 0, no tree ops. | ||
760 | */ | ||
761 | __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess); | ||
762 | spin_unlock(&mctz->lock); | ||
763 | } | ||
764 | } | ||
765 | } | ||
766 | |||
767 | static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg) | ||
768 | { | ||
769 | int node, zone; | ||
770 | struct mem_cgroup_per_zone *mz; | ||
771 | struct mem_cgroup_tree_per_zone *mctz; | ||
772 | |||
773 | for_each_node(node) { | ||
774 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | ||
775 | mz = mem_cgroup_zoneinfo(memcg, node, zone); | ||
776 | mctz = soft_limit_tree_node_zone(node, zone); | ||
777 | mem_cgroup_remove_exceeded(memcg, mz, mctz); | ||
778 | } | ||
779 | } | ||
780 | } | ||
781 | |||
782 | static struct mem_cgroup_per_zone * | ||
783 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | ||
784 | { | ||
785 | struct rb_node *rightmost = NULL; | ||
786 | struct mem_cgroup_per_zone *mz; | ||
787 | |||
788 | retry: | ||
789 | mz = NULL; | ||
790 | rightmost = rb_last(&mctz->rb_root); | ||
791 | if (!rightmost) | ||
792 | goto done; /* Nothing to reclaim from */ | ||
793 | |||
794 | mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node); | ||
795 | /* | ||
796 | * Remove the node now but someone else can add it back, | ||
797 | * we will to add it back at the end of reclaim to its correct | ||
798 | * position in the tree. | ||
799 | */ | ||
800 | __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); | ||
801 | if (!res_counter_soft_limit_excess(&mz->memcg->res) || | ||
802 | !css_tryget(&mz->memcg->css)) | ||
803 | goto retry; | ||
804 | done: | ||
805 | return mz; | ||
806 | } | ||
807 | |||
808 | static struct mem_cgroup_per_zone * | ||
809 | mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | ||
810 | { | ||
811 | struct mem_cgroup_per_zone *mz; | ||
812 | |||
813 | spin_lock(&mctz->lock); | ||
814 | mz = __mem_cgroup_largest_soft_limit_node(mctz); | ||
815 | spin_unlock(&mctz->lock); | ||
816 | return mz; | ||
817 | } | ||
818 | |||
651 | /* | 819 | /* |
652 | * Implementation Note: reading percpu statistics for memcg. | 820 | * Implementation Note: reading percpu statistics for memcg. |
653 | * | 821 | * |
@@ -822,48 +990,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, | |||
822 | } | 990 | } |
823 | 991 | ||
824 | /* | 992 | /* |
825 | * Called from rate-limited memcg_check_events when enough | ||
826 | * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure | ||
827 | * that all the parents up the hierarchy will be notified that this group | ||
828 | * is in excess or that it is not in excess anymore. mmecg->soft_contributed | ||
829 | * makes the transition a single action whenever the state flips from one to | ||
830 | * the other. | ||
831 | */ | ||
832 | static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg) | ||
833 | { | ||
834 | unsigned long long excess = res_counter_soft_limit_excess(&memcg->res); | ||
835 | struct mem_cgroup *parent = memcg; | ||
836 | int delta = 0; | ||
837 | |||
838 | spin_lock(&memcg->soft_lock); | ||
839 | if (excess) { | ||
840 | if (!memcg->soft_contributed) { | ||
841 | delta = 1; | ||
842 | memcg->soft_contributed = true; | ||
843 | } | ||
844 | } else { | ||
845 | if (memcg->soft_contributed) { | ||
846 | delta = -1; | ||
847 | memcg->soft_contributed = false; | ||
848 | } | ||
849 | } | ||
850 | |||
851 | /* | ||
852 | * Necessary to update all ancestors when hierarchy is used | ||
853 | * because their event counter is not touched. | ||
854 | * We track children even outside the hierarchy for the root | ||
855 | * cgroup because tree walk starting at root should visit | ||
856 | * all cgroups and we want to prevent from pointless tree | ||
857 | * walk if no children is below the limit. | ||
858 | */ | ||
859 | while (delta && (parent = parent_mem_cgroup(parent))) | ||
860 | atomic_add(delta, &parent->children_in_excess); | ||
861 | if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy) | ||
862 | atomic_add(delta, &root_mem_cgroup->children_in_excess); | ||
863 | spin_unlock(&memcg->soft_lock); | ||
864 | } | ||
865 | |||
866 | /* | ||
867 | * Check events in order. | 993 | * Check events in order. |
868 | * | 994 | * |
869 | */ | 995 | */ |
@@ -886,7 +1012,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) | |||
886 | 1012 | ||
887 | mem_cgroup_threshold(memcg); | 1013 | mem_cgroup_threshold(memcg); |
888 | if (unlikely(do_softlimit)) | 1014 | if (unlikely(do_softlimit)) |
889 | mem_cgroup_update_soft_limit(memcg); | 1015 | mem_cgroup_update_tree(memcg, page); |
890 | #if MAX_NUMNODES > 1 | 1016 | #if MAX_NUMNODES > 1 |
891 | if (unlikely(do_numainfo)) | 1017 | if (unlikely(do_numainfo)) |
892 | atomic_inc(&memcg->numainfo_events); | 1018 | atomic_inc(&memcg->numainfo_events); |
@@ -929,15 +1055,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) | |||
929 | return memcg; | 1055 | return memcg; |
930 | } | 1056 | } |
931 | 1057 | ||
932 | static enum mem_cgroup_filter_t | ||
933 | mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root, | ||
934 | mem_cgroup_iter_filter cond) | ||
935 | { | ||
936 | if (!cond) | ||
937 | return VISIT; | ||
938 | return cond(memcg, root); | ||
939 | } | ||
940 | |||
941 | /* | 1058 | /* |
942 | * Returns a next (in a pre-order walk) alive memcg (with elevated css | 1059 | * Returns a next (in a pre-order walk) alive memcg (with elevated css |
943 | * ref. count) or NULL if the whole root's subtree has been visited. | 1060 | * ref. count) or NULL if the whole root's subtree has been visited. |
@@ -945,7 +1062,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root, | |||
945 | * helper function to be used by mem_cgroup_iter | 1062 | * helper function to be used by mem_cgroup_iter |
946 | */ | 1063 | */ |
947 | static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, | 1064 | static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, |
948 | struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond) | 1065 | struct mem_cgroup *last_visited) |
949 | { | 1066 | { |
950 | struct cgroup_subsys_state *prev_css, *next_css; | 1067 | struct cgroup_subsys_state *prev_css, *next_css; |
951 | 1068 | ||
@@ -963,31 +1080,11 @@ skip_node: | |||
963 | if (next_css) { | 1080 | if (next_css) { |
964 | struct mem_cgroup *mem = mem_cgroup_from_css(next_css); | 1081 | struct mem_cgroup *mem = mem_cgroup_from_css(next_css); |
965 | 1082 | ||
966 | switch (mem_cgroup_filter(mem, root, cond)) { | 1083 | if (css_tryget(&mem->css)) |
967 | case SKIP: | 1084 | return mem; |
1085 | else { | ||
968 | prev_css = next_css; | 1086 | prev_css = next_css; |
969 | goto skip_node; | 1087 | goto skip_node; |
970 | case SKIP_TREE: | ||
971 | if (mem == root) | ||
972 | return NULL; | ||
973 | /* | ||
974 | * css_rightmost_descendant is not an optimal way to | ||
975 | * skip through a subtree (especially for imbalanced | ||
976 | * trees leaning to right) but that's what we have right | ||
977 | * now. More effective solution would be traversing | ||
978 | * right-up for first non-NULL without calling | ||
979 | * css_next_descendant_pre afterwards. | ||
980 | */ | ||
981 | prev_css = css_rightmost_descendant(next_css); | ||
982 | goto skip_node; | ||
983 | case VISIT: | ||
984 | if (css_tryget(&mem->css)) | ||
985 | return mem; | ||
986 | else { | ||
987 | prev_css = next_css; | ||
988 | goto skip_node; | ||
989 | } | ||
990 | break; | ||
991 | } | 1088 | } |
992 | } | 1089 | } |
993 | 1090 | ||
@@ -1051,7 +1148,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, | |||
1051 | * @root: hierarchy root | 1148 | * @root: hierarchy root |
1052 | * @prev: previously returned memcg, NULL on first invocation | 1149 | * @prev: previously returned memcg, NULL on first invocation |
1053 | * @reclaim: cookie for shared reclaim walks, NULL for full walks | 1150 | * @reclaim: cookie for shared reclaim walks, NULL for full walks |
1054 | * @cond: filter for visited nodes, NULL for no filter | ||
1055 | * | 1151 | * |
1056 | * Returns references to children of the hierarchy below @root, or | 1152 | * Returns references to children of the hierarchy below @root, or |
1057 | * @root itself, or %NULL after a full round-trip. | 1153 | * @root itself, or %NULL after a full round-trip. |
@@ -1064,18 +1160,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, | |||
1064 | * divide up the memcgs in the hierarchy among all concurrent | 1160 | * divide up the memcgs in the hierarchy among all concurrent |
1065 | * reclaimers operating on the same zone and priority. | 1161 | * reclaimers operating on the same zone and priority. |
1066 | */ | 1162 | */ |
1067 | struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, | 1163 | struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, |
1068 | struct mem_cgroup *prev, | 1164 | struct mem_cgroup *prev, |
1069 | struct mem_cgroup_reclaim_cookie *reclaim, | 1165 | struct mem_cgroup_reclaim_cookie *reclaim) |
1070 | mem_cgroup_iter_filter cond) | ||
1071 | { | 1166 | { |
1072 | struct mem_cgroup *memcg = NULL; | 1167 | struct mem_cgroup *memcg = NULL; |
1073 | struct mem_cgroup *last_visited = NULL; | 1168 | struct mem_cgroup *last_visited = NULL; |
1074 | 1169 | ||
1075 | if (mem_cgroup_disabled()) { | 1170 | if (mem_cgroup_disabled()) |
1076 | /* first call must return non-NULL, second return NULL */ | 1171 | return NULL; |
1077 | return (struct mem_cgroup *)(unsigned long)!prev; | ||
1078 | } | ||
1079 | 1172 | ||
1080 | if (!root) | 1173 | if (!root) |
1081 | root = root_mem_cgroup; | 1174 | root = root_mem_cgroup; |
@@ -1086,9 +1179,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, | |||
1086 | if (!root->use_hierarchy && root != root_mem_cgroup) { | 1179 | if (!root->use_hierarchy && root != root_mem_cgroup) { |
1087 | if (prev) | 1180 | if (prev) |
1088 | goto out_css_put; | 1181 | goto out_css_put; |
1089 | if (mem_cgroup_filter(root, root, cond) == VISIT) | 1182 | return root; |
1090 | return root; | ||
1091 | return NULL; | ||
1092 | } | 1183 | } |
1093 | 1184 | ||
1094 | rcu_read_lock(); | 1185 | rcu_read_lock(); |
@@ -1111,7 +1202,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, | |||
1111 | last_visited = mem_cgroup_iter_load(iter, root, &seq); | 1202 | last_visited = mem_cgroup_iter_load(iter, root, &seq); |
1112 | } | 1203 | } |
1113 | 1204 | ||
1114 | memcg = __mem_cgroup_iter_next(root, last_visited, cond); | 1205 | memcg = __mem_cgroup_iter_next(root, last_visited); |
1115 | 1206 | ||
1116 | if (reclaim) { | 1207 | if (reclaim) { |
1117 | mem_cgroup_iter_update(iter, last_visited, memcg, seq); | 1208 | mem_cgroup_iter_update(iter, last_visited, memcg, seq); |
@@ -1122,11 +1213,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, | |||
1122 | reclaim->generation = iter->generation; | 1213 | reclaim->generation = iter->generation; |
1123 | } | 1214 | } |
1124 | 1215 | ||
1125 | /* | 1216 | if (prev && !memcg) |
1126 | * We have finished the whole tree walk or no group has been | ||
1127 | * visited because filter told us to skip the root node. | ||
1128 | */ | ||
1129 | if (!memcg && (prev || (cond && !last_visited))) | ||
1130 | goto out_unlock; | 1217 | goto out_unlock; |
1131 | } | 1218 | } |
1132 | out_unlock: | 1219 | out_unlock: |
@@ -1767,7 +1854,6 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg, | |||
1767 | return total; | 1854 | return total; |
1768 | } | 1855 | } |
1769 | 1856 | ||
1770 | #if MAX_NUMNODES > 1 | ||
1771 | /** | 1857 | /** |
1772 | * test_mem_cgroup_node_reclaimable | 1858 | * test_mem_cgroup_node_reclaimable |
1773 | * @memcg: the target memcg | 1859 | * @memcg: the target memcg |
@@ -1790,6 +1876,7 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg, | |||
1790 | return false; | 1876 | return false; |
1791 | 1877 | ||
1792 | } | 1878 | } |
1879 | #if MAX_NUMNODES > 1 | ||
1793 | 1880 | ||
1794 | /* | 1881 | /* |
1795 | * Always updating the nodemask is not very good - even if we have an empty | 1882 | * Always updating the nodemask is not very good - even if we have an empty |
@@ -1857,50 +1944,104 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) | |||
1857 | return node; | 1944 | return node; |
1858 | } | 1945 | } |
1859 | 1946 | ||
1947 | /* | ||
1948 | * Check all nodes whether it contains reclaimable pages or not. | ||
1949 | * For quick scan, we make use of scan_nodes. This will allow us to skip | ||
1950 | * unused nodes. But scan_nodes is lazily updated and may not cotain | ||
1951 | * enough new information. We need to do double check. | ||
1952 | */ | ||
1953 | static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) | ||
1954 | { | ||
1955 | int nid; | ||
1956 | |||
1957 | /* | ||
1958 | * quick check...making use of scan_node. | ||
1959 | * We can skip unused nodes. | ||
1960 | */ | ||
1961 | if (!nodes_empty(memcg->scan_nodes)) { | ||
1962 | for (nid = first_node(memcg->scan_nodes); | ||
1963 | nid < MAX_NUMNODES; | ||
1964 | nid = next_node(nid, memcg->scan_nodes)) { | ||
1965 | |||
1966 | if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) | ||
1967 | return true; | ||
1968 | } | ||
1969 | } | ||
1970 | /* | ||
1971 | * Check rest of nodes. | ||
1972 | */ | ||
1973 | for_each_node_state(nid, N_MEMORY) { | ||
1974 | if (node_isset(nid, memcg->scan_nodes)) | ||
1975 | continue; | ||
1976 | if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap)) | ||
1977 | return true; | ||
1978 | } | ||
1979 | return false; | ||
1980 | } | ||
1981 | |||
1860 | #else | 1982 | #else |
1861 | int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) | 1983 | int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) |
1862 | { | 1984 | { |
1863 | return 0; | 1985 | return 0; |
1864 | } | 1986 | } |
1865 | 1987 | ||
1866 | #endif | 1988 | static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap) |
1867 | |||
1868 | /* | ||
1869 | * A group is eligible for the soft limit reclaim under the given root | ||
1870 | * hierarchy if | ||
1871 | * a) it is over its soft limit | ||
1872 | * b) any parent up the hierarchy is over its soft limit | ||
1873 | * | ||
1874 | * If the given group doesn't have any children over the limit then it | ||
1875 | * doesn't make any sense to iterate its subtree. | ||
1876 | */ | ||
1877 | enum mem_cgroup_filter_t | ||
1878 | mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, | ||
1879 | struct mem_cgroup *root) | ||
1880 | { | 1989 | { |
1881 | struct mem_cgroup *parent; | 1990 | return test_mem_cgroup_node_reclaimable(memcg, 0, noswap); |
1882 | 1991 | } | |
1883 | if (!memcg) | 1992 | #endif |
1884 | memcg = root_mem_cgroup; | ||
1885 | parent = memcg; | ||
1886 | |||
1887 | if (res_counter_soft_limit_excess(&memcg->res)) | ||
1888 | return VISIT; | ||
1889 | 1993 | ||
1890 | /* | 1994 | static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg, |
1891 | * If any parent up to the root in the hierarchy is over its soft limit | 1995 | struct zone *zone, |
1892 | * then we have to obey and reclaim from this group as well. | 1996 | gfp_t gfp_mask, |
1893 | */ | 1997 | unsigned long *total_scanned) |
1894 | while ((parent = parent_mem_cgroup(parent))) { | 1998 | { |
1895 | if (res_counter_soft_limit_excess(&parent->res)) | 1999 | struct mem_cgroup *victim = NULL; |
1896 | return VISIT; | 2000 | int total = 0; |
1897 | if (parent == root) | 2001 | int loop = 0; |
2002 | unsigned long excess; | ||
2003 | unsigned long nr_scanned; | ||
2004 | struct mem_cgroup_reclaim_cookie reclaim = { | ||
2005 | .zone = zone, | ||
2006 | .priority = 0, | ||
2007 | }; | ||
2008 | |||
2009 | excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT; | ||
2010 | |||
2011 | while (1) { | ||
2012 | victim = mem_cgroup_iter(root_memcg, victim, &reclaim); | ||
2013 | if (!victim) { | ||
2014 | loop++; | ||
2015 | if (loop >= 2) { | ||
2016 | /* | ||
2017 | * If we have not been able to reclaim | ||
2018 | * anything, it might because there are | ||
2019 | * no reclaimable pages under this hierarchy | ||
2020 | */ | ||
2021 | if (!total) | ||
2022 | break; | ||
2023 | /* | ||
2024 | * We want to do more targeted reclaim. | ||
2025 | * excess >> 2 is not to excessive so as to | ||
2026 | * reclaim too much, nor too less that we keep | ||
2027 | * coming back to reclaim from this cgroup | ||
2028 | */ | ||
2029 | if (total >= (excess >> 2) || | ||
2030 | (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS)) | ||
2031 | break; | ||
2032 | } | ||
2033 | continue; | ||
2034 | } | ||
2035 | if (!mem_cgroup_reclaimable(victim, false)) | ||
2036 | continue; | ||
2037 | total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false, | ||
2038 | zone, &nr_scanned); | ||
2039 | *total_scanned += nr_scanned; | ||
2040 | if (!res_counter_soft_limit_excess(&root_memcg->res)) | ||
1898 | break; | 2041 | break; |
1899 | } | 2042 | } |
1900 | 2043 | mem_cgroup_iter_break(root_memcg, victim); | |
1901 | if (!atomic_read(&memcg->children_in_excess)) | 2044 | return total; |
1902 | return SKIP_TREE; | ||
1903 | return SKIP; | ||
1904 | } | 2045 | } |
1905 | 2046 | ||
1906 | static DEFINE_SPINLOCK(memcg_oom_lock); | 2047 | static DEFINE_SPINLOCK(memcg_oom_lock); |
@@ -2812,7 +2953,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2812 | unlock_page_cgroup(pc); | 2953 | unlock_page_cgroup(pc); |
2813 | 2954 | ||
2814 | /* | 2955 | /* |
2815 | * "charge_statistics" updated event counter. | 2956 | * "charge_statistics" updated event counter. Then, check it. |
2957 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | ||
2958 | * if they exceeds softlimit. | ||
2816 | */ | 2959 | */ |
2817 | memcg_check_events(memcg, page); | 2960 | memcg_check_events(memcg, page); |
2818 | } | 2961 | } |
@@ -4647,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg, | |||
4647 | return ret; | 4790 | return ret; |
4648 | } | 4791 | } |
4649 | 4792 | ||
4793 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | ||
4794 | gfp_t gfp_mask, | ||
4795 | unsigned long *total_scanned) | ||
4796 | { | ||
4797 | unsigned long nr_reclaimed = 0; | ||
4798 | struct mem_cgroup_per_zone *mz, *next_mz = NULL; | ||
4799 | unsigned long reclaimed; | ||
4800 | int loop = 0; | ||
4801 | struct mem_cgroup_tree_per_zone *mctz; | ||
4802 | unsigned long long excess; | ||
4803 | unsigned long nr_scanned; | ||
4804 | |||
4805 | if (order > 0) | ||
4806 | return 0; | ||
4807 | |||
4808 | mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone)); | ||
4809 | /* | ||
4810 | * This loop can run a while, specially if mem_cgroup's continuously | ||
4811 | * keep exceeding their soft limit and putting the system under | ||
4812 | * pressure | ||
4813 | */ | ||
4814 | do { | ||
4815 | if (next_mz) | ||
4816 | mz = next_mz; | ||
4817 | else | ||
4818 | mz = mem_cgroup_largest_soft_limit_node(mctz); | ||
4819 | if (!mz) | ||
4820 | break; | ||
4821 | |||
4822 | nr_scanned = 0; | ||
4823 | reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone, | ||
4824 | gfp_mask, &nr_scanned); | ||
4825 | nr_reclaimed += reclaimed; | ||
4826 | *total_scanned += nr_scanned; | ||
4827 | spin_lock(&mctz->lock); | ||
4828 | |||
4829 | /* | ||
4830 | * If we failed to reclaim anything from this memory cgroup | ||
4831 | * it is time to move on to the next cgroup | ||
4832 | */ | ||
4833 | next_mz = NULL; | ||
4834 | if (!reclaimed) { | ||
4835 | do { | ||
4836 | /* | ||
4837 | * Loop until we find yet another one. | ||
4838 | * | ||
4839 | * By the time we get the soft_limit lock | ||
4840 | * again, someone might have aded the | ||
4841 | * group back on the RB tree. Iterate to | ||
4842 | * make sure we get a different mem. | ||
4843 | * mem_cgroup_largest_soft_limit_node returns | ||
4844 | * NULL if no other cgroup is present on | ||
4845 | * the tree | ||
4846 | */ | ||
4847 | next_mz = | ||
4848 | __mem_cgroup_largest_soft_limit_node(mctz); | ||
4849 | if (next_mz == mz) | ||
4850 | css_put(&next_mz->memcg->css); | ||
4851 | else /* next_mz == NULL or other memcg */ | ||
4852 | break; | ||
4853 | } while (1); | ||
4854 | } | ||
4855 | __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz); | ||
4856 | excess = res_counter_soft_limit_excess(&mz->memcg->res); | ||
4857 | /* | ||
4858 | * One school of thought says that we should not add | ||
4859 | * back the node to the tree if reclaim returns 0. | ||
4860 | * But our reclaim could return 0, simply because due | ||
4861 | * to priority we are exposing a smaller subset of | ||
4862 | * memory to reclaim from. Consider this as a longer | ||
4863 | * term TODO. | ||
4864 | */ | ||
4865 | /* If excess == 0, no tree ops */ | ||
4866 | __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess); | ||
4867 | spin_unlock(&mctz->lock); | ||
4868 | css_put(&mz->memcg->css); | ||
4869 | loop++; | ||
4870 | /* | ||
4871 | * Could not reclaim anything and there are no more | ||
4872 | * mem cgroups to try or we seem to be looping without | ||
4873 | * reclaiming anything. | ||
4874 | */ | ||
4875 | if (!nr_reclaimed && | ||
4876 | (next_mz == NULL || | ||
4877 | loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS)) | ||
4878 | break; | ||
4879 | } while (!nr_reclaimed); | ||
4880 | if (next_mz) | ||
4881 | css_put(&next_mz->memcg->css); | ||
4882 | return nr_reclaimed; | ||
4883 | } | ||
4884 | |||
4650 | /** | 4885 | /** |
4651 | * mem_cgroup_force_empty_list - clears LRU of a group | 4886 | * mem_cgroup_force_empty_list - clears LRU of a group |
4652 | * @memcg: group to clear | 4887 | * @memcg: group to clear |
@@ -5911,6 +6146,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
5911 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 6146 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
5912 | mz = &pn->zoneinfo[zone]; | 6147 | mz = &pn->zoneinfo[zone]; |
5913 | lruvec_init(&mz->lruvec); | 6148 | lruvec_init(&mz->lruvec); |
6149 | mz->usage_in_excess = 0; | ||
6150 | mz->on_tree = false; | ||
5914 | mz->memcg = memcg; | 6151 | mz->memcg = memcg; |
5915 | } | 6152 | } |
5916 | memcg->nodeinfo[node] = pn; | 6153 | memcg->nodeinfo[node] = pn; |
@@ -5966,6 +6203,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
5966 | int node; | 6203 | int node; |
5967 | size_t size = memcg_size(); | 6204 | size_t size = memcg_size(); |
5968 | 6205 | ||
6206 | mem_cgroup_remove_from_trees(memcg); | ||
5969 | free_css_id(&mem_cgroup_subsys, &memcg->css); | 6207 | free_css_id(&mem_cgroup_subsys, &memcg->css); |
5970 | 6208 | ||
5971 | for_each_node(node) | 6209 | for_each_node(node) |
@@ -6002,6 +6240,29 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) | |||
6002 | } | 6240 | } |
6003 | EXPORT_SYMBOL(parent_mem_cgroup); | 6241 | EXPORT_SYMBOL(parent_mem_cgroup); |
6004 | 6242 | ||
6243 | static void __init mem_cgroup_soft_limit_tree_init(void) | ||
6244 | { | ||
6245 | struct mem_cgroup_tree_per_node *rtpn; | ||
6246 | struct mem_cgroup_tree_per_zone *rtpz; | ||
6247 | int tmp, node, zone; | ||
6248 | |||
6249 | for_each_node(node) { | ||
6250 | tmp = node; | ||
6251 | if (!node_state(node, N_NORMAL_MEMORY)) | ||
6252 | tmp = -1; | ||
6253 | rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp); | ||
6254 | BUG_ON(!rtpn); | ||
6255 | |||
6256 | soft_limit_tree.rb_tree_per_node[node] = rtpn; | ||
6257 | |||
6258 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | ||
6259 | rtpz = &rtpn->rb_tree_per_zone[zone]; | ||
6260 | rtpz->rb_root = RB_ROOT; | ||
6261 | spin_lock_init(&rtpz->lock); | ||
6262 | } | ||
6263 | } | ||
6264 | } | ||
6265 | |||
6005 | static struct cgroup_subsys_state * __ref | 6266 | static struct cgroup_subsys_state * __ref |
6006 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | 6267 | mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) |
6007 | { | 6268 | { |
@@ -6031,7 +6292,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) | |||
6031 | mutex_init(&memcg->thresholds_lock); | 6292 | mutex_init(&memcg->thresholds_lock); |
6032 | spin_lock_init(&memcg->move_lock); | 6293 | spin_lock_init(&memcg->move_lock); |
6033 | vmpressure_init(&memcg->vmpressure); | 6294 | vmpressure_init(&memcg->vmpressure); |
6034 | spin_lock_init(&memcg->soft_lock); | ||
6035 | 6295 | ||
6036 | return &memcg->css; | 6296 | return &memcg->css; |
6037 | 6297 | ||
@@ -6109,13 +6369,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) | |||
6109 | 6369 | ||
6110 | mem_cgroup_invalidate_reclaim_iterators(memcg); | 6370 | mem_cgroup_invalidate_reclaim_iterators(memcg); |
6111 | mem_cgroup_reparent_charges(memcg); | 6371 | mem_cgroup_reparent_charges(memcg); |
6112 | if (memcg->soft_contributed) { | ||
6113 | while ((memcg = parent_mem_cgroup(memcg))) | ||
6114 | atomic_dec(&memcg->children_in_excess); | ||
6115 | |||
6116 | if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy) | ||
6117 | atomic_dec(&root_mem_cgroup->children_in_excess); | ||
6118 | } | ||
6119 | mem_cgroup_destroy_all_caches(memcg); | 6372 | mem_cgroup_destroy_all_caches(memcg); |
6120 | vmpressure_cleanup(&memcg->vmpressure); | 6373 | vmpressure_cleanup(&memcg->vmpressure); |
6121 | } | 6374 | } |
@@ -6790,6 +7043,7 @@ static int __init mem_cgroup_init(void) | |||
6790 | { | 7043 | { |
6791 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); | 7044 | hotcpu_notifier(memcg_cpu_hotplug_callback, 0); |
6792 | enable_swap_cgroup(); | 7045 | enable_swap_cgroup(); |
7046 | mem_cgroup_soft_limit_tree_init(); | ||
6793 | memcg_stock_init(); | 7047 | memcg_stock_init(); |
6794 | return 0; | 7048 | return 0; |
6795 | } | 7049 | } |
diff --git a/mm/mlock.c b/mm/mlock.c index d63802663242..67ba6da7d0e3 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -736,6 +736,7 @@ static int do_mlockall(int flags) | |||
736 | 736 | ||
737 | /* Ignore errors */ | 737 | /* Ignore errors */ |
738 | mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); | 738 | mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); |
739 | cond_resched(); | ||
739 | } | 740 | } |
740 | out: | 741 | out: |
741 | return 0; | 742 | return 0; |
diff --git a/mm/vmscan.c b/mm/vmscan.c index 8ed1b775bdc9..beb35778c69f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -139,23 +139,11 @@ static bool global_reclaim(struct scan_control *sc) | |||
139 | { | 139 | { |
140 | return !sc->target_mem_cgroup; | 140 | return !sc->target_mem_cgroup; |
141 | } | 141 | } |
142 | |||
143 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
144 | { | ||
145 | struct mem_cgroup *root = sc->target_mem_cgroup; | ||
146 | return !mem_cgroup_disabled() && | ||
147 | mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE; | ||
148 | } | ||
149 | #else | 142 | #else |
150 | static bool global_reclaim(struct scan_control *sc) | 143 | static bool global_reclaim(struct scan_control *sc) |
151 | { | 144 | { |
152 | return true; | 145 | return true; |
153 | } | 146 | } |
154 | |||
155 | static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc) | ||
156 | { | ||
157 | return false; | ||
158 | } | ||
159 | #endif | 147 | #endif |
160 | 148 | ||
161 | unsigned long zone_reclaimable_pages(struct zone *zone) | 149 | unsigned long zone_reclaimable_pages(struct zone *zone) |
@@ -2176,11 +2164,9 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
2176 | } | 2164 | } |
2177 | } | 2165 | } |
2178 | 2166 | ||
2179 | static int | 2167 | static void shrink_zone(struct zone *zone, struct scan_control *sc) |
2180 | __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | ||
2181 | { | 2168 | { |
2182 | unsigned long nr_reclaimed, nr_scanned; | 2169 | unsigned long nr_reclaimed, nr_scanned; |
2183 | int groups_scanned = 0; | ||
2184 | 2170 | ||
2185 | do { | 2171 | do { |
2186 | struct mem_cgroup *root = sc->target_mem_cgroup; | 2172 | struct mem_cgroup *root = sc->target_mem_cgroup; |
@@ -2188,17 +2174,15 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2188 | .zone = zone, | 2174 | .zone = zone, |
2189 | .priority = sc->priority, | 2175 | .priority = sc->priority, |
2190 | }; | 2176 | }; |
2191 | struct mem_cgroup *memcg = NULL; | 2177 | struct mem_cgroup *memcg; |
2192 | mem_cgroup_iter_filter filter = (soft_reclaim) ? | ||
2193 | mem_cgroup_soft_reclaim_eligible : NULL; | ||
2194 | 2178 | ||
2195 | nr_reclaimed = sc->nr_reclaimed; | 2179 | nr_reclaimed = sc->nr_reclaimed; |
2196 | nr_scanned = sc->nr_scanned; | 2180 | nr_scanned = sc->nr_scanned; |
2197 | 2181 | ||
2198 | while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { | 2182 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
2183 | do { | ||
2199 | struct lruvec *lruvec; | 2184 | struct lruvec *lruvec; |
2200 | 2185 | ||
2201 | groups_scanned++; | ||
2202 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); | 2186 | lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2203 | 2187 | ||
2204 | shrink_lruvec(lruvec, sc); | 2188 | shrink_lruvec(lruvec, sc); |
@@ -2218,7 +2202,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2218 | mem_cgroup_iter_break(root, memcg); | 2202 | mem_cgroup_iter_break(root, memcg); |
2219 | break; | 2203 | break; |
2220 | } | 2204 | } |
2221 | } | 2205 | memcg = mem_cgroup_iter(root, memcg, &reclaim); |
2206 | } while (memcg); | ||
2222 | 2207 | ||
2223 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, | 2208 | vmpressure(sc->gfp_mask, sc->target_mem_cgroup, |
2224 | sc->nr_scanned - nr_scanned, | 2209 | sc->nr_scanned - nr_scanned, |
@@ -2226,37 +2211,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim) | |||
2226 | 2211 | ||
2227 | } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, | 2212 | } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, |
2228 | sc->nr_scanned - nr_scanned, sc)); | 2213 | sc->nr_scanned - nr_scanned, sc)); |
2229 | |||
2230 | return groups_scanned; | ||
2231 | } | ||
2232 | |||
2233 | |||
2234 | static void shrink_zone(struct zone *zone, struct scan_control *sc) | ||
2235 | { | ||
2236 | bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc); | ||
2237 | unsigned long nr_scanned = sc->nr_scanned; | ||
2238 | int scanned_groups; | ||
2239 | |||
2240 | scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim); | ||
2241 | /* | ||
2242 | * memcg iterator might race with other reclaimer or start from | ||
2243 | * a incomplete tree walk so the tree walk in __shrink_zone | ||
2244 | * might have missed groups that are above the soft limit. Try | ||
2245 | * another loop to catch up with others. Do it just once to | ||
2246 | * prevent from reclaim latencies when other reclaimers always | ||
2247 | * preempt this one. | ||
2248 | */ | ||
2249 | if (do_soft_reclaim && !scanned_groups) | ||
2250 | __shrink_zone(zone, sc, do_soft_reclaim); | ||
2251 | |||
2252 | /* | ||
2253 | * No group is over the soft limit or those that are do not have | ||
2254 | * pages in the zone we are reclaiming so we have to reclaim everybody | ||
2255 | */ | ||
2256 | if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) { | ||
2257 | __shrink_zone(zone, sc, false); | ||
2258 | return; | ||
2259 | } | ||
2260 | } | 2214 | } |
2261 | 2215 | ||
2262 | /* Returns true if compaction should go ahead for a high-order request */ | 2216 | /* Returns true if compaction should go ahead for a high-order request */ |
@@ -2320,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2320 | { | 2274 | { |
2321 | struct zoneref *z; | 2275 | struct zoneref *z; |
2322 | struct zone *zone; | 2276 | struct zone *zone; |
2277 | unsigned long nr_soft_reclaimed; | ||
2278 | unsigned long nr_soft_scanned; | ||
2323 | bool aborted_reclaim = false; | 2279 | bool aborted_reclaim = false; |
2324 | 2280 | ||
2325 | /* | 2281 | /* |
@@ -2359,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) | |||
2359 | continue; | 2315 | continue; |
2360 | } | 2316 | } |
2361 | } | 2317 | } |
2318 | /* | ||
2319 | * This steals pages from memory cgroups over softlimit | ||
2320 | * and returns the number of reclaimed pages and | ||
2321 | * scanned pages. This works for global memory pressure | ||
2322 | * and balancing, not for a memcg's limit. | ||
2323 | */ | ||
2324 | nr_soft_scanned = 0; | ||
2325 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
2326 | sc->order, sc->gfp_mask, | ||
2327 | &nr_soft_scanned); | ||
2328 | sc->nr_reclaimed += nr_soft_reclaimed; | ||
2329 | sc->nr_scanned += nr_soft_scanned; | ||
2362 | /* need some check for avoid more shrink_zone() */ | 2330 | /* need some check for avoid more shrink_zone() */ |
2363 | } | 2331 | } |
2364 | 2332 | ||
@@ -2952,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2952 | { | 2920 | { |
2953 | int i; | 2921 | int i; |
2954 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2922 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2923 | unsigned long nr_soft_reclaimed; | ||
2924 | unsigned long nr_soft_scanned; | ||
2955 | struct scan_control sc = { | 2925 | struct scan_control sc = { |
2956 | .gfp_mask = GFP_KERNEL, | 2926 | .gfp_mask = GFP_KERNEL, |
2957 | .priority = DEF_PRIORITY, | 2927 | .priority = DEF_PRIORITY, |
@@ -3066,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
3066 | 3036 | ||
3067 | sc.nr_scanned = 0; | 3037 | sc.nr_scanned = 0; |
3068 | 3038 | ||
3039 | nr_soft_scanned = 0; | ||
3040 | /* | ||
3041 | * Call soft limit reclaim before calling shrink_zone. | ||
3042 | */ | ||
3043 | nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone, | ||
3044 | order, sc.gfp_mask, | ||
3045 | &nr_soft_scanned); | ||
3046 | sc.nr_reclaimed += nr_soft_reclaimed; | ||
3047 | |||
3069 | /* | 3048 | /* |
3070 | * There should be no need to raise the scanning | 3049 | * There should be no need to raise the scanning |
3071 | * priority if enough pages are already being scanned | 3050 | * priority if enough pages are already being scanned |
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 47016c304c84..66cad506b8a2 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl | |||
@@ -3975,8 +3975,8 @@ sub string_find_replace { | |||
3975 | # check for new externs in .h files. | 3975 | # check for new externs in .h files. |
3976 | if ($realfile =~ /\.h$/ && | 3976 | if ($realfile =~ /\.h$/ && |
3977 | $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) { | 3977 | $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) { |
3978 | if (WARN("AVOID_EXTERNS", | 3978 | if (CHK("AVOID_EXTERNS", |
3979 | "extern prototypes should be avoided in .h files\n" . $herecurr) && | 3979 | "extern prototypes should be avoided in .h files\n" . $herecurr) && |
3980 | $fix) { | 3980 | $fix) { |
3981 | $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; | 3981 | $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; |
3982 | } | 3982 | } |
diff --git a/tools/lib/lk/debugfs.c b/tools/lib/lk/debugfs.c index 099e7cd022e4..7c4347962353 100644 --- a/tools/lib/lk/debugfs.c +++ b/tools/lib/lk/debugfs.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <stdbool.h> | 5 | #include <stdbool.h> |
6 | #include <sys/vfs.h> | 6 | #include <sys/vfs.h> |
7 | #include <sys/mount.h> | 7 | #include <sys/mount.h> |
8 | #include <linux/magic.h> | ||
9 | #include <linux/kernel.h> | 8 | #include <linux/kernel.h> |
10 | 9 | ||
11 | #include "debugfs.h" | 10 | #include "debugfs.h" |
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index 9570c2b0f83c..b2519e49424f 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c | |||
@@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc) | |||
32 | int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, | 32 | int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, |
33 | struct perf_tsc_conversion *tc) | 33 | struct perf_tsc_conversion *tc) |
34 | { | 34 | { |
35 | bool cap_usr_time_zero; | 35 | bool cap_user_time_zero; |
36 | u32 seq; | 36 | u32 seq; |
37 | int i = 0; | 37 | int i = 0; |
38 | 38 | ||
@@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, | |||
42 | tc->time_mult = pc->time_mult; | 42 | tc->time_mult = pc->time_mult; |
43 | tc->time_shift = pc->time_shift; | 43 | tc->time_shift = pc->time_shift; |
44 | tc->time_zero = pc->time_zero; | 44 | tc->time_zero = pc->time_zero; |
45 | cap_usr_time_zero = pc->cap_usr_time_zero; | 45 | cap_user_time_zero = pc->cap_user_time_zero; |
46 | rmb(); | 46 | rmb(); |
47 | if (pc->lock == seq && !(seq & 1)) | 47 | if (pc->lock == seq && !(seq & 1)) |
48 | break; | 48 | break; |
@@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, | |||
52 | } | 52 | } |
53 | } | 53 | } |
54 | 54 | ||
55 | if (!cap_usr_time_zero) | 55 | if (!cap_user_time_zero) |
56 | return -EOPNOTSUPP; | 56 | return -EOPNOTSUPP; |
57 | 57 | ||
58 | return 0; | 58 | return 0; |
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 423875c999b2..afe377b2884f 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c | |||
@@ -321,8 +321,6 @@ found: | |||
321 | return perf_event__repipe(tool, event_sw, &sample_sw, machine); | 321 | return perf_event__repipe(tool, event_sw, &sample_sw, machine); |
322 | } | 322 | } |
323 | 323 | ||
324 | extern volatile int session_done; | ||
325 | |||
326 | static void sig_handler(int sig __maybe_unused) | 324 | static void sig_handler(int sig __maybe_unused) |
327 | { | 325 | { |
328 | session_done = 1; | 326 | session_done = 1; |
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8e50d8d77419..72eae7498c09 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c | |||
@@ -401,8 +401,6 @@ static int perf_report__setup_sample_type(struct perf_report *rep) | |||
401 | return 0; | 401 | return 0; |
402 | } | 402 | } |
403 | 403 | ||
404 | extern volatile int session_done; | ||
405 | |||
406 | static void sig_handler(int sig __maybe_unused) | 404 | static void sig_handler(int sig __maybe_unused) |
407 | { | 405 | { |
408 | session_done = 1; | 406 | session_done = 1; |
@@ -568,6 +566,9 @@ static int __cmd_report(struct perf_report *rep) | |||
568 | } | 566 | } |
569 | } | 567 | } |
570 | 568 | ||
569 | if (session_done()) | ||
570 | return 0; | ||
571 | |||
571 | if (nr_samples == 0) { | 572 | if (nr_samples == 0) { |
572 | ui__error("The %s file has no samples!\n", session->filename); | 573 | ui__error("The %s file has no samples!\n", session->filename); |
573 | return 0; | 574 | return 0; |
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7f31a3ded1b6..9c333ff3dfeb 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c | |||
@@ -553,8 +553,6 @@ static struct perf_tool perf_script = { | |||
553 | .ordering_requires_timestamps = true, | 553 | .ordering_requires_timestamps = true, |
554 | }; | 554 | }; |
555 | 555 | ||
556 | extern volatile int session_done; | ||
557 | |||
558 | static void sig_handler(int sig __maybe_unused) | 556 | static void sig_handler(int sig __maybe_unused) |
559 | { | 557 | { |
560 | session_done = 1; | 558 | session_done = 1; |
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f5aa6375e3e9..fd4853404727 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c | |||
@@ -16,6 +16,23 @@ | |||
16 | #include <sys/mman.h> | 16 | #include <sys/mman.h> |
17 | #include <linux/futex.h> | 17 | #include <linux/futex.h> |
18 | 18 | ||
19 | /* For older distros: */ | ||
20 | #ifndef MAP_STACK | ||
21 | # define MAP_STACK 0x20000 | ||
22 | #endif | ||
23 | |||
24 | #ifndef MADV_HWPOISON | ||
25 | # define MADV_HWPOISON 100 | ||
26 | #endif | ||
27 | |||
28 | #ifndef MADV_MERGEABLE | ||
29 | # define MADV_MERGEABLE 12 | ||
30 | #endif | ||
31 | |||
32 | #ifndef MADV_UNMERGEABLE | ||
33 | # define MADV_UNMERGEABLE 13 | ||
34 | #endif | ||
35 | |||
19 | static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, | 36 | static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, |
20 | unsigned long arg, | 37 | unsigned long arg, |
21 | u8 arg_idx __maybe_unused, | 38 | u8 arg_idx __maybe_unused, |
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index 214e17e97e5c..346ee929d250 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile | |||
@@ -180,6 +180,9 @@ FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) | |||
180 | ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y) | 180 | ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y) |
181 | CFLAGS += -DLIBELF_MMAP | 181 | CFLAGS += -DLIBELF_MMAP |
182 | endif | 182 | endif |
183 | ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y) | ||
184 | CFLAGS += -DHAVE_ELF_GETPHDRNUM | ||
185 | endif | ||
183 | 186 | ||
184 | # include ARCH specific config | 187 | # include ARCH specific config |
185 | -include $(src-perf)/arch/$(ARCH)/Makefile | 188 | -include $(src-perf)/arch/$(ARCH)/Makefile |
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak index 708fb8e9822a..d5a8dd44945f 100644 --- a/tools/perf/config/feature-tests.mak +++ b/tools/perf/config/feature-tests.mak | |||
@@ -61,6 +61,15 @@ int main(void) | |||
61 | } | 61 | } |
62 | endef | 62 | endef |
63 | 63 | ||
64 | define SOURCE_ELF_GETPHDRNUM | ||
65 | #include <libelf.h> | ||
66 | int main(void) | ||
67 | { | ||
68 | size_t dst; | ||
69 | return elf_getphdrnum(0, &dst); | ||
70 | } | ||
71 | endef | ||
72 | |||
64 | ifndef NO_SLANG | 73 | ifndef NO_SLANG |
65 | define SOURCE_SLANG | 74 | define SOURCE_SLANG |
66 | #include <slang.h> | 75 | #include <slang.h> |
@@ -210,6 +219,7 @@ define SOURCE_LIBAUDIT | |||
210 | 219 | ||
211 | int main(void) | 220 | int main(void) |
212 | { | 221 | { |
222 | printf(\"error message: %s\n\", audit_errno_to_name(0)); | ||
213 | return audit_open(); | 223 | return audit_open(); |
214 | } | 224 | } |
215 | endef | 225 | endef |
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bfc5a27597d6..7eae5488ecea 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c | |||
@@ -809,7 +809,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, | |||
809 | end = map__rip_2objdump(map, sym->end); | 809 | end = map__rip_2objdump(map, sym->end); |
810 | 810 | ||
811 | offset = line_ip - start; | 811 | offset = line_ip - start; |
812 | if (offset < 0 || (u64)line_ip > end) | 812 | if ((u64)line_ip < start || (u64)line_ip > end) |
813 | offset = -1; | 813 | offset = -1; |
814 | else | 814 | else |
815 | parsed_line = tmp2 + 1; | 815 | parsed_line = tmp2 + 1; |
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 3e5f5430a28a..e23bde19d590 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c | |||
@@ -263,6 +263,21 @@ bool die_is_signed_type(Dwarf_Die *tp_die) | |||
263 | } | 263 | } |
264 | 264 | ||
265 | /** | 265 | /** |
266 | * die_is_func_def - Ensure that this DIE is a subprogram and definition | ||
267 | * @dw_die: a DIE | ||
268 | * | ||
269 | * Ensure that this DIE is a subprogram and NOT a declaration. This | ||
270 | * returns true if @dw_die is a function definition. | ||
271 | **/ | ||
272 | bool die_is_func_def(Dwarf_Die *dw_die) | ||
273 | { | ||
274 | Dwarf_Attribute attr; | ||
275 | |||
276 | return (dwarf_tag(dw_die) == DW_TAG_subprogram && | ||
277 | dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL); | ||
278 | } | ||
279 | |||
280 | /** | ||
266 | * die_get_data_member_location - Get the data-member offset | 281 | * die_get_data_member_location - Get the data-member offset |
267 | * @mb_die: a DIE of a member of a data structure | 282 | * @mb_die: a DIE of a member of a data structure |
268 | * @offs: The offset of the member in the data structure | 283 | * @offs: The offset of the member in the data structure |
@@ -392,6 +407,10 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) | |||
392 | { | 407 | { |
393 | struct __addr_die_search_param *ad = data; | 408 | struct __addr_die_search_param *ad = data; |
394 | 409 | ||
410 | /* | ||
411 | * Since a declaration entry doesn't has given pc, this always returns | ||
412 | * function definition entry. | ||
413 | */ | ||
395 | if (dwarf_tag(fn_die) == DW_TAG_subprogram && | 414 | if (dwarf_tag(fn_die) == DW_TAG_subprogram && |
396 | dwarf_haspc(fn_die, ad->addr)) { | 415 | dwarf_haspc(fn_die, ad->addr)) { |
397 | memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); | 416 | memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); |
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 6ce1717784b7..8658d41697d2 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h | |||
@@ -38,6 +38,9 @@ extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr, | |||
38 | extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, | 38 | extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, |
39 | int (*callback)(Dwarf_Die *, void *), void *data); | 39 | int (*callback)(Dwarf_Die *, void *), void *data); |
40 | 40 | ||
41 | /* Ensure that this DIE is a subprogram and definition (not declaration) */ | ||
42 | extern bool die_is_func_def(Dwarf_Die *dw_die); | ||
43 | |||
41 | /* Compare diename and tname */ | 44 | /* Compare diename and tname */ |
42 | extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); | 45 | extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); |
43 | 46 | ||
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 26441d0e571b..ce69901176d8 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c | |||
@@ -199,9 +199,11 @@ static int write_buildid(char *name, size_t name_len, u8 *build_id, | |||
199 | return write_padded(fd, name, name_len + 1, len); | 199 | return write_padded(fd, name, name_len + 1, len); |
200 | } | 200 | } |
201 | 201 | ||
202 | static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, | 202 | static int __dsos__write_buildid_table(struct list_head *head, |
203 | u16 misc, int fd) | 203 | struct machine *machine, |
204 | pid_t pid, u16 misc, int fd) | ||
204 | { | 205 | { |
206 | char nm[PATH_MAX]; | ||
205 | struct dso *pos; | 207 | struct dso *pos; |
206 | 208 | ||
207 | dsos__for_each_with_build_id(pos, head) { | 209 | dsos__for_each_with_build_id(pos, head) { |
@@ -215,6 +217,10 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, | |||
215 | if (is_vdso_map(pos->short_name)) { | 217 | if (is_vdso_map(pos->short_name)) { |
216 | name = (char *) VDSO__MAP_NAME; | 218 | name = (char *) VDSO__MAP_NAME; |
217 | name_len = sizeof(VDSO__MAP_NAME) + 1; | 219 | name_len = sizeof(VDSO__MAP_NAME) + 1; |
220 | } else if (dso__is_kcore(pos)) { | ||
221 | machine__mmap_name(machine, nm, sizeof(nm)); | ||
222 | name = nm; | ||
223 | name_len = strlen(nm) + 1; | ||
218 | } else { | 224 | } else { |
219 | name = pos->long_name; | 225 | name = pos->long_name; |
220 | name_len = pos->long_name_len + 1; | 226 | name_len = pos->long_name_len + 1; |
@@ -240,10 +246,10 @@ static int machine__write_buildid_table(struct machine *machine, int fd) | |||
240 | umisc = PERF_RECORD_MISC_GUEST_USER; | 246 | umisc = PERF_RECORD_MISC_GUEST_USER; |
241 | } | 247 | } |
242 | 248 | ||
243 | err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid, | 249 | err = __dsos__write_buildid_table(&machine->kernel_dsos, machine, |
244 | kmisc, fd); | 250 | machine->pid, kmisc, fd); |
245 | if (err == 0) | 251 | if (err == 0) |
246 | err = __dsos__write_buildid_table(&machine->user_dsos, | 252 | err = __dsos__write_buildid_table(&machine->user_dsos, machine, |
247 | machine->pid, umisc, fd); | 253 | machine->pid, umisc, fd); |
248 | return err; | 254 | return err; |
249 | } | 255 | } |
@@ -375,23 +381,31 @@ out_free: | |||
375 | return err; | 381 | return err; |
376 | } | 382 | } |
377 | 383 | ||
378 | static int dso__cache_build_id(struct dso *dso, const char *debugdir) | 384 | static int dso__cache_build_id(struct dso *dso, struct machine *machine, |
385 | const char *debugdir) | ||
379 | { | 386 | { |
380 | bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; | 387 | bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; |
381 | bool is_vdso = is_vdso_map(dso->short_name); | 388 | bool is_vdso = is_vdso_map(dso->short_name); |
389 | char *name = dso->long_name; | ||
390 | char nm[PATH_MAX]; | ||
382 | 391 | ||
383 | return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), | 392 | if (dso__is_kcore(dso)) { |
384 | dso->long_name, debugdir, | 393 | is_kallsyms = true; |
385 | is_kallsyms, is_vdso); | 394 | machine__mmap_name(machine, nm, sizeof(nm)); |
395 | name = nm; | ||
396 | } | ||
397 | return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, | ||
398 | debugdir, is_kallsyms, is_vdso); | ||
386 | } | 399 | } |
387 | 400 | ||
388 | static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) | 401 | static int __dsos__cache_build_ids(struct list_head *head, |
402 | struct machine *machine, const char *debugdir) | ||
389 | { | 403 | { |
390 | struct dso *pos; | 404 | struct dso *pos; |
391 | int err = 0; | 405 | int err = 0; |
392 | 406 | ||
393 | dsos__for_each_with_build_id(pos, head) | 407 | dsos__for_each_with_build_id(pos, head) |
394 | if (dso__cache_build_id(pos, debugdir)) | 408 | if (dso__cache_build_id(pos, machine, debugdir)) |
395 | err = -1; | 409 | err = -1; |
396 | 410 | ||
397 | return err; | 411 | return err; |
@@ -399,8 +413,9 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) | |||
399 | 413 | ||
400 | static int machine__cache_build_ids(struct machine *machine, const char *debugdir) | 414 | static int machine__cache_build_ids(struct machine *machine, const char *debugdir) |
401 | { | 415 | { |
402 | int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir); | 416 | int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine, |
403 | ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir); | 417 | debugdir); |
418 | ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir); | ||
404 | return ret; | 419 | return ret; |
405 | } | 420 | } |
406 | 421 | ||
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 46a0d35a05e1..9ff6cf3e9a99 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c | |||
@@ -611,6 +611,8 @@ void hists__collapse_resort(struct hists *hists) | |||
611 | next = rb_first(root); | 611 | next = rb_first(root); |
612 | 612 | ||
613 | while (next) { | 613 | while (next) { |
614 | if (session_done()) | ||
615 | break; | ||
614 | n = rb_entry(next, struct hist_entry, rb_node_in); | 616 | n = rb_entry(next, struct hist_entry, rb_node_in); |
615 | next = rb_next(&n->rb_node_in); | 617 | next = rb_next(&n->rb_node_in); |
616 | 618 | ||
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index be0329394d56..20c7299a9d4e 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c | |||
@@ -734,7 +734,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) | |||
734 | } | 734 | } |
735 | 735 | ||
736 | /* If not a real subprogram, find a real one */ | 736 | /* If not a real subprogram, find a real one */ |
737 | if (dwarf_tag(sc_die) != DW_TAG_subprogram) { | 737 | if (!die_is_func_def(sc_die)) { |
738 | if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { | 738 | if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { |
739 | pr_warning("Failed to find probe point in any " | 739 | pr_warning("Failed to find probe point in any " |
740 | "functions.\n"); | 740 | "functions.\n"); |
@@ -980,12 +980,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) | |||
980 | struct dwarf_callback_param *param = data; | 980 | struct dwarf_callback_param *param = data; |
981 | struct probe_finder *pf = param->data; | 981 | struct probe_finder *pf = param->data; |
982 | struct perf_probe_point *pp = &pf->pev->point; | 982 | struct perf_probe_point *pp = &pf->pev->point; |
983 | Dwarf_Attribute attr; | ||
984 | 983 | ||
985 | /* Check tag and diename */ | 984 | /* Check tag and diename */ |
986 | if (dwarf_tag(sp_die) != DW_TAG_subprogram || | 985 | if (!die_is_func_def(sp_die) || |
987 | !die_compare_name(sp_die, pp->function) || | 986 | !die_compare_name(sp_die, pp->function)) |
988 | dwarf_attr(sp_die, DW_AT_declaration, &attr)) | ||
989 | return DWARF_CB_OK; | 987 | return DWARF_CB_OK; |
990 | 988 | ||
991 | /* Check declared file */ | 989 | /* Check declared file */ |
@@ -1474,7 +1472,7 @@ static int line_range_inline_cb(Dwarf_Die *in_die, void *data) | |||
1474 | return 0; | 1472 | return 0; |
1475 | } | 1473 | } |
1476 | 1474 | ||
1477 | /* Search function from function name */ | 1475 | /* Search function definition from function name */ |
1478 | static int line_range_search_cb(Dwarf_Die *sp_die, void *data) | 1476 | static int line_range_search_cb(Dwarf_Die *sp_die, void *data) |
1479 | { | 1477 | { |
1480 | struct dwarf_callback_param *param = data; | 1478 | struct dwarf_callback_param *param = data; |
@@ -1485,7 +1483,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) | |||
1485 | if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) | 1483 | if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) |
1486 | return DWARF_CB_OK; | 1484 | return DWARF_CB_OK; |
1487 | 1485 | ||
1488 | if (dwarf_tag(sp_die) == DW_TAG_subprogram && | 1486 | if (die_is_func_def(sp_die) && |
1489 | die_compare_name(sp_die, lr->function)) { | 1487 | die_compare_name(sp_die, lr->function)) { |
1490 | lf->fname = dwarf_decl_file(sp_die); | 1488 | lf->fname = dwarf_decl_file(sp_die); |
1491 | dwarf_decl_line(sp_die, &lr->offset); | 1489 | dwarf_decl_line(sp_die, &lr->offset); |
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 51f5edf2a6d0..70ffa41518f3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c | |||
@@ -531,6 +531,9 @@ static int flush_sample_queue(struct perf_session *s, | |||
531 | return 0; | 531 | return 0; |
532 | 532 | ||
533 | list_for_each_entry_safe(iter, tmp, head, list) { | 533 | list_for_each_entry_safe(iter, tmp, head, list) { |
534 | if (session_done()) | ||
535 | return 0; | ||
536 | |||
534 | if (iter->timestamp > limit) | 537 | if (iter->timestamp > limit) |
535 | break; | 538 | break; |
536 | 539 | ||
@@ -1160,7 +1163,6 @@ static void perf_session__warn_about_errors(const struct perf_session *session, | |||
1160 | } | 1163 | } |
1161 | } | 1164 | } |
1162 | 1165 | ||
1163 | #define session_done() (*(volatile int *)(&session_done)) | ||
1164 | volatile int session_done; | 1166 | volatile int session_done; |
1165 | 1167 | ||
1166 | static int __perf_session__process_pipe_events(struct perf_session *self, | 1168 | static int __perf_session__process_pipe_events(struct perf_session *self, |
@@ -1372,10 +1374,13 @@ more: | |||
1372 | "Processing events..."); | 1374 | "Processing events..."); |
1373 | } | 1375 | } |
1374 | 1376 | ||
1377 | err = 0; | ||
1378 | if (session_done()) | ||
1379 | goto out_err; | ||
1380 | |||
1375 | if (file_pos < file_size) | 1381 | if (file_pos < file_size) |
1376 | goto more; | 1382 | goto more; |
1377 | 1383 | ||
1378 | err = 0; | ||
1379 | /* do the final flush for ordered samples */ | 1384 | /* do the final flush for ordered samples */ |
1380 | session->ordered_samples.next_flush = ULLONG_MAX; | 1385 | session->ordered_samples.next_flush = ULLONG_MAX; |
1381 | err = flush_sample_queue(session, tool); | 1386 | err = flush_sample_queue(session, tool); |
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 3aa75fb2225f..04bf7373a7e5 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h | |||
@@ -124,4 +124,8 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session, | |||
124 | 124 | ||
125 | #define perf_session__set_tracepoints_handlers(session, array) \ | 125 | #define perf_session__set_tracepoints_handlers(session, array) \ |
126 | __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) | 126 | __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) |
127 | |||
128 | extern volatile int session_done; | ||
129 | |||
130 | #define session_done() (*(volatile int *)(&session_done)) | ||
127 | #endif /* __PERF_SESSION_H */ | 131 | #endif /* __PERF_SESSION_H */ |
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a7b9ab557380..a9c829be5216 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c | |||
@@ -8,6 +8,22 @@ | |||
8 | #include "symbol.h" | 8 | #include "symbol.h" |
9 | #include "debug.h" | 9 | #include "debug.h" |
10 | 10 | ||
11 | #ifndef HAVE_ELF_GETPHDRNUM | ||
12 | static int elf_getphdrnum(Elf *elf, size_t *dst) | ||
13 | { | ||
14 | GElf_Ehdr gehdr; | ||
15 | GElf_Ehdr *ehdr; | ||
16 | |||
17 | ehdr = gelf_getehdr(elf, &gehdr); | ||
18 | if (!ehdr) | ||
19 | return -1; | ||
20 | |||
21 | *dst = ehdr->e_phnum; | ||
22 | |||
23 | return 0; | ||
24 | } | ||
25 | #endif | ||
26 | |||
11 | #ifndef NT_GNU_BUILD_ID | 27 | #ifndef NT_GNU_BUILD_ID |
12 | #define NT_GNU_BUILD_ID 3 | 28 | #define NT_GNU_BUILD_ID 3 |
13 | #endif | 29 | #endif |
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index fe7a27d67d2b..e9e1c03f927d 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c | |||
@@ -186,7 +186,7 @@ void parse_proc_kallsyms(struct pevent *pevent, | |||
186 | char *next = NULL; | 186 | char *next = NULL; |
187 | char *addr_str; | 187 | char *addr_str; |
188 | char *mod; | 188 | char *mod; |
189 | char *fmt; | 189 | char *fmt = NULL; |
190 | 190 | ||
191 | line = strtok_r(file, "\n", &next); | 191 | line = strtok_r(file, "\n", &next); |
192 | while (line) { | 192 | while (line) { |