summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--CREDITS3
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/powerpc/boot/Makefile4
-rw-r--r--arch/powerpc/boot/epapr-wrapper.c9
-rw-r--r--arch/powerpc/boot/epapr.c4
-rw-r--r--arch/powerpc/boot/of.c16
-rwxr-xr-xarch/powerpc/boot/wrapper9
-rw-r--r--arch/powerpc/include/asm/irq.h4
-rw-r--r--arch/powerpc/include/asm/processor.h4
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/irq.c100
-rw-r--r--arch/powerpc/kernel/misc_32.S25
-rw-r--r--arch/powerpc/kernel/misc_64.S10
-rw-r--r--arch/powerpc/kernel/process.c3
-rw-r--r--arch/powerpc/kernel/prom_init.c21
-rw-r--r--arch/powerpc/lib/sstep.c3
-rw-r--r--arch/powerpc/platforms/pseries/smp.c26
-rw-r--r--arch/x86/kernel/cpu/perf_event.c10
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_uncore.c10
-rw-r--r--arch/x86/kernel/reboot.c18
-rw-r--r--arch/x86/platform/efi/efi.c11
-rw-r--r--drivers/ata/sata_promise.c2
-rw-r--r--drivers/block/cciss.c1
-rw-r--r--drivers/block/cpqarray.c1
-rw-r--r--drivers/gpu/drm/i2c/tda998x_drv.c3
-rw-r--r--drivers/md/bcache/bcache.h7
-rw-r--r--drivers/md/bcache/bset.c39
-rw-r--r--drivers/md/bcache/btree.c4
-rw-r--r--drivers/md/bcache/journal.c33
-rw-r--r--drivers/md/bcache/request.c15
-rw-r--r--drivers/md/bcache/sysfs.c9
-rw-r--r--drivers/md/bcache/util.c11
-rw-r--r--drivers/md/bcache/util.h12
-rw-r--r--drivers/md/bcache/writeback.c42
-rw-r--r--fs/bio.c4
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--include/linux/memcontrol.h55
-rw-r--r--include/linux/smp.h6
-rw-r--r--include/uapi/linux/perf_event.h15
-rw-r--r--ipc/msg.c19
-rw-r--r--ipc/sem.c34
-rw-r--r--ipc/shm.c17
-rw-r--r--ipc/util.c32
-rw-r--r--ipc/util.h10
-rw-r--r--kernel/audit.c5
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/reboot.c9
-rw-r--r--kernel/sched/fair.c9
-rw-r--r--kernel/watchdog.c60
-rw-r--r--mm/memcontrol.c560
-rw-r--r--mm/mlock.c1
-rw-r--r--mm/vmscan.c83
-rwxr-xr-xscripts/checkpatch.pl4
-rw-r--r--tools/lib/lk/debugfs.c1
-rw-r--r--tools/perf/arch/x86/util/tsc.c6
-rw-r--r--tools/perf/builtin-inject.c2
-rw-r--r--tools/perf/builtin-report.c5
-rw-r--r--tools/perf/builtin-script.c2
-rw-r--r--tools/perf/builtin-trace.c17
-rw-r--r--tools/perf/config/Makefile3
-rw-r--r--tools/perf/config/feature-tests.mak10
-rw-r--r--tools/perf/util/annotate.c2
-rw-r--r--tools/perf/util/dwarf-aux.c19
-rw-r--r--tools/perf/util/dwarf-aux.h3
-rw-r--r--tools/perf/util/header.c41
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/probe-finder.c12
-rw-r--r--tools/perf/util/session.c9
-rw-r--r--tools/perf/util/session.h4
-rw-r--r--tools/perf/util/symbol-elf.c16
-rw-r--r--tools/perf/util/trace-event-parse.c2
72 files changed, 1046 insertions, 534 deletions
diff --git a/CREDITS b/CREDITS
index 9416a9a8b95e..0640e1650483 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2808,8 +2808,7 @@ S: Ottawa, Ontario
2808S: Canada K2P 0X8 2808S: Canada K2P 0X8
2809 2809
2810N: Mikael Pettersson 2810N: Mikael Pettersson
2811E: mikpe@it.uu.se 2811E: mikpelinux@gmail.com
2812W: http://user.it.uu.se/~mikpe/linux/
2813D: Miscellaneous fixes 2812D: Miscellaneous fixes
2814 2813
2815N: Reed H. Petty 2814N: Reed H. Petty
diff --git a/MAINTAINERS b/MAINTAINERS
index e61c2e83fc2b..c53fe9559642 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1812,7 +1812,8 @@ S: Supported
1812F: drivers/net/ethernet/broadcom/bnx2x/ 1812F: drivers/net/ethernet/broadcom/bnx2x/
1813 1813
1814BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE 1814BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE
1815M: Christian Daudt <csd@broadcom.com> 1815M: Christian Daudt <bcm@fixthebug.org>
1816L: bcm-kernel-feedback-list@broadcom.com
1816T: git git://git.github.com/broadcom/bcm11351 1817T: git git://git.github.com/broadcom/bcm11351
1817S: Maintained 1818S: Maintained
1818F: arch/arm/mach-bcm/ 1819F: arch/arm/mach-bcm/
@@ -6595,7 +6596,7 @@ S: Obsolete
6595F: drivers/net/wireless/prism54/ 6596F: drivers/net/wireless/prism54/
6596 6597
6597PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER 6598PROMISE SATA TX2/TX4 CONTROLLER LIBATA DRIVER
6598M: Mikael Pettersson <mikpe@it.uu.se> 6599M: Mikael Pettersson <mikpelinux@gmail.com>
6599L: linux-ide@vger.kernel.org 6600L: linux-ide@vger.kernel.org
6600S: Maintained 6601S: Maintained
6601F: drivers/ata/sata_promise.* 6602F: drivers/ata/sata_promise.*
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 6a15c968d214..15ca2255f438 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -74,7 +74,7 @@ src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c
74src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c 74src-wlib-$(CONFIG_PPC_82xx) += pq2.c fsl-soc.c planetcore.c
75src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c 75src-wlib-$(CONFIG_EMBEDDED6xx) += mv64x60.c mv64x60_i2c.c ugecon.c
76 76
77src-plat-y := of.c 77src-plat-y := of.c epapr.c
78src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \ 78src-plat-$(CONFIG_40x) += fixed-head.S ep405.c cuboot-hotfoot.c \
79 treeboot-walnut.c cuboot-acadia.c \ 79 treeboot-walnut.c cuboot-acadia.c \
80 cuboot-kilauea.c simpleboot.c \ 80 cuboot-kilauea.c simpleboot.c \
@@ -97,7 +97,7 @@ src-plat-$(CONFIG_EMBEDDED6xx) += cuboot-pq2.c cuboot-mpc7448hpc2.c \
97 prpmc2800.c 97 prpmc2800.c
98src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c 98src-plat-$(CONFIG_AMIGAONE) += cuboot-amigaone.c
99src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c 99src-plat-$(CONFIG_PPC_PS3) += ps3-head.S ps3-hvcall.S ps3.c
100src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c 100src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
101 101
102src-wlib := $(sort $(src-wlib-y)) 102src-wlib := $(sort $(src-wlib-y))
103src-plat := $(sort $(src-plat-y)) 103src-plat := $(sort $(src-plat-y))
diff --git a/arch/powerpc/boot/epapr-wrapper.c b/arch/powerpc/boot/epapr-wrapper.c
new file mode 100644
index 000000000000..c10191006673
--- /dev/null
+++ b/arch/powerpc/boot/epapr-wrapper.c
@@ -0,0 +1,9 @@
1extern void epapr_platform_init(unsigned long r3, unsigned long r4,
2 unsigned long r5, unsigned long r6,
3 unsigned long r7);
4
5void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
6 unsigned long r6, unsigned long r7)
7{
8 epapr_platform_init(r3, r4, r5, r6, r7);
9}
diff --git a/arch/powerpc/boot/epapr.c b/arch/powerpc/boot/epapr.c
index 06c1961bd124..02e91aa2194a 100644
--- a/arch/powerpc/boot/epapr.c
+++ b/arch/powerpc/boot/epapr.c
@@ -48,8 +48,8 @@ static void platform_fixups(void)
48 fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size); 48 fdt_addr, fdt_totalsize((void *)fdt_addr), ima_size);
49} 49}
50 50
51void platform_init(unsigned long r3, unsigned long r4, unsigned long r5, 51void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
52 unsigned long r6, unsigned long r7) 52 unsigned long r6, unsigned long r7)
53{ 53{
54 epapr_magic = r6; 54 epapr_magic = r6;
55 ima_size = r7; 55 ima_size = r7;
diff --git a/arch/powerpc/boot/of.c b/arch/powerpc/boot/of.c
index 61d9899aa0d0..62e2f43ec1df 100644
--- a/arch/powerpc/boot/of.c
+++ b/arch/powerpc/boot/of.c
@@ -26,6 +26,9 @@
26 26
27static unsigned long claim_base; 27static unsigned long claim_base;
28 28
29void epapr_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
30 unsigned long r6, unsigned long r7);
31
29static void *of_try_claim(unsigned long size) 32static void *of_try_claim(unsigned long size)
30{ 33{
31 unsigned long addr = 0; 34 unsigned long addr = 0;
@@ -61,7 +64,7 @@ static void of_image_hdr(const void *hdr)
61 } 64 }
62} 65}
63 66
64void platform_init(unsigned long a1, unsigned long a2, void *promptr) 67static void of_platform_init(unsigned long a1, unsigned long a2, void *promptr)
65{ 68{
66 platform_ops.image_hdr = of_image_hdr; 69 platform_ops.image_hdr = of_image_hdr;
67 platform_ops.malloc = of_try_claim; 70 platform_ops.malloc = of_try_claim;
@@ -81,3 +84,14 @@ void platform_init(unsigned long a1, unsigned long a2, void *promptr)
81 loader_info.initrd_size = a2; 84 loader_info.initrd_size = a2;
82 } 85 }
83} 86}
87
88void platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
89 unsigned long r6, unsigned long r7)
90{
91 /* Detect OF vs. ePAPR boot */
92 if (r5)
93 of_platform_init(r3, r4, (void *)r5);
94 else
95 epapr_platform_init(r3, r4, r5, r6, r7);
96}
97
diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 6761c746048d..cd7af841ba05 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -148,18 +148,18 @@ make_space=y
148 148
149case "$platform" in 149case "$platform" in
150pseries) 150pseries)
151 platformo=$object/of.o 151 platformo="$object/of.o $object/epapr.o"
152 link_address='0x4000000' 152 link_address='0x4000000'
153 ;; 153 ;;
154maple) 154maple)
155 platformo=$object/of.o 155 platformo="$object/of.o $object/epapr.o"
156 link_address='0x400000' 156 link_address='0x400000'
157 ;; 157 ;;
158pmac|chrp) 158pmac|chrp)
159 platformo=$object/of.o 159 platformo="$object/of.o $object/epapr.o"
160 ;; 160 ;;
161coff) 161coff)
162 platformo="$object/crt0.o $object/of.o" 162 platformo="$object/crt0.o $object/of.o $object/epapr.o"
163 lds=$object/zImage.coff.lds 163 lds=$object/zImage.coff.lds
164 link_address='0x500000' 164 link_address='0x500000'
165 pie= 165 pie=
@@ -253,6 +253,7 @@ treeboot-iss4xx-mpic)
253 platformo="$object/treeboot-iss4xx.o" 253 platformo="$object/treeboot-iss4xx.o"
254 ;; 254 ;;
255epapr) 255epapr)
256 platformo="$object/epapr.o $object/epapr-wrapper.o"
256 link_address='0x20000000' 257 link_address='0x20000000'
257 pie=-pie 258 pie=-pie
258 ;; 259 ;;
diff --git a/arch/powerpc/include/asm/irq.h b/arch/powerpc/include/asm/irq.h
index 0e40843a1c6e..41f13cec8a8f 100644
--- a/arch/powerpc/include/asm/irq.h
+++ b/arch/powerpc/include/asm/irq.h
@@ -69,9 +69,9 @@ extern struct thread_info *softirq_ctx[NR_CPUS];
69 69
70extern void irq_ctx_init(void); 70extern void irq_ctx_init(void);
71extern void call_do_softirq(struct thread_info *tp); 71extern void call_do_softirq(struct thread_info *tp);
72extern int call_handle_irq(int irq, void *p1, 72extern void call_do_irq(struct pt_regs *regs, struct thread_info *tp);
73 struct thread_info *tp, void *func);
74extern void do_IRQ(struct pt_regs *regs); 73extern void do_IRQ(struct pt_regs *regs);
74extern void __do_irq(struct pt_regs *regs);
75 75
76int irq_choose_cpu(const struct cpumask *mask); 76int irq_choose_cpu(const struct cpumask *mask);
77 77
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index e378cccfca55..ce4de5aed7b5 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -149,8 +149,6 @@ typedef struct {
149 149
150struct thread_struct { 150struct thread_struct {
151 unsigned long ksp; /* Kernel stack pointer */ 151 unsigned long ksp; /* Kernel stack pointer */
152 unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
153
154#ifdef CONFIG_PPC64 152#ifdef CONFIG_PPC64
155 unsigned long ksp_vsid; 153 unsigned long ksp_vsid;
156#endif 154#endif
@@ -162,6 +160,7 @@ struct thread_struct {
162#endif 160#endif
163#ifdef CONFIG_PPC32 161#ifdef CONFIG_PPC32
164 void *pgdir; /* root of page-table tree */ 162 void *pgdir; /* root of page-table tree */
163 unsigned long ksp_limit; /* if ksp <= ksp_limit stack overflow */
165#endif 164#endif
166#ifdef CONFIG_PPC_ADV_DEBUG_REGS 165#ifdef CONFIG_PPC_ADV_DEBUG_REGS
167 /* 166 /*
@@ -321,7 +320,6 @@ struct thread_struct {
321#else 320#else
322#define INIT_THREAD { \ 321#define INIT_THREAD { \
323 .ksp = INIT_SP, \ 322 .ksp = INIT_SP, \
324 .ksp_limit = INIT_SP_LIMIT, \
325 .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \ 323 .regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
326 .fs = KERNEL_DS, \ 324 .fs = KERNEL_DS, \
327 .fpr = {{0}}, \ 325 .fpr = {{0}}, \
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index d8958be5f31a..502c7a4e73f7 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -80,10 +80,11 @@ int main(void)
80 DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr)); 80 DEFINE(TASKTHREADPPR, offsetof(struct task_struct, thread.ppr));
81#else 81#else
82 DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); 82 DEFINE(THREAD_INFO, offsetof(struct task_struct, stack));
83 DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
84 DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
83#endif /* CONFIG_PPC64 */ 85#endif /* CONFIG_PPC64 */
84 86
85 DEFINE(KSP, offsetof(struct thread_struct, ksp)); 87 DEFINE(KSP, offsetof(struct thread_struct, ksp));
86 DEFINE(KSP_LIMIT, offsetof(struct thread_struct, ksp_limit));
87 DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); 88 DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
88#ifdef CONFIG_BOOKE 89#ifdef CONFIG_BOOKE
89 DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0])); 90 DEFINE(THREAD_NORMSAVES, offsetof(struct thread_struct, normsave[0]));
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index c69440cef7af..57d286a78f86 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -441,50 +441,6 @@ void migrate_irqs(void)
441} 441}
442#endif 442#endif
443 443
444static inline void handle_one_irq(unsigned int irq)
445{
446 struct thread_info *curtp, *irqtp;
447 unsigned long saved_sp_limit;
448 struct irq_desc *desc;
449
450 desc = irq_to_desc(irq);
451 if (!desc)
452 return;
453
454 /* Switch to the irq stack to handle this */
455 curtp = current_thread_info();
456 irqtp = hardirq_ctx[smp_processor_id()];
457
458 if (curtp == irqtp) {
459 /* We're already on the irq stack, just handle it */
460 desc->handle_irq(irq, desc);
461 return;
462 }
463
464 saved_sp_limit = current->thread.ksp_limit;
465
466 irqtp->task = curtp->task;
467 irqtp->flags = 0;
468
469 /* Copy the softirq bits in preempt_count so that the
470 * softirq checks work in the hardirq context. */
471 irqtp->preempt_count = (irqtp->preempt_count & ~SOFTIRQ_MASK) |
472 (curtp->preempt_count & SOFTIRQ_MASK);
473
474 current->thread.ksp_limit = (unsigned long)irqtp +
475 _ALIGN_UP(sizeof(struct thread_info), 16);
476
477 call_handle_irq(irq, desc, irqtp, desc->handle_irq);
478 current->thread.ksp_limit = saved_sp_limit;
479 irqtp->task = NULL;
480
481 /* Set any flag that may have been set on the
482 * alternate stack
483 */
484 if (irqtp->flags)
485 set_bits(irqtp->flags, &curtp->flags);
486}
487
488static inline void check_stack_overflow(void) 444static inline void check_stack_overflow(void)
489{ 445{
490#ifdef CONFIG_DEBUG_STACKOVERFLOW 446#ifdef CONFIG_DEBUG_STACKOVERFLOW
@@ -501,9 +457,9 @@ static inline void check_stack_overflow(void)
501#endif 457#endif
502} 458}
503 459
504void do_IRQ(struct pt_regs *regs) 460void __do_irq(struct pt_regs *regs)
505{ 461{
506 struct pt_regs *old_regs = set_irq_regs(regs); 462 struct irq_desc *desc;
507 unsigned int irq; 463 unsigned int irq;
508 464
509 irq_enter(); 465 irq_enter();
@@ -519,18 +475,56 @@ void do_IRQ(struct pt_regs *regs)
519 */ 475 */
520 irq = ppc_md.get_irq(); 476 irq = ppc_md.get_irq();
521 477
522 /* We can hard enable interrupts now */ 478 /* We can hard enable interrupts now to allow perf interrupts */
523 may_hard_irq_enable(); 479 may_hard_irq_enable();
524 480
525 /* And finally process it */ 481 /* And finally process it */
526 if (irq != NO_IRQ) 482 if (unlikely(irq == NO_IRQ))
527 handle_one_irq(irq);
528 else
529 __get_cpu_var(irq_stat).spurious_irqs++; 483 __get_cpu_var(irq_stat).spurious_irqs++;
484 else {
485 desc = irq_to_desc(irq);
486 if (likely(desc))
487 desc->handle_irq(irq, desc);
488 }
530 489
531 trace_irq_exit(regs); 490 trace_irq_exit(regs);
532 491
533 irq_exit(); 492 irq_exit();
493}
494
495void do_IRQ(struct pt_regs *regs)
496{
497 struct pt_regs *old_regs = set_irq_regs(regs);
498 struct thread_info *curtp, *irqtp;
499
500 /* Switch to the irq stack to handle this */
501 curtp = current_thread_info();
502 irqtp = hardirq_ctx[raw_smp_processor_id()];
503
504 /* Already there ? */
505 if (unlikely(curtp == irqtp)) {
506 __do_irq(regs);
507 set_irq_regs(old_regs);
508 return;
509 }
510
511 /* Prepare the thread_info in the irq stack */
512 irqtp->task = curtp->task;
513 irqtp->flags = 0;
514
515 /* Copy the preempt_count so that the [soft]irq checks work. */
516 irqtp->preempt_count = curtp->preempt_count;
517
518 /* Switch stack and call */
519 call_do_irq(regs, irqtp);
520
521 /* Restore stack limit */
522 irqtp->task = NULL;
523
524 /* Copy back updates to the thread_info */
525 if (irqtp->flags)
526 set_bits(irqtp->flags, &curtp->flags);
527
534 set_irq_regs(old_regs); 528 set_irq_regs(old_regs);
535} 529}
536 530
@@ -592,28 +586,22 @@ void irq_ctx_init(void)
592 memset((void *)softirq_ctx[i], 0, THREAD_SIZE); 586 memset((void *)softirq_ctx[i], 0, THREAD_SIZE);
593 tp = softirq_ctx[i]; 587 tp = softirq_ctx[i];
594 tp->cpu = i; 588 tp->cpu = i;
595 tp->preempt_count = 0;
596 589
597 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE); 590 memset((void *)hardirq_ctx[i], 0, THREAD_SIZE);
598 tp = hardirq_ctx[i]; 591 tp = hardirq_ctx[i];
599 tp->cpu = i; 592 tp->cpu = i;
600 tp->preempt_count = HARDIRQ_OFFSET;
601 } 593 }
602} 594}
603 595
604static inline void do_softirq_onstack(void) 596static inline void do_softirq_onstack(void)
605{ 597{
606 struct thread_info *curtp, *irqtp; 598 struct thread_info *curtp, *irqtp;
607 unsigned long saved_sp_limit = current->thread.ksp_limit;
608 599
609 curtp = current_thread_info(); 600 curtp = current_thread_info();
610 irqtp = softirq_ctx[smp_processor_id()]; 601 irqtp = softirq_ctx[smp_processor_id()];
611 irqtp->task = curtp->task; 602 irqtp->task = curtp->task;
612 irqtp->flags = 0; 603 irqtp->flags = 0;
613 current->thread.ksp_limit = (unsigned long)irqtp +
614 _ALIGN_UP(sizeof(struct thread_info), 16);
615 call_do_softirq(irqtp); 604 call_do_softirq(irqtp);
616 current->thread.ksp_limit = saved_sp_limit;
617 irqtp->task = NULL; 605 irqtp->task = NULL;
618 606
619 /* Set any flag that may have been set on the 607 /* Set any flag that may have been set on the
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 777d999f563b..2b0ad9845363 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -36,26 +36,41 @@
36 36
37 .text 37 .text
38 38
39/*
40 * We store the saved ksp_limit in the unused part
41 * of the STACK_FRAME_OVERHEAD
42 */
39_GLOBAL(call_do_softirq) 43_GLOBAL(call_do_softirq)
40 mflr r0 44 mflr r0
41 stw r0,4(r1) 45 stw r0,4(r1)
46 lwz r10,THREAD+KSP_LIMIT(r2)
47 addi r11,r3,THREAD_INFO_GAP
42 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) 48 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
43 mr r1,r3 49 mr r1,r3
50 stw r10,8(r1)
51 stw r11,THREAD+KSP_LIMIT(r2)
44 bl __do_softirq 52 bl __do_softirq
53 lwz r10,8(r1)
45 lwz r1,0(r1) 54 lwz r1,0(r1)
46 lwz r0,4(r1) 55 lwz r0,4(r1)
56 stw r10,THREAD+KSP_LIMIT(r2)
47 mtlr r0 57 mtlr r0
48 blr 58 blr
49 59
50_GLOBAL(call_handle_irq) 60_GLOBAL(call_do_irq)
51 mflr r0 61 mflr r0
52 stw r0,4(r1) 62 stw r0,4(r1)
53 mtctr r6 63 lwz r10,THREAD+KSP_LIMIT(r2)
54 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) 64 addi r11,r3,THREAD_INFO_GAP
55 mr r1,r5 65 stwu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
56 bctrl 66 mr r1,r4
67 stw r10,8(r1)
68 stw r11,THREAD+KSP_LIMIT(r2)
69 bl __do_irq
70 lwz r10,8(r1)
57 lwz r1,0(r1) 71 lwz r1,0(r1)
58 lwz r0,4(r1) 72 lwz r0,4(r1)
73 stw r10,THREAD+KSP_LIMIT(r2)
59 mtlr r0 74 mtlr r0
60 blr 75 blr
61 76
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 971d7e78aff2..e59caf874d05 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -40,14 +40,12 @@ _GLOBAL(call_do_softirq)
40 mtlr r0 40 mtlr r0
41 blr 41 blr
42 42
43_GLOBAL(call_handle_irq) 43_GLOBAL(call_do_irq)
44 ld r8,0(r6)
45 mflr r0 44 mflr r0
46 std r0,16(r1) 45 std r0,16(r1)
47 mtctr r8 46 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r4)
48 stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) 47 mr r1,r4
49 mr r1,r5 48 bl .__do_irq
50 bctrl
51 ld r1,0(r1) 49 ld r1,0(r1)
52 ld r0,16(r1) 50 ld r0,16(r1)
53 mtlr r0 51 mtlr r0
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6f428da53e20..96d2fdf3aa9e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1000,9 +1000,10 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
1000 kregs = (struct pt_regs *) sp; 1000 kregs = (struct pt_regs *) sp;
1001 sp -= STACK_FRAME_OVERHEAD; 1001 sp -= STACK_FRAME_OVERHEAD;
1002 p->thread.ksp = sp; 1002 p->thread.ksp = sp;
1003#ifdef CONFIG_PPC32
1003 p->thread.ksp_limit = (unsigned long)task_stack_page(p) + 1004 p->thread.ksp_limit = (unsigned long)task_stack_page(p) +
1004 _ALIGN_UP(sizeof(struct thread_info), 16); 1005 _ALIGN_UP(sizeof(struct thread_info), 16);
1005 1006#endif
1006#ifdef CONFIG_HAVE_HW_BREAKPOINT 1007#ifdef CONFIG_HAVE_HW_BREAKPOINT
1007 p->thread.ptrace_bps[0] = NULL; 1008 p->thread.ptrace_bps[0] = NULL;
1008#endif 1009#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 12e656ffe60e..5fe2842e8bab 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -196,6 +196,8 @@ static int __initdata mem_reserve_cnt;
196 196
197static cell_t __initdata regbuf[1024]; 197static cell_t __initdata regbuf[1024];
198 198
199static bool rtas_has_query_cpu_stopped;
200
199 201
200/* 202/*
201 * Error results ... some OF calls will return "-1" on error, some 203 * Error results ... some OF calls will return "-1" on error, some
@@ -1574,6 +1576,11 @@ static void __init prom_instantiate_rtas(void)
1574 prom_setprop(rtas_node, "/rtas", "linux,rtas-entry", 1576 prom_setprop(rtas_node, "/rtas", "linux,rtas-entry",
1575 &val, sizeof(val)); 1577 &val, sizeof(val));
1576 1578
1579 /* Check if it supports "query-cpu-stopped-state" */
1580 if (prom_getprop(rtas_node, "query-cpu-stopped-state",
1581 &val, sizeof(val)) != PROM_ERROR)
1582 rtas_has_query_cpu_stopped = true;
1583
1577#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__) 1584#if defined(CONFIG_PPC_POWERNV) && defined(__BIG_ENDIAN__)
1578 /* PowerVN takeover hack */ 1585 /* PowerVN takeover hack */
1579 prom_rtas_data = base; 1586 prom_rtas_data = base;
@@ -1815,6 +1822,18 @@ static void __init prom_hold_cpus(void)
1815 = (void *) LOW_ADDR(__secondary_hold_acknowledge); 1822 = (void *) LOW_ADDR(__secondary_hold_acknowledge);
1816 unsigned long secondary_hold = LOW_ADDR(__secondary_hold); 1823 unsigned long secondary_hold = LOW_ADDR(__secondary_hold);
1817 1824
1825 /*
1826 * On pseries, if RTAS supports "query-cpu-stopped-state",
1827 * we skip this stage, the CPUs will be started by the
1828 * kernel using RTAS.
1829 */
1830 if ((of_platform == PLATFORM_PSERIES ||
1831 of_platform == PLATFORM_PSERIES_LPAR) &&
1832 rtas_has_query_cpu_stopped) {
1833 prom_printf("prom_hold_cpus: skipped\n");
1834 return;
1835 }
1836
1818 prom_debug("prom_hold_cpus: start...\n"); 1837 prom_debug("prom_hold_cpus: start...\n");
1819 prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop); 1838 prom_debug(" 1) spinloop = 0x%x\n", (unsigned long)spinloop);
1820 prom_debug(" 1) *spinloop = 0x%x\n", *spinloop); 1839 prom_debug(" 1) *spinloop = 0x%x\n", *spinloop);
@@ -3011,6 +3030,8 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4,
3011 * On non-powermacs, put all CPUs in spin-loops. 3030 * On non-powermacs, put all CPUs in spin-loops.
3012 * 3031 *
3013 * PowerMacs use a different mechanism to spin CPUs 3032 * PowerMacs use a different mechanism to spin CPUs
3033 *
3034 * (This must be done after instanciating RTAS)
3014 */ 3035 */
3015 if (of_platform != PLATFORM_POWERMAC && 3036 if (of_platform != PLATFORM_POWERMAC &&
3016 of_platform != PLATFORM_OPAL) 3037 of_platform != PLATFORM_OPAL)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index a7ee978fb860..b1faa1593c90 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1505,6 +1505,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1505 */ 1505 */
1506 if ((ra == 1) && !(regs->msr & MSR_PR) \ 1506 if ((ra == 1) && !(regs->msr & MSR_PR) \
1507 && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) { 1507 && (val3 >= (regs->gpr[1] - STACK_INT_FRAME_SIZE))) {
1508#ifdef CONFIG_PPC32
1508 /* 1509 /*
1509 * Check if we will touch kernel sack overflow 1510 * Check if we will touch kernel sack overflow
1510 */ 1511 */
@@ -1513,7 +1514,7 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
1513 err = -EINVAL; 1514 err = -EINVAL;
1514 break; 1515 break;
1515 } 1516 }
1516 1517#endif /* CONFIG_PPC32 */
1517 /* 1518 /*
1518 * Check if we already set since that means we'll 1519 * Check if we already set since that means we'll
1519 * lose the previous value. 1520 * lose the previous value.
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 1c1771a40250..24f58cb0a543 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -233,18 +233,24 @@ static void __init smp_init_pseries(void)
233 233
234 alloc_bootmem_cpumask_var(&of_spin_mask); 234 alloc_bootmem_cpumask_var(&of_spin_mask);
235 235
236 /* Mark threads which are still spinning in hold loops. */ 236 /*
237 if (cpu_has_feature(CPU_FTR_SMT)) { 237 * Mark threads which are still spinning in hold loops
238 for_each_present_cpu(i) { 238 *
239 if (cpu_thread_in_core(i) == 0) 239 * We know prom_init will not have started them if RTAS supports
240 cpumask_set_cpu(i, of_spin_mask); 240 * query-cpu-stopped-state.
241 } 241 */
242 } else { 242 if (rtas_token("query-cpu-stopped-state") == RTAS_UNKNOWN_SERVICE) {
243 cpumask_copy(of_spin_mask, cpu_present_mask); 243 if (cpu_has_feature(CPU_FTR_SMT)) {
244 for_each_present_cpu(i) {
245 if (cpu_thread_in_core(i) == 0)
246 cpumask_set_cpu(i, of_spin_mask);
247 }
248 } else
249 cpumask_copy(of_spin_mask, cpu_present_mask);
250
251 cpumask_clear_cpu(boot_cpuid, of_spin_mask);
244 } 252 }
245 253
246 cpumask_clear_cpu(boot_cpuid, of_spin_mask);
247
248 /* Non-lpar has additional take/give timebase */ 254 /* Non-lpar has additional take/give timebase */
249 if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) { 255 if (rtas_token("freeze-time-base") != RTAS_UNKNOWN_SERVICE) {
250 smp_ops->give_timebase = rtas_give_timebase; 256 smp_ops->give_timebase = rtas_give_timebase;
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 8355c84b9729..a9c606bb4945 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1883,9 +1883,9 @@ static struct pmu pmu = {
1883 1883
1884void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 1884void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1885{ 1885{
1886 userpg->cap_usr_time = 0; 1886 userpg->cap_user_time = 0;
1887 userpg->cap_usr_time_zero = 0; 1887 userpg->cap_user_time_zero = 0;
1888 userpg->cap_usr_rdpmc = x86_pmu.attr_rdpmc; 1888 userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc;
1889 userpg->pmc_width = x86_pmu.cntval_bits; 1889 userpg->pmc_width = x86_pmu.cntval_bits;
1890 1890
1891 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) 1891 if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
@@ -1894,13 +1894,13 @@ void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
1894 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) 1894 if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
1895 return; 1895 return;
1896 1896
1897 userpg->cap_usr_time = 1; 1897 userpg->cap_user_time = 1;
1898 userpg->time_mult = this_cpu_read(cyc2ns); 1898 userpg->time_mult = this_cpu_read(cyc2ns);
1899 userpg->time_shift = CYC2NS_SCALE_FACTOR; 1899 userpg->time_shift = CYC2NS_SCALE_FACTOR;
1900 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; 1900 userpg->time_offset = this_cpu_read(cyc2ns_offset) - now;
1901 1901
1902 if (sched_clock_stable && !check_tsc_disabled()) { 1902 if (sched_clock_stable && !check_tsc_disabled()) {
1903 userpg->cap_usr_time_zero = 1; 1903 userpg->cap_user_time_zero = 1;
1904 userpg->time_zero = this_cpu_read(cyc2ns_offset); 1904 userpg->time_zero = this_cpu_read(cyc2ns_offset);
1905 } 1905 }
1906} 1906}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9db76c31b3c3..f31a1655d1ff 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2325,6 +2325,7 @@ __init int intel_pmu_init(void)
2325 break; 2325 break;
2326 2326
2327 case 55: /* Atom 22nm "Silvermont" */ 2327 case 55: /* Atom 22nm "Silvermont" */
2328 case 77: /* Avoton "Silvermont" */
2328 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, 2329 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
2329 sizeof(hw_cache_event_ids)); 2330 sizeof(hw_cache_event_ids));
2330 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs, 2331 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 8ed44589b0e4..4118f9f68315 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2706,14 +2706,14 @@ static void uncore_pmu_init_hrtimer(struct intel_uncore_box *box)
2706 box->hrtimer.function = uncore_pmu_hrtimer; 2706 box->hrtimer.function = uncore_pmu_hrtimer;
2707} 2707}
2708 2708
2709struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cpu) 2709static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int node)
2710{ 2710{
2711 struct intel_uncore_box *box; 2711 struct intel_uncore_box *box;
2712 int i, size; 2712 int i, size;
2713 2713
2714 size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg); 2714 size = sizeof(*box) + type->num_shared_regs * sizeof(struct intel_uncore_extra_reg);
2715 2715
2716 box = kzalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); 2716 box = kzalloc_node(size, GFP_KERNEL, node);
2717 if (!box) 2717 if (!box)
2718 return NULL; 2718 return NULL;
2719 2719
@@ -3031,7 +3031,7 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu,
3031 struct intel_uncore_box *fake_box; 3031 struct intel_uncore_box *fake_box;
3032 int ret = -EINVAL, n; 3032 int ret = -EINVAL, n;
3033 3033
3034 fake_box = uncore_alloc_box(pmu->type, smp_processor_id()); 3034 fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE);
3035 if (!fake_box) 3035 if (!fake_box)
3036 return -ENOMEM; 3036 return -ENOMEM;
3037 3037
@@ -3294,7 +3294,7 @@ static int uncore_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id
3294 } 3294 }
3295 3295
3296 type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)]; 3296 type = pci_uncores[UNCORE_PCI_DEV_TYPE(id->driver_data)];
3297 box = uncore_alloc_box(type, 0); 3297 box = uncore_alloc_box(type, NUMA_NO_NODE);
3298 if (!box) 3298 if (!box)
3299 return -ENOMEM; 3299 return -ENOMEM;
3300 3300
@@ -3499,7 +3499,7 @@ static int uncore_cpu_prepare(int cpu, int phys_id)
3499 if (pmu->func_id < 0) 3499 if (pmu->func_id < 0)
3500 pmu->func_id = j; 3500 pmu->func_id = j;
3501 3501
3502 box = uncore_alloc_box(type, cpu); 3502 box = uncore_alloc_box(type, cpu_to_node(cpu));
3503 if (!box) 3503 if (!box)
3504 return -ENOMEM; 3504 return -ENOMEM;
3505 3505
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 563ed91e6faa..e643e744e4d8 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -352,12 +352,28 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
352 }, 352 },
353 { /* Handle problems with rebooting on the Precision M6600. */ 353 { /* Handle problems with rebooting on the Precision M6600. */
354 .callback = set_pci_reboot, 354 .callback = set_pci_reboot,
355 .ident = "Dell OptiPlex 990", 355 .ident = "Dell Precision M6600",
356 .matches = { 356 .matches = {
357 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), 357 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
358 DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"), 358 DMI_MATCH(DMI_PRODUCT_NAME, "Precision M6600"),
359 }, 359 },
360 }, 360 },
361 { /* Handle problems with rebooting on the Dell PowerEdge C6100. */
362 .callback = set_pci_reboot,
363 .ident = "Dell PowerEdge C6100",
364 .matches = {
365 DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
366 DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
367 },
368 },
369 { /* Some C6100 machines were shipped with vendor being 'Dell'. */
370 .callback = set_pci_reboot,
371 .ident = "Dell PowerEdge C6100",
372 .matches = {
373 DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
374 DMI_MATCH(DMI_PRODUCT_NAME, "C6100"),
375 },
376 },
361 { } 377 { }
362}; 378};
363 379
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 90f6ed127096..c7e22ab29a5a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -912,10 +912,13 @@ void __init efi_enter_virtual_mode(void)
912 912
913 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { 913 for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
914 md = p; 914 md = p;
915 if (!(md->attribute & EFI_MEMORY_RUNTIME) && 915 if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
916 md->type != EFI_BOOT_SERVICES_CODE && 916#ifdef CONFIG_X86_64
917 md->type != EFI_BOOT_SERVICES_DATA) 917 if (md->type != EFI_BOOT_SERVICES_CODE &&
918 continue; 918 md->type != EFI_BOOT_SERVICES_DATA)
919#endif
920 continue;
921 }
919 922
920 size = md->num_pages << EFI_PAGE_SHIFT; 923 size = md->num_pages << EFI_PAGE_SHIFT;
921 end = md->phys_addr + size; 924 end = md->phys_addr + size;
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 958ba2a420c3..97f4acb54ad6 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -2,7 +2,7 @@
2 * sata_promise.c - Promise SATA 2 * sata_promise.c - Promise SATA
3 * 3 *
4 * Maintained by: Tejun Heo <tj@kernel.org> 4 * Maintained by: Tejun Heo <tj@kernel.org>
5 * Mikael Pettersson <mikpe@it.uu.se> 5 * Mikael Pettersson
6 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * Please ALWAYS copy linux-ide@vger.kernel.org
7 * on emails. 7 * on emails.
8 * 8 *
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index d2d95ff5353b..edfa2515bc86 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1189,6 +1189,7 @@ static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
1189 int err; 1189 int err;
1190 u32 cp; 1190 u32 cp;
1191 1191
1192 memset(&arg64, 0, sizeof(arg64));
1192 err = 0; 1193 err = 0;
1193 err |= 1194 err |=
1194 copy_from_user(&arg64.LUN_info, &arg32->LUN_info, 1195 copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index 639d26b90b91..2b9440384536 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -1193,6 +1193,7 @@ out_passthru:
1193 ida_pci_info_struct pciinfo; 1193 ida_pci_info_struct pciinfo;
1194 1194
1195 if (!arg) return -EINVAL; 1195 if (!arg) return -EINVAL;
1196 memset(&pciinfo, 0, sizeof(pciinfo));
1196 pciinfo.bus = host->pci_dev->bus->number; 1197 pciinfo.bus = host->pci_dev->bus->number;
1197 pciinfo.dev_fn = host->pci_dev->devfn; 1198 pciinfo.dev_fn = host->pci_dev->devfn;
1198 pciinfo.board_id = host->board_id; 1199 pciinfo.board_id = host->board_id;
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index b1f8fc69023f..60e84043aa34 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -707,8 +707,7 @@ tda998x_encoder_dpms(struct drm_encoder *encoder, int mode)
707 reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2); 707 reg_write(encoder, REG_VIP_CNTRL_2, priv->vip_cntrl_2);
708 break; 708 break;
709 case DRM_MODE_DPMS_OFF: 709 case DRM_MODE_DPMS_OFF:
710 /* disable audio and video ports */ 710 /* disable video ports */
711 reg_write(encoder, REG_ENA_AP, 0x00);
712 reg_write(encoder, REG_ENA_VP_0, 0x00); 711 reg_write(encoder, REG_ENA_VP_0, 0x00);
713 reg_write(encoder, REG_ENA_VP_1, 0x00); 712 reg_write(encoder, REG_ENA_VP_1, 0x00);
714 reg_write(encoder, REG_ENA_VP_2, 0x00); 713 reg_write(encoder, REG_ENA_VP_2, 0x00);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index b39f6f0b45f2..0f12382aa35d 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -498,7 +498,7 @@ struct cached_dev {
498 */ 498 */
499 atomic_t has_dirty; 499 atomic_t has_dirty;
500 500
501 struct ratelimit writeback_rate; 501 struct bch_ratelimit writeback_rate;
502 struct delayed_work writeback_rate_update; 502 struct delayed_work writeback_rate_update;
503 503
504 /* 504 /*
@@ -507,10 +507,9 @@ struct cached_dev {
507 */ 507 */
508 sector_t last_read; 508 sector_t last_read;
509 509
510 /* Number of writeback bios in flight */ 510 /* Limit number of writeback bios in flight */
511 atomic_t in_flight; 511 struct semaphore in_flight;
512 struct closure_with_timer writeback; 512 struct closure_with_timer writeback;
513 struct closure_waitlist writeback_wait;
514 513
515 struct keybuf writeback_keys; 514 struct keybuf writeback_keys;
516 515
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 8010eed06a51..22d1ae72c282 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -926,28 +926,45 @@ struct bkey *bch_next_recurse_key(struct btree *b, struct bkey *search)
926 926
927/* Mergesort */ 927/* Mergesort */
928 928
929static void sort_key_next(struct btree_iter *iter,
930 struct btree_iter_set *i)
931{
932 i->k = bkey_next(i->k);
933
934 if (i->k == i->end)
935 *i = iter->data[--iter->used];
936}
937
929static void btree_sort_fixup(struct btree_iter *iter) 938static void btree_sort_fixup(struct btree_iter *iter)
930{ 939{
931 while (iter->used > 1) { 940 while (iter->used > 1) {
932 struct btree_iter_set *top = iter->data, *i = top + 1; 941 struct btree_iter_set *top = iter->data, *i = top + 1;
933 struct bkey *k;
934 942
935 if (iter->used > 2 && 943 if (iter->used > 2 &&
936 btree_iter_cmp(i[0], i[1])) 944 btree_iter_cmp(i[0], i[1]))
937 i++; 945 i++;
938 946
939 for (k = i->k; 947 if (bkey_cmp(top->k, &START_KEY(i->k)) <= 0)
940 k != i->end && bkey_cmp(top->k, &START_KEY(k)) > 0;
941 k = bkey_next(k))
942 if (top->k > i->k)
943 __bch_cut_front(top->k, k);
944 else if (KEY_SIZE(k))
945 bch_cut_back(&START_KEY(k), top->k);
946
947 if (top->k < i->k || k == i->k)
948 break; 948 break;
949 949
950 heap_sift(iter, i - top, btree_iter_cmp); 950 if (!KEY_SIZE(i->k)) {
951 sort_key_next(iter, i);
952 heap_sift(iter, i - top, btree_iter_cmp);
953 continue;
954 }
955
956 if (top->k > i->k) {
957 if (bkey_cmp(top->k, i->k) >= 0)
958 sort_key_next(iter, i);
959 else
960 bch_cut_front(top->k, i->k);
961
962 heap_sift(iter, i - top, btree_iter_cmp);
963 } else {
964 /* can't happen because of comparison func */
965 BUG_ON(!bkey_cmp(&START_KEY(top->k), &START_KEY(i->k)));
966 bch_cut_back(&START_KEY(i->k), top->k);
967 }
951 } 968 }
952} 969}
953 970
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index f9764e61978b..f42fc7ed9cd6 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -255,7 +255,7 @@ void bch_btree_node_read(struct btree *b)
255 255
256 return; 256 return;
257err: 257err:
258 bch_cache_set_error(b->c, "io error reading bucket %lu", 258 bch_cache_set_error(b->c, "io error reading bucket %zu",
259 PTR_BUCKET_NR(b->c, &b->key, 0)); 259 PTR_BUCKET_NR(b->c, &b->key, 0));
260} 260}
261 261
@@ -612,7 +612,7 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
612 return SHRINK_STOP; 612 return SHRINK_STOP;
613 613
614 /* Return -1 if we can't do anything right now */ 614 /* Return -1 if we can't do anything right now */
615 if (sc->gfp_mask & __GFP_WAIT) 615 if (sc->gfp_mask & __GFP_IO)
616 mutex_lock(&c->bucket_lock); 616 mutex_lock(&c->bucket_lock);
617 else if (!mutex_trylock(&c->bucket_lock)) 617 else if (!mutex_trylock(&c->bucket_lock))
618 return -1; 618 return -1;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index ba95ab84b2be..8435f81e5d85 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -153,7 +153,8 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
153 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); 153 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
154 pr_debug("%u journal buckets", ca->sb.njournal_buckets); 154 pr_debug("%u journal buckets", ca->sb.njournal_buckets);
155 155
156 /* Read journal buckets ordered by golden ratio hash to quickly 156 /*
157 * Read journal buckets ordered by golden ratio hash to quickly
157 * find a sequence of buckets with valid journal entries 158 * find a sequence of buckets with valid journal entries
158 */ 159 */
159 for (i = 0; i < ca->sb.njournal_buckets; i++) { 160 for (i = 0; i < ca->sb.njournal_buckets; i++) {
@@ -166,18 +167,20 @@ int bch_journal_read(struct cache_set *c, struct list_head *list,
166 goto bsearch; 167 goto bsearch;
167 } 168 }
168 169
169 /* If that fails, check all the buckets we haven't checked 170 /*
171 * If that fails, check all the buckets we haven't checked
170 * already 172 * already
171 */ 173 */
172 pr_debug("falling back to linear search"); 174 pr_debug("falling back to linear search");
173 175
174 for (l = 0; l < ca->sb.njournal_buckets; l++) { 176 for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
175 if (test_bit(l, bitmap)) 177 l < ca->sb.njournal_buckets;
176 continue; 178 l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, l + 1))
177
178 if (read_bucket(l)) 179 if (read_bucket(l))
179 goto bsearch; 180 goto bsearch;
180 } 181
182 if (list_empty(list))
183 continue;
181bsearch: 184bsearch:
182 /* Binary search */ 185 /* Binary search */
183 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); 186 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
@@ -197,10 +200,12 @@ bsearch:
197 r = m; 200 r = m;
198 } 201 }
199 202
200 /* Read buckets in reverse order until we stop finding more 203 /*
204 * Read buckets in reverse order until we stop finding more
201 * journal entries 205 * journal entries
202 */ 206 */
203 pr_debug("finishing up"); 207 pr_debug("finishing up: m %u njournal_buckets %u",
208 m, ca->sb.njournal_buckets);
204 l = m; 209 l = m;
205 210
206 while (1) { 211 while (1) {
@@ -228,9 +233,10 @@ bsearch:
228 } 233 }
229 } 234 }
230 235
231 c->journal.seq = list_entry(list->prev, 236 if (!list_empty(list))
232 struct journal_replay, 237 c->journal.seq = list_entry(list->prev,
233 list)->j.seq; 238 struct journal_replay,
239 list)->j.seq;
234 240
235 return 0; 241 return 0;
236#undef read_bucket 242#undef read_bucket
@@ -428,7 +434,7 @@ static void do_journal_discard(struct cache *ca)
428 return; 434 return;
429 } 435 }
430 436
431 switch (atomic_read(&ja->discard_in_flight) == DISCARD_IN_FLIGHT) { 437 switch (atomic_read(&ja->discard_in_flight)) {
432 case DISCARD_IN_FLIGHT: 438 case DISCARD_IN_FLIGHT:
433 return; 439 return;
434 440
@@ -689,6 +695,7 @@ void bch_journal_meta(struct cache_set *c, struct closure *cl)
689 if (cl) 695 if (cl)
690 BUG_ON(!closure_wait(&w->wait, cl)); 696 BUG_ON(!closure_wait(&w->wait, cl));
691 697
698 closure_flush(&c->journal.io);
692 __journal_try_write(c, true); 699 __journal_try_write(c, true);
693 } 700 }
694} 701}
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 786a1a4f74d8..71eb233b9ace 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -997,14 +997,17 @@ static void request_write(struct cached_dev *dc, struct search *s)
997 } else { 997 } else {
998 bch_writeback_add(dc); 998 bch_writeback_add(dc);
999 999
1000 if (s->op.flush_journal) { 1000 if (bio->bi_rw & REQ_FLUSH) {
1001 /* Also need to send a flush to the backing device */ 1001 /* Also need to send a flush to the backing device */
1002 s->op.cache_bio = bio_clone_bioset(bio, GFP_NOIO, 1002 struct bio *flush = bio_alloc_bioset(0, GFP_NOIO,
1003 dc->disk.bio_split); 1003 dc->disk.bio_split);
1004 1004
1005 bio->bi_size = 0; 1005 flush->bi_rw = WRITE_FLUSH;
1006 bio->bi_vcnt = 0; 1006 flush->bi_bdev = bio->bi_bdev;
1007 closure_bio_submit(bio, cl, s->d); 1007 flush->bi_end_io = request_endio;
1008 flush->bi_private = cl;
1009
1010 closure_bio_submit(flush, cl, s->d);
1008 } else { 1011 } else {
1009 s->op.cache_bio = bio; 1012 s->op.cache_bio = bio;
1010 } 1013 }
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 4fe6ab2fbe2e..924dcfdae111 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -223,8 +223,13 @@ STORE(__cached_dev)
223 } 223 }
224 224
225 if (attr == &sysfs_label) { 225 if (attr == &sysfs_label) {
226 /* note: endlines are preserved */ 226 if (size > SB_LABEL_SIZE)
227 memcpy(dc->sb.label, buf, SB_LABEL_SIZE); 227 return -EINVAL;
228 memcpy(dc->sb.label, buf, size);
229 if (size < SB_LABEL_SIZE)
230 dc->sb.label[size] = '\0';
231 if (size && dc->sb.label[size - 1] == '\n')
232 dc->sb.label[size - 1] = '\0';
228 bch_write_bdev_super(dc, NULL); 233 bch_write_bdev_super(dc, NULL);
229 if (dc->disk.c) { 234 if (dc->disk.c) {
230 memcpy(dc->disk.c->uuids[dc->disk.id].label, 235 memcpy(dc->disk.c->uuids[dc->disk.id].label,
diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c
index 98eb81159a22..420dad545c7d 100644
--- a/drivers/md/bcache/util.c
+++ b/drivers/md/bcache/util.c
@@ -190,7 +190,16 @@ void bch_time_stats_update(struct time_stats *stats, uint64_t start_time)
190 stats->last = now ?: 1; 190 stats->last = now ?: 1;
191} 191}
192 192
193unsigned bch_next_delay(struct ratelimit *d, uint64_t done) 193/**
194 * bch_next_delay() - increment @d by the amount of work done, and return how
195 * long to delay until the next time to do some work.
196 *
197 * @d - the struct bch_ratelimit to update
198 * @done - the amount of work done, in arbitrary units
199 *
200 * Returns the amount of time to delay by, in jiffies
201 */
202uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done)
194{ 203{
195 uint64_t now = local_clock(); 204 uint64_t now = local_clock();
196 205
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index 1ae2a73ad85f..ea345c6896f4 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -450,17 +450,23 @@ read_attribute(name ## _last_ ## frequency_units)
450 (ewma) >> factor; \ 450 (ewma) >> factor; \
451}) 451})
452 452
453struct ratelimit { 453struct bch_ratelimit {
454 /* Next time we want to do some work, in nanoseconds */
454 uint64_t next; 455 uint64_t next;
456
457 /*
458 * Rate at which we want to do work, in units per nanosecond
459 * The units here correspond to the units passed to bch_next_delay()
460 */
455 unsigned rate; 461 unsigned rate;
456}; 462};
457 463
458static inline void ratelimit_reset(struct ratelimit *d) 464static inline void bch_ratelimit_reset(struct bch_ratelimit *d)
459{ 465{
460 d->next = local_clock(); 466 d->next = local_clock();
461} 467}
462 468
463unsigned bch_next_delay(struct ratelimit *d, uint64_t done); 469uint64_t bch_next_delay(struct bch_ratelimit *d, uint64_t done);
464 470
465#define __DIV_SAFE(n, d, zero) \ 471#define __DIV_SAFE(n, d, zero) \
466({ \ 472({ \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 22cbff551628..ba3ee48320f2 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -94,11 +94,15 @@ static void update_writeback_rate(struct work_struct *work)
94 94
95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors) 95static unsigned writeback_delay(struct cached_dev *dc, unsigned sectors)
96{ 96{
97 uint64_t ret;
98
97 if (atomic_read(&dc->disk.detaching) || 99 if (atomic_read(&dc->disk.detaching) ||
98 !dc->writeback_percent) 100 !dc->writeback_percent)
99 return 0; 101 return 0;
100 102
101 return bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL); 103 ret = bch_next_delay(&dc->writeback_rate, sectors * 10000000ULL);
104
105 return min_t(uint64_t, ret, HZ);
102} 106}
103 107
104/* Background writeback */ 108/* Background writeback */
@@ -208,7 +212,7 @@ normal_refill:
208 212
209 up_write(&dc->writeback_lock); 213 up_write(&dc->writeback_lock);
210 214
211 ratelimit_reset(&dc->writeback_rate); 215 bch_ratelimit_reset(&dc->writeback_rate);
212 216
213 /* Punt to workqueue only so we don't recurse and blow the stack */ 217 /* Punt to workqueue only so we don't recurse and blow the stack */
214 continue_at(cl, read_dirty, dirty_wq); 218 continue_at(cl, read_dirty, dirty_wq);
@@ -318,9 +322,7 @@ static void write_dirty_finish(struct closure *cl)
318 } 322 }
319 323
320 bch_keybuf_del(&dc->writeback_keys, w); 324 bch_keybuf_del(&dc->writeback_keys, w);
321 atomic_dec_bug(&dc->in_flight); 325 up(&dc->in_flight);
322
323 closure_wake_up(&dc->writeback_wait);
324 326
325 closure_return_with_destructor(cl, dirty_io_destructor); 327 closure_return_with_destructor(cl, dirty_io_destructor);
326} 328}
@@ -349,7 +351,7 @@ static void write_dirty(struct closure *cl)
349 351
350 closure_bio_submit(&io->bio, cl, &io->dc->disk); 352 closure_bio_submit(&io->bio, cl, &io->dc->disk);
351 353
352 continue_at(cl, write_dirty_finish, dirty_wq); 354 continue_at(cl, write_dirty_finish, system_wq);
353} 355}
354 356
355static void read_dirty_endio(struct bio *bio, int error) 357static void read_dirty_endio(struct bio *bio, int error)
@@ -369,7 +371,7 @@ static void read_dirty_submit(struct closure *cl)
369 371
370 closure_bio_submit(&io->bio, cl, &io->dc->disk); 372 closure_bio_submit(&io->bio, cl, &io->dc->disk);
371 373
372 continue_at(cl, write_dirty, dirty_wq); 374 continue_at(cl, write_dirty, system_wq);
373} 375}
374 376
375static void read_dirty(struct closure *cl) 377static void read_dirty(struct closure *cl)
@@ -394,12 +396,8 @@ static void read_dirty(struct closure *cl)
394 396
395 if (delay > 0 && 397 if (delay > 0 &&
396 (KEY_START(&w->key) != dc->last_read || 398 (KEY_START(&w->key) != dc->last_read ||
397 jiffies_to_msecs(delay) > 50)) { 399 jiffies_to_msecs(delay) > 50))
398 w->private = NULL; 400 delay = schedule_timeout_uninterruptible(delay);
399
400 closure_delay(&dc->writeback, delay);
401 continue_at(cl, read_dirty, dirty_wq);
402 }
403 401
404 dc->last_read = KEY_OFFSET(&w->key); 402 dc->last_read = KEY_OFFSET(&w->key);
405 403
@@ -424,15 +422,10 @@ static void read_dirty(struct closure *cl)
424 422
425 trace_bcache_writeback(&w->key); 423 trace_bcache_writeback(&w->key);
426 424
427 closure_call(&io->cl, read_dirty_submit, NULL, &dc->disk.cl); 425 down(&dc->in_flight);
426 closure_call(&io->cl, read_dirty_submit, NULL, cl);
428 427
429 delay = writeback_delay(dc, KEY_SIZE(&w->key)); 428 delay = writeback_delay(dc, KEY_SIZE(&w->key));
430
431 atomic_inc(&dc->in_flight);
432
433 if (!closure_wait_event(&dc->writeback_wait, cl,
434 atomic_read(&dc->in_flight) < 64))
435 continue_at(cl, read_dirty, dirty_wq);
436 } 429 }
437 430
438 if (0) { 431 if (0) {
@@ -442,7 +435,11 @@ err:
442 bch_keybuf_del(&dc->writeback_keys, w); 435 bch_keybuf_del(&dc->writeback_keys, w);
443 } 436 }
444 437
445 refill_dirty(cl); 438 /*
439 * Wait for outstanding writeback IOs to finish (and keybuf slots to be
440 * freed) before refilling again
441 */
442 continue_at(cl, refill_dirty, dirty_wq);
446} 443}
447 444
448/* Init */ 445/* Init */
@@ -484,6 +481,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
484 481
485void bch_cached_dev_writeback_init(struct cached_dev *dc) 482void bch_cached_dev_writeback_init(struct cached_dev *dc)
486{ 483{
484 sema_init(&dc->in_flight, 64);
487 closure_init_unlocked(&dc->writeback); 485 closure_init_unlocked(&dc->writeback);
488 init_rwsem(&dc->writeback_lock); 486 init_rwsem(&dc->writeback_lock);
489 487
@@ -513,7 +511,7 @@ void bch_writeback_exit(void)
513 511
514int __init bch_writeback_init(void) 512int __init bch_writeback_init(void)
515{ 513{
516 dirty_wq = create_singlethread_workqueue("bcache_writeback"); 514 dirty_wq = create_workqueue("bcache_writeback");
517 if (!dirty_wq) 515 if (!dirty_wq)
518 return -ENOMEM; 516 return -ENOMEM;
519 517
diff --git a/fs/bio.c b/fs/bio.c
index b3b20ed9510e..ea5035da4d9a 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
917 src_p = kmap_atomic(src_bv->bv_page); 917 src_p = kmap_atomic(src_bv->bv_page);
918 dst_p = kmap_atomic(dst_bv->bv_page); 918 dst_p = kmap_atomic(dst_bv->bv_page);
919 919
920 memcpy(dst_p + dst_bv->bv_offset, 920 memcpy(dst_p + dst_offset,
921 src_p + src_bv->bv_offset, 921 src_p + src_offset,
922 bytes); 922 bytes);
923 923
924 kunmap_atomic(dst_p); 924 kunmap_atomic(dst_p);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 121da2dc3be8..d4e81e4a9b04 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1924,7 +1924,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1924{ 1924{
1925 int tmp, hangup_needed = 0; 1925 int tmp, hangup_needed = 0;
1926 struct ocfs2_super *osb = NULL; 1926 struct ocfs2_super *osb = NULL;
1927 char nodestr[8]; 1927 char nodestr[12];
1928 1928
1929 trace_ocfs2_dismount_volume(sb); 1929 trace_ocfs2_dismount_volume(sb);
1930 1930
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 60e95872da29..ecc82b37c4cc 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -53,23 +53,6 @@ struct mem_cgroup_reclaim_cookie {
53 unsigned int generation; 53 unsigned int generation;
54}; 54};
55 55
56enum mem_cgroup_filter_t {
57 VISIT, /* visit current node */
58 SKIP, /* skip the current node and continue traversal */
59 SKIP_TREE, /* skip the whole subtree and continue traversal */
60};
61
62/*
63 * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
64 * iterate through the hierarchy tree. Each tree element is checked by the
65 * predicate before it is returned by the iterator. If a filter returns
66 * SKIP or SKIP_TREE then the iterator code continues traversal (with the
67 * next node down the hierarchy or the next node that doesn't belong under the
68 * memcg's subtree).
69 */
70typedef enum mem_cgroup_filter_t
71(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
72
73#ifdef CONFIG_MEMCG 56#ifdef CONFIG_MEMCG
74/* 57/*
75 * All "charge" functions with gfp_mask should use GFP_KERNEL or 58 * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -137,18 +120,9 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
137extern void mem_cgroup_end_migration(struct mem_cgroup *memcg, 120extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
138 struct page *oldpage, struct page *newpage, bool migration_ok); 121 struct page *oldpage, struct page *newpage, bool migration_ok);
139 122
140struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, 123struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
141 struct mem_cgroup *prev, 124 struct mem_cgroup *,
142 struct mem_cgroup_reclaim_cookie *reclaim, 125 struct mem_cgroup_reclaim_cookie *);
143 mem_cgroup_iter_filter cond);
144
145static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
146 struct mem_cgroup *prev,
147 struct mem_cgroup_reclaim_cookie *reclaim)
148{
149 return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
150}
151
152void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *); 126void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
153 127
154/* 128/*
@@ -260,9 +234,9 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
260 mem_cgroup_update_page_stat(page, idx, -1); 234 mem_cgroup_update_page_stat(page, idx, -1);
261} 235}
262 236
263enum mem_cgroup_filter_t 237unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
264mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, 238 gfp_t gfp_mask,
265 struct mem_cgroup *root); 239 unsigned long *total_scanned);
266 240
267void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx); 241void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
268static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, 242static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -376,15 +350,6 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
376 struct page *oldpage, struct page *newpage, bool migration_ok) 350 struct page *oldpage, struct page *newpage, bool migration_ok)
377{ 351{
378} 352}
379static inline struct mem_cgroup *
380mem_cgroup_iter_cond(struct mem_cgroup *root,
381 struct mem_cgroup *prev,
382 struct mem_cgroup_reclaim_cookie *reclaim,
383 mem_cgroup_iter_filter cond)
384{
385 /* first call must return non-NULL, second return NULL */
386 return (struct mem_cgroup *)(unsigned long)!prev;
387}
388 353
389static inline struct mem_cgroup * 354static inline struct mem_cgroup *
390mem_cgroup_iter(struct mem_cgroup *root, 355mem_cgroup_iter(struct mem_cgroup *root,
@@ -471,11 +436,11 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
471} 436}
472 437
473static inline 438static inline
474enum mem_cgroup_filter_t 439unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
475mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, 440 gfp_t gfp_mask,
476 struct mem_cgroup *root) 441 unsigned long *total_scanned)
477{ 442{
478 return VISIT; 443 return 0;
479} 444}
480 445
481static inline void mem_cgroup_split_huge_fixup(struct page *head) 446static inline void mem_cgroup_split_huge_fixup(struct page *head)
diff --git a/include/linux/smp.h b/include/linux/smp.h
index cfb7ca094b38..731f5237d5f4 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -155,6 +155,12 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
155 155
156static inline void kick_all_cpus_sync(void) { } 156static inline void kick_all_cpus_sync(void) { }
157 157
158static inline void __smp_call_function_single(int cpuid,
159 struct call_single_data *data, int wait)
160{
161 on_each_cpu(data->func, data->info, wait);
162}
163
158#endif /* !SMP */ 164#endif /* !SMP */
159 165
160/* 166/*
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 40a1fb807396..009a655a5d35 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -380,10 +380,13 @@ struct perf_event_mmap_page {
380 union { 380 union {
381 __u64 capabilities; 381 __u64 capabilities;
382 struct { 382 struct {
383 __u64 cap_usr_time : 1, 383 __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
384 cap_usr_rdpmc : 1, 384 cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
385 cap_usr_time_zero : 1, 385
386 cap_____res : 61; 386 cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
387 cap_user_time : 1, /* The time_* fields are used */
388 cap_user_time_zero : 1, /* The time_zero field is used */
389 cap_____res : 59;
387 }; 390 };
388 }; 391 };
389 392
@@ -442,12 +445,13 @@ struct perf_event_mmap_page {
442 * ((rem * time_mult) >> time_shift); 445 * ((rem * time_mult) >> time_shift);
443 */ 446 */
444 __u64 time_zero; 447 __u64 time_zero;
448 __u32 size; /* Header size up to __reserved[] fields. */
445 449
446 /* 450 /*
447 * Hole for extension of the self monitor capabilities 451 * Hole for extension of the self monitor capabilities
448 */ 452 */
449 453
450 __u64 __reserved[119]; /* align to 1k */ 454 __u8 __reserved[118*8+4]; /* align to 1k. */
451 455
452 /* 456 /*
453 * Control data for the mmap() data buffer. 457 * Control data for the mmap() data buffer.
@@ -528,6 +532,7 @@ enum perf_event_type {
528 * u64 len; 532 * u64 len;
529 * u64 pgoff; 533 * u64 pgoff;
530 * char filename[]; 534 * char filename[];
535 * struct sample_id sample_id;
531 * }; 536 * };
532 */ 537 */
533 PERF_RECORD_MMAP = 1, 538 PERF_RECORD_MMAP = 1,
diff --git a/ipc/msg.c b/ipc/msg.c
index b0d541d42677..9e4310c546ae 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -165,6 +165,15 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
165 ipc_rmid(&msg_ids(ns), &s->q_perm); 165 ipc_rmid(&msg_ids(ns), &s->q_perm);
166} 166}
167 167
168static void msg_rcu_free(struct rcu_head *head)
169{
170 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
171 struct msg_queue *msq = ipc_rcu_to_struct(p);
172
173 security_msg_queue_free(msq);
174 ipc_rcu_free(head);
175}
176
168/** 177/**
169 * newque - Create a new msg queue 178 * newque - Create a new msg queue
170 * @ns: namespace 179 * @ns: namespace
@@ -189,15 +198,14 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params)
189 msq->q_perm.security = NULL; 198 msq->q_perm.security = NULL;
190 retval = security_msg_queue_alloc(msq); 199 retval = security_msg_queue_alloc(msq);
191 if (retval) { 200 if (retval) {
192 ipc_rcu_putref(msq); 201 ipc_rcu_putref(msq, ipc_rcu_free);
193 return retval; 202 return retval;
194 } 203 }
195 204
196 /* ipc_addid() locks msq upon success. */ 205 /* ipc_addid() locks msq upon success. */
197 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni); 206 id = ipc_addid(&msg_ids(ns), &msq->q_perm, ns->msg_ctlmni);
198 if (id < 0) { 207 if (id < 0) {
199 security_msg_queue_free(msq); 208 ipc_rcu_putref(msq, msg_rcu_free);
200 ipc_rcu_putref(msq);
201 return id; 209 return id;
202 } 210 }
203 211
@@ -276,8 +284,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
276 free_msg(msg); 284 free_msg(msg);
277 } 285 }
278 atomic_sub(msq->q_cbytes, &ns->msg_bytes); 286 atomic_sub(msq->q_cbytes, &ns->msg_bytes);
279 security_msg_queue_free(msq); 287 ipc_rcu_putref(msq, msg_rcu_free);
280 ipc_rcu_putref(msq);
281} 288}
282 289
283/* 290/*
@@ -717,7 +724,7 @@ long do_msgsnd(int msqid, long mtype, void __user *mtext,
717 rcu_read_lock(); 724 rcu_read_lock();
718 ipc_lock_object(&msq->q_perm); 725 ipc_lock_object(&msq->q_perm);
719 726
720 ipc_rcu_putref(msq); 727 ipc_rcu_putref(msq, ipc_rcu_free);
721 if (msq->q_perm.deleted) { 728 if (msq->q_perm.deleted) {
722 err = -EIDRM; 729 err = -EIDRM;
723 goto out_unlock0; 730 goto out_unlock0;
diff --git a/ipc/sem.c b/ipc/sem.c
index 69b6a21f3844..19c8b980d1fe 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -243,6 +243,15 @@ static void merge_queues(struct sem_array *sma)
243 } 243 }
244} 244}
245 245
246static void sem_rcu_free(struct rcu_head *head)
247{
248 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
249 struct sem_array *sma = ipc_rcu_to_struct(p);
250
251 security_sem_free(sma);
252 ipc_rcu_free(head);
253}
254
246/* 255/*
247 * If the request contains only one semaphore operation, and there are 256 * If the request contains only one semaphore operation, and there are
248 * no complex transactions pending, lock only the semaphore involved. 257 * no complex transactions pending, lock only the semaphore involved.
@@ -374,12 +383,7 @@ static inline struct sem_array *sem_obtain_object_check(struct ipc_namespace *ns
374static inline void sem_lock_and_putref(struct sem_array *sma) 383static inline void sem_lock_and_putref(struct sem_array *sma)
375{ 384{
376 sem_lock(sma, NULL, -1); 385 sem_lock(sma, NULL, -1);
377 ipc_rcu_putref(sma); 386 ipc_rcu_putref(sma, ipc_rcu_free);
378}
379
380static inline void sem_putref(struct sem_array *sma)
381{
382 ipc_rcu_putref(sma);
383} 387}
384 388
385static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s) 389static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
@@ -458,14 +462,13 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
458 sma->sem_perm.security = NULL; 462 sma->sem_perm.security = NULL;
459 retval = security_sem_alloc(sma); 463 retval = security_sem_alloc(sma);
460 if (retval) { 464 if (retval) {
461 ipc_rcu_putref(sma); 465 ipc_rcu_putref(sma, ipc_rcu_free);
462 return retval; 466 return retval;
463 } 467 }
464 468
465 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni); 469 id = ipc_addid(&sem_ids(ns), &sma->sem_perm, ns->sc_semmni);
466 if (id < 0) { 470 if (id < 0) {
467 security_sem_free(sma); 471 ipc_rcu_putref(sma, sem_rcu_free);
468 ipc_rcu_putref(sma);
469 return id; 472 return id;
470 } 473 }
471 ns->used_sems += nsems; 474 ns->used_sems += nsems;
@@ -1047,8 +1050,7 @@ static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
1047 1050
1048 wake_up_sem_queue_do(&tasks); 1051 wake_up_sem_queue_do(&tasks);
1049 ns->used_sems -= sma->sem_nsems; 1052 ns->used_sems -= sma->sem_nsems;
1050 security_sem_free(sma); 1053 ipc_rcu_putref(sma, sem_rcu_free);
1051 ipc_rcu_putref(sma);
1052} 1054}
1053 1055
1054static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version) 1056static unsigned long copy_semid_to_user(void __user *buf, struct semid64_ds *in, int version)
@@ -1292,7 +1294,7 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1292 rcu_read_unlock(); 1294 rcu_read_unlock();
1293 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1295 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1294 if(sem_io == NULL) { 1296 if(sem_io == NULL) {
1295 sem_putref(sma); 1297 ipc_rcu_putref(sma, ipc_rcu_free);
1296 return -ENOMEM; 1298 return -ENOMEM;
1297 } 1299 }
1298 1300
@@ -1328,20 +1330,20 @@ static int semctl_main(struct ipc_namespace *ns, int semid, int semnum,
1328 if(nsems > SEMMSL_FAST) { 1330 if(nsems > SEMMSL_FAST) {
1329 sem_io = ipc_alloc(sizeof(ushort)*nsems); 1331 sem_io = ipc_alloc(sizeof(ushort)*nsems);
1330 if(sem_io == NULL) { 1332 if(sem_io == NULL) {
1331 sem_putref(sma); 1333 ipc_rcu_putref(sma, ipc_rcu_free);
1332 return -ENOMEM; 1334 return -ENOMEM;
1333 } 1335 }
1334 } 1336 }
1335 1337
1336 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) { 1338 if (copy_from_user (sem_io, p, nsems*sizeof(ushort))) {
1337 sem_putref(sma); 1339 ipc_rcu_putref(sma, ipc_rcu_free);
1338 err = -EFAULT; 1340 err = -EFAULT;
1339 goto out_free; 1341 goto out_free;
1340 } 1342 }
1341 1343
1342 for (i = 0; i < nsems; i++) { 1344 for (i = 0; i < nsems; i++) {
1343 if (sem_io[i] > SEMVMX) { 1345 if (sem_io[i] > SEMVMX) {
1344 sem_putref(sma); 1346 ipc_rcu_putref(sma, ipc_rcu_free);
1345 err = -ERANGE; 1347 err = -ERANGE;
1346 goto out_free; 1348 goto out_free;
1347 } 1349 }
@@ -1629,7 +1631,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid)
1629 /* step 2: allocate new undo structure */ 1631 /* step 2: allocate new undo structure */
1630 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); 1632 new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL);
1631 if (!new) { 1633 if (!new) {
1632 sem_putref(sma); 1634 ipc_rcu_putref(sma, ipc_rcu_free);
1633 return ERR_PTR(-ENOMEM); 1635 return ERR_PTR(-ENOMEM);
1634 } 1636 }
1635 1637
diff --git a/ipc/shm.c b/ipc/shm.c
index 2821cdf93adb..d69739610fd4 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -167,6 +167,15 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
167 ipc_lock_object(&ipcp->shm_perm); 167 ipc_lock_object(&ipcp->shm_perm);
168} 168}
169 169
170static void shm_rcu_free(struct rcu_head *head)
171{
172 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
173 struct shmid_kernel *shp = ipc_rcu_to_struct(p);
174
175 security_shm_free(shp);
176 ipc_rcu_free(head);
177}
178
170static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) 179static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
171{ 180{
172 ipc_rmid(&shm_ids(ns), &s->shm_perm); 181 ipc_rmid(&shm_ids(ns), &s->shm_perm);
@@ -208,8 +217,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
208 user_shm_unlock(file_inode(shp->shm_file)->i_size, 217 user_shm_unlock(file_inode(shp->shm_file)->i_size,
209 shp->mlock_user); 218 shp->mlock_user);
210 fput (shp->shm_file); 219 fput (shp->shm_file);
211 security_shm_free(shp); 220 ipc_rcu_putref(shp, shm_rcu_free);
212 ipc_rcu_putref(shp);
213} 221}
214 222
215/* 223/*
@@ -497,7 +505,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
497 shp->shm_perm.security = NULL; 505 shp->shm_perm.security = NULL;
498 error = security_shm_alloc(shp); 506 error = security_shm_alloc(shp);
499 if (error) { 507 if (error) {
500 ipc_rcu_putref(shp); 508 ipc_rcu_putref(shp, ipc_rcu_free);
501 return error; 509 return error;
502 } 510 }
503 511
@@ -566,8 +574,7 @@ no_id:
566 user_shm_unlock(size, shp->mlock_user); 574 user_shm_unlock(size, shp->mlock_user);
567 fput(file); 575 fput(file);
568no_file: 576no_file:
569 security_shm_free(shp); 577 ipc_rcu_putref(shp, shm_rcu_free);
570 ipc_rcu_putref(shp);
571 return error; 578 return error;
572} 579}
573 580
diff --git a/ipc/util.c b/ipc/util.c
index e829da9ed01f..fdb8ae740775 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -474,11 +474,6 @@ void ipc_free(void* ptr, int size)
474 kfree(ptr); 474 kfree(ptr);
475} 475}
476 476
477struct ipc_rcu {
478 struct rcu_head rcu;
479 atomic_t refcount;
480} ____cacheline_aligned_in_smp;
481
482/** 477/**
483 * ipc_rcu_alloc - allocate ipc and rcu space 478 * ipc_rcu_alloc - allocate ipc and rcu space
484 * @size: size desired 479 * @size: size desired
@@ -505,27 +500,24 @@ int ipc_rcu_getref(void *ptr)
505 return atomic_inc_not_zero(&p->refcount); 500 return atomic_inc_not_zero(&p->refcount);
506} 501}
507 502
508/** 503void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head))
509 * ipc_schedule_free - free ipc + rcu space
510 * @head: RCU callback structure for queued work
511 */
512static void ipc_schedule_free(struct rcu_head *head)
513{
514 vfree(container_of(head, struct ipc_rcu, rcu));
515}
516
517void ipc_rcu_putref(void *ptr)
518{ 504{
519 struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1; 505 struct ipc_rcu *p = ((struct ipc_rcu *)ptr) - 1;
520 506
521 if (!atomic_dec_and_test(&p->refcount)) 507 if (!atomic_dec_and_test(&p->refcount))
522 return; 508 return;
523 509
524 if (is_vmalloc_addr(ptr)) { 510 call_rcu(&p->rcu, func);
525 call_rcu(&p->rcu, ipc_schedule_free); 511}
526 } else { 512
527 kfree_rcu(p, rcu); 513void ipc_rcu_free(struct rcu_head *head)
528 } 514{
515 struct ipc_rcu *p = container_of(head, struct ipc_rcu, rcu);
516
517 if (is_vmalloc_addr(p))
518 vfree(p);
519 else
520 kfree(p);
529} 521}
530 522
531/** 523/**
diff --git a/ipc/util.h b/ipc/util.h
index c5f3338ba1fa..f2f5036f2eed 100644
--- a/ipc/util.h
+++ b/ipc/util.h
@@ -47,6 +47,13 @@ static inline void msg_exit_ns(struct ipc_namespace *ns) { }
47static inline void shm_exit_ns(struct ipc_namespace *ns) { } 47static inline void shm_exit_ns(struct ipc_namespace *ns) { }
48#endif 48#endif
49 49
50struct ipc_rcu {
51 struct rcu_head rcu;
52 atomic_t refcount;
53} ____cacheline_aligned_in_smp;
54
55#define ipc_rcu_to_struct(p) ((void *)(p+1))
56
50/* 57/*
51 * Structure that holds the parameters needed by the ipc operations 58 * Structure that holds the parameters needed by the ipc operations
52 * (see after) 59 * (see after)
@@ -120,7 +127,8 @@ void ipc_free(void* ptr, int size);
120 */ 127 */
121void* ipc_rcu_alloc(int size); 128void* ipc_rcu_alloc(int size);
122int ipc_rcu_getref(void *ptr); 129int ipc_rcu_getref(void *ptr);
123void ipc_rcu_putref(void *ptr); 130void ipc_rcu_putref(void *ptr, void (*func)(struct rcu_head *head));
131void ipc_rcu_free(struct rcu_head *head);
124 132
125struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int); 133struct kern_ipc_perm *ipc_lock(struct ipc_ids *, int);
126struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id); 134struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id);
diff --git a/kernel/audit.c b/kernel/audit.c
index 91e53d04b6a9..7b0e23a740ce 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1117,9 +1117,10 @@ struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp_mask,
1117 1117
1118 sleep_time = timeout_start + audit_backlog_wait_time - 1118 sleep_time = timeout_start + audit_backlog_wait_time -
1119 jiffies; 1119 jiffies;
1120 if ((long)sleep_time > 0) 1120 if ((long)sleep_time > 0) {
1121 wait_for_auditd(sleep_time); 1121 wait_for_auditd(sleep_time);
1122 continue; 1122 continue;
1123 }
1123 } 1124 }
1124 if (audit_rate_check() && printk_ratelimit()) 1125 if (audit_rate_check() && printk_ratelimit())
1125 printk(KERN_WARNING 1126 printk(KERN_WARNING
diff --git a/kernel/events/core.c b/kernel/events/core.c
index dd236b66ca3a..cb4238e85b38 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3660,6 +3660,26 @@ static void calc_timer_values(struct perf_event *event,
3660 *running = ctx_time - event->tstamp_running; 3660 *running = ctx_time - event->tstamp_running;
3661} 3661}
3662 3662
3663static void perf_event_init_userpage(struct perf_event *event)
3664{
3665 struct perf_event_mmap_page *userpg;
3666 struct ring_buffer *rb;
3667
3668 rcu_read_lock();
3669 rb = rcu_dereference(event->rb);
3670 if (!rb)
3671 goto unlock;
3672
3673 userpg = rb->user_page;
3674
3675 /* Allow new userspace to detect that bit 0 is deprecated */
3676 userpg->cap_bit0_is_deprecated = 1;
3677 userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
3678
3679unlock:
3680 rcu_read_unlock();
3681}
3682
3663void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) 3683void __weak arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now)
3664{ 3684{
3665} 3685}
@@ -4044,6 +4064,7 @@ again:
4044 ring_buffer_attach(event, rb); 4064 ring_buffer_attach(event, rb);
4045 rcu_assign_pointer(event->rb, rb); 4065 rcu_assign_pointer(event->rb, rb);
4046 4066
4067 perf_event_init_userpage(event);
4047 perf_event_update_userpage(event); 4068 perf_event_update_userpage(event);
4048 4069
4049unlock: 4070unlock:
diff --git a/kernel/reboot.c b/kernel/reboot.c
index 269ed9384cc4..f813b3474646 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -32,7 +32,14 @@ EXPORT_SYMBOL(cad_pid);
32#endif 32#endif
33enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE; 33enum reboot_mode reboot_mode DEFAULT_REBOOT_MODE;
34 34
35int reboot_default; 35/*
36 * This variable is used privately to keep track of whether or not
37 * reboot_type is still set to its default value (i.e., reboot= hasn't
38 * been set on the command line). This is needed so that we can
39 * suppress DMI scanning for reboot quirks. Without it, it's
40 * impossible to override a faulty reboot quirk without recompiling.
41 */
42int reboot_default = 1;
36int reboot_cpu; 43int reboot_cpu;
37enum reboot_type reboot_type = BOOT_ACPI; 44enum reboot_type reboot_type = BOOT_ACPI;
38int reboot_force; 45int reboot_force;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 11cd13667359..7c70201fbc61 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -4242,7 +4242,7 @@ static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
4242 } 4242 }
4243 4243
4244 if (!se) { 4244 if (!se) {
4245 cfs_rq->h_load = rq->avg.load_avg_contrib; 4245 cfs_rq->h_load = cfs_rq->runnable_load_avg;
4246 cfs_rq->last_h_load_update = now; 4246 cfs_rq->last_h_load_update = now;
4247 } 4247 }
4248 4248
@@ -4823,8 +4823,8 @@ void fix_small_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
4823 (busiest->load_per_task * SCHED_POWER_SCALE) / 4823 (busiest->load_per_task * SCHED_POWER_SCALE) /
4824 busiest->group_power; 4824 busiest->group_power;
4825 4825
4826 if (busiest->avg_load - local->avg_load + scaled_busy_load_per_task >= 4826 if (busiest->avg_load + scaled_busy_load_per_task >=
4827 (scaled_busy_load_per_task * imbn)) { 4827 local->avg_load + (scaled_busy_load_per_task * imbn)) {
4828 env->imbalance = busiest->load_per_task; 4828 env->imbalance = busiest->load_per_task;
4829 return; 4829 return;
4830 } 4830 }
@@ -4896,7 +4896,8 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
4896 * max load less than avg load(as we skip the groups at or below 4896 * max load less than avg load(as we skip the groups at or below
4897 * its cpu_power, while calculating max_load..) 4897 * its cpu_power, while calculating max_load..)
4898 */ 4898 */
4899 if (busiest->avg_load < sds->avg_load) { 4899 if (busiest->avg_load <= sds->avg_load ||
4900 local->avg_load >= sds->avg_load) {
4900 env->imbalance = 0; 4901 env->imbalance = 0;
4901 return fix_small_imbalance(env, sds); 4902 return fix_small_imbalance(env, sds);
4902 } 4903 }
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 51c4f34d258e..4431610f049a 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -486,7 +486,52 @@ static struct smp_hotplug_thread watchdog_threads = {
486 .unpark = watchdog_enable, 486 .unpark = watchdog_enable,
487}; 487};
488 488
489static int watchdog_enable_all_cpus(void) 489static void restart_watchdog_hrtimer(void *info)
490{
491 struct hrtimer *hrtimer = &__raw_get_cpu_var(watchdog_hrtimer);
492 int ret;
493
494 /*
495 * No need to cancel and restart hrtimer if it is currently executing
496 * because it will reprogram itself with the new period now.
497 * We should never see it unqueued here because we are running per-cpu
498 * with interrupts disabled.
499 */
500 ret = hrtimer_try_to_cancel(hrtimer);
501 if (ret == 1)
502 hrtimer_start(hrtimer, ns_to_ktime(sample_period),
503 HRTIMER_MODE_REL_PINNED);
504}
505
506static void update_timers(int cpu)
507{
508 struct call_single_data data = {.func = restart_watchdog_hrtimer};
509 /*
510 * Make sure that perf event counter will adopt to a new
511 * sampling period. Updating the sampling period directly would
512 * be much nicer but we do not have an API for that now so
513 * let's use a big hammer.
514 * Hrtimer will adopt the new period on the next tick but this
515 * might be late already so we have to restart the timer as well.
516 */
517 watchdog_nmi_disable(cpu);
518 __smp_call_function_single(cpu, &data, 1);
519 watchdog_nmi_enable(cpu);
520}
521
522static void update_timers_all_cpus(void)
523{
524 int cpu;
525
526 get_online_cpus();
527 preempt_disable();
528 for_each_online_cpu(cpu)
529 update_timers(cpu);
530 preempt_enable();
531 put_online_cpus();
532}
533
534static int watchdog_enable_all_cpus(bool sample_period_changed)
490{ 535{
491 int err = 0; 536 int err = 0;
492 537
@@ -496,6 +541,8 @@ static int watchdog_enable_all_cpus(void)
496 pr_err("Failed to create watchdog threads, disabled\n"); 541 pr_err("Failed to create watchdog threads, disabled\n");
497 else 542 else
498 watchdog_running = 1; 543 watchdog_running = 1;
544 } else if (sample_period_changed) {
545 update_timers_all_cpus();
499 } 546 }
500 547
501 return err; 548 return err;
@@ -520,13 +567,15 @@ int proc_dowatchdog(struct ctl_table *table, int write,
520 void __user *buffer, size_t *lenp, loff_t *ppos) 567 void __user *buffer, size_t *lenp, loff_t *ppos)
521{ 568{
522 int err, old_thresh, old_enabled; 569 int err, old_thresh, old_enabled;
570 static DEFINE_MUTEX(watchdog_proc_mutex);
523 571
572 mutex_lock(&watchdog_proc_mutex);
524 old_thresh = ACCESS_ONCE(watchdog_thresh); 573 old_thresh = ACCESS_ONCE(watchdog_thresh);
525 old_enabled = ACCESS_ONCE(watchdog_user_enabled); 574 old_enabled = ACCESS_ONCE(watchdog_user_enabled);
526 575
527 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 576 err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
528 if (err || !write) 577 if (err || !write)
529 return err; 578 goto out;
530 579
531 set_sample_period(); 580 set_sample_period();
532 /* 581 /*
@@ -535,7 +584,7 @@ int proc_dowatchdog(struct ctl_table *table, int write,
535 * watchdog_*_all_cpus() function takes care of this. 584 * watchdog_*_all_cpus() function takes care of this.
536 */ 585 */
537 if (watchdog_user_enabled && watchdog_thresh) 586 if (watchdog_user_enabled && watchdog_thresh)
538 err = watchdog_enable_all_cpus(); 587 err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
539 else 588 else
540 watchdog_disable_all_cpus(); 589 watchdog_disable_all_cpus();
541 590
@@ -544,7 +593,8 @@ int proc_dowatchdog(struct ctl_table *table, int write,
544 watchdog_thresh = old_thresh; 593 watchdog_thresh = old_thresh;
545 watchdog_user_enabled = old_enabled; 594 watchdog_user_enabled = old_enabled;
546 } 595 }
547 596out:
597 mutex_unlock(&watchdog_proc_mutex);
548 return err; 598 return err;
549} 599}
550#endif /* CONFIG_SYSCTL */ 600#endif /* CONFIG_SYSCTL */
@@ -554,5 +604,5 @@ void __init lockup_detector_init(void)
554 set_sample_period(); 604 set_sample_period();
555 605
556 if (watchdog_user_enabled) 606 if (watchdog_user_enabled)
557 watchdog_enable_all_cpus(); 607 watchdog_enable_all_cpus(false);
558} 608}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d5ff3ce13029..1c52ddbc839b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -39,6 +39,7 @@
39#include <linux/limits.h> 39#include <linux/limits.h>
40#include <linux/export.h> 40#include <linux/export.h>
41#include <linux/mutex.h> 41#include <linux/mutex.h>
42#include <linux/rbtree.h>
42#include <linux/slab.h> 43#include <linux/slab.h>
43#include <linux/swap.h> 44#include <linux/swap.h>
44#include <linux/swapops.h> 45#include <linux/swapops.h>
@@ -160,6 +161,10 @@ struct mem_cgroup_per_zone {
160 161
161 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1]; 162 struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
162 163
164 struct rb_node tree_node; /* RB tree node */
165 unsigned long long usage_in_excess;/* Set to the value by which */
166 /* the soft limit is exceeded*/
167 bool on_tree;
163 struct mem_cgroup *memcg; /* Back pointer, we cannot */ 168 struct mem_cgroup *memcg; /* Back pointer, we cannot */
164 /* use container_of */ 169 /* use container_of */
165}; 170};
@@ -168,6 +173,26 @@ struct mem_cgroup_per_node {
168 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; 173 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
169}; 174};
170 175
176/*
177 * Cgroups above their limits are maintained in a RB-Tree, independent of
178 * their hierarchy representation
179 */
180
181struct mem_cgroup_tree_per_zone {
182 struct rb_root rb_root;
183 spinlock_t lock;
184};
185
186struct mem_cgroup_tree_per_node {
187 struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
188};
189
190struct mem_cgroup_tree {
191 struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
192};
193
194static struct mem_cgroup_tree soft_limit_tree __read_mostly;
195
171struct mem_cgroup_threshold { 196struct mem_cgroup_threshold {
172 struct eventfd_ctx *eventfd; 197 struct eventfd_ctx *eventfd;
173 u64 threshold; 198 u64 threshold;
@@ -303,22 +328,6 @@ struct mem_cgroup {
303 atomic_t numainfo_events; 328 atomic_t numainfo_events;
304 atomic_t numainfo_updating; 329 atomic_t numainfo_updating;
305#endif 330#endif
306 /*
307 * Protects soft_contributed transitions.
308 * See mem_cgroup_update_soft_limit
309 */
310 spinlock_t soft_lock;
311
312 /*
313 * If true then this group has increased parents' children_in_excess
314 * when it got over the soft limit.
315 * When a group falls bellow the soft limit, parents' children_in_excess
316 * is decreased and soft_contributed changed to false.
317 */
318 bool soft_contributed;
319
320 /* Number of children that are in soft limit excess */
321 atomic_t children_in_excess;
322 331
323 struct mem_cgroup_per_node *nodeinfo[0]; 332 struct mem_cgroup_per_node *nodeinfo[0];
324 /* WARNING: nodeinfo must be the last member here */ 333 /* WARNING: nodeinfo must be the last member here */
@@ -422,6 +431,7 @@ static bool move_file(void)
422 * limit reclaim to prevent infinite loops, if they ever occur. 431 * limit reclaim to prevent infinite loops, if they ever occur.
423 */ 432 */
424#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100 433#define MEM_CGROUP_MAX_RECLAIM_LOOPS 100
434#define MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
425 435
426enum charge_type { 436enum charge_type {
427 MEM_CGROUP_CHARGE_TYPE_CACHE = 0, 437 MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -648,6 +658,164 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
648 return mem_cgroup_zoneinfo(memcg, nid, zid); 658 return mem_cgroup_zoneinfo(memcg, nid, zid);
649} 659}
650 660
661static struct mem_cgroup_tree_per_zone *
662soft_limit_tree_node_zone(int nid, int zid)
663{
664 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
665}
666
667static struct mem_cgroup_tree_per_zone *
668soft_limit_tree_from_page(struct page *page)
669{
670 int nid = page_to_nid(page);
671 int zid = page_zonenum(page);
672
673 return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
674}
675
676static void
677__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
678 struct mem_cgroup_per_zone *mz,
679 struct mem_cgroup_tree_per_zone *mctz,
680 unsigned long long new_usage_in_excess)
681{
682 struct rb_node **p = &mctz->rb_root.rb_node;
683 struct rb_node *parent = NULL;
684 struct mem_cgroup_per_zone *mz_node;
685
686 if (mz->on_tree)
687 return;
688
689 mz->usage_in_excess = new_usage_in_excess;
690 if (!mz->usage_in_excess)
691 return;
692 while (*p) {
693 parent = *p;
694 mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
695 tree_node);
696 if (mz->usage_in_excess < mz_node->usage_in_excess)
697 p = &(*p)->rb_left;
698 /*
699 * We can't avoid mem cgroups that are over their soft
700 * limit by the same amount
701 */
702 else if (mz->usage_in_excess >= mz_node->usage_in_excess)
703 p = &(*p)->rb_right;
704 }
705 rb_link_node(&mz->tree_node, parent, p);
706 rb_insert_color(&mz->tree_node, &mctz->rb_root);
707 mz->on_tree = true;
708}
709
710static void
711__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
712 struct mem_cgroup_per_zone *mz,
713 struct mem_cgroup_tree_per_zone *mctz)
714{
715 if (!mz->on_tree)
716 return;
717 rb_erase(&mz->tree_node, &mctz->rb_root);
718 mz->on_tree = false;
719}
720
721static void
722mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
723 struct mem_cgroup_per_zone *mz,
724 struct mem_cgroup_tree_per_zone *mctz)
725{
726 spin_lock(&mctz->lock);
727 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
728 spin_unlock(&mctz->lock);
729}
730
731
732static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
733{
734 unsigned long long excess;
735 struct mem_cgroup_per_zone *mz;
736 struct mem_cgroup_tree_per_zone *mctz;
737 int nid = page_to_nid(page);
738 int zid = page_zonenum(page);
739 mctz = soft_limit_tree_from_page(page);
740
741 /*
742 * Necessary to update all ancestors when hierarchy is used.
743 * because their event counter is not touched.
744 */
745 for (; memcg; memcg = parent_mem_cgroup(memcg)) {
746 mz = mem_cgroup_zoneinfo(memcg, nid, zid);
747 excess = res_counter_soft_limit_excess(&memcg->res);
748 /*
749 * We have to update the tree if mz is on RB-tree or
750 * mem is over its softlimit.
751 */
752 if (excess || mz->on_tree) {
753 spin_lock(&mctz->lock);
754 /* if on-tree, remove it */
755 if (mz->on_tree)
756 __mem_cgroup_remove_exceeded(memcg, mz, mctz);
757 /*
758 * Insert again. mz->usage_in_excess will be updated.
759 * If excess is 0, no tree ops.
760 */
761 __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
762 spin_unlock(&mctz->lock);
763 }
764 }
765}
766
767static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
768{
769 int node, zone;
770 struct mem_cgroup_per_zone *mz;
771 struct mem_cgroup_tree_per_zone *mctz;
772
773 for_each_node(node) {
774 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
775 mz = mem_cgroup_zoneinfo(memcg, node, zone);
776 mctz = soft_limit_tree_node_zone(node, zone);
777 mem_cgroup_remove_exceeded(memcg, mz, mctz);
778 }
779 }
780}
781
782static struct mem_cgroup_per_zone *
783__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
784{
785 struct rb_node *rightmost = NULL;
786 struct mem_cgroup_per_zone *mz;
787
788retry:
789 mz = NULL;
790 rightmost = rb_last(&mctz->rb_root);
791 if (!rightmost)
792 goto done; /* Nothing to reclaim from */
793
794 mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
795 /*
796 * Remove the node now but someone else can add it back,
797 * we will to add it back at the end of reclaim to its correct
798 * position in the tree.
799 */
800 __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
801 if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
802 !css_tryget(&mz->memcg->css))
803 goto retry;
804done:
805 return mz;
806}
807
808static struct mem_cgroup_per_zone *
809mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
810{
811 struct mem_cgroup_per_zone *mz;
812
813 spin_lock(&mctz->lock);
814 mz = __mem_cgroup_largest_soft_limit_node(mctz);
815 spin_unlock(&mctz->lock);
816 return mz;
817}
818
651/* 819/*
652 * Implementation Note: reading percpu statistics for memcg. 820 * Implementation Note: reading percpu statistics for memcg.
653 * 821 *
@@ -822,48 +990,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
822} 990}
823 991
824/* 992/*
825 * Called from rate-limited memcg_check_events when enough
826 * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure
827 * that all the parents up the hierarchy will be notified that this group
828 * is in excess or that it is not in excess anymore. mmecg->soft_contributed
829 * makes the transition a single action whenever the state flips from one to
830 * the other.
831 */
832static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
833{
834 unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
835 struct mem_cgroup *parent = memcg;
836 int delta = 0;
837
838 spin_lock(&memcg->soft_lock);
839 if (excess) {
840 if (!memcg->soft_contributed) {
841 delta = 1;
842 memcg->soft_contributed = true;
843 }
844 } else {
845 if (memcg->soft_contributed) {
846 delta = -1;
847 memcg->soft_contributed = false;
848 }
849 }
850
851 /*
852 * Necessary to update all ancestors when hierarchy is used
853 * because their event counter is not touched.
854 * We track children even outside the hierarchy for the root
855 * cgroup because tree walk starting at root should visit
856 * all cgroups and we want to prevent from pointless tree
857 * walk if no children is below the limit.
858 */
859 while (delta && (parent = parent_mem_cgroup(parent)))
860 atomic_add(delta, &parent->children_in_excess);
861 if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
862 atomic_add(delta, &root_mem_cgroup->children_in_excess);
863 spin_unlock(&memcg->soft_lock);
864}
865
866/*
867 * Check events in order. 993 * Check events in order.
868 * 994 *
869 */ 995 */
@@ -886,7 +1012,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
886 1012
887 mem_cgroup_threshold(memcg); 1013 mem_cgroup_threshold(memcg);
888 if (unlikely(do_softlimit)) 1014 if (unlikely(do_softlimit))
889 mem_cgroup_update_soft_limit(memcg); 1015 mem_cgroup_update_tree(memcg, page);
890#if MAX_NUMNODES > 1 1016#if MAX_NUMNODES > 1
891 if (unlikely(do_numainfo)) 1017 if (unlikely(do_numainfo))
892 atomic_inc(&memcg->numainfo_events); 1018 atomic_inc(&memcg->numainfo_events);
@@ -929,15 +1055,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
929 return memcg; 1055 return memcg;
930} 1056}
931 1057
932static enum mem_cgroup_filter_t
933mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
934 mem_cgroup_iter_filter cond)
935{
936 if (!cond)
937 return VISIT;
938 return cond(memcg, root);
939}
940
941/* 1058/*
942 * Returns a next (in a pre-order walk) alive memcg (with elevated css 1059 * Returns a next (in a pre-order walk) alive memcg (with elevated css
943 * ref. count) or NULL if the whole root's subtree has been visited. 1060 * ref. count) or NULL if the whole root's subtree has been visited.
@@ -945,7 +1062,7 @@ mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
945 * helper function to be used by mem_cgroup_iter 1062 * helper function to be used by mem_cgroup_iter
946 */ 1063 */
947static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root, 1064static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
948 struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond) 1065 struct mem_cgroup *last_visited)
949{ 1066{
950 struct cgroup_subsys_state *prev_css, *next_css; 1067 struct cgroup_subsys_state *prev_css, *next_css;
951 1068
@@ -963,31 +1080,11 @@ skip_node:
963 if (next_css) { 1080 if (next_css) {
964 struct mem_cgroup *mem = mem_cgroup_from_css(next_css); 1081 struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
965 1082
966 switch (mem_cgroup_filter(mem, root, cond)) { 1083 if (css_tryget(&mem->css))
967 case SKIP: 1084 return mem;
1085 else {
968 prev_css = next_css; 1086 prev_css = next_css;
969 goto skip_node; 1087 goto skip_node;
970 case SKIP_TREE:
971 if (mem == root)
972 return NULL;
973 /*
974 * css_rightmost_descendant is not an optimal way to
975 * skip through a subtree (especially for imbalanced
976 * trees leaning to right) but that's what we have right
977 * now. More effective solution would be traversing
978 * right-up for first non-NULL without calling
979 * css_next_descendant_pre afterwards.
980 */
981 prev_css = css_rightmost_descendant(next_css);
982 goto skip_node;
983 case VISIT:
984 if (css_tryget(&mem->css))
985 return mem;
986 else {
987 prev_css = next_css;
988 goto skip_node;
989 }
990 break;
991 } 1088 }
992 } 1089 }
993 1090
@@ -1051,7 +1148,6 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
1051 * @root: hierarchy root 1148 * @root: hierarchy root
1052 * @prev: previously returned memcg, NULL on first invocation 1149 * @prev: previously returned memcg, NULL on first invocation
1053 * @reclaim: cookie for shared reclaim walks, NULL for full walks 1150 * @reclaim: cookie for shared reclaim walks, NULL for full walks
1054 * @cond: filter for visited nodes, NULL for no filter
1055 * 1151 *
1056 * Returns references to children of the hierarchy below @root, or 1152 * Returns references to children of the hierarchy below @root, or
1057 * @root itself, or %NULL after a full round-trip. 1153 * @root itself, or %NULL after a full round-trip.
@@ -1064,18 +1160,15 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
1064 * divide up the memcgs in the hierarchy among all concurrent 1160 * divide up the memcgs in the hierarchy among all concurrent
1065 * reclaimers operating on the same zone and priority. 1161 * reclaimers operating on the same zone and priority.
1066 */ 1162 */
1067struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root, 1163struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1068 struct mem_cgroup *prev, 1164 struct mem_cgroup *prev,
1069 struct mem_cgroup_reclaim_cookie *reclaim, 1165 struct mem_cgroup_reclaim_cookie *reclaim)
1070 mem_cgroup_iter_filter cond)
1071{ 1166{
1072 struct mem_cgroup *memcg = NULL; 1167 struct mem_cgroup *memcg = NULL;
1073 struct mem_cgroup *last_visited = NULL; 1168 struct mem_cgroup *last_visited = NULL;
1074 1169
1075 if (mem_cgroup_disabled()) { 1170 if (mem_cgroup_disabled())
1076 /* first call must return non-NULL, second return NULL */ 1171 return NULL;
1077 return (struct mem_cgroup *)(unsigned long)!prev;
1078 }
1079 1172
1080 if (!root) 1173 if (!root)
1081 root = root_mem_cgroup; 1174 root = root_mem_cgroup;
@@ -1086,9 +1179,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
1086 if (!root->use_hierarchy && root != root_mem_cgroup) { 1179 if (!root->use_hierarchy && root != root_mem_cgroup) {
1087 if (prev) 1180 if (prev)
1088 goto out_css_put; 1181 goto out_css_put;
1089 if (mem_cgroup_filter(root, root, cond) == VISIT) 1182 return root;
1090 return root;
1091 return NULL;
1092 } 1183 }
1093 1184
1094 rcu_read_lock(); 1185 rcu_read_lock();
@@ -1111,7 +1202,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
1111 last_visited = mem_cgroup_iter_load(iter, root, &seq); 1202 last_visited = mem_cgroup_iter_load(iter, root, &seq);
1112 } 1203 }
1113 1204
1114 memcg = __mem_cgroup_iter_next(root, last_visited, cond); 1205 memcg = __mem_cgroup_iter_next(root, last_visited);
1115 1206
1116 if (reclaim) { 1207 if (reclaim) {
1117 mem_cgroup_iter_update(iter, last_visited, memcg, seq); 1208 mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1122,11 +1213,7 @@ struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
1122 reclaim->generation = iter->generation; 1213 reclaim->generation = iter->generation;
1123 } 1214 }
1124 1215
1125 /* 1216 if (prev && !memcg)
1126 * We have finished the whole tree walk or no group has been
1127 * visited because filter told us to skip the root node.
1128 */
1129 if (!memcg && (prev || (cond && !last_visited)))
1130 goto out_unlock; 1217 goto out_unlock;
1131 } 1218 }
1132out_unlock: 1219out_unlock:
@@ -1767,7 +1854,6 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
1767 return total; 1854 return total;
1768} 1855}
1769 1856
1770#if MAX_NUMNODES > 1
1771/** 1857/**
1772 * test_mem_cgroup_node_reclaimable 1858 * test_mem_cgroup_node_reclaimable
1773 * @memcg: the target memcg 1859 * @memcg: the target memcg
@@ -1790,6 +1876,7 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1790 return false; 1876 return false;
1791 1877
1792} 1878}
1879#if MAX_NUMNODES > 1
1793 1880
1794/* 1881/*
1795 * Always updating the nodemask is not very good - even if we have an empty 1882 * Always updating the nodemask is not very good - even if we have an empty
@@ -1857,50 +1944,104 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1857 return node; 1944 return node;
1858} 1945}
1859 1946
1947/*
1948 * Check all nodes whether it contains reclaimable pages or not.
1949 * For quick scan, we make use of scan_nodes. This will allow us to skip
1950 * unused nodes. But scan_nodes is lazily updated and may not cotain
1951 * enough new information. We need to do double check.
1952 */
1953static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1954{
1955 int nid;
1956
1957 /*
1958 * quick check...making use of scan_node.
1959 * We can skip unused nodes.
1960 */
1961 if (!nodes_empty(memcg->scan_nodes)) {
1962 for (nid = first_node(memcg->scan_nodes);
1963 nid < MAX_NUMNODES;
1964 nid = next_node(nid, memcg->scan_nodes)) {
1965
1966 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1967 return true;
1968 }
1969 }
1970 /*
1971 * Check rest of nodes.
1972 */
1973 for_each_node_state(nid, N_MEMORY) {
1974 if (node_isset(nid, memcg->scan_nodes))
1975 continue;
1976 if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
1977 return true;
1978 }
1979 return false;
1980}
1981
1860#else 1982#else
1861int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) 1983int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
1862{ 1984{
1863 return 0; 1985 return 0;
1864} 1986}
1865 1987
1866#endif 1988static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
1867
1868/*
1869 * A group is eligible for the soft limit reclaim under the given root
1870 * hierarchy if
1871 * a) it is over its soft limit
1872 * b) any parent up the hierarchy is over its soft limit
1873 *
1874 * If the given group doesn't have any children over the limit then it
1875 * doesn't make any sense to iterate its subtree.
1876 */
1877enum mem_cgroup_filter_t
1878mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
1879 struct mem_cgroup *root)
1880{ 1989{
1881 struct mem_cgroup *parent; 1990 return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
1882 1991}
1883 if (!memcg) 1992#endif
1884 memcg = root_mem_cgroup;
1885 parent = memcg;
1886
1887 if (res_counter_soft_limit_excess(&memcg->res))
1888 return VISIT;
1889 1993
1890 /* 1994static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
1891 * If any parent up to the root in the hierarchy is over its soft limit 1995 struct zone *zone,
1892 * then we have to obey and reclaim from this group as well. 1996 gfp_t gfp_mask,
1893 */ 1997 unsigned long *total_scanned)
1894 while ((parent = parent_mem_cgroup(parent))) { 1998{
1895 if (res_counter_soft_limit_excess(&parent->res)) 1999 struct mem_cgroup *victim = NULL;
1896 return VISIT; 2000 int total = 0;
1897 if (parent == root) 2001 int loop = 0;
2002 unsigned long excess;
2003 unsigned long nr_scanned;
2004 struct mem_cgroup_reclaim_cookie reclaim = {
2005 .zone = zone,
2006 .priority = 0,
2007 };
2008
2009 excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
2010
2011 while (1) {
2012 victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
2013 if (!victim) {
2014 loop++;
2015 if (loop >= 2) {
2016 /*
2017 * If we have not been able to reclaim
2018 * anything, it might because there are
2019 * no reclaimable pages under this hierarchy
2020 */
2021 if (!total)
2022 break;
2023 /*
2024 * We want to do more targeted reclaim.
2025 * excess >> 2 is not to excessive so as to
2026 * reclaim too much, nor too less that we keep
2027 * coming back to reclaim from this cgroup
2028 */
2029 if (total >= (excess >> 2) ||
2030 (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
2031 break;
2032 }
2033 continue;
2034 }
2035 if (!mem_cgroup_reclaimable(victim, false))
2036 continue;
2037 total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
2038 zone, &nr_scanned);
2039 *total_scanned += nr_scanned;
2040 if (!res_counter_soft_limit_excess(&root_memcg->res))
1898 break; 2041 break;
1899 } 2042 }
1900 2043 mem_cgroup_iter_break(root_memcg, victim);
1901 if (!atomic_read(&memcg->children_in_excess)) 2044 return total;
1902 return SKIP_TREE;
1903 return SKIP;
1904} 2045}
1905 2046
1906static DEFINE_SPINLOCK(memcg_oom_lock); 2047static DEFINE_SPINLOCK(memcg_oom_lock);
@@ -2812,7 +2953,9 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2812 unlock_page_cgroup(pc); 2953 unlock_page_cgroup(pc);
2813 2954
2814 /* 2955 /*
2815 * "charge_statistics" updated event counter. 2956 * "charge_statistics" updated event counter. Then, check it.
2957 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2958 * if they exceeds softlimit.
2816 */ 2959 */
2817 memcg_check_events(memcg, page); 2960 memcg_check_events(memcg, page);
2818} 2961}
@@ -4647,6 +4790,98 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
4647 return ret; 4790 return ret;
4648} 4791}
4649 4792
4793unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4794 gfp_t gfp_mask,
4795 unsigned long *total_scanned)
4796{
4797 unsigned long nr_reclaimed = 0;
4798 struct mem_cgroup_per_zone *mz, *next_mz = NULL;
4799 unsigned long reclaimed;
4800 int loop = 0;
4801 struct mem_cgroup_tree_per_zone *mctz;
4802 unsigned long long excess;
4803 unsigned long nr_scanned;
4804
4805 if (order > 0)
4806 return 0;
4807
4808 mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
4809 /*
4810 * This loop can run a while, specially if mem_cgroup's continuously
4811 * keep exceeding their soft limit and putting the system under
4812 * pressure
4813 */
4814 do {
4815 if (next_mz)
4816 mz = next_mz;
4817 else
4818 mz = mem_cgroup_largest_soft_limit_node(mctz);
4819 if (!mz)
4820 break;
4821
4822 nr_scanned = 0;
4823 reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
4824 gfp_mask, &nr_scanned);
4825 nr_reclaimed += reclaimed;
4826 *total_scanned += nr_scanned;
4827 spin_lock(&mctz->lock);
4828
4829 /*
4830 * If we failed to reclaim anything from this memory cgroup
4831 * it is time to move on to the next cgroup
4832 */
4833 next_mz = NULL;
4834 if (!reclaimed) {
4835 do {
4836 /*
4837 * Loop until we find yet another one.
4838 *
4839 * By the time we get the soft_limit lock
4840 * again, someone might have aded the
4841 * group back on the RB tree. Iterate to
4842 * make sure we get a different mem.
4843 * mem_cgroup_largest_soft_limit_node returns
4844 * NULL if no other cgroup is present on
4845 * the tree
4846 */
4847 next_mz =
4848 __mem_cgroup_largest_soft_limit_node(mctz);
4849 if (next_mz == mz)
4850 css_put(&next_mz->memcg->css);
4851 else /* next_mz == NULL or other memcg */
4852 break;
4853 } while (1);
4854 }
4855 __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
4856 excess = res_counter_soft_limit_excess(&mz->memcg->res);
4857 /*
4858 * One school of thought says that we should not add
4859 * back the node to the tree if reclaim returns 0.
4860 * But our reclaim could return 0, simply because due
4861 * to priority we are exposing a smaller subset of
4862 * memory to reclaim from. Consider this as a longer
4863 * term TODO.
4864 */
4865 /* If excess == 0, no tree ops */
4866 __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
4867 spin_unlock(&mctz->lock);
4868 css_put(&mz->memcg->css);
4869 loop++;
4870 /*
4871 * Could not reclaim anything and there are no more
4872 * mem cgroups to try or we seem to be looping without
4873 * reclaiming anything.
4874 */
4875 if (!nr_reclaimed &&
4876 (next_mz == NULL ||
4877 loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
4878 break;
4879 } while (!nr_reclaimed);
4880 if (next_mz)
4881 css_put(&next_mz->memcg->css);
4882 return nr_reclaimed;
4883}
4884
4650/** 4885/**
4651 * mem_cgroup_force_empty_list - clears LRU of a group 4886 * mem_cgroup_force_empty_list - clears LRU of a group
4652 * @memcg: group to clear 4887 * @memcg: group to clear
@@ -5911,6 +6146,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
5911 for (zone = 0; zone < MAX_NR_ZONES; zone++) { 6146 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
5912 mz = &pn->zoneinfo[zone]; 6147 mz = &pn->zoneinfo[zone];
5913 lruvec_init(&mz->lruvec); 6148 lruvec_init(&mz->lruvec);
6149 mz->usage_in_excess = 0;
6150 mz->on_tree = false;
5914 mz->memcg = memcg; 6151 mz->memcg = memcg;
5915 } 6152 }
5916 memcg->nodeinfo[node] = pn; 6153 memcg->nodeinfo[node] = pn;
@@ -5966,6 +6203,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
5966 int node; 6203 int node;
5967 size_t size = memcg_size(); 6204 size_t size = memcg_size();
5968 6205
6206 mem_cgroup_remove_from_trees(memcg);
5969 free_css_id(&mem_cgroup_subsys, &memcg->css); 6207 free_css_id(&mem_cgroup_subsys, &memcg->css);
5970 6208
5971 for_each_node(node) 6209 for_each_node(node)
@@ -6002,6 +6240,29 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
6002} 6240}
6003EXPORT_SYMBOL(parent_mem_cgroup); 6241EXPORT_SYMBOL(parent_mem_cgroup);
6004 6242
6243static void __init mem_cgroup_soft_limit_tree_init(void)
6244{
6245 struct mem_cgroup_tree_per_node *rtpn;
6246 struct mem_cgroup_tree_per_zone *rtpz;
6247 int tmp, node, zone;
6248
6249 for_each_node(node) {
6250 tmp = node;
6251 if (!node_state(node, N_NORMAL_MEMORY))
6252 tmp = -1;
6253 rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
6254 BUG_ON(!rtpn);
6255
6256 soft_limit_tree.rb_tree_per_node[node] = rtpn;
6257
6258 for (zone = 0; zone < MAX_NR_ZONES; zone++) {
6259 rtpz = &rtpn->rb_tree_per_zone[zone];
6260 rtpz->rb_root = RB_ROOT;
6261 spin_lock_init(&rtpz->lock);
6262 }
6263 }
6264}
6265
6005static struct cgroup_subsys_state * __ref 6266static struct cgroup_subsys_state * __ref
6006mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) 6267mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
6007{ 6268{
@@ -6031,7 +6292,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
6031 mutex_init(&memcg->thresholds_lock); 6292 mutex_init(&memcg->thresholds_lock);
6032 spin_lock_init(&memcg->move_lock); 6293 spin_lock_init(&memcg->move_lock);
6033 vmpressure_init(&memcg->vmpressure); 6294 vmpressure_init(&memcg->vmpressure);
6034 spin_lock_init(&memcg->soft_lock);
6035 6295
6036 return &memcg->css; 6296 return &memcg->css;
6037 6297
@@ -6109,13 +6369,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
6109 6369
6110 mem_cgroup_invalidate_reclaim_iterators(memcg); 6370 mem_cgroup_invalidate_reclaim_iterators(memcg);
6111 mem_cgroup_reparent_charges(memcg); 6371 mem_cgroup_reparent_charges(memcg);
6112 if (memcg->soft_contributed) {
6113 while ((memcg = parent_mem_cgroup(memcg)))
6114 atomic_dec(&memcg->children_in_excess);
6115
6116 if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
6117 atomic_dec(&root_mem_cgroup->children_in_excess);
6118 }
6119 mem_cgroup_destroy_all_caches(memcg); 6372 mem_cgroup_destroy_all_caches(memcg);
6120 vmpressure_cleanup(&memcg->vmpressure); 6373 vmpressure_cleanup(&memcg->vmpressure);
6121} 6374}
@@ -6790,6 +7043,7 @@ static int __init mem_cgroup_init(void)
6790{ 7043{
6791 hotcpu_notifier(memcg_cpu_hotplug_callback, 0); 7044 hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
6792 enable_swap_cgroup(); 7045 enable_swap_cgroup();
7046 mem_cgroup_soft_limit_tree_init();
6793 memcg_stock_init(); 7047 memcg_stock_init();
6794 return 0; 7048 return 0;
6795} 7049}
diff --git a/mm/mlock.c b/mm/mlock.c
index d63802663242..67ba6da7d0e3 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -736,6 +736,7 @@ static int do_mlockall(int flags)
736 736
737 /* Ignore errors */ 737 /* Ignore errors */
738 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); 738 mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
739 cond_resched();
739 } 740 }
740out: 741out:
741 return 0; 742 return 0;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 8ed1b775bdc9..beb35778c69f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -139,23 +139,11 @@ static bool global_reclaim(struct scan_control *sc)
139{ 139{
140 return !sc->target_mem_cgroup; 140 return !sc->target_mem_cgroup;
141} 141}
142
143static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
144{
145 struct mem_cgroup *root = sc->target_mem_cgroup;
146 return !mem_cgroup_disabled() &&
147 mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
148}
149#else 142#else
150static bool global_reclaim(struct scan_control *sc) 143static bool global_reclaim(struct scan_control *sc)
151{ 144{
152 return true; 145 return true;
153} 146}
154
155static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
156{
157 return false;
158}
159#endif 147#endif
160 148
161unsigned long zone_reclaimable_pages(struct zone *zone) 149unsigned long zone_reclaimable_pages(struct zone *zone)
@@ -2176,11 +2164,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
2176 } 2164 }
2177} 2165}
2178 2166
2179static int 2167static void shrink_zone(struct zone *zone, struct scan_control *sc)
2180__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2181{ 2168{
2182 unsigned long nr_reclaimed, nr_scanned; 2169 unsigned long nr_reclaimed, nr_scanned;
2183 int groups_scanned = 0;
2184 2170
2185 do { 2171 do {
2186 struct mem_cgroup *root = sc->target_mem_cgroup; 2172 struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2188,17 +2174,15 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2188 .zone = zone, 2174 .zone = zone,
2189 .priority = sc->priority, 2175 .priority = sc->priority,
2190 }; 2176 };
2191 struct mem_cgroup *memcg = NULL; 2177 struct mem_cgroup *memcg;
2192 mem_cgroup_iter_filter filter = (soft_reclaim) ?
2193 mem_cgroup_soft_reclaim_eligible : NULL;
2194 2178
2195 nr_reclaimed = sc->nr_reclaimed; 2179 nr_reclaimed = sc->nr_reclaimed;
2196 nr_scanned = sc->nr_scanned; 2180 nr_scanned = sc->nr_scanned;
2197 2181
2198 while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) { 2182 memcg = mem_cgroup_iter(root, NULL, &reclaim);
2183 do {
2199 struct lruvec *lruvec; 2184 struct lruvec *lruvec;
2200 2185
2201 groups_scanned++;
2202 lruvec = mem_cgroup_zone_lruvec(zone, memcg); 2186 lruvec = mem_cgroup_zone_lruvec(zone, memcg);
2203 2187
2204 shrink_lruvec(lruvec, sc); 2188 shrink_lruvec(lruvec, sc);
@@ -2218,7 +2202,8 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2218 mem_cgroup_iter_break(root, memcg); 2202 mem_cgroup_iter_break(root, memcg);
2219 break; 2203 break;
2220 } 2204 }
2221 } 2205 memcg = mem_cgroup_iter(root, memcg, &reclaim);
2206 } while (memcg);
2222 2207
2223 vmpressure(sc->gfp_mask, sc->target_mem_cgroup, 2208 vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
2224 sc->nr_scanned - nr_scanned, 2209 sc->nr_scanned - nr_scanned,
@@ -2226,37 +2211,6 @@ __shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
2226 2211
2227 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed, 2212 } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
2228 sc->nr_scanned - nr_scanned, sc)); 2213 sc->nr_scanned - nr_scanned, sc));
2229
2230 return groups_scanned;
2231}
2232
2233
2234static void shrink_zone(struct zone *zone, struct scan_control *sc)
2235{
2236 bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
2237 unsigned long nr_scanned = sc->nr_scanned;
2238 int scanned_groups;
2239
2240 scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
2241 /*
2242 * memcg iterator might race with other reclaimer or start from
2243 * a incomplete tree walk so the tree walk in __shrink_zone
2244 * might have missed groups that are above the soft limit. Try
2245 * another loop to catch up with others. Do it just once to
2246 * prevent from reclaim latencies when other reclaimers always
2247 * preempt this one.
2248 */
2249 if (do_soft_reclaim && !scanned_groups)
2250 __shrink_zone(zone, sc, do_soft_reclaim);
2251
2252 /*
2253 * No group is over the soft limit or those that are do not have
2254 * pages in the zone we are reclaiming so we have to reclaim everybody
2255 */
2256 if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
2257 __shrink_zone(zone, sc, false);
2258 return;
2259 }
2260} 2214}
2261 2215
2262/* Returns true if compaction should go ahead for a high-order request */ 2216/* Returns true if compaction should go ahead for a high-order request */
@@ -2320,6 +2274,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2320{ 2274{
2321 struct zoneref *z; 2275 struct zoneref *z;
2322 struct zone *zone; 2276 struct zone *zone;
2277 unsigned long nr_soft_reclaimed;
2278 unsigned long nr_soft_scanned;
2323 bool aborted_reclaim = false; 2279 bool aborted_reclaim = false;
2324 2280
2325 /* 2281 /*
@@ -2359,6 +2315,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2359 continue; 2315 continue;
2360 } 2316 }
2361 } 2317 }
2318 /*
2319 * This steals pages from memory cgroups over softlimit
2320 * and returns the number of reclaimed pages and
2321 * scanned pages. This works for global memory pressure
2322 * and balancing, not for a memcg's limit.
2323 */
2324 nr_soft_scanned = 0;
2325 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
2326 sc->order, sc->gfp_mask,
2327 &nr_soft_scanned);
2328 sc->nr_reclaimed += nr_soft_reclaimed;
2329 sc->nr_scanned += nr_soft_scanned;
2362 /* need some check for avoid more shrink_zone() */ 2330 /* need some check for avoid more shrink_zone() */
2363 } 2331 }
2364 2332
@@ -2952,6 +2920,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
2952{ 2920{
2953 int i; 2921 int i;
2954 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ 2922 int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
2923 unsigned long nr_soft_reclaimed;
2924 unsigned long nr_soft_scanned;
2955 struct scan_control sc = { 2925 struct scan_control sc = {
2956 .gfp_mask = GFP_KERNEL, 2926 .gfp_mask = GFP_KERNEL,
2957 .priority = DEF_PRIORITY, 2927 .priority = DEF_PRIORITY,
@@ -3066,6 +3036,15 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
3066 3036
3067 sc.nr_scanned = 0; 3037 sc.nr_scanned = 0;
3068 3038
3039 nr_soft_scanned = 0;
3040 /*
3041 * Call soft limit reclaim before calling shrink_zone.
3042 */
3043 nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
3044 order, sc.gfp_mask,
3045 &nr_soft_scanned);
3046 sc.nr_reclaimed += nr_soft_reclaimed;
3047
3069 /* 3048 /*
3070 * There should be no need to raise the scanning 3049 * There should be no need to raise the scanning
3071 * priority if enough pages are already being scanned 3050 * priority if enough pages are already being scanned
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 47016c304c84..66cad506b8a2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3975,8 +3975,8 @@ sub string_find_replace {
3975# check for new externs in .h files. 3975# check for new externs in .h files.
3976 if ($realfile =~ /\.h$/ && 3976 if ($realfile =~ /\.h$/ &&
3977 $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) { 3977 $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
3978 if (WARN("AVOID_EXTERNS", 3978 if (CHK("AVOID_EXTERNS",
3979 "extern prototypes should be avoided in .h files\n" . $herecurr) && 3979 "extern prototypes should be avoided in .h files\n" . $herecurr) &&
3980 $fix) { 3980 $fix) {
3981 $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/; 3981 $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
3982 } 3982 }
diff --git a/tools/lib/lk/debugfs.c b/tools/lib/lk/debugfs.c
index 099e7cd022e4..7c4347962353 100644
--- a/tools/lib/lk/debugfs.c
+++ b/tools/lib/lk/debugfs.c
@@ -5,7 +5,6 @@
5#include <stdbool.h> 5#include <stdbool.h>
6#include <sys/vfs.h> 6#include <sys/vfs.h>
7#include <sys/mount.h> 7#include <sys/mount.h>
8#include <linux/magic.h>
9#include <linux/kernel.h> 8#include <linux/kernel.h>
10 9
11#include "debugfs.h" 10#include "debugfs.h"
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index 9570c2b0f83c..b2519e49424f 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -32,7 +32,7 @@ u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
32int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, 32int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
33 struct perf_tsc_conversion *tc) 33 struct perf_tsc_conversion *tc)
34{ 34{
35 bool cap_usr_time_zero; 35 bool cap_user_time_zero;
36 u32 seq; 36 u32 seq;
37 int i = 0; 37 int i = 0;
38 38
@@ -42,7 +42,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
42 tc->time_mult = pc->time_mult; 42 tc->time_mult = pc->time_mult;
43 tc->time_shift = pc->time_shift; 43 tc->time_shift = pc->time_shift;
44 tc->time_zero = pc->time_zero; 44 tc->time_zero = pc->time_zero;
45 cap_usr_time_zero = pc->cap_usr_time_zero; 45 cap_user_time_zero = pc->cap_user_time_zero;
46 rmb(); 46 rmb();
47 if (pc->lock == seq && !(seq & 1)) 47 if (pc->lock == seq && !(seq & 1))
48 break; 48 break;
@@ -52,7 +52,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
52 } 52 }
53 } 53 }
54 54
55 if (!cap_usr_time_zero) 55 if (!cap_user_time_zero)
56 return -EOPNOTSUPP; 56 return -EOPNOTSUPP;
57 57
58 return 0; 58 return 0;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 423875c999b2..afe377b2884f 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -321,8 +321,6 @@ found:
321 return perf_event__repipe(tool, event_sw, &sample_sw, machine); 321 return perf_event__repipe(tool, event_sw, &sample_sw, machine);
322} 322}
323 323
324extern volatile int session_done;
325
326static void sig_handler(int sig __maybe_unused) 324static void sig_handler(int sig __maybe_unused)
327{ 325{
328 session_done = 1; 326 session_done = 1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8e50d8d77419..72eae7498c09 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -401,8 +401,6 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
401 return 0; 401 return 0;
402} 402}
403 403
404extern volatile int session_done;
405
406static void sig_handler(int sig __maybe_unused) 404static void sig_handler(int sig __maybe_unused)
407{ 405{
408 session_done = 1; 406 session_done = 1;
@@ -568,6 +566,9 @@ static int __cmd_report(struct perf_report *rep)
568 } 566 }
569 } 567 }
570 568
569 if (session_done())
570 return 0;
571
571 if (nr_samples == 0) { 572 if (nr_samples == 0) {
572 ui__error("The %s file has no samples!\n", session->filename); 573 ui__error("The %s file has no samples!\n", session->filename);
573 return 0; 574 return 0;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7f31a3ded1b6..9c333ff3dfeb 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -553,8 +553,6 @@ static struct perf_tool perf_script = {
553 .ordering_requires_timestamps = true, 553 .ordering_requires_timestamps = true,
554}; 554};
555 555
556extern volatile int session_done;
557
558static void sig_handler(int sig __maybe_unused) 556static void sig_handler(int sig __maybe_unused)
559{ 557{
560 session_done = 1; 558 session_done = 1;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index f5aa6375e3e9..fd4853404727 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -16,6 +16,23 @@
16#include <sys/mman.h> 16#include <sys/mman.h>
17#include <linux/futex.h> 17#include <linux/futex.h>
18 18
19/* For older distros: */
20#ifndef MAP_STACK
21# define MAP_STACK 0x20000
22#endif
23
24#ifndef MADV_HWPOISON
25# define MADV_HWPOISON 100
26#endif
27
28#ifndef MADV_MERGEABLE
29# define MADV_MERGEABLE 12
30#endif
31
32#ifndef MADV_UNMERGEABLE
33# define MADV_UNMERGEABLE 13
34#endif
35
19static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, 36static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
20 unsigned long arg, 37 unsigned long arg,
21 u8 arg_idx __maybe_unused, 38 u8 arg_idx __maybe_unused,
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 214e17e97e5c..346ee929d250 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -180,6 +180,9 @@ FLAGS_LIBELF=$(CFLAGS) $(LDFLAGS) $(EXTLIBS)
180ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y) 180ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_LIBELF),-DLIBELF_MMAP),y)
181 CFLAGS += -DLIBELF_MMAP 181 CFLAGS += -DLIBELF_MMAP
182endif 182endif
183ifeq ($(call try-cc,$(SOURCE_ELF_GETPHDRNUM),$(FLAGS_LIBELF),-DHAVE_ELF_GETPHDRNUM),y)
184 CFLAGS += -DHAVE_ELF_GETPHDRNUM
185endif
183 186
184# include ARCH specific config 187# include ARCH specific config
185-include $(src-perf)/arch/$(ARCH)/Makefile 188-include $(src-perf)/arch/$(ARCH)/Makefile
diff --git a/tools/perf/config/feature-tests.mak b/tools/perf/config/feature-tests.mak
index 708fb8e9822a..d5a8dd44945f 100644
--- a/tools/perf/config/feature-tests.mak
+++ b/tools/perf/config/feature-tests.mak
@@ -61,6 +61,15 @@ int main(void)
61} 61}
62endef 62endef
63 63
64define SOURCE_ELF_GETPHDRNUM
65#include <libelf.h>
66int main(void)
67{
68 size_t dst;
69 return elf_getphdrnum(0, &dst);
70}
71endef
72
64ifndef NO_SLANG 73ifndef NO_SLANG
65define SOURCE_SLANG 74define SOURCE_SLANG
66#include <slang.h> 75#include <slang.h>
@@ -210,6 +219,7 @@ define SOURCE_LIBAUDIT
210 219
211int main(void) 220int main(void)
212{ 221{
222 printf(\"error message: %s\n\", audit_errno_to_name(0));
213 return audit_open(); 223 return audit_open();
214} 224}
215endef 225endef
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index bfc5a27597d6..7eae5488ecea 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -809,7 +809,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map,
809 end = map__rip_2objdump(map, sym->end); 809 end = map__rip_2objdump(map, sym->end);
810 810
811 offset = line_ip - start; 811 offset = line_ip - start;
812 if (offset < 0 || (u64)line_ip > end) 812 if ((u64)line_ip < start || (u64)line_ip > end)
813 offset = -1; 813 offset = -1;
814 else 814 else
815 parsed_line = tmp2 + 1; 815 parsed_line = tmp2 + 1;
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 3e5f5430a28a..e23bde19d590 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -263,6 +263,21 @@ bool die_is_signed_type(Dwarf_Die *tp_die)
263} 263}
264 264
265/** 265/**
266 * die_is_func_def - Ensure that this DIE is a subprogram and definition
267 * @dw_die: a DIE
268 *
269 * Ensure that this DIE is a subprogram and NOT a declaration. This
270 * returns true if @dw_die is a function definition.
271 **/
272bool die_is_func_def(Dwarf_Die *dw_die)
273{
274 Dwarf_Attribute attr;
275
276 return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
277 dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
278}
279
280/**
266 * die_get_data_member_location - Get the data-member offset 281 * die_get_data_member_location - Get the data-member offset
267 * @mb_die: a DIE of a member of a data structure 282 * @mb_die: a DIE of a member of a data structure
268 * @offs: The offset of the member in the data structure 283 * @offs: The offset of the member in the data structure
@@ -392,6 +407,10 @@ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
392{ 407{
393 struct __addr_die_search_param *ad = data; 408 struct __addr_die_search_param *ad = data;
394 409
410 /*
411 * Since a declaration entry doesn't has given pc, this always returns
412 * function definition entry.
413 */
395 if (dwarf_tag(fn_die) == DW_TAG_subprogram && 414 if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
396 dwarf_haspc(fn_die, ad->addr)) { 415 dwarf_haspc(fn_die, ad->addr)) {
397 memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); 416 memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 6ce1717784b7..8658d41697d2 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -38,6 +38,9 @@ extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
38extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, 38extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
39 int (*callback)(Dwarf_Die *, void *), void *data); 39 int (*callback)(Dwarf_Die *, void *), void *data);
40 40
41/* Ensure that this DIE is a subprogram and definition (not declaration) */
42extern bool die_is_func_def(Dwarf_Die *dw_die);
43
41/* Compare diename and tname */ 44/* Compare diename and tname */
42extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); 45extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
43 46
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 26441d0e571b..ce69901176d8 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -199,9 +199,11 @@ static int write_buildid(char *name, size_t name_len, u8 *build_id,
199 return write_padded(fd, name, name_len + 1, len); 199 return write_padded(fd, name, name_len + 1, len);
200} 200}
201 201
202static int __dsos__write_buildid_table(struct list_head *head, pid_t pid, 202static int __dsos__write_buildid_table(struct list_head *head,
203 u16 misc, int fd) 203 struct machine *machine,
204 pid_t pid, u16 misc, int fd)
204{ 205{
206 char nm[PATH_MAX];
205 struct dso *pos; 207 struct dso *pos;
206 208
207 dsos__for_each_with_build_id(pos, head) { 209 dsos__for_each_with_build_id(pos, head) {
@@ -215,6 +217,10 @@ static int __dsos__write_buildid_table(struct list_head *head, pid_t pid,
215 if (is_vdso_map(pos->short_name)) { 217 if (is_vdso_map(pos->short_name)) {
216 name = (char *) VDSO__MAP_NAME; 218 name = (char *) VDSO__MAP_NAME;
217 name_len = sizeof(VDSO__MAP_NAME) + 1; 219 name_len = sizeof(VDSO__MAP_NAME) + 1;
220 } else if (dso__is_kcore(pos)) {
221 machine__mmap_name(machine, nm, sizeof(nm));
222 name = nm;
223 name_len = strlen(nm) + 1;
218 } else { 224 } else {
219 name = pos->long_name; 225 name = pos->long_name;
220 name_len = pos->long_name_len + 1; 226 name_len = pos->long_name_len + 1;
@@ -240,10 +246,10 @@ static int machine__write_buildid_table(struct machine *machine, int fd)
240 umisc = PERF_RECORD_MISC_GUEST_USER; 246 umisc = PERF_RECORD_MISC_GUEST_USER;
241 } 247 }
242 248
243 err = __dsos__write_buildid_table(&machine->kernel_dsos, machine->pid, 249 err = __dsos__write_buildid_table(&machine->kernel_dsos, machine,
244 kmisc, fd); 250 machine->pid, kmisc, fd);
245 if (err == 0) 251 if (err == 0)
246 err = __dsos__write_buildid_table(&machine->user_dsos, 252 err = __dsos__write_buildid_table(&machine->user_dsos, machine,
247 machine->pid, umisc, fd); 253 machine->pid, umisc, fd);
248 return err; 254 return err;
249} 255}
@@ -375,23 +381,31 @@ out_free:
375 return err; 381 return err;
376} 382}
377 383
378static int dso__cache_build_id(struct dso *dso, const char *debugdir) 384static int dso__cache_build_id(struct dso *dso, struct machine *machine,
385 const char *debugdir)
379{ 386{
380 bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; 387 bool is_kallsyms = dso->kernel && dso->long_name[0] != '/';
381 bool is_vdso = is_vdso_map(dso->short_name); 388 bool is_vdso = is_vdso_map(dso->short_name);
389 char *name = dso->long_name;
390 char nm[PATH_MAX];
382 391
383 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), 392 if (dso__is_kcore(dso)) {
384 dso->long_name, debugdir, 393 is_kallsyms = true;
385 is_kallsyms, is_vdso); 394 machine__mmap_name(machine, nm, sizeof(nm));
395 name = nm;
396 }
397 return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
398 debugdir, is_kallsyms, is_vdso);
386} 399}
387 400
388static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir) 401static int __dsos__cache_build_ids(struct list_head *head,
402 struct machine *machine, const char *debugdir)
389{ 403{
390 struct dso *pos; 404 struct dso *pos;
391 int err = 0; 405 int err = 0;
392 406
393 dsos__for_each_with_build_id(pos, head) 407 dsos__for_each_with_build_id(pos, head)
394 if (dso__cache_build_id(pos, debugdir)) 408 if (dso__cache_build_id(pos, machine, debugdir))
395 err = -1; 409 err = -1;
396 410
397 return err; 411 return err;
@@ -399,8 +413,9 @@ static int __dsos__cache_build_ids(struct list_head *head, const char *debugdir)
399 413
400static int machine__cache_build_ids(struct machine *machine, const char *debugdir) 414static int machine__cache_build_ids(struct machine *machine, const char *debugdir)
401{ 415{
402 int ret = __dsos__cache_build_ids(&machine->kernel_dsos, debugdir); 416 int ret = __dsos__cache_build_ids(&machine->kernel_dsos, machine,
403 ret |= __dsos__cache_build_ids(&machine->user_dsos, debugdir); 417 debugdir);
418 ret |= __dsos__cache_build_ids(&machine->user_dsos, machine, debugdir);
404 return ret; 419 return ret;
405} 420}
406 421
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 46a0d35a05e1..9ff6cf3e9a99 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -611,6 +611,8 @@ void hists__collapse_resort(struct hists *hists)
611 next = rb_first(root); 611 next = rb_first(root);
612 612
613 while (next) { 613 while (next) {
614 if (session_done())
615 break;
614 n = rb_entry(next, struct hist_entry, rb_node_in); 616 n = rb_entry(next, struct hist_entry, rb_node_in);
615 next = rb_next(&n->rb_node_in); 617 next = rb_next(&n->rb_node_in);
616 618
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index be0329394d56..20c7299a9d4e 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -734,7 +734,7 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
734 } 734 }
735 735
736 /* If not a real subprogram, find a real one */ 736 /* If not a real subprogram, find a real one */
737 if (dwarf_tag(sc_die) != DW_TAG_subprogram) { 737 if (!die_is_func_def(sc_die)) {
738 if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { 738 if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
739 pr_warning("Failed to find probe point in any " 739 pr_warning("Failed to find probe point in any "
740 "functions.\n"); 740 "functions.\n");
@@ -980,12 +980,10 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
980 struct dwarf_callback_param *param = data; 980 struct dwarf_callback_param *param = data;
981 struct probe_finder *pf = param->data; 981 struct probe_finder *pf = param->data;
982 struct perf_probe_point *pp = &pf->pev->point; 982 struct perf_probe_point *pp = &pf->pev->point;
983 Dwarf_Attribute attr;
984 983
985 /* Check tag and diename */ 984 /* Check tag and diename */
986 if (dwarf_tag(sp_die) != DW_TAG_subprogram || 985 if (!die_is_func_def(sp_die) ||
987 !die_compare_name(sp_die, pp->function) || 986 !die_compare_name(sp_die, pp->function))
988 dwarf_attr(sp_die, DW_AT_declaration, &attr))
989 return DWARF_CB_OK; 987 return DWARF_CB_OK;
990 988
991 /* Check declared file */ 989 /* Check declared file */
@@ -1474,7 +1472,7 @@ static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
1474 return 0; 1472 return 0;
1475} 1473}
1476 1474
1477/* Search function from function name */ 1475/* Search function definition from function name */
1478static int line_range_search_cb(Dwarf_Die *sp_die, void *data) 1476static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1479{ 1477{
1480 struct dwarf_callback_param *param = data; 1478 struct dwarf_callback_param *param = data;
@@ -1485,7 +1483,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
1485 if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) 1483 if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
1486 return DWARF_CB_OK; 1484 return DWARF_CB_OK;
1487 1485
1488 if (dwarf_tag(sp_die) == DW_TAG_subprogram && 1486 if (die_is_func_def(sp_die) &&
1489 die_compare_name(sp_die, lr->function)) { 1487 die_compare_name(sp_die, lr->function)) {
1490 lf->fname = dwarf_decl_file(sp_die); 1488 lf->fname = dwarf_decl_file(sp_die);
1491 dwarf_decl_line(sp_die, &lr->offset); 1489 dwarf_decl_line(sp_die, &lr->offset);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 51f5edf2a6d0..70ffa41518f3 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -531,6 +531,9 @@ static int flush_sample_queue(struct perf_session *s,
531 return 0; 531 return 0;
532 532
533 list_for_each_entry_safe(iter, tmp, head, list) { 533 list_for_each_entry_safe(iter, tmp, head, list) {
534 if (session_done())
535 return 0;
536
534 if (iter->timestamp > limit) 537 if (iter->timestamp > limit)
535 break; 538 break;
536 539
@@ -1160,7 +1163,6 @@ static void perf_session__warn_about_errors(const struct perf_session *session,
1160 } 1163 }
1161} 1164}
1162 1165
1163#define session_done() (*(volatile int *)(&session_done))
1164volatile int session_done; 1166volatile int session_done;
1165 1167
1166static int __perf_session__process_pipe_events(struct perf_session *self, 1168static int __perf_session__process_pipe_events(struct perf_session *self,
@@ -1372,10 +1374,13 @@ more:
1372 "Processing events..."); 1374 "Processing events...");
1373 } 1375 }
1374 1376
1377 err = 0;
1378 if (session_done())
1379 goto out_err;
1380
1375 if (file_pos < file_size) 1381 if (file_pos < file_size)
1376 goto more; 1382 goto more;
1377 1383
1378 err = 0;
1379 /* do the final flush for ordered samples */ 1384 /* do the final flush for ordered samples */
1380 session->ordered_samples.next_flush = ULLONG_MAX; 1385 session->ordered_samples.next_flush = ULLONG_MAX;
1381 err = flush_sample_queue(session, tool); 1386 err = flush_sample_queue(session, tool);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 3aa75fb2225f..04bf7373a7e5 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -124,4 +124,8 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session,
124 124
125#define perf_session__set_tracepoints_handlers(session, array) \ 125#define perf_session__set_tracepoints_handlers(session, array) \
126 __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array)) 126 __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
127
128extern volatile int session_done;
129
130#define session_done() (*(volatile int *)(&session_done))
127#endif /* __PERF_SESSION_H */ 131#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a7b9ab557380..a9c829be5216 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -8,6 +8,22 @@
8#include "symbol.h" 8#include "symbol.h"
9#include "debug.h" 9#include "debug.h"
10 10
11#ifndef HAVE_ELF_GETPHDRNUM
12static int elf_getphdrnum(Elf *elf, size_t *dst)
13{
14 GElf_Ehdr gehdr;
15 GElf_Ehdr *ehdr;
16
17 ehdr = gelf_getehdr(elf, &gehdr);
18 if (!ehdr)
19 return -1;
20
21 *dst = ehdr->e_phnum;
22
23 return 0;
24}
25#endif
26
11#ifndef NT_GNU_BUILD_ID 27#ifndef NT_GNU_BUILD_ID
12#define NT_GNU_BUILD_ID 3 28#define NT_GNU_BUILD_ID 3
13#endif 29#endif
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index fe7a27d67d2b..e9e1c03f927d 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -186,7 +186,7 @@ void parse_proc_kallsyms(struct pevent *pevent,
186 char *next = NULL; 186 char *next = NULL;
187 char *addr_str; 187 char *addr_str;
188 char *mod; 188 char *mod;
189 char *fmt; 189 char *fmt = NULL;
190 190
191 line = strtok_r(file, "\n", &next); 191 line = strtok_r(file, "\n", &next);
192 while (line) { 192 while (line) {