aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Makefile2
-rw-r--r--arch/sh/boards/mach-se/7724/setup.c9
-rw-r--r--arch/sh/drivers/heartbeat.c10
-rw-r--r--arch/sh/include/asm/dwarf.h4
-rw-r--r--arch/sh/include/asm/heartbeat.h1
-rw-r--r--arch/sh/include/mach-kfr2r09/mach/partner-jet-setup.txt1
-rw-r--r--arch/sh/kernel/Makefile43
-rw-r--r--arch/sh/kernel/Makefile_3240
-rw-r--r--arch/sh/kernel/Makefile_6421
-rw-r--r--arch/sh/kernel/cpu/init.c34
-rw-r--r--arch/sh/kernel/cpu/sh2/entry.S3
-rw-r--r--arch/sh/kernel/cpu/sh2a/entry.S3
-rw-r--r--arch/sh/kernel/cpu/sh3/entry.S8
-rw-r--r--arch/sh/kernel/cpu/sh4a/clock-sh7724.c2
-rw-r--r--arch/sh/kernel/cpu/shmobile/cpuidle.c11
-rw-r--r--arch/sh/kernel/cpu/shmobile/pm.c16
-rw-r--r--arch/sh/kernel/cpu/shmobile/sleep.S187
-rw-r--r--arch/sh/kernel/dwarf.c54
-rw-r--r--arch/sh/kernel/entry-common.S27
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/reboot.c12
-rw-r--r--drivers/clocksource/sh_cmt.c28
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid5.c34
-rw-r--r--drivers/video/sh_mobile_lcdcfb.c5
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--include/linux/perf_counter.h49
-rw-r--r--include/linux/wait.h9
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/perf_counter.c338
-rw-r--r--kernel/wait.c5
-rw-r--r--net/socket.c2
-rw-r--r--tools/perf/Makefile29
-rw-r--r--tools/perf/builtin-list.c3
-rw-r--r--tools/perf/builtin-record.c95
-rw-r--r--tools/perf/builtin-report.c12
-rw-r--r--tools/perf/util/parse-events.c10
-rw-r--r--tools/perf/util/symbol.c17
-rw-r--r--tools/perf/util/symbol.h24
61 files changed, 1277 insertions, 571 deletions
diff --git a/Makefile b/Makefile
index 0d46615bffe5..abcfa85f8f82 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 31 3SUBLEVEL = 31
4EXTRAVERSION = -rc5 4EXTRAVERSION = -rc6
5NAME = Man-Eating Seals of Antiquity 5NAME = Man-Eating Seals of Antiquity
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c
index 36a4ca3a9000..9162081504ef 100644
--- a/arch/sh/boards/mach-se/7724/setup.c
+++ b/arch/sh/boards/mach-se/7724/setup.c
@@ -246,7 +246,7 @@ static struct platform_device ceu1_device = {
246 }, 246 },
247}; 247};
248 248
249/* KEYSC */ 249/* KEYSC in SoC (Needs SW33-2 set to ON) */
250static struct sh_keysc_info keysc_info = { 250static struct sh_keysc_info keysc_info = {
251 .mode = SH_KEYSC_MODE_1, 251 .mode = SH_KEYSC_MODE_1,
252 .scan_timing = 10, 252 .scan_timing = 10,
@@ -263,12 +263,13 @@ static struct sh_keysc_info keysc_info = {
263 263
264static struct resource keysc_resources[] = { 264static struct resource keysc_resources[] = {
265 [0] = { 265 [0] = {
266 .start = 0x1a204000, 266 .name = "KEYSC",
267 .end = 0x1a20400f, 267 .start = 0x044b0000,
268 .end = 0x044b000f,
268 .flags = IORESOURCE_MEM, 269 .flags = IORESOURCE_MEM,
269 }, 270 },
270 [1] = { 271 [1] = {
271 .start = IRQ0_KEY, 272 .start = 79,
272 .flags = IORESOURCE_IRQ, 273 .flags = IORESOURCE_IRQ,
273 }, 274 },
274}; 275};
diff --git a/arch/sh/drivers/heartbeat.c b/arch/sh/drivers/heartbeat.c
index 938817e34e2b..a9339a6174fc 100644
--- a/arch/sh/drivers/heartbeat.c
+++ b/arch/sh/drivers/heartbeat.c
@@ -40,14 +40,19 @@ static inline void heartbeat_toggle_bit(struct heartbeat_data *hd,
40 if (inverted) 40 if (inverted)
41 new = ~new; 41 new = ~new;
42 42
43 new &= hd->mask;
44
43 switch (hd->regsize) { 45 switch (hd->regsize) {
44 case 32: 46 case 32:
47 new |= ioread32(hd->base) & ~hd->mask;
45 iowrite32(new, hd->base); 48 iowrite32(new, hd->base);
46 break; 49 break;
47 case 16: 50 case 16:
51 new |= ioread16(hd->base) & ~hd->mask;
48 iowrite16(new, hd->base); 52 iowrite16(new, hd->base);
49 break; 53 break;
50 default: 54 default:
55 new |= ioread8(hd->base) & ~hd->mask;
51 iowrite8(new, hd->base); 56 iowrite8(new, hd->base);
52 break; 57 break;
53 } 58 }
@@ -72,6 +77,7 @@ static int heartbeat_drv_probe(struct platform_device *pdev)
72{ 77{
73 struct resource *res; 78 struct resource *res;
74 struct heartbeat_data *hd; 79 struct heartbeat_data *hd;
80 int i;
75 81
76 if (unlikely(pdev->num_resources != 1)) { 82 if (unlikely(pdev->num_resources != 1)) {
77 dev_err(&pdev->dev, "invalid number of resources\n"); 83 dev_err(&pdev->dev, "invalid number of resources\n");
@@ -107,6 +113,10 @@ static int heartbeat_drv_probe(struct platform_device *pdev)
107 hd->nr_bits = ARRAY_SIZE(default_bit_pos); 113 hd->nr_bits = ARRAY_SIZE(default_bit_pos);
108 } 114 }
109 115
116 hd->mask = 0;
117 for (i = 0; i < hd->nr_bits; i++)
118 hd->mask |= (1 << hd->bit_pos[i]);
119
110 if (!hd->regsize) 120 if (!hd->regsize)
111 hd->regsize = 8; /* default access size */ 121 hd->regsize = 8; /* default access size */
112 122
diff --git a/arch/sh/include/asm/dwarf.h b/arch/sh/include/asm/dwarf.h
index 60b180728d8d..d3d3837c5e1b 100644
--- a/arch/sh/include/asm/dwarf.h
+++ b/arch/sh/include/asm/dwarf.h
@@ -340,6 +340,10 @@ struct dwarf_stack {
340#define DW_CFA_lo_user 0x1c 340#define DW_CFA_lo_user 0x1c
341#define DW_CFA_hi_user 0x3f 341#define DW_CFA_hi_user 0x3f
342 342
343/* GNU extension opcodes */
344#define DW_CFA_GNU_args_size 0x2e
345#define DW_CFA_GNU_negative_offset_extended 0x2f
346
343/* 347/*
344 * Some call frame instructions encode their operands in the opcode. We 348 * Some call frame instructions encode their operands in the opcode. We
345 * need some helper functions to extract both the opcode and operands 349 * need some helper functions to extract both the opcode and operands
diff --git a/arch/sh/include/asm/heartbeat.h b/arch/sh/include/asm/heartbeat.h
index 724a43ed245e..caaafe5a3ef1 100644
--- a/arch/sh/include/asm/heartbeat.h
+++ b/arch/sh/include/asm/heartbeat.h
@@ -11,6 +11,7 @@ struct heartbeat_data {
11 unsigned int nr_bits; 11 unsigned int nr_bits;
12 struct timer_list timer; 12 struct timer_list timer;
13 unsigned int regsize; 13 unsigned int regsize;
14 unsigned int mask;
14 unsigned long flags; 15 unsigned long flags;
15}; 16};
16 17
diff --git a/arch/sh/include/mach-kfr2r09/mach/partner-jet-setup.txt b/arch/sh/include/mach-kfr2r09/mach/partner-jet-setup.txt
index 9c85088728a7..25801d495c5f 100644
--- a/arch/sh/include/mach-kfr2r09/mach/partner-jet-setup.txt
+++ b/arch/sh/include/mach-kfr2r09/mach/partner-jet-setup.txt
@@ -24,6 +24,7 @@ LIST "setup clocks"
24ED 0xa4150004, 0x00000050 24ED 0xa4150004, 0x00000050
25ED 0xa4150000, 0x91053508 25ED 0xa4150000, 0x91053508
26WAIT 1 26WAIT 1
27ED 0xa4150050, 0x00000340
27ED 0xa4150024, 0x00005000 28ED 0xa4150024, 0x00005000
28 29
29LIST "setup pins" 30LIST "setup pins"
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 349d833deab5..f37cf02ad9be 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -1,5 +1,40 @@
1ifeq ($(CONFIG_SUPERH32),y) 1#
2include ${srctree}/arch/sh/kernel/Makefile_32 2# Makefile for the Linux/SuperH kernel.
3else 3#
4include ${srctree}/arch/sh/kernel/Makefile_64 4
5extra-y := head_$(BITS).o init_task.o vmlinux.lds
6
7ifdef CONFIG_FUNCTION_TRACER
8# Do not profile debug and lowlevel utilities
9CFLAGS_REMOVE_ftrace.o = -pg
5endif 10endif
11
12obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \
13 machvec.o process_$(BITS).o ptrace_$(BITS).o setup.o \
14 signal_$(BITS).o sys_sh.o sys_sh$(BITS).o syscalls_$(BITS).o \
15 time.o topology.o traps.o traps_$(BITS).o unwinder.o
16
17obj-y += cpu/
18obj-$(CONFIG_VSYSCALL) += vsyscall/
19obj-$(CONFIG_SMP) += smp.o
20obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o
21obj-$(CONFIG_KGDB) += kgdb.o
22obj-$(CONFIG_SH_CPU_FREQ) += cpufreq.o
23obj-$(CONFIG_MODULES) += sh_ksyms_$(BITS).o module.o
24obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
25obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
26obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
27obj-$(CONFIG_STACKTRACE) += stacktrace.o
28obj-$(CONFIG_IO_TRAPPED) += io_trapped.o
29obj-$(CONFIG_KPROBES) += kprobes.o
30obj-$(CONFIG_GENERIC_GPIO) += gpio.o
31obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
32obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
33obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
34obj-$(CONFIG_DUMP_CODE) += disassemble.o
35obj-$(CONFIG_HIBERNATION) += swsusp.o
36obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
37
38obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o
39
40EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/Makefile_32 b/arch/sh/kernel/Makefile_32
deleted file mode 100644
index f2245ebf0b31..000000000000
--- a/arch/sh/kernel/Makefile_32
+++ /dev/null
@@ -1,40 +0,0 @@
1#
2# Makefile for the Linux/SuperH kernel.
3#
4
5extra-y := head_32.o init_task.o vmlinux.lds
6
7ifdef CONFIG_FUNCTION_TRACER
8# Do not profile debug and lowlevel utilities
9CFLAGS_REMOVE_ftrace.o = -pg
10endif
11
12obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \
13 machvec.o process_32.o ptrace_32.o setup.o signal_32.o \
14 sys_sh.o sys_sh32.o syscalls_32.o time.o topology.o \
15 traps.o traps_32.o unwinder.o
16
17obj-y += cpu/
18obj-$(CONFIG_VSYSCALL) += vsyscall/
19obj-$(CONFIG_SMP) += smp.o
20obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o
21obj-$(CONFIG_KGDB) += kgdb.o
22obj-$(CONFIG_SH_CPU_FREQ) += cpufreq.o
23obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o
24obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
25obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
26obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
27obj-$(CONFIG_STACKTRACE) += stacktrace.o
28obj-$(CONFIG_IO_TRAPPED) += io_trapped.o
29obj-$(CONFIG_KPROBES) += kprobes.o
30obj-$(CONFIG_GENERIC_GPIO) += gpio.o
31obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
32obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
33obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
34obj-$(CONFIG_DUMP_CODE) += disassemble.o
35obj-$(CONFIG_HIBERNATION) += swsusp.o
36obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
37
38obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o
39
40EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/Makefile_64 b/arch/sh/kernel/Makefile_64
deleted file mode 100644
index cdfec1e499a0..000000000000
--- a/arch/sh/kernel/Makefile_64
+++ /dev/null
@@ -1,21 +0,0 @@
1extra-y := head_64.o init_task.o vmlinux.lds
2
3obj-y := debugtraps.o dumpstack.o idle.o io.o io_generic.o irq.o \
4 machvec.o process_64.o ptrace_64.o setup.o signal_64.o \
5 sys_sh.o sys_sh64.o syscalls_64.o time.o topology.o \
6 traps.o traps_64.o unwinder.o
7
8obj-y += cpu/
9obj-$(CONFIG_SMP) += smp.o
10obj-$(CONFIG_SH_CPU_FREQ) += cpufreq.o
11obj-$(CONFIG_MODULES) += sh_ksyms_64.o module.o
12obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
13obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
14obj-$(CONFIG_STACKTRACE) += stacktrace.o
15obj-$(CONFIG_IO_TRAPPED) += io_trapped.o
16obj-$(CONFIG_GENERIC_GPIO) += gpio.o
17obj-$(CONFIG_DWARF_UNWINDER) += dwarf.o
18
19obj-$(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) += localtimer.o
20
21EXTRA_CFLAGS += -Werror
diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c
index c832fa4cf8ed..e932ebef4738 100644
--- a/arch/sh/kernel/cpu/init.c
+++ b/arch/sh/kernel/cpu/init.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * CPU init code 4 * CPU init code
5 * 5 *
6 * Copyright (C) 2002 - 2007 Paul Mundt 6 * Copyright (C) 2002 - 2009 Paul Mundt
7 * Copyright (C) 2003 Richard Curnow 7 * Copyright (C) 2003 Richard Curnow
8 * 8 *
9 * This file is subject to the terms and conditions of the GNU General Public 9 * This file is subject to the terms and conditions of the GNU General Public
@@ -62,6 +62,37 @@ static void __init speculative_execution_init(void)
62#define speculative_execution_init() do { } while (0) 62#define speculative_execution_init() do { } while (0)
63#endif 63#endif
64 64
65#ifdef CONFIG_CPU_SH4A
66#define EXPMASK 0xff2f0004
67#define EXPMASK_RTEDS (1 << 0)
68#define EXPMASK_BRDSSLP (1 << 1)
69#define EXPMASK_MMCAW (1 << 4)
70
71static void __init expmask_init(void)
72{
73 unsigned long expmask = __raw_readl(EXPMASK);
74
75 /*
76 * Future proofing.
77 *
78 * Disable support for slottable sleep instruction
79 * and non-nop instructions in the rte delay slot.
80 */
81 expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP);
82
83 /*
84 * Enable associative writes to the memory-mapped cache array
85 * until the cache flush ops have been rewritten.
86 */
87 expmask |= EXPMASK_MMCAW;
88
89 __raw_writel(expmask, EXPMASK);
90 ctrl_barrier();
91}
92#else
93#define expmask_init() do { } while (0)
94#endif
95
65/* 2nd-level cache init */ 96/* 2nd-level cache init */
66void __uses_jump_to_uncached __attribute__ ((weak)) l2_cache_init(void) 97void __uses_jump_to_uncached __attribute__ ((weak)) l2_cache_init(void)
67{ 98{
@@ -319,4 +350,5 @@ asmlinkage void __init sh_cpu_init(void)
319#endif 350#endif
320 351
321 speculative_execution_init(); 352 speculative_execution_init();
353 expmask_init();
322} 354}
diff --git a/arch/sh/kernel/cpu/sh2/entry.S b/arch/sh/kernel/cpu/sh2/entry.S
index becc54c45692..c8a4331d9b8d 100644
--- a/arch/sh/kernel/cpu/sh2/entry.S
+++ b/arch/sh/kernel/cpu/sh2/entry.S
@@ -227,8 +227,9 @@ ENTRY(sh_bios_handler)
227 mov.l @r15+, r14 227 mov.l @r15+, r14
228 add #8,r15 228 add #8,r15
229 lds.l @r15+, pr 229 lds.l @r15+, pr
230 mov.l @r15+,r15
230 rte 231 rte
231 mov.l @r15+,r15 232 nop
232 .align 2 233 .align 2
2331: .long gdb_vbr_vector 2341: .long gdb_vbr_vector
234#endif /* CONFIG_SH_STANDARD_BIOS */ 235#endif /* CONFIG_SH_STANDARD_BIOS */
diff --git a/arch/sh/kernel/cpu/sh2a/entry.S b/arch/sh/kernel/cpu/sh2a/entry.S
index ab3903eeda5c..222742ddc0d6 100644
--- a/arch/sh/kernel/cpu/sh2a/entry.S
+++ b/arch/sh/kernel/cpu/sh2a/entry.S
@@ -176,8 +176,9 @@ ENTRY(sh_bios_handler)
176 movml.l @r15+,r14 176 movml.l @r15+,r14
177 add #8,r15 177 add #8,r15
178 lds.l @r15+, pr 178 lds.l @r15+, pr
179 mov.l @r15+,r15
179 rte 180 rte
180 mov.l @r15+,r15 181 nop
181 .align 2 182 .align 2
1821: .long gdb_vbr_vector 1831: .long gdb_vbr_vector
183#endif /* CONFIG_SH_STANDARD_BIOS */ 184#endif /* CONFIG_SH_STANDARD_BIOS */
diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S
index 854921c6f45b..f94f25e666cc 100644
--- a/arch/sh/kernel/cpu/sh3/entry.S
+++ b/arch/sh/kernel/cpu/sh3/entry.S
@@ -516,6 +516,14 @@ ENTRY(handle_interrupt)
516 bsr save_regs ! needs original pr value in k3 516 bsr save_regs ! needs original pr value in k3
517 mov #-1, k2 ! default vector kept in k2 517 mov #-1, k2 ! default vector kept in k2
518 518
519 stc sr, r0 ! get status register
520 shlr2 r0
521 and #0x3c, r0
522 cmp/eq #0x3c, r0
523 bf 9f
524 TRACE_IRQS_OFF
5259:
526
519 ! Setup return address and jump to do_IRQ 527 ! Setup return address and jump to do_IRQ
520 mov.l 4f, r9 ! fetch return address 528 mov.l 4f, r9 ! fetch return address
521 lds r9, pr ! put return address in pr 529 lds r9, pr ! put return address in pr
diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
index 34611d97378e..627588dfddf0 100644
--- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c
@@ -127,7 +127,7 @@ struct clk *main_clks[] = {
127 &div3_clk, 127 &div3_clk,
128}; 128};
129 129
130static int divisors[] = { 2, 0, 4, 6, 8, 12, 16, 0, 24, 32, 36, 48, 0, 72 }; 130static int divisors[] = { 2, 3, 4, 6, 8, 12, 16, 0, 24, 32, 36, 48, 0, 72 };
131 131
132static struct clk_div_mult_table div4_table = { 132static struct clk_div_mult_table div4_table = {
133 .divisors = divisors, 133 .divisors = divisors,
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index 4afdd975cc66..1c504bd972c3 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -21,6 +21,7 @@
21static unsigned long cpuidle_mode[] = { 21static unsigned long cpuidle_mode[] = {
22 SUSP_SH_SLEEP, /* regular sleep mode */ 22 SUSP_SH_SLEEP, /* regular sleep mode */
23 SUSP_SH_SLEEP | SUSP_SH_SF, /* sleep mode + self refresh */ 23 SUSP_SH_SLEEP | SUSP_SH_SF, /* sleep mode + self refresh */
24 SUSP_SH_STANDBY | SUSP_SH_SF, /* software standby mode + self refresh */
24}; 25};
25 26
26static int cpuidle_sleep_enter(struct cpuidle_device *dev, 27static int cpuidle_sleep_enter(struct cpuidle_device *dev,
@@ -96,6 +97,16 @@ void sh_mobile_setup_cpuidle(void)
96 state->flags |= CPUIDLE_FLAG_TIME_VALID; 97 state->flags |= CPUIDLE_FLAG_TIME_VALID;
97 state->enter = cpuidle_sleep_enter; 98 state->enter = cpuidle_sleep_enter;
98 99
100 state = &dev->states[i++];
101 snprintf(state->name, CPUIDLE_NAME_LEN, "C2");
102 strncpy(state->desc, "SuperH Mobile Standby Mode [SF]", CPUIDLE_DESC_LEN);
103 state->exit_latency = 2300;
104 state->target_residency = 1 * 2;
105 state->power_usage = 1;
106 state->flags = 0;
107 state->flags |= CPUIDLE_FLAG_TIME_VALID;
108 state->enter = cpuidle_sleep_enter;
109
99 dev->state_count = i; 110 dev->state_count = i;
100 111
101 cpuidle_register_device(dev); 112 cpuidle_register_device(dev);
diff --git a/arch/sh/kernel/cpu/shmobile/pm.c b/arch/sh/kernel/cpu/shmobile/pm.c
index de078d24ce56..ee3c2aaf66fb 100644
--- a/arch/sh/kernel/cpu/shmobile/pm.c
+++ b/arch/sh/kernel/cpu/shmobile/pm.c
@@ -41,23 +41,11 @@ extern const unsigned int sh_mobile_standby_size;
41 41
42void sh_mobile_call_standby(unsigned long mode) 42void sh_mobile_call_standby(unsigned long mode)
43{ 43{
44 extern void *vbr_base;
45 void *onchip_mem = (void *)ILRAM_BASE; 44 void *onchip_mem = (void *)ILRAM_BASE;
46 void (*standby_onchip_mem)(unsigned long) = onchip_mem; 45 void (*standby_onchip_mem)(unsigned long, unsigned long) = onchip_mem;
47
48 /* Note: Wake up from sleep may generate exceptions!
49 * Setup VBR to point to on-chip ram if self-refresh is
50 * going to be used.
51 */
52 if (mode & SUSP_SH_SF)
53 asm volatile("ldc %0, vbr" : : "r" (onchip_mem) : "memory");
54 46
55 /* Let assembly snippet in on-chip memory handle the rest */ 47 /* Let assembly snippet in on-chip memory handle the rest */
56 standby_onchip_mem(mode); 48 standby_onchip_mem(mode, ILRAM_BASE);
57
58 /* Put VBR back in System RAM again */
59 if (mode & SUSP_SH_SF)
60 asm volatile("ldc %0, vbr" : : "r" (&vbr_base) : "memory");
61} 49}
62 50
63static int sh_pm_enter(suspend_state_t state) 51static int sh_pm_enter(suspend_state_t state)
diff --git a/arch/sh/kernel/cpu/shmobile/sleep.S b/arch/sh/kernel/cpu/shmobile/sleep.S
index 5d888ef53d82..a439e6c7824f 100644
--- a/arch/sh/kernel/cpu/shmobile/sleep.S
+++ b/arch/sh/kernel/cpu/shmobile/sleep.S
@@ -16,18 +16,73 @@
16#include <asm/asm-offsets.h> 16#include <asm/asm-offsets.h>
17#include <asm/suspend.h> 17#include <asm/suspend.h>
18 18
19/*
20 * Kernel mode register usage, see entry.S:
21 * k0 scratch
22 * k1 scratch
23 * k4 scratch
24 */
25#define k0 r0
26#define k1 r1
27#define k4 r4
28
19/* manage self-refresh and enter standby mode. 29/* manage self-refresh and enter standby mode.
20 * this code will be copied to on-chip memory and executed from there. 30 * this code will be copied to on-chip memory and executed from there.
21 */ 31 */
22 32
23 .balign 4096,0,4096 33 .balign 4096,0,4096
24ENTRY(sh_mobile_standby) 34ENTRY(sh_mobile_standby)
35
36 /* save original vbr */
37 stc vbr, r1
38 mova saved_vbr, r0
39 mov.l r1, @r0
40
41 /* point vbr to our on-chip memory page */
42 ldc r5, vbr
43
44 /* save return address */
45 mova saved_spc, r0
46 sts pr, r5
47 mov.l r5, @r0
48
49 /* save sr */
50 mova saved_sr, r0
51 stc sr, r5
52 mov.l r5, @r0
53
54 /* save mode flags */
55 mova saved_mode, r0
56 mov.l r4, @r0
57
58 /* put mode flags in r0 */
25 mov r4, r0 59 mov r4, r0
26 60
27 tst #SUSP_SH_SF, r0 61 tst #SUSP_SH_SF, r0
28 bt skip_set_sf 62 bt skip_set_sf
63#ifdef CONFIG_CPU_SUBTYPE_SH7724
64 /* DBSC: put memory in self-refresh mode */
65 mov.l dben_reg, r4
66 mov.l dben_data0, r1
67 mov.l r1, @r4
29 68
30 /* SDRAM: disable power down and put in self-refresh mode */ 69 mov.l dbrfpdn0_reg, r4
70 mov.l dbrfpdn0_data0, r1
71 mov.l r1, @r4
72
73 mov.l dbcmdcnt_reg, r4
74 mov.l dbcmdcnt_data0, r1
75 mov.l r1, @r4
76
77 mov.l dbcmdcnt_reg, r4
78 mov.l dbcmdcnt_data1, r1
79 mov.l r1, @r4
80
81 mov.l dbrfpdn0_reg, r4
82 mov.l dbrfpdn0_data1, r1
83 mov.l r1, @r4
84#else
85 /* SBSC: disable power down and put in self-refresh mode */
31 mov.l 1f, r4 86 mov.l 1f, r4
32 mov.l 2f, r1 87 mov.l 2f, r1
33 mov.l @r4, r2 88 mov.l @r4, r2
@@ -35,16 +90,9 @@ ENTRY(sh_mobile_standby)
35 mov.l 3f, r3 90 mov.l 3f, r3
36 and r3, r2 91 and r3, r2
37 mov.l r2, @r4 92 mov.l r2, @r4
93#endif
38 94
39skip_set_sf: 95skip_set_sf:
40 tst #SUSP_SH_SLEEP, r0
41 bt test_standby
42
43 /* set mode to "sleep mode" */
44 bra do_sleep
45 mov #0x00, r1
46
47test_standby:
48 tst #SUSP_SH_STANDBY, r0 96 tst #SUSP_SH_STANDBY, r0
49 bt test_rstandby 97 bt test_rstandby
50 98
@@ -62,62 +110,135 @@ test_rstandby:
62 110
63test_ustandby: 111test_ustandby:
64 tst #SUSP_SH_USTANDBY, r0 112 tst #SUSP_SH_USTANDBY, r0
65 bt done_sleep 113 bt force_sleep
66 114
67 /* set mode to "u-standby mode" */ 115 /* set mode to "u-standby mode" */
68 mov #0x10, r1 116 bra do_sleep
117 mov #0x10, r1
118
119force_sleep:
69 120
70 /* fall-through */ 121 /* set mode to "sleep mode" */
122 mov #0x00, r1
71 123
72do_sleep: 124do_sleep:
73 /* setup and enter selected standby mode */ 125 /* setup and enter selected standby mode */
74 mov.l 5f, r4 126 mov.l 5f, r4
75 mov.l r1, @r4 127 mov.l r1, @r4
128again:
76 sleep 129 sleep
130 bra again
131 nop
132
133restore_jump_vbr:
134 /* setup spc with return address to c code */
135 mov.l saved_spc, k0
136 ldc k0, spc
137
138 /* restore vbr */
139 mov.l saved_vbr, k0
140 ldc k0, vbr
141
142 /* setup ssr with saved sr */
143 mov.l saved_sr, k0
144 ldc k0, ssr
145
146 /* get mode flags */
147 mov.l saved_mode, k0
77 148
78done_sleep: 149done_sleep:
79 /* reset standby mode to sleep mode */ 150 /* reset standby mode to sleep mode */
80 mov.l 5f, r4 151 mov.l 5f, k4
81 mov #0x00, r1 152 mov #0x00, k1
82 mov.l r1, @r4 153 mov.l k1, @k4
83 154
84 tst #SUSP_SH_SF, r0 155 tst #SUSP_SH_SF, k0
85 bt skip_restore_sf 156 bt skip_restore_sf
86 157
87 /* SDRAM: set auto-refresh mode */ 158#ifdef CONFIG_CPU_SUBTYPE_SH7724
88 mov.l 1f, r4 159 /* DBSC: put memory in auto-refresh mode */
89 mov.l @r4, r2 160 mov.l dbrfpdn0_reg, k4
90 mov.l 4f, r3 161 mov.l dbrfpdn0_data0, k1
91 and r3, r2 162 mov.l k1, @k4
92 mov.l r2, @r4 163
93 mov.l 6f, r4 164 nop /* sleep 140 ns */
94 mov.l 7f, r1 165 nop
95 mov.l 8f, r2 166 nop
96 mov.l @r4, r3 167 nop
97 mov #-1, r4 168
98 add r4, r3 169 mov.l dbcmdcnt_reg, k4
99 or r2, r3 170 mov.l dbcmdcnt_data0, k1
100 mov.l r3, @r1 171 mov.l k1, @k4
172
173 mov.l dbcmdcnt_reg, k4
174 mov.l dbcmdcnt_data1, k1
175 mov.l k1, @k4
176
177 mov.l dben_reg, k4
178 mov.l dben_data1, k1
179 mov.l k1, @k4
180
181 mov.l dbrfpdn0_reg, k4
182 mov.l dbrfpdn0_data2, k1
183 mov.l k1, @k4
184#else
185 /* SBSC: set auto-refresh mode */
186 mov.l 1f, k4
187 mov.l @k4, k0
188 mov.l 4f, k1
189 and k1, k0
190 mov.l k0, @k4
191 mov.l 6f, k4
192 mov.l 8f, k0
193 mov.l @k4, k1
194 mov #-1, k4
195 add k4, k1
196 or k1, k0
197 mov.l 7f, k1
198 mov.l k0, @k1
199#endif
101skip_restore_sf: 200skip_restore_sf:
102 rts 201 /* jump to vbr vector */
202 mov.l saved_vbr, k0
203 mov.l offset_vbr, k4
204 add k4, k0
205 jmp @k0
103 nop 206 nop
104 207
105 .balign 4 208 .balign 4
209saved_mode: .long 0
210saved_spc: .long 0
211saved_sr: .long 0
212saved_vbr: .long 0
213offset_vbr: .long 0x600
214#ifdef CONFIG_CPU_SUBTYPE_SH7724
215dben_reg: .long 0xfd000010 /* DBEN */
216dben_data0: .long 0
217dben_data1: .long 1
218dbrfpdn0_reg: .long 0xfd000040 /* DBRFPDN0 */
219dbrfpdn0_data0: .long 0
220dbrfpdn0_data1: .long 1
221dbrfpdn0_data2: .long 0x00010000
222dbcmdcnt_reg: .long 0xfd000014 /* DBCMDCNT */
223dbcmdcnt_data0: .long 2
224dbcmdcnt_data1: .long 4
225#else
1061: .long 0xfe400008 /* SDCR0 */ 2261: .long 0xfe400008 /* SDCR0 */
1072: .long 0x00000400 2272: .long 0x00000400
1083: .long 0xffff7fff 2283: .long 0xffff7fff
1094: .long 0xfffffbff 2294: .long 0xfffffbff
230#endif
1105: .long 0xa4150020 /* STBCR */ 2315: .long 0xa4150020 /* STBCR */
1116: .long 0xfe40001c /* RTCOR */ 2326: .long 0xfe40001c /* RTCOR */
1127: .long 0xfe400018 /* RTCNT */ 2337: .long 0xfe400018 /* RTCNT */
1138: .long 0xa55a0000 2348: .long 0xa55a0000
114 235
236
115/* interrupt vector @ 0x600 */ 237/* interrupt vector @ 0x600 */
116 .balign 0x400,0,0x400 238 .balign 0x400,0,0x400
117 .long 0xdeadbeef 239 .long 0xdeadbeef
118 .balign 0x200,0,0x200 240 .balign 0x200,0,0x200
119 /* sh7722 will end up here in sleep mode */ 241 bra restore_jump_vbr
120 rte
121 nop 242 nop
122sh_mobile_standby_end: 243sh_mobile_standby_end:
123 244
diff --git a/arch/sh/kernel/dwarf.c b/arch/sh/kernel/dwarf.c
index db021361b161..5fd6e604816d 100644
--- a/arch/sh/kernel/dwarf.c
+++ b/arch/sh/kernel/dwarf.c
@@ -330,7 +330,6 @@ struct dwarf_fde *dwarf_lookup_fde(unsigned long pc)
330 * @fde: the FDE for this function 330 * @fde: the FDE for this function
331 * @frame: the instructions calculate the CFA for this frame 331 * @frame: the instructions calculate the CFA for this frame
332 * @pc: the program counter of the address we're interested in 332 * @pc: the program counter of the address we're interested in
333 * @define_ra: keep executing insns until the return addr reg is defined?
334 * 333 *
335 * Execute the Call Frame instruction sequence starting at 334 * Execute the Call Frame instruction sequence starting at
336 * @insn_start and ending at @insn_end. The instructions describe 335 * @insn_start and ending at @insn_end. The instructions describe
@@ -342,36 +341,17 @@ static int dwarf_cfa_execute_insns(unsigned char *insn_start,
342 struct dwarf_cie *cie, 341 struct dwarf_cie *cie,
343 struct dwarf_fde *fde, 342 struct dwarf_fde *fde,
344 struct dwarf_frame *frame, 343 struct dwarf_frame *frame,
345 unsigned long pc, 344 unsigned long pc)
346 bool define_ra)
347{ 345{
348 unsigned char insn; 346 unsigned char insn;
349 unsigned char *current_insn; 347 unsigned char *current_insn;
350 unsigned int count, delta, reg, expr_len, offset; 348 unsigned int count, delta, reg, expr_len, offset;
351 bool seen_ra_reg;
352 349
353 current_insn = insn_start; 350 current_insn = insn_start;
354 351
355 /* 352 while (current_insn < insn_end && frame->pc <= pc) {
356 * If we're executing instructions for the dwarf_unwind_stack()
357 * FDE we need to keep executing instructions until the value of
358 * DWARF_ARCH_RA_REG is defined. See the comment in
359 * dwarf_unwind_stack() for more details.
360 */
361 if (define_ra)
362 seen_ra_reg = false;
363 else
364 seen_ra_reg = true;
365
366 while (current_insn < insn_end && (frame->pc <= pc || !seen_ra_reg) ) {
367 insn = __raw_readb(current_insn++); 353 insn = __raw_readb(current_insn++);
368 354
369 if (!seen_ra_reg) {
370 if (frame->num_regs >= DWARF_ARCH_RA_REG &&
371 frame->regs[DWARF_ARCH_RA_REG].flags)
372 seen_ra_reg = true;
373 }
374
375 /* 355 /*
376 * Firstly, handle the opcodes that embed their operands 356 * Firstly, handle the opcodes that embed their operands
377 * in the instructions. 357 * in the instructions.
@@ -484,6 +464,19 @@ static int dwarf_cfa_execute_insns(unsigned char *insn_start,
484 frame->regs[reg].flags |= DWARF_REG_OFFSET; 464 frame->regs[reg].flags |= DWARF_REG_OFFSET;
485 frame->regs[reg].addr = offset; 465 frame->regs[reg].addr = offset;
486 break; 466 break;
467 case DW_CFA_GNU_args_size:
468 count = dwarf_read_uleb128(current_insn, &offset);
469 current_insn += count;
470 break;
471 case DW_CFA_GNU_negative_offset_extended:
472 count = dwarf_read_uleb128(current_insn, &reg);
473 current_insn += count;
474 count = dwarf_read_uleb128(current_insn, &offset);
475 offset *= cie->data_alignment_factor;
476 dwarf_frame_alloc_regs(frame, reg);
477 frame->regs[reg].flags |= DWARF_REG_OFFSET;
478 frame->regs[reg].addr = -offset;
479 break;
487 default: 480 default:
488 pr_debug("unhandled DWARF instruction 0x%x\n", insn); 481 pr_debug("unhandled DWARF instruction 0x%x\n", insn);
489 break; 482 break;
@@ -510,26 +503,17 @@ struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
510 struct dwarf_fde *fde; 503 struct dwarf_fde *fde;
511 unsigned long addr; 504 unsigned long addr;
512 int i, offset; 505 int i, offset;
513 bool define_ra = false;
514 506
515 /* 507 /*
516 * If this is the first invocation of this recursive function we 508 * If this is the first invocation of this recursive function we
517 * need get the contents of a physical register to get the CFA 509 * need get the contents of a physical register to get the CFA
518 * in order to begin the virtual unwinding of the stack. 510 * in order to begin the virtual unwinding of the stack.
519 * 511 *
520 * Setting "define_ra" to true indictates that we want
521 * dwarf_cfa_execute_insns() to continue executing instructions
522 * until we know how to calculate the value of DWARF_ARCH_RA_REG
523 * (which we need in order to kick off the whole unwinding
524 * process).
525 *
526 * NOTE: the return address is guaranteed to be setup by the 512 * NOTE: the return address is guaranteed to be setup by the
527 * time this function makes its first function call. 513 * time this function makes its first function call.
528 */ 514 */
529 if (!pc && !prev) { 515 if (!pc && !prev)
530 pc = (unsigned long)&dwarf_unwind_stack; 516 pc = (unsigned long)current_text_addr();
531 define_ra = true;
532 }
533 517
534 frame = kzalloc(sizeof(*frame), GFP_ATOMIC); 518 frame = kzalloc(sizeof(*frame), GFP_ATOMIC);
535 if (!frame) 519 if (!frame)
@@ -565,11 +549,11 @@ struct dwarf_frame *dwarf_unwind_stack(unsigned long pc,
565 /* CIE initial instructions */ 549 /* CIE initial instructions */
566 dwarf_cfa_execute_insns(cie->initial_instructions, 550 dwarf_cfa_execute_insns(cie->initial_instructions,
567 cie->instructions_end, cie, fde, 551 cie->instructions_end, cie, fde,
568 frame, pc, false); 552 frame, pc);
569 553
570 /* FDE instructions */ 554 /* FDE instructions */
571 dwarf_cfa_execute_insns(fde->instructions, fde->end, cie, 555 dwarf_cfa_execute_insns(fde->instructions, fde->end, cie,
572 fde, frame, pc, define_ra); 556 fde, frame, pc);
573 557
574 /* Calculate the CFA */ 558 /* Calculate the CFA */
575 switch (frame->flags) { 559 switch (frame->flags) {
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index e63178fefb9b..700477601c6f 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -77,15 +77,6 @@ ENTRY(ret_from_irq)
77 ! 77 !
78 mov #OFF_SR, r0 78 mov #OFF_SR, r0
79 mov.l @(r0,r15), r0 ! get status register 79 mov.l @(r0,r15), r0 ! get status register
80
81 shlr2 r0
82 and #0x3c, r0
83 cmp/eq #0x3c, r0
84 bt 9f
85 TRACE_IRQS_ON
869:
87 mov #OFF_SR, r0
88 mov.l @(r0,r15), r0 ! get status register
89 shll r0 80 shll r0
90 shll r0 ! kernel space? 81 shll r0 ! kernel space?
91 get_current_thread_info r8, r0 82 get_current_thread_info r8, r0
@@ -96,6 +87,7 @@ ENTRY(ret_from_irq)
96 nop 87 nop
97ENTRY(resume_kernel) 88ENTRY(resume_kernel)
98 cli 89 cli
90 TRACE_IRQS_OFF
99 mov.l @(TI_PRE_COUNT,r8), r0 ! current_thread_info->preempt_count 91 mov.l @(TI_PRE_COUNT,r8), r0 ! current_thread_info->preempt_count
100 tst r0, r0 92 tst r0, r0
101 bf noresched 93 bf noresched
@@ -213,12 +205,25 @@ syscall_trace_entry:
213 mov.l r0, @(OFF_R0,r15) ! Return value 205 mov.l r0, @(OFF_R0,r15) ! Return value
214 206
215__restore_all: 207__restore_all:
216 mov.l 1f, r0 208 mov #OFF_SR, r0
209 mov.l @(r0,r15), r0 ! get status register
210
211 shlr2 r0
212 and #0x3c, r0
213 cmp/eq #0x3c, r0
214 bt 1f
215 TRACE_IRQS_ON
216 bra 2f
217 nop
2181:
219 TRACE_IRQS_OFF
2202:
221 mov.l 3f, r0
217 jmp @r0 222 jmp @r0
218 nop 223 nop
219 224
220 .align 2 225 .align 2
2211: .long restore_all 2263: .long restore_all
222 227
223 .align 2 228 .align 2
224syscall_badsys: ! Bad syscall number 229syscall_badsys: ! Bad syscall number
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486)
27 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
28 select HAVE_KPROBES 29 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
742config X86_LOCAL_APIC 743config X86_LOCAL_APIC
743 def_bool y 744 def_bool y
744 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 745 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
745 select HAVE_PERF_COUNTERS if (!M386 && !M486)
746 746
747config X86_IO_APIC 747config X86_IO_APIC
748 def_bool y 748 def_bool y
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1cf..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
400 level = cpuid_eax(1); 400 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
403 } 410 }
404 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
405 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..8bc64cfbe936 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
39 40
40static atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
41 42
@@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
96{ 97{
97 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
98 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
99 102
100 if (curr) 103 if (is_throttled)
101 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
102 105
103 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
104 return 0; 108 return 0;
105 109
106 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
107 111
108 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
109 if (curr) { 113 if (is_throttled) {
110 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
111 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
112 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
113 117
114 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
115 } else { 119 } else if (was_throttled) {
116 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
117 } 121 }
118 122
119 return 1; 123 return 1;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f900954..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
55 int num_counters_fixed; 55 int num_counters_fixed;
56 int counter_bits; 56 int counter_bits;
57 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
58 u64 max_period; 59 u64 max_period;
59 u64 intel_ctrl; 60 u64 intel_ctrl;
60}; 61};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
72{ 73{
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
613 614
614static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
615{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
616 int i; 618 int i;
617 619
618 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
627 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628 goto eventsel_fail; 630 goto eventsel_fail;
629 } 631 }
632#endif
630 633
631 return true; 634 return true;
632 635
636#ifdef CONFIG_X86_LOCAL_APIC
633eventsel_fail: 637eventsel_fail:
634 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
635 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
644 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
645 649
646 return false; 650 return false;
651#endif
647} 652}
648 653
649static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
650{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
651 int i; 657 int i;
652 658
653 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
657 663
658 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
659 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
660} 667}
661 668
662static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
748 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
749 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
750 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
751 } 767 }
752 768
753 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1449 1465
1450void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1451{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1452 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1453} 1471}
1454 1472
1455void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1456{ 1474{
1457 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1458 return; 1477 return;
1459 1478
1460 /* 1479 /*
1461 * Always use NMI for PMU 1480 * Always use NMI for PMU
1462 */ 1481 */
1463 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1464} 1484}
1465 1485
1466static int __kprobes 1486static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1484 1504
1485 regs = args->regs; 1505 regs = args->regs;
1486 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1487 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1488 /* 1510 /*
1489 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1490 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
1515 .event_map = p6_pmu_event_map, 1537 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event, 1538 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1518 .max_period = (1ULL << 31) - 1, 1541 .max_period = (1ULL << 31) - 1,
1519 .version = 0, 1542 .version = 0,
1520 .num_counters = 2, 1543 .num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1541 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1542 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1543 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1544 /* 1568 /*
1545 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1546 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
1564 .num_counters = 4, 1588 .num_counters = 4,
1565 .counter_bits = 48, 1589 .counter_bits = 48,
1566 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1567 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1568 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1569}; 1594};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
1589 return -ENODEV; 1614 return -ENODEV;
1590 } 1615 }
1591 1616
1617 x86_pmu = p6_pmu;
1618
1592 if (!cpu_has_apic) { 1619 if (!cpu_has_apic) {
1593 pr_info("no Local APIC, try rebooting with lapic"); 1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1594 return -ENODEV; 1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1595 } 1623 }
1596 1624
1597 x86_pmu = p6_pmu;
1598
1599 return 0; 1625 return 0;
1600} 1626}
1601 1627
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 9eb897603705..a06e8d101844 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -418,20 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
418} 418}
419 419
420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { 420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
421 { /* Handle problems with rebooting on Apple MacBook5,2 */ 421 { /* Handle problems with rebooting on Apple MacBook5 */
422 .callback = set_pci_reboot, 422 .callback = set_pci_reboot,
423 .ident = "Apple MacBook", 423 .ident = "Apple MacBook5",
424 .matches = { 424 .matches = {
425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), 425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), 426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
427 }, 427 },
428 }, 428 },
429 { /* Handle problems with rebooting on Apple MacBookPro5,1 */ 429 { /* Handle problems with rebooting on Apple MacBookPro5 */
430 .callback = set_pci_reboot, 430 .callback = set_pci_reboot,
431 .ident = "Apple MacBookPro5,1", 431 .ident = "Apple MacBookPro5",
432 .matches = { 432 .matches = {
433 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), 433 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
434 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5,1"), 434 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
435 }, 435 },
436 }, 436 },
437 { } 437 { }
diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 2964f5f4a7ef..6b3e0c2f33e2 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -40,6 +40,7 @@ struct sh_cmt_priv {
40 struct platform_device *pdev; 40 struct platform_device *pdev;
41 41
42 unsigned long flags; 42 unsigned long flags;
43 unsigned long flags_suspend;
43 unsigned long match_value; 44 unsigned long match_value;
44 unsigned long next_match_value; 45 unsigned long next_match_value;
45 unsigned long max_match_value; 46 unsigned long max_match_value;
@@ -667,11 +668,38 @@ static int __devexit sh_cmt_remove(struct platform_device *pdev)
667 return -EBUSY; /* cannot unregister clockevent and clocksource */ 668 return -EBUSY; /* cannot unregister clockevent and clocksource */
668} 669}
669 670
671static int sh_cmt_suspend(struct device *dev)
672{
673 struct platform_device *pdev = to_platform_device(dev);
674 struct sh_cmt_priv *p = platform_get_drvdata(pdev);
675
676 /* save flag state and stop CMT channel */
677 p->flags_suspend = p->flags;
678 sh_cmt_stop(p, p->flags);
679 return 0;
680}
681
682static int sh_cmt_resume(struct device *dev)
683{
684 struct platform_device *pdev = to_platform_device(dev);
685 struct sh_cmt_priv *p = platform_get_drvdata(pdev);
686
687 /* start CMT channel from saved state */
688 sh_cmt_start(p, p->flags_suspend);
689 return 0;
690}
691
692static struct dev_pm_ops sh_cmt_dev_pm_ops = {
693 .suspend = sh_cmt_suspend,
694 .resume = sh_cmt_resume,
695};
696
670static struct platform_driver sh_cmt_device_driver = { 697static struct platform_driver sh_cmt_device_driver = {
671 .probe = sh_cmt_probe, 698 .probe = sh_cmt_probe,
672 .remove = __devexit_p(sh_cmt_remove), 699 .remove = __devexit_p(sh_cmt_remove),
673 .driver = { 700 .driver = {
674 .name = "sh_cmt", 701 .name = "sh_cmt",
702 .pm = &sh_cmt_dev_pm_ops,
675 } 703 }
676}; 704};
677 705
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9b..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
1974 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1975 mddev->events ++; 1976 mddev->events ++;
1976 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1977 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1978 if ((mddev->events&1)==0) { 1979 * to spares. */
1979 mddev->events++; 1980 if ((mddev->events&1)==0)
1980 nospares = 0; 1981 nospares = 0;
1981 }
1982 } else { 1982 } else {
1983 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1984 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1985 mddev->events++;
1986 nospares = 0; 1985 nospares = 0;
1987 }
1988 } 1986 }
1989 } 1987 }
1990 1988
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3601 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3602 return -EINVAL; 3600 return -EINVAL;
3603 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3604 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3605 return -EBUSY; 3604 return -EBUSY;
3606 3605
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4304 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4305 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4306 4305
4306 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4309 return -EBUSY; 4309 err = -EBUSY;
4310 } 4310 } else if (mddev->pers) {
4311
4312 if (mddev->pers) {
4313 4311
4314 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4367 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4369 } 4367 }
4370 4368out:
4369 mutex_unlock(&mddev->open_mutex);
4370 if (err)
4371 return err;
4371 /* 4372 /*
4372 * Free resources if final stop 4373 * Free resources if final stop
4373 */ 4374 */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4433 blk_integrity_unregister(disk); 4434 blk_integrity_unregister(disk);
4434 md_new_event(mddev); 4435 md_new_event(mddev);
4435 sysfs_notify_dirent(mddev->sysfs_state); 4436 sysfs_notify_dirent(mddev->sysfs_state);
4436out:
4437 return err; 4437 return err;
4438} 4438}
4439 4439
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5518 } 5518 }
5519 BUG_ON(mddev != bdev->bd_disk->private_data); 5519 BUG_ON(mddev != bdev->bd_disk->private_data);
5520 5520
5521 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5521 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5522 goto out; 5522 goto out;
5523 5523
5524 err = 0; 5524 err = 0;
5525 atomic_inc(&mddev->openers); 5525 atomic_inc(&mddev->openers);
5526 mddev_unlock(mddev); 5526 mutex_unlock(&mddev->open_mutex);
5527 5527
5528 check_disk_change(bdev); 5528 check_disk_change(bdev);
5529 out: 5529 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf9..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2b521ee67dfa..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3786 sector_nr = raid5_size(mddev, 0, 0) 3786 sector_nr = raid5_size(mddev, 0, 0)
3787 - conf->reshape_progress; 3787 - conf->reshape_progress;
3788 } else if (mddev->delta_disks > 0 && 3788 } else if (mddev->delta_disks >= 0 &&
3789 conf->reshape_progress > 0) 3789 conf->reshape_progress > 0)
3790 sector_nr = conf->reshape_progress; 3790 sector_nr = conf->reshape_progress;
3791 sector_div(sector_nr, new_data_disks); 3791 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
4509 (old_disks-max_degraded)); 4509 (old_disks-max_degraded));
4510 /* here_old is the first stripe that we might need to read 4510 /* here_old is the first stripe that we might need to read
4511 * from */ 4511 * from */
4512 if (here_new >= here_old) { 4512 if (mddev->delta_disks == 0) {
4513 /* We cannot be sure it is safe to start an in-place
4514 * reshape. It is only safe if user-space if monitoring
4515 * and taking constant backups.
4516 * mdadm always starts a situation like this in
4517 * readonly mode so it can take control before
4518 * allowing any writes. So just check for that.
4519 */
4520 if ((here_new * mddev->new_chunk_sectors !=
4521 here_old * mddev->chunk_sectors) ||
4522 mddev->ro == 0) {
4523 printk(KERN_ERR "raid5: in-place reshape must be started"
4524 " in read-only mode - aborting\n");
4525 return -EINVAL;
4526 }
4527 } else if (mddev->delta_disks < 0
4528 ? (here_new * mddev->new_chunk_sectors <=
4529 here_old * mddev->chunk_sectors)
4530 : (here_new * mddev->new_chunk_sectors >=
4531 here_old * mddev->chunk_sectors)) {
4513 /* Reading from the same stripe as writing to - bad */ 4532 /* Reading from the same stripe as writing to - bad */
4514 printk(KERN_ERR "raid5: reshape_position too early for " 4533 printk(KERN_ERR "raid5: reshape_position too early for "
4515 "auto-recovery - aborting.\n"); 4534 "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5078 mddev->degraded--; 5097 mddev->degraded--;
5079 for (d = conf->raid_disks ; 5098 for (d = conf->raid_disks ;
5080 d < conf->raid_disks - mddev->delta_disks; 5099 d < conf->raid_disks - mddev->delta_disks;
5081 d++) 5100 d++) {
5082 raid5_remove_disk(mddev, d); 5101 mdk_rdev_t *rdev = conf->disks[d].rdev;
5102 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5103 char nm[20];
5104 sprintf(nm, "rd%d", rdev->raid_disk);
5105 sysfs_remove_link(&mddev->kobj, nm);
5106 rdev->raid_disk = -1;
5107 }
5108 }
5083 } 5109 }
5084 mddev->layout = conf->algorithm; 5110 mddev->layout = conf->algorithm;
5085 mddev->chunk_sectors = conf->chunk_sectors; 5111 mddev->chunk_sectors = conf->chunk_sectors;
diff --git a/drivers/video/sh_mobile_lcdcfb.c b/drivers/video/sh_mobile_lcdcfb.c
index cff406de3d15..fc3f9662ceae 100644
--- a/drivers/video/sh_mobile_lcdcfb.c
+++ b/drivers/video/sh_mobile_lcdcfb.c
@@ -477,6 +477,9 @@ static int sh_mobile_lcdc_start(struct sh_mobile_lcdc_priv *priv)
477 /* tell the board code to enable the panel */ 477 /* tell the board code to enable the panel */
478 for (k = 0; k < ARRAY_SIZE(priv->ch); k++) { 478 for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
479 ch = &priv->ch[k]; 479 ch = &priv->ch[k];
480 if (!ch->enabled)
481 continue;
482
480 board_cfg = &ch->cfg.board_cfg; 483 board_cfg = &ch->cfg.board_cfg;
481 if (board_cfg->display_on) 484 if (board_cfg->display_on)
482 board_cfg->display_on(board_cfg->board_data); 485 board_cfg->display_on(board_cfg->board_data);
@@ -494,6 +497,8 @@ static void sh_mobile_lcdc_stop(struct sh_mobile_lcdc_priv *priv)
494 /* clean up deferred io and ask board code to disable panel */ 497 /* clean up deferred io and ask board code to disable panel */
495 for (k = 0; k < ARRAY_SIZE(priv->ch); k++) { 498 for (k = 0; k < ARRAY_SIZE(priv->ch); k++) {
496 ch = &priv->ch[k]; 499 ch = &priv->ch[k];
500 if (!ch->enabled)
501 continue;
497 502
498 /* deferred io mode: 503 /* deferred io mode:
499 * flush frame, and wait for frame end interrupt 504 * flush frame, and wait for frame end interrupt
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
310 return ret; 310 return ret;
311} 311}
312 312
313static DEFINE_SPINLOCK(dentry_list_lock); 313DEFINE_SPINLOCK(dentry_list_lock);
314 314
315/* We limit the number of dentry locks to drop in one go. We have 315/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 316 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 317#define DL_INODE_DROP_COUNT 64
318 318
319/* Drop inode references from dentry locks */ 319/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 320static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 321{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 322 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 323
327 spin_lock(&dentry_list_lock); 324 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 325 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 326 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 327 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 328 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 330 kfree(dl);
334 spin_lock(&dentry_list_lock); 331 spin_lock(&dentry_list_lock);
335 } 332 }
336 if (osb->dentry_lock_list) 333 spin_unlock(&dentry_list_lock);
334}
335
336void ocfs2_drop_dl_inodes(struct work_struct *work)
337{
338 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
339 dentry_lock_work);
340
341 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
342 /*
343 * Don't queue dropping if umount is in progress. We flush the
344 * list in ocfs2_dismount_volume
345 */
346 spin_lock(&dentry_list_lock);
347 if (osb->dentry_lock_list &&
348 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 349 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 350 spin_unlock(&dentry_list_lock);
339} 351}
340 352
353/* Flush the whole work queue */
354void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
355{
356 __ocfs2_drop_dl_inodes(osb, -1);
357}
358
341/* 359/*
342 * ocfs2_dentry_iput() and friends. 360 * ocfs2_dentry_iput() and friends.
343 * 361 *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 386 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 387 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 388 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 389 if (!osb->dentry_lock_list &&
390 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 391 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 392 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 393 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 73}
73 74
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 75static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 212
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 213 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 214 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 215 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 216 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 217
217 if (err < 0) 218 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 220 oinfo->dqi_gqi_bh,
221 off + len); 221 rounded_end);
222 if (err < 0) 222 if (err < 0)
223 goto out; 223 goto out;
224 new = 1; 224 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 234 }
235 if (err) { 235 if (err) {
236 mlog_errno(err); 236 mlog_errno(err);
237 return err; 237 goto out;
238 } 238 }
239 lock_buffer(bh); 239 lock_buffer(bh);
240 if (new) 240 if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 345 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 346 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 347 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 351 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 352 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 353 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 354 msecs_to_jiffies(oinfo->dqi_syncms));
356 355
357out_err: 356out_err:
358 mlog_exit(status); 357 mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 401 return err;
403} 402}
404 403
404static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
405{
406 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
407
408 /*
409 * We may need to allocate tree blocks and a leaf block but not the
410 * root block
411 */
412 return oinfo->dqi_gi.dqi_qtree_depth;
413}
414
415static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
416{
417 /* We modify all the allocated blocks, tree root, and info block */
418 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
419 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
420}
421
405/* Read in information from global quota file and acquire a reference to it. 422/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 423 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 424int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 425{
409 int err, err2, ex = 0; 426 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 427 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 428 int type = dquot->dq_type;
429 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
430 struct ocfs2_super *osb = OCFS2_SB(sb);
431 struct inode *gqinode = info->dqi_gqinode;
432 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
433 handle_t *handle = NULL;
412 434
413 err = ocfs2_qinfo_lock(info, 0); 435 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 436 if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 441 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 442 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 443 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
444 ocfs2_qinfo_unlock(info, 0);
445
422 if (!dquot->dq_off) { /* No real quota entry? */ 446 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 447 ex = 1;
448 /*
449 * Add blocks to quota file before we start a transaction since
450 * locking allocators ranks above a transaction start
451 */
452 WARN_ON(journal_current_handle());
453 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
454 err = ocfs2_extend_no_holes(gqinode,
455 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
456 gqinode->i_size);
457 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
458 if (err < 0)
459 goto out;
429 } 460 }
461
462 handle = ocfs2_start_trans(osb,
463 ocfs2_calc_global_qinit_credits(sb, type));
464 if (IS_ERR(handle)) {
465 err = PTR_ERR(handle);
466 goto out;
467 }
468 err = ocfs2_qinfo_lock(info, ex);
469 if (err < 0)
470 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 471 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 472 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 473 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 479 ocfs2_qinfo_unlock(info, 1);
439 else 480 else
440 ocfs2_qinfo_unlock(info, 0); 481 ocfs2_qinfo_unlock(info, 0);
482out_trans:
483 if (handle)
484 ocfs2_commit_trans(osb, handle);
441out: 485out:
442 if (err < 0) 486 if (err < 0)
443 mlog_errno(err); 487 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
607 651
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 652 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 653 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 654 msecs_to_jiffies(oinfo->dqi_syncms));
611} 655}
612 656
613/* 657/*
@@ -635,20 +679,18 @@ out:
635 return status; 679 return status;
636} 680}
637 681
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 682static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 683{
640 struct ocfs2_mem_dqinfo *oinfo; 684 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 685 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 686 * We modify tree, leaf block, global info, local chunk header,
643 687 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 688 * accounts for inode update
645 return 0; 689 */
646 690 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 691 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 692 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 693 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 694}
653 695
654static int ocfs2_release_dquot(struct dquot *dquot) 696static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
680 return status; 722 return status;
681} 723}
682 724
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 725static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 726{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 727 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 728 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 729 int status = 0;
711 730
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 731 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 734 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 735 if (status < 0)
717 goto out; 736 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 737 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 738 ocfs2_unlock_global_qf(oinfo, 1);
729out: 739out:
730 mlog_exit(status); 740 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work);
1227
1228 kill_block_super(sb);
1229}
1230
1216static struct file_system_type ocfs2_fs_type = { 1231static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1232 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1233 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1234 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1235 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1236 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1237
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1238 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1239 .next = NULL
1226}; 1240};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1833
1820 debugfs_remove(osb->osb_ctxt); 1834 debugfs_remove(osb->osb_ctxt);
1821 1835
1836 /*
1837 * Flush inode dropping work queue so that deletes are
1838 * performed while the filesystem is still working
1839 */
1840 ocfs2_drop_all_dl_inodes(osb);
1841
1822 /* Orphan scan should be stopped as early as possible */ 1842 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1843 ocfs2_orphan_scan_stop(osb);
1824 1844
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2001 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2002 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2003
2004 ocfs2_orphan_scan_init(osb);
2005
1984 status = ocfs2_recovery_init(osb); 2006 status = ocfs2_recovery_init(osb);
1985 if (status) { 2007 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2008 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index a9d823a93fe8..b53f7006cc4e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
115 PERF_SAMPLE_TID = 1U << 1, 115 PERF_SAMPLE_TID = 1U << 1,
116 PERF_SAMPLE_TIME = 1U << 2, 116 PERF_SAMPLE_TIME = 1U << 2,
117 PERF_SAMPLE_ADDR = 1U << 3, 117 PERF_SAMPLE_ADDR = 1U << 3,
118 PERF_SAMPLE_GROUP = 1U << 4, 118 PERF_SAMPLE_READ = 1U << 4,
119 PERF_SAMPLE_CALLCHAIN = 1U << 5, 119 PERF_SAMPLE_CALLCHAIN = 1U << 5,
120 PERF_SAMPLE_ID = 1U << 6, 120 PERF_SAMPLE_ID = 1U << 6,
121 PERF_SAMPLE_CPU = 1U << 7, 121 PERF_SAMPLE_CPU = 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
127}; 127};
128 128
129/* 129/*
130 * Bits that can be set in attr.read_format to request that 130 * The format of the data returned by read() on a perf counter fd,
131 * reads on the counter should return the indicated quantities, 131 * as specified by attr.read_format:
132 * in increasing order of bit value, after the counter value. 132 *
133 * struct read_format {
134 * { u64 value;
135 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
136 * { u64 time_running; } && PERF_FORMAT_RUNNING
137 * { u64 id; } && PERF_FORMAT_ID
138 * } && !PERF_FORMAT_GROUP
139 *
140 * { u64 nr;
141 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
142 * { u64 time_running; } && PERF_FORMAT_RUNNING
143 * { u64 value;
144 * { u64 id; } && PERF_FORMAT_ID
145 * } cntr[nr];
146 * } && PERF_FORMAT_GROUP
147 * };
133 */ 148 */
134enum perf_counter_read_format { 149enum perf_counter_read_format {
135 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, 150 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
136 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, 151 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
137 PERF_FORMAT_ID = 1U << 2, 152 PERF_FORMAT_ID = 1U << 2,
153 PERF_FORMAT_GROUP = 1U << 3,
138 154
139 PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ 155 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
140}; 156};
141 157
142#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 158#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
343 * struct { 359 * struct {
344 * struct perf_event_header header; 360 * struct perf_event_header header;
345 * u32 pid, tid; 361 * u32 pid, tid;
346 * u64 value; 362 *
347 * { u64 time_enabled; } && PERF_FORMAT_ENABLED 363 * struct read_format values;
348 * { u64 time_running; } && PERF_FORMAT_RUNNING
349 * { u64 parent_id; } && PERF_FORMAT_ID
350 * }; 364 * };
351 */ 365 */
352 PERF_EVENT_READ = 8, 366 PERF_EVENT_READ = 8,
@@ -364,11 +378,22 @@ enum perf_event_type {
364 * { u32 cpu, res; } && PERF_SAMPLE_CPU 378 * { u32 cpu, res; } && PERF_SAMPLE_CPU
365 * { u64 period; } && PERF_SAMPLE_PERIOD 379 * { u64 period; } && PERF_SAMPLE_PERIOD
366 * 380 *
367 * { u64 nr; 381 * { struct read_format values; } && PERF_SAMPLE_READ
368 * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
369 * 382 *
370 * { u64 nr, 383 * { u64 nr,
371 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 384 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
385 *
386 * #
387 * # The RAW record below is opaque data wrt the ABI
388 * #
389 * # That is, the ABI doesn't make any promises wrt to
390 * # the stability of its content, it may vary depending
391 * # on event, hardware, kernel version and phase of
392 * # the moon.
393 * #
394 * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
395 * #
396 *
372 * { u32 size; 397 * { u32 size;
373 * char data[size];}&& PERF_SAMPLE_RAW 398 * char data[size];}&& PERF_SAMPLE_RAW
374 * }; 399 * };
@@ -694,6 +719,8 @@ struct perf_sample_data {
694 719
695extern int perf_counter_overflow(struct perf_counter *counter, int nmi, 720extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
696 struct perf_sample_data *data); 721 struct perf_sample_data *data);
722extern void perf_counter_output(struct perf_counter *counter, int nmi,
723 struct perf_sample_data *data);
697 724
698/* 725/*
699 * Return 1 for a software counter, 0 for a hardware counter 726 * Return 1 for a software counter, 0 for a hardware counter
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6788e1a4d4ca..cf3c2f5dba51 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,7 +77,14 @@ struct task_struct;
77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
78 { .flags = word, .bit_nr = bit, } 78 { .flags = word, .bit_nr = bit, }
79 79
80extern void init_waitqueue_head(wait_queue_head_t *q); 80extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
81
82#define init_waitqueue_head(q) \
83 do { \
84 static struct lock_class_key __key; \
85 \
86 __init_waitqueue_head((q), &__key); \
87 } while (0)
81 88
82#ifdef CONFIG_LOCKDEP 89#ifdef CONFIG_LOCKDEP
83# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ 90# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
diff --git a/kernel/futex.c b/kernel/futex.c
index 0672ff88f159..e18cfbdc7190 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue 1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1011 * q: the futex_q 1011 * q: the futex_q
1012 * key: the key of the requeue target futex 1012 * key: the key of the requeue target futex
1013 * hb: the hash_bucket of the requeue target futex
1013 * 1014 *
1014 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1015 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1015 * target futex if it is uncontended or via a lock steal. Set the futex_q key 1016 * target futex if it is uncontended or via a lock steal. Set the futex_q key
1016 * to the requeue target futex so the waiter can detect the wakeup on the right 1017 * to the requeue target futex so the waiter can detect the wakeup on the right
1017 * futex, but remove it from the hb and NULL the rt_waiter so it can detect 1018 * futex, but remove it from the hb and NULL the rt_waiter so it can detect
1018 * atomic lock acquisition. Must be called with the q->lock_ptr held. 1019 * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1020 * to protect access to the pi_state to fixup the owner later. Must be called
1021 * with both q->lock_ptr and hb->lock held.
1019 */ 1022 */
1020static inline 1023static inline
1021void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) 1024void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1025 struct futex_hash_bucket *hb)
1022{ 1026{
1023 drop_futex_key_refs(&q->key); 1027 drop_futex_key_refs(&q->key);
1024 get_futex_key_refs(key); 1028 get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
1030 WARN_ON(!q->rt_waiter); 1034 WARN_ON(!q->rt_waiter);
1031 q->rt_waiter = NULL; 1035 q->rt_waiter = NULL;
1032 1036
1037 q->lock_ptr = &hb->lock;
1038#ifdef CONFIG_DEBUG_PI_LIST
1039 q->list.plist.lock = &hb->lock;
1040#endif
1041
1033 wake_up_state(q->task, TASK_NORMAL); 1042 wake_up_state(q->task, TASK_NORMAL);
1034} 1043}
1035 1044
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1088 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, 1097 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1089 set_waiters); 1098 set_waiters);
1090 if (ret == 1) 1099 if (ret == 1)
1091 requeue_pi_wake_futex(top_waiter, key2); 1100 requeue_pi_wake_futex(top_waiter, key2, hb2);
1092 1101
1093 return ret; 1102 return ret;
1094} 1103}
@@ -1247,8 +1256,15 @@ retry_private:
1247 if (!match_futex(&this->key, &key1)) 1256 if (!match_futex(&this->key, &key1))
1248 continue; 1257 continue;
1249 1258
1250 WARN_ON(!requeue_pi && this->rt_waiter); 1259 /*
1251 WARN_ON(requeue_pi && !this->rt_waiter); 1260 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
1261 * be paired with each other and no other futex ops.
1262 */
1263 if ((requeue_pi && !this->rt_waiter) ||
1264 (!requeue_pi && this->rt_waiter)) {
1265 ret = -EINVAL;
1266 break;
1267 }
1252 1268
1253 /* 1269 /*
1254 * Wake nr_wake waiters. For requeue_pi, if we acquired the 1270 * Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
1273 this->task, 1); 1289 this->task, 1);
1274 if (ret == 1) { 1290 if (ret == 1) {
1275 /* We got the lock. */ 1291 /* We got the lock. */
1276 requeue_pi_wake_futex(this, &key2); 1292 requeue_pi_wake_futex(this, &key2, hb2);
1277 continue; 1293 continue;
1278 } else if (ret) { 1294 } else if (ret) {
1279 /* -EDEADLK */ 1295 /* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b9ee29..235716556bf1 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
180 int cmd = op & FUTEX_CMD_MASK; 180 int cmd = op & FUTEX_CMD_MASK;
181 181
182 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 182 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
183 cmd == FUTEX_WAIT_BITSET)) { 183 cmd == FUTEX_WAIT_BITSET ||
184 cmd == FUTEX_WAIT_REQUEUE_PI)) {
184 if (get_compat_timespec(&ts, utime)) 185 if (get_compat_timespec(&ts, utime))
185 return -EFAULT; 186 return -EFAULT;
186 if (!timespec_valid(&ts)) 187 if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
191 t = ktime_add_safe(ktime_get(), t); 192 t = ktime_add_safe(ktime_get(), t);
192 tp = &t; 193 tp = &t;
193 } 194 }
194 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) 195 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
196 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
195 val2 = (int) (unsigned long) utime; 197 val2 = (int) (unsigned long) utime;
196 198
197 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 199 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c679db4687..d222515a5a06 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
761{ 761{
762 struct irq_desc *desc = irq_to_desc(irq); 762 struct irq_desc *desc = irq_to_desc(irq);
763 struct irqaction *action, **action_ptr; 763 struct irqaction *action, **action_ptr;
764 struct task_struct *irqthread;
765 unsigned long flags; 764 unsigned long flags;
766 765
767 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 766 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
809 desc->chip->disable(irq); 808 desc->chip->disable(irq);
810 } 809 }
811 810
812 irqthread = action->thread;
813 action->thread = NULL;
814
815 spin_unlock_irqrestore(&desc->lock, flags); 811 spin_unlock_irqrestore(&desc->lock, flags);
816 812
817 unregister_handler_proc(irq, action); 813 unregister_handler_proc(irq, action);
@@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
819 /* Make sure it's not being used on another CPU: */ 815 /* Make sure it's not being used on another CPU: */
820 synchronize_irq(irq); 816 synchronize_irq(irq);
821 817
822 if (irqthread) {
823 if (!test_bit(IRQTF_DIED, &action->thread_flags))
824 kthread_stop(irqthread);
825 put_task_struct(irqthread);
826 }
827
828#ifdef CONFIG_DEBUG_SHIRQ 818#ifdef CONFIG_DEBUG_SHIRQ
829 /* 819 /*
830 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 820 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
840 local_irq_restore(flags); 830 local_irq_restore(flags);
841 } 831 }
842#endif 832#endif
833
834 if (action->thread) {
835 if (!test_bit(IRQTF_DIED, &action->thread_flags))
836 kthread_stop(action->thread);
837 put_task_struct(action->thread);
838 }
839
843 return action; 840 return action;
844} 841}
845 842
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b0b20a07f394..534e20d14d63 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
88void __weak hw_perf_enable(void) { barrier(); } 88void __weak hw_perf_enable(void) { barrier(); }
89 89
90void __weak hw_perf_counter_setup(int cpu) { barrier(); } 90void __weak hw_perf_counter_setup(int cpu) { barrier(); }
91void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
91 92
92int __weak 93int __weak
93hw_perf_group_sched_in(struct perf_counter *group_leader, 94hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
306 return; 307 return;
307 308
308 counter->state = PERF_COUNTER_STATE_INACTIVE; 309 counter->state = PERF_COUNTER_STATE_INACTIVE;
310 if (counter->pending_disable) {
311 counter->pending_disable = 0;
312 counter->state = PERF_COUNTER_STATE_OFF;
313 }
309 counter->tstamp_stopped = ctx->time; 314 counter->tstamp_stopped = ctx->time;
310 counter->pmu->disable(counter); 315 counter->pmu->disable(counter);
311 counter->oncpu = -1; 316 counter->oncpu = -1;
@@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
1691 return 0; 1696 return 0;
1692} 1697}
1693 1698
1694static u64 perf_counter_read_tree(struct perf_counter *counter) 1699static int perf_counter_read_size(struct perf_counter *counter)
1700{
1701 int entry = sizeof(u64); /* value */
1702 int size = 0;
1703 int nr = 1;
1704
1705 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1706 size += sizeof(u64);
1707
1708 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1709 size += sizeof(u64);
1710
1711 if (counter->attr.read_format & PERF_FORMAT_ID)
1712 entry += sizeof(u64);
1713
1714 if (counter->attr.read_format & PERF_FORMAT_GROUP) {
1715 nr += counter->group_leader->nr_siblings;
1716 size += sizeof(u64);
1717 }
1718
1719 size += entry * nr;
1720
1721 return size;
1722}
1723
1724static u64 perf_counter_read_value(struct perf_counter *counter)
1695{ 1725{
1696 struct perf_counter *child; 1726 struct perf_counter *child;
1697 u64 total = 0; 1727 u64 total = 0;
@@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
1703 return total; 1733 return total;
1704} 1734}
1705 1735
1736static int perf_counter_read_entry(struct perf_counter *counter,
1737 u64 read_format, char __user *buf)
1738{
1739 int n = 0, count = 0;
1740 u64 values[2];
1741
1742 values[n++] = perf_counter_read_value(counter);
1743 if (read_format & PERF_FORMAT_ID)
1744 values[n++] = primary_counter_id(counter);
1745
1746 count = n * sizeof(u64);
1747
1748 if (copy_to_user(buf, values, count))
1749 return -EFAULT;
1750
1751 return count;
1752}
1753
1754static int perf_counter_read_group(struct perf_counter *counter,
1755 u64 read_format, char __user *buf)
1756{
1757 struct perf_counter *leader = counter->group_leader, *sub;
1758 int n = 0, size = 0, err = -EFAULT;
1759 u64 values[3];
1760
1761 values[n++] = 1 + leader->nr_siblings;
1762 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1763 values[n++] = leader->total_time_enabled +
1764 atomic64_read(&leader->child_total_time_enabled);
1765 }
1766 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1767 values[n++] = leader->total_time_running +
1768 atomic64_read(&leader->child_total_time_running);
1769 }
1770
1771 size = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, size))
1774 return -EFAULT;
1775
1776 err = perf_counter_read_entry(leader, read_format, buf + size);
1777 if (err < 0)
1778 return err;
1779
1780 size += err;
1781
1782 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
1783 err = perf_counter_read_entry(counter, read_format,
1784 buf + size);
1785 if (err < 0)
1786 return err;
1787
1788 size += err;
1789 }
1790
1791 return size;
1792}
1793
1794static int perf_counter_read_one(struct perf_counter *counter,
1795 u64 read_format, char __user *buf)
1796{
1797 u64 values[4];
1798 int n = 0;
1799
1800 values[n++] = perf_counter_read_value(counter);
1801 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1802 values[n++] = counter->total_time_enabled +
1803 atomic64_read(&counter->child_total_time_enabled);
1804 }
1805 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1806 values[n++] = counter->total_time_running +
1807 atomic64_read(&counter->child_total_time_running);
1808 }
1809 if (read_format & PERF_FORMAT_ID)
1810 values[n++] = primary_counter_id(counter);
1811
1812 if (copy_to_user(buf, values, n * sizeof(u64)))
1813 return -EFAULT;
1814
1815 return n * sizeof(u64);
1816}
1817
1706/* 1818/*
1707 * Read the performance counter - simple non blocking version for now 1819 * Read the performance counter - simple non blocking version for now
1708 */ 1820 */
1709static ssize_t 1821static ssize_t
1710perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) 1822perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1711{ 1823{
1712 u64 values[4]; 1824 u64 read_format = counter->attr.read_format;
1713 int n; 1825 int ret;
1714 1826
1715 /* 1827 /*
1716 * Return end-of-file for a read on a counter that is in 1828 * Return end-of-file for a read on a counter that is in
@@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1720 if (counter->state == PERF_COUNTER_STATE_ERROR) 1832 if (counter->state == PERF_COUNTER_STATE_ERROR)
1721 return 0; 1833 return 0;
1722 1834
1835 if (count < perf_counter_read_size(counter))
1836 return -ENOSPC;
1837
1723 WARN_ON_ONCE(counter->ctx->parent_ctx); 1838 WARN_ON_ONCE(counter->ctx->parent_ctx);
1724 mutex_lock(&counter->child_mutex); 1839 mutex_lock(&counter->child_mutex);
1725 values[0] = perf_counter_read_tree(counter); 1840 if (read_format & PERF_FORMAT_GROUP)
1726 n = 1; 1841 ret = perf_counter_read_group(counter, read_format, buf);
1727 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1842 else
1728 values[n++] = counter->total_time_enabled + 1843 ret = perf_counter_read_one(counter, read_format, buf);
1729 atomic64_read(&counter->child_total_time_enabled);
1730 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1731 values[n++] = counter->total_time_running +
1732 atomic64_read(&counter->child_total_time_running);
1733 if (counter->attr.read_format & PERF_FORMAT_ID)
1734 values[n++] = primary_counter_id(counter);
1735 mutex_unlock(&counter->child_mutex); 1844 mutex_unlock(&counter->child_mutex);
1736 1845
1737 if (count < n * sizeof(u64)) 1846 return ret;
1738 return -EINVAL;
1739 count = n * sizeof(u64);
1740
1741 if (copy_to_user(buf, values, count))
1742 return -EFAULT;
1743
1744 return count;
1745} 1847}
1746 1848
1747static ssize_t 1849static ssize_t
@@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
2245 2347
2246 if (counter->pending_disable) { 2348 if (counter->pending_disable) {
2247 counter->pending_disable = 0; 2349 counter->pending_disable = 0;
2248 perf_counter_disable(counter); 2350 __perf_counter_disable(counter);
2249 } 2351 }
2250 2352
2251 if (counter->pending_wakeup) { 2353 if (counter->pending_wakeup) {
@@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
2630 return task_pid_nr_ns(p, counter->ns); 2732 return task_pid_nr_ns(p, counter->ns);
2631} 2733}
2632 2734
2633static void perf_counter_output(struct perf_counter *counter, int nmi, 2735static void perf_output_read_one(struct perf_output_handle *handle,
2736 struct perf_counter *counter)
2737{
2738 u64 read_format = counter->attr.read_format;
2739 u64 values[4];
2740 int n = 0;
2741
2742 values[n++] = atomic64_read(&counter->count);
2743 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2744 values[n++] = counter->total_time_enabled +
2745 atomic64_read(&counter->child_total_time_enabled);
2746 }
2747 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2748 values[n++] = counter->total_time_running +
2749 atomic64_read(&counter->child_total_time_running);
2750 }
2751 if (read_format & PERF_FORMAT_ID)
2752 values[n++] = primary_counter_id(counter);
2753
2754 perf_output_copy(handle, values, n * sizeof(u64));
2755}
2756
2757/*
2758 * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
2759 */
2760static void perf_output_read_group(struct perf_output_handle *handle,
2761 struct perf_counter *counter)
2762{
2763 struct perf_counter *leader = counter->group_leader, *sub;
2764 u64 read_format = counter->attr.read_format;
2765 u64 values[5];
2766 int n = 0;
2767
2768 values[n++] = 1 + leader->nr_siblings;
2769
2770 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2771 values[n++] = leader->total_time_enabled;
2772
2773 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2774 values[n++] = leader->total_time_running;
2775
2776 if (leader != counter)
2777 leader->pmu->read(leader);
2778
2779 values[n++] = atomic64_read(&leader->count);
2780 if (read_format & PERF_FORMAT_ID)
2781 values[n++] = primary_counter_id(leader);
2782
2783 perf_output_copy(handle, values, n * sizeof(u64));
2784
2785 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2786 n = 0;
2787
2788 if (sub != counter)
2789 sub->pmu->read(sub);
2790
2791 values[n++] = atomic64_read(&sub->count);
2792 if (read_format & PERF_FORMAT_ID)
2793 values[n++] = primary_counter_id(sub);
2794
2795 perf_output_copy(handle, values, n * sizeof(u64));
2796 }
2797}
2798
2799static void perf_output_read(struct perf_output_handle *handle,
2800 struct perf_counter *counter)
2801{
2802 if (counter->attr.read_format & PERF_FORMAT_GROUP)
2803 perf_output_read_group(handle, counter);
2804 else
2805 perf_output_read_one(handle, counter);
2806}
2807
2808void perf_counter_output(struct perf_counter *counter, int nmi,
2634 struct perf_sample_data *data) 2809 struct perf_sample_data *data)
2635{ 2810{
2636 int ret; 2811 int ret;
@@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2641 struct { 2816 struct {
2642 u32 pid, tid; 2817 u32 pid, tid;
2643 } tid_entry; 2818 } tid_entry;
2644 struct {
2645 u64 id;
2646 u64 counter;
2647 } group_entry;
2648 struct perf_callchain_entry *callchain = NULL; 2819 struct perf_callchain_entry *callchain = NULL;
2649 int callchain_size = 0; 2820 int callchain_size = 0;
2650 u64 time; 2821 u64 time;
@@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2699 if (sample_type & PERF_SAMPLE_PERIOD) 2870 if (sample_type & PERF_SAMPLE_PERIOD)
2700 header.size += sizeof(u64); 2871 header.size += sizeof(u64);
2701 2872
2702 if (sample_type & PERF_SAMPLE_GROUP) { 2873 if (sample_type & PERF_SAMPLE_READ)
2703 header.size += sizeof(u64) + 2874 header.size += perf_counter_read_size(counter);
2704 counter->nr_siblings * sizeof(group_entry);
2705 }
2706 2875
2707 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2876 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2708 callchain = perf_callchain(data->regs); 2877 callchain = perf_callchain(data->regs);
@@ -2759,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2759 if (sample_type & PERF_SAMPLE_PERIOD) 2928 if (sample_type & PERF_SAMPLE_PERIOD)
2760 perf_output_put(&handle, data->period); 2929 perf_output_put(&handle, data->period);
2761 2930
2762 /* 2931 if (sample_type & PERF_SAMPLE_READ)
2763 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. 2932 perf_output_read(&handle, counter);
2764 */
2765 if (sample_type & PERF_SAMPLE_GROUP) {
2766 struct perf_counter *leader, *sub;
2767 u64 nr = counter->nr_siblings;
2768
2769 perf_output_put(&handle, nr);
2770
2771 leader = counter->group_leader;
2772 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2773 if (sub != counter)
2774 sub->pmu->read(sub);
2775
2776 group_entry.id = primary_counter_id(sub);
2777 group_entry.counter = atomic64_read(&sub->count);
2778
2779 perf_output_put(&handle, group_entry);
2780 }
2781 }
2782 2933
2783 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2784 if (callchain) 2935 if (callchain)
@@ -2817,8 +2968,6 @@ struct perf_read_event {
2817 2968
2818 u32 pid; 2969 u32 pid;
2819 u32 tid; 2970 u32 tid;
2820 u64 value;
2821 u64 format[3];
2822}; 2971};
2823 2972
2824static void 2973static void
@@ -2830,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
2830 .header = { 2979 .header = {
2831 .type = PERF_EVENT_READ, 2980 .type = PERF_EVENT_READ,
2832 .misc = 0, 2981 .misc = 0,
2833 .size = sizeof(event) - sizeof(event.format), 2982 .size = sizeof(event) + perf_counter_read_size(counter),
2834 }, 2983 },
2835 .pid = perf_counter_pid(counter, task), 2984 .pid = perf_counter_pid(counter, task),
2836 .tid = perf_counter_tid(counter, task), 2985 .tid = perf_counter_tid(counter, task),
2837 .value = atomic64_read(&counter->count),
2838 }; 2986 };
2839 int ret, i = 0; 2987 int ret;
2840
2841 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2842 event.header.size += sizeof(u64);
2843 event.format[i++] = counter->total_time_enabled;
2844 }
2845
2846 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2847 event.header.size += sizeof(u64);
2848 event.format[i++] = counter->total_time_running;
2849 }
2850
2851 if (counter->attr.read_format & PERF_FORMAT_ID) {
2852 event.header.size += sizeof(u64);
2853 event.format[i++] = primary_counter_id(counter);
2854 }
2855 2988
2856 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); 2989 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2857 if (ret) 2990 if (ret)
2858 return; 2991 return;
2859 2992
2860 perf_output_copy(&handle, &event, event.header.size); 2993 perf_output_put(&handle, event);
2994 perf_output_read(&handle, counter);
2995
2861 perf_output_end(&handle); 2996 perf_output_end(&handle);
2862} 2997}
2863 2998
@@ -2893,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
2893 return; 3028 return;
2894 3029
2895 task_event->event.pid = perf_counter_pid(counter, task); 3030 task_event->event.pid = perf_counter_pid(counter, task);
2896 task_event->event.ppid = perf_counter_pid(counter, task->real_parent); 3031 task_event->event.ppid = perf_counter_pid(counter, current);
2897 3032
2898 task_event->event.tid = perf_counter_tid(counter, task); 3033 task_event->event.tid = perf_counter_tid(counter, task);
2899 task_event->event.ptid = perf_counter_tid(counter, task->real_parent); 3034 task_event->event.ptid = perf_counter_tid(counter, current);
2900 3035
2901 perf_output_put(&handle, task_event->event); 3036 perf_output_put(&handle, task_event->event);
2902 perf_output_end(&handle); 3037 perf_output_end(&handle);
@@ -3443,40 +3578,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3443 3578
3444static int perf_swcounter_is_counting(struct perf_counter *counter) 3579static int perf_swcounter_is_counting(struct perf_counter *counter)
3445{ 3580{
3446 struct perf_counter_context *ctx; 3581 /*
3447 unsigned long flags; 3582 * The counter is active, we're good!
3448 int count; 3583 */
3449
3450 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 3584 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
3451 return 1; 3585 return 1;
3452 3586
3587 /*
3588 * The counter is off/error, not counting.
3589 */
3453 if (counter->state != PERF_COUNTER_STATE_INACTIVE) 3590 if (counter->state != PERF_COUNTER_STATE_INACTIVE)
3454 return 0; 3591 return 0;
3455 3592
3456 /* 3593 /*
3457 * If the counter is inactive, it could be just because 3594 * The counter is inactive, if the context is active
3458 * its task is scheduled out, or because it's in a group 3595 * we're part of a group that didn't make it on the 'pmu',
3459 * which could not go on the PMU. We want to count in 3596 * not counting.
3460 * the first case but not the second. If the context is
3461 * currently active then an inactive software counter must
3462 * be the second case. If it's not currently active then
3463 * we need to know whether the counter was active when the
3464 * context was last active, which we can determine by
3465 * comparing counter->tstamp_stopped with ctx->time.
3466 *
3467 * We are within an RCU read-side critical section,
3468 * which protects the existence of *ctx.
3469 */ 3597 */
3470 ctx = counter->ctx; 3598 if (counter->ctx->is_active)
3471 spin_lock_irqsave(&ctx->lock, flags); 3599 return 0;
3472 count = 1; 3600
3473 /* Re-check state now we have the lock */ 3601 /*
3474 if (counter->state < PERF_COUNTER_STATE_INACTIVE || 3602 * We're inactive and the context is too, this means the
3475 counter->ctx->is_active || 3603 * task is scheduled out, we're counting events that happen
3476 counter->tstamp_stopped < ctx->time) 3604 * to us, like migration events.
3477 count = 0; 3605 */
3478 spin_unlock_irqrestore(&ctx->lock, flags); 3606 return 1;
3479 return count;
3480} 3607}
3481 3608
3482static int perf_swcounter_match(struct perf_counter *counter, 3609static int perf_swcounter_match(struct perf_counter *counter,
@@ -3928,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3928 atomic64_set(&hwc->period_left, hwc->sample_period); 4055 atomic64_set(&hwc->period_left, hwc->sample_period);
3929 4056
3930 /* 4057 /*
3931 * we currently do not support PERF_SAMPLE_GROUP on inherited counters 4058 * we currently do not support PERF_FORMAT_GROUP on inherited counters
3932 */ 4059 */
3933 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) 4060 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
3934 goto done; 4061 goto done;
3935 4062
3936 switch (attr->type) { 4063 switch (attr->type) {
@@ -4592,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
4592 perf_counter_init_cpu(cpu); 4719 perf_counter_init_cpu(cpu);
4593 break; 4720 break;
4594 4721
4722 case CPU_ONLINE:
4723 case CPU_ONLINE_FROZEN:
4724 hw_perf_counter_setup_online(cpu);
4725 break;
4726
4595 case CPU_DOWN_PREPARE: 4727 case CPU_DOWN_PREPARE:
4596 case CPU_DOWN_PREPARE_FROZEN: 4728 case CPU_DOWN_PREPARE_FROZEN:
4597 perf_counter_exit_cpu(cpu); 4729 perf_counter_exit_cpu(cpu);
@@ -4616,6 +4748,8 @@ void __init perf_counter_init(void)
4616{ 4748{
4617 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 4749 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
4618 (void *)(long)smp_processor_id()); 4750 (void *)(long)smp_processor_id());
4751 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
4752 (void *)(long)smp_processor_id());
4619 register_cpu_notifier(&perf_cpu_nb); 4753 register_cpu_notifier(&perf_cpu_nb);
4620} 4754}
4621 4755
diff --git a/kernel/wait.c b/kernel/wait.c
index ea7c3b4275cf..c4bd3d825f35 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13void init_waitqueue_head(wait_queue_head_t *q) 13void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
14{ 14{
15 spin_lock_init(&q->lock); 15 spin_lock_init(&q->lock);
16 lockdep_set_class(&q->lock, key);
16 INIT_LIST_HEAD(&q->task_list); 17 INIT_LIST_HEAD(&q->task_list);
17} 18}
18 19
19EXPORT_SYMBOL(init_waitqueue_head); 20EXPORT_SYMBOL(__init_waitqueue_head);
20 21
21void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 22void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
22{ 23{
diff --git a/net/socket.c b/net/socket.c
index 791d71a36a93..6d4716559047 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
736 if (more) 736 if (more)
737 flags |= MSG_MORE; 737 flags |= MSG_MORE;
738 738
739 return sock->ops->sendpage(sock, page, offset, size, flags); 739 return kernel_sendpage(sock, page, offset, size, flags);
740} 740}
741 741
742static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 742static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 60411e94113b..c045b4271e57 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,22 +382,29 @@ endif
382ifdef NO_DEMANGLE 382ifdef NO_DEMANGLE
383 BASIC_CFLAGS += -DNO_DEMANGLE 383 BASIC_CFLAGS += -DNO_DEMANGLE
384else 384else
385
386 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") 385 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
387 386
388 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
389
390 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
391
392 ifeq ($(has_bfd),y) 387 ifeq ($(has_bfd),y)
393 EXTLIBS += -lbfd 388 EXTLIBS += -lbfd
394 else ifeq ($(has_bfd_iberty),y)
395 EXTLIBS += -lbfd -liberty
396 else ifeq ($(has_bfd_iberty_z),y)
397 EXTLIBS += -lbfd -liberty -lz
398 else 389 else
399 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) 390 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
400 BASIC_CFLAGS += -DNO_DEMANGLE 391 ifeq ($(has_bfd_iberty),y)
392 EXTLIBS += -lbfd -liberty
393 else
394 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
395 ifeq ($(has_bfd_iberty_z),y)
396 EXTLIBS += -lbfd -liberty -lz
397 else
398 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
399 ifeq ($(has_cplus_demangle),y)
400 EXTLIBS += -liberty
401 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
402 else
403 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
404 BASIC_CFLAGS += -DNO_DEMANGLE
405 endif
406 endif
407 endif
401 endif 408 endif
402endif 409endif
403 410
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8a35c9..d88c6961274c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
10 10
11#include "perf.h" 11#include "perf.h"
12 12
13#include "util/parse-options.h"
14#include "util/parse-events.h" 13#include "util/parse-events.h"
14#include "util/cache.h"
15 15
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
17{ 17{
18 setup_pager();
18 print_events(); 19 print_events();
19 return 0; 20 return 0;
20} 21}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0345aad8eba5..3d051b9cf25f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int output;
34static const char *output_name = "perf.data"; 34static const char *output_name = "perf.data";
35static int group = 0; 35static int group = 0;
36static unsigned int realtime_prio = 0; 36static unsigned int realtime_prio = 0;
37static int raw_samples = 0;
37static int system_wide = 0; 38static int system_wide = 0;
39static int profile_cpu = -1;
38static pid_t target_pid = -1; 40static pid_t target_pid = -1;
39static int inherit = 1; 41static int inherit = 1;
40static int force = 0; 42static int force = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
203 kill(getpid(), signr); 205 kill(getpid(), signr);
204} 206}
205 207
206static void pid_synthesize_comm_event(pid_t pid, int full) 208static pid_t pid_synthesize_comm_event(pid_t pid, int full)
207{ 209{
208 struct comm_event comm_ev; 210 struct comm_event comm_ev;
209 char filename[PATH_MAX]; 211 char filename[PATH_MAX];
210 char bf[BUFSIZ]; 212 char bf[BUFSIZ];
211 int fd; 213 FILE *fp;
212 size_t size; 214 size_t size = 0;
213 char *field, *sep;
214 DIR *tasks; 215 DIR *tasks;
215 struct dirent dirent, *next; 216 struct dirent dirent, *next;
217 pid_t tgid = 0;
216 218
217 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); 219 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
218 220
219 fd = open(filename, O_RDONLY); 221 fp = fopen(filename, "r");
220 if (fd < 0) { 222 if (fd == NULL) {
221 /* 223 /*
222 * We raced with a task exiting - just return: 224 * We raced with a task exiting - just return:
223 */ 225 */
224 if (verbose) 226 if (verbose)
225 fprintf(stderr, "couldn't open %s\n", filename); 227 fprintf(stderr, "couldn't open %s\n", filename);
226 return; 228 return 0;
227 }
228 if (read(fd, bf, sizeof(bf)) < 0) {
229 fprintf(stderr, "couldn't read %s\n", filename);
230 exit(EXIT_FAILURE);
231 } 229 }
232 close(fd);
233 230
234 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
235 memset(&comm_ev, 0, sizeof(comm_ev)); 231 memset(&comm_ev, 0, sizeof(comm_ev));
236 field = strchr(bf, '('); 232 while (!comm_ev.comm[0] || !comm_ev.pid) {
237 if (field == NULL) 233 if (fgets(bf, sizeof(bf), fp) == NULL)
238 goto out_failure; 234 goto out_failure;
239 sep = strchr(++field, ')'); 235
240 if (sep == NULL) 236 if (memcmp(bf, "Name:", 5) == 0) {
241 goto out_failure; 237 char *name = bf + 5;
242 size = sep - field; 238 while (*name && isspace(*name))
243 memcpy(comm_ev.comm, field, size++); 239 ++name;
244 240 size = strlen(name) - 1;
245 comm_ev.pid = pid; 241 memcpy(comm_ev.comm, name, size++);
242 } else if (memcmp(bf, "Tgid:", 5) == 0) {
243 char *tgids = bf + 5;
244 while (*tgids && isspace(*tgids))
245 ++tgids;
246 tgid = comm_ev.pid = atoi(tgids);
247 }
248 }
249
246 comm_ev.header.type = PERF_EVENT_COMM; 250 comm_ev.header.type = PERF_EVENT_COMM;
247 size = ALIGN(size, sizeof(u64)); 251 size = ALIGN(size, sizeof(u64));
248 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); 252 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
251 comm_ev.tid = pid; 255 comm_ev.tid = pid;
252 256
253 write_output(&comm_ev, comm_ev.header.size); 257 write_output(&comm_ev, comm_ev.header.size);
254 return; 258 goto out_fclose;
255 } 259 }
256 260
257 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 261 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
268 write_output(&comm_ev, comm_ev.header.size); 272 write_output(&comm_ev, comm_ev.header.size);
269 } 273 }
270 closedir(tasks); 274 closedir(tasks);
271 return; 275
276out_fclose:
277 fclose(fp);
278 return tgid;
272 279
273out_failure: 280out_failure:
274 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", 281 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
276 exit(EXIT_FAILURE); 283 exit(EXIT_FAILURE);
277} 284}
278 285
279static void pid_synthesize_mmap_samples(pid_t pid) 286static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
280{ 287{
281 char filename[PATH_MAX]; 288 char filename[PATH_MAX];
282 FILE *fp; 289 FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
328 mmap_ev.len -= mmap_ev.start; 335 mmap_ev.len -= mmap_ev.start;
329 mmap_ev.header.size = (sizeof(mmap_ev) - 336 mmap_ev.header.size = (sizeof(mmap_ev) -
330 (sizeof(mmap_ev.filename) - size)); 337 (sizeof(mmap_ev.filename) - size));
331 mmap_ev.pid = pid; 338 mmap_ev.pid = tgid;
332 mmap_ev.tid = pid; 339 mmap_ev.tid = pid;
333 340
334 write_output(&mmap_ev, mmap_ev.header.size); 341 write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
347 354
348 while (!readdir_r(proc, &dirent, &next) && next) { 355 while (!readdir_r(proc, &dirent, &next) && next) {
349 char *end; 356 char *end;
350 pid_t pid; 357 pid_t pid, tgid;
351 358
352 pid = strtol(dirent.d_name, &end, 10); 359 pid = strtol(dirent.d_name, &end, 10);
353 if (*end) /* only interested in proper numerical dirents */ 360 if (*end) /* only interested in proper numerical dirents */
354 continue; 361 continue;
355 362
356 pid_synthesize_comm_event(pid, 1); 363 tgid = pid_synthesize_comm_event(pid, 1);
357 pid_synthesize_mmap_samples(pid); 364 pid_synthesize_mmap_samples(pid, tgid);
358 } 365 }
359 366
360 closedir(proc); 367 closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
392 PERF_FORMAT_TOTAL_TIME_RUNNING | 399 PERF_FORMAT_TOTAL_TIME_RUNNING |
393 PERF_FORMAT_ID; 400 PERF_FORMAT_ID;
394 401
395 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 402 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
396 403
397 if (freq) { 404 if (freq) {
398 attr->sample_type |= PERF_SAMPLE_PERIOD; 405 attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
412 if (call_graph) 419 if (call_graph)
413 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 420 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
414 421
422 if (raw_samples)
423 attr->sample_type |= PERF_SAMPLE_RAW;
415 424
416 attr->mmap = track; 425 attr->mmap = track;
417 attr->comm = track; 426 attr->comm = track;
@@ -426,6 +435,8 @@ try_again:
426 435
427 if (err == EPERM) 436 if (err == EPERM)
428 die("Permission error - are you root?\n"); 437 die("Permission error - are you root?\n");
438 else if (err == ENODEV && profile_cpu != -1)
439 die("No such device - did you specify an out-of-range profile CPU?\n");
429 440
430 /* 441 /*
431 * If it's cycles then fall back to hrtimer 442 * If it's cycles then fall back to hrtimer
@@ -559,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
559 if (pid == -1) 570 if (pid == -1)
560 pid = getpid(); 571 pid = getpid();
561 572
562 open_counters(-1, pid); 573 open_counters(profile_cpu, pid);
563 } else for (i = 0; i < nr_cpus; i++) 574 } else {
564 open_counters(i, target_pid); 575 if (profile_cpu != -1) {
576 open_counters(profile_cpu, target_pid);
577 } else {
578 for (i = 0; i < nr_cpus; i++)
579 open_counters(i, target_pid);
580 }
581 }
565 582
566 if (file_new) 583 if (file_new)
567 perf_header__write(header, output); 584 perf_header__write(header, output);
568 585
569 if (!system_wide) { 586 if (!system_wide) {
570 pid_synthesize_comm_event(pid, 0); 587 pid_t tgid = pid_synthesize_comm_event(pid, 0);
571 pid_synthesize_mmap_samples(pid); 588 pid_synthesize_mmap_samples(pid, tgid);
572 } else 589 } else
573 synthesize_all(); 590 synthesize_all();
574 591
@@ -636,10 +653,14 @@ static const struct option options[] = {
636 "record events on existing pid"), 653 "record events on existing pid"),
637 OPT_INTEGER('r', "realtime", &realtime_prio, 654 OPT_INTEGER('r', "realtime", &realtime_prio,
638 "collect data with this RT SCHED_FIFO priority"), 655 "collect data with this RT SCHED_FIFO priority"),
656 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
657 "collect raw sample records from all opened counters"),
639 OPT_BOOLEAN('a', "all-cpus", &system_wide, 658 OPT_BOOLEAN('a', "all-cpus", &system_wide,
640 "system-wide collection from all CPUs"), 659 "system-wide collection from all CPUs"),
641 OPT_BOOLEAN('A', "append", &append_file, 660 OPT_BOOLEAN('A', "append", &append_file,
642 "append to the output file to do incremental profiling"), 661 "append to the output file to do incremental profiling"),
662 OPT_INTEGER('C', "profile_cpu", &profile_cpu,
663 "CPU to profile on"),
643 OPT_BOOLEAN('f', "force", &force, 664 OPT_BOOLEAN('f', "force", &force,
644 "overwrite existing data file"), 665 "overwrite existing data file"),
645 OPT_LONG('c', "count", &default_interval, 666 OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 99274cec0adb..b53a60fc12de 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1526,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1526 more_data += sizeof(u64); 1526 more_data += sizeof(u64);
1527 } 1527 }
1528 1528
1529 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", 1529 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1530 (void *)(offset + head), 1530 (void *)(offset + head),
1531 (void *)(long)(event->header.size), 1531 (void *)(long)(event->header.size),
1532 event->header.misc, 1532 event->header.misc,
1533 event->ip.pid, 1533 event->ip.pid, event->ip.tid,
1534 (void *)(long)ip, 1534 (void *)(long)ip,
1535 (long long)period); 1535 (long long)period);
1536 1536
@@ -1590,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1590 if (show & show_mask) { 1590 if (show & show_mask) {
1591 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); 1591 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
1592 1592
1593 if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) 1593 if (dso_list && (!dso || !dso->name ||
1594 !strlist__has_entry(dso_list, dso->name)))
1594 return 0; 1595 return 0;
1595 1596
1596 if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) 1597 if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
1597 return 0; 1598 return 0;
1598 1599
1599 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { 1600 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1612,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
1612 struct thread *thread = threads__findnew(event->mmap.pid); 1613 struct thread *thread = threads__findnew(event->mmap.pid);
1613 struct map *map = map__new(&event->mmap); 1614 struct map *map = map__new(&event->mmap);
1614 1615
1615 dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", 1616 dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
1616 (void *)(offset + head), 1617 (void *)(offset + head),
1617 (void *)(long)(event->header.size), 1618 (void *)(long)(event->header.size),
1618 event->mmap.pid, 1619 event->mmap.pid,
1620 event->mmap.tid,
1619 (void *)(long)event->mmap.start, 1621 (void *)(long)event->mmap.start,
1620 (void *)(long)event->mmap.len, 1622 (void *)(long)event->mmap.len,
1621 (void *)(long)event->mmap.pgoff, 1623 (void *)(long)event->mmap.pgoff,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4858d83b3b67..044178408783 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
379 struct perf_counter_attr *attr) 379 struct perf_counter_attr *attr)
380{ 380{
381 const char *evt_name; 381 const char *evt_name;
382 char *flags;
382 char sys_name[MAX_EVENT_LENGTH]; 383 char sys_name[MAX_EVENT_LENGTH];
383 char id_buf[4]; 384 char id_buf[4];
384 int fd; 385 int fd;
@@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
400 strncpy(sys_name, *strp, sys_length); 401 strncpy(sys_name, *strp, sys_length);
401 sys_name[sys_length] = '\0'; 402 sys_name[sys_length] = '\0';
402 evt_name = evt_name + 1; 403 evt_name = evt_name + 1;
404
405 flags = strchr(evt_name, ':');
406 if (flags) {
407 *flags = '\0';
408 flags++;
409 if (!strncmp(flags, "record", strlen(flags)))
410 attr->sample_type |= PERF_SAMPLE_RAW;
411 }
412
403 evt_length = strlen(evt_name); 413 evt_length = strlen(evt_name);
404 if (evt_length >= MAX_EVENT_LENGTH) 414 if (evt_length >= MAX_EVENT_LENGTH)
405 return 0; 415 return 0;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f1dcede14307..5c0f42e6b33b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,23 +7,8 @@
7#include <gelf.h> 7#include <gelf.h>
8#include <elf.h> 8#include <elf.h>
9 9
10#ifndef NO_DEMANGLE
11#include <bfd.h>
12#else
13static inline
14char *bfd_demangle(void __used *v, const char __used *c, int __used i)
15{
16 return NULL;
17}
18#endif
19
20const char *sym_hist_filter; 10const char *sym_hist_filter;
21 11
22#ifndef DMGL_PARAMS
23#define DMGL_PARAMS (1 << 0) /* Include function args */
24#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
25#endif
26
27enum dso_origin { 12enum dso_origin {
28 DSO__ORIG_KERNEL = 0, 13 DSO__ORIG_KERNEL = 0,
29 DSO__ORIG_JAVA_JIT, 14 DSO__ORIG_JAVA_JIT,
@@ -816,6 +801,8 @@ more:
816 } 801 }
817out: 802out:
818 free(name); 803 free(name);
804 if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
805 return 0;
819 return ret; 806 return ret;
820} 807}
821 808
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 1e003ec2f4b1..b53bf0125c1b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
7#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include "module.h" 8#include "module.h"
9 9
10#ifdef HAVE_CPLUS_DEMANGLE
11extern char *cplus_demangle(const char *, int);
12
13static inline char *bfd_demangle(void __used *v, const char *c, int i)
14{
15 return cplus_demangle(c, i);
16}
17#else
18#ifdef NO_DEMANGLE
19static inline char *bfd_demangle(void __used *v, const char __used *c,
20 int __used i)
21{
22 return NULL;
23}
24#else
25#include <bfd.h>
26#endif
27#endif
28
29#ifndef DMGL_PARAMS
30#define DMGL_PARAMS (1 << 0) /* Include function args */
31#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
32#endif
33
10struct symbol { 34struct symbol {
11 struct rb_node rb_node; 35 struct rb_node rb_node;
12 u64 start; 36 u64 start;