aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/lockdep-design.txt6
-rw-r--r--Makefile2
-rw-r--r--arch/ia64/Makefile5
-rw-r--r--arch/ia64/include/asm/bitops.h2
-rw-r--r--arch/ia64/include/asm/pgtable.h1
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c4
-rw-r--r--arch/ia64/kernel/iosapic.c4
-rw-r--r--arch/ia64/kernel/pci-dma.c5
-rw-r--r--arch/ia64/kernel/topology.c6
-rw-r--r--arch/ia64/kvm/mmio.c6
-rw-r--r--arch/ia64/kvm/vcpu.c6
-rw-r--r--arch/ia64/kvm/vcpu.h13
-rw-r--r--arch/mn10300/include/asm/pci.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/kernel/dma.c6
-rw-r--r--arch/powerpc/kernel/perf_counter.c8
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/sh/boards/board-ap325rxa.c2
-rw-r--r--arch/sh/boards/mach-migor/setup.c2
-rw-r--r--arch/sh/kernel/cpu/sh2/setup-sh7619.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-mxg.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7201.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7203.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7206.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7705.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh770x.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7710.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7720.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh4-202.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7760.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7343.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7366.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7722.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7723.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7724.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7763.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7770.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7780.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7785.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7786.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-shx3.c2
-rw-r--r--arch/sh/kernel/cpu/sh5/setup-sh5.c2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--drivers/ata/ahci.c79
-rw-r--r--drivers/ata/libata-core.c3
-rw-r--r--drivers/ata/pata_at91.c17
-rw-r--r--drivers/ata/pata_atiixp.c19
-rw-r--r--drivers/ata/sata_nv.c8
-rw-r--r--drivers/base/platform.c3
-rw-r--r--drivers/char/pty.c2
-rw-r--r--drivers/gpu/drm/drm_irq.c2
-rw-r--r--drivers/gpu/drm/drm_modes.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid5.c34
-rw-r--r--drivers/mtd/maps/sbc8240.c0
-rw-r--r--drivers/mtd/ubi/eba.c1
-rw-r--r--drivers/mtd/ubi/scan.c13
-rw-r--r--drivers/pci/hotplug/sgi_hotplug.c7
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/nfs4xdr.c1374
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/proc/base.c27
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
-rw-r--r--include/linux/ftrace_event.h4
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/nfs_fs.h5
-rw-r--r--include/linux/perf_counter.h60
-rw-r--r--include/linux/sunrpc/xdr.h10
-rw-r--r--include/linux/wait.h9
-rw-r--r--include/trace/ftrace.h183
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/irq/numa_migrate.c4
-rw-r--r--kernel/lockdep_proc.c3
-rw-r--r--kernel/perf_counter.c581
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/rtmutex.c4
-rw-r--r--kernel/trace/blktrace.c12
-rw-r--r--kernel/trace/ring_buffer.c15
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace.h4
-rw-r--r--kernel/trace/trace_events_filter.c20
-rw-r--r--kernel/wait.c5
-rw-r--r--mm/mempool.c4
-rw-r--r--net/socket.c2
-rw-r--r--net/sunrpc/xdr.c12
-rwxr-xr-xscripts/recordmcount.pl9
-rw-r--r--security/selinux/hooks.c3
-rw-r--r--sound/pci/hda/patch_realtek.c20
-rw-r--r--sound/soc/fsl/efika-audio-fabric.c2
-rw-r--r--sound/soc/fsl/pcm030-audio-fabric.c2
-rw-r--r--tools/perf/Documentation/perf-examples.txt225
-rw-r--r--tools/perf/Documentation/perf-stat.txt2
-rw-r--r--tools/perf/Documentation/perf-top.txt112
-rw-r--r--tools/perf/Makefile25
-rw-r--r--tools/perf/builtin-list.c3
-rw-r--r--tools/perf/builtin-record.c108
-rw-r--r--tools/perf/builtin-report.c111
-rw-r--r--tools/perf/builtin-stat.c2
-rw-r--r--tools/perf/builtin-top.c552
-rw-r--r--tools/perf/util/callchain.c32
-rw-r--r--tools/perf/util/callchain.h8
-rw-r--r--tools/perf/util/header.c5
-rw-r--r--tools/perf/util/parse-events.c36
-rw-r--r--tools/perf/util/parse-events.h1
-rw-r--r--tools/perf/util/symbol.c72
-rw-r--r--tools/perf/util/symbol.h26
-rw-r--r--virt/kvm/ioapic.c10
-rw-r--r--virt/kvm/irq_comm.c4
158 files changed, 3577 insertions, 1436 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7bb0d934b6d8..dbea4f95fc85 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,6 +139,7 @@ Code Seq# Include File Comments
139'm' all linux/synclink.h conflict! 139'm' all linux/synclink.h conflict!
140'm' 00-1F net/irda/irmod.h conflict! 140'm' 00-1F net/irda/irmod.h conflict!
141'n' 00-7F linux/ncp_fs.h 141'n' 00-7F linux/ncp_fs.h
142'n' 80-8F linux/nilfs2_fs.h NILFS2
142'n' E0-FF video/matrox.h matroxfb 143'n' E0-FF video/matrox.h matroxfb
143'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 144'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
144'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) 145'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2f1820683b69..c08813dbfce2 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1115,6 +1115,10 @@ and is between 256 and 4096 characters. It is defined in the file
1115 libata.dma=4 Compact Flash DMA only 1115 libata.dma=4 Compact Flash DMA only
1116 Combinations also work, so libata.dma=3 enables DMA 1116 Combinations also work, so libata.dma=3 enables DMA
1117 for disks and CDROMs, but not CFs. 1117 for disks and CDROMs, but not CFs.
1118
1119 libata.ignore_hpa= [LIBATA] Ignore HPA limit
1120 libata.ignore_hpa=0 keep BIOS limits (default)
1121 libata.ignore_hpa=1 ignore limits, using full disk
1118 1122
1119 libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume 1123 libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
1120 when set. 1124 when set.
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index e20d913d5914..abf768c681e2 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -30,9 +30,9 @@ State
30The validator tracks lock-class usage history into 4n + 1 separate state bits: 30The validator tracks lock-class usage history into 4n + 1 separate state bits:
31 31
32- 'ever held in STATE context' 32- 'ever held in STATE context'
33- 'ever head as readlock in STATE context' 33- 'ever held as readlock in STATE context'
34- 'ever head with STATE enabled' 34- 'ever held with STATE enabled'
35- 'ever head as readlock with STATE enabled' 35- 'ever held as readlock with STATE enabled'
36 36
37Where STATE can be either one of (kernel/lockdep_states.h) 37Where STATE can be either one of (kernel/lockdep_states.h)
38 - hardirq 38 - hardirq
diff --git a/Makefile b/Makefile
index 0d46615bffe5..abcfa85f8f82 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 31 3SUBLEVEL = 31
4EXTRAVERSION = -rc5 4EXTRAVERSION = -rc6
5NAME = Man-Eating Seals of Antiquity 5NAME = Man-Eating Seals of Antiquity
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 58a7e46affda..e7cbaa02cd0b 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -41,11 +41,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from
41 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) 41 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
42endif 42endif
43 43
44ifeq ($(call cc-version),0304)
45 cflags-$(CONFIG_ITANIUM) += -mtune=merced
46 cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
47endif
48
49KBUILD_CFLAGS += $(cflags-y) 44KBUILD_CFLAGS += $(cflags-y)
50head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o 45head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
51 46
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index e2ca80037335..57a2787bc9fb 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -286,7 +286,7 @@ __test_and_clear_bit(int nr, volatile void * addr)
286{ 286{
287 __u32 *p = (__u32 *) addr + (nr >> 5); 287 __u32 *p = (__u32 *) addr + (nr >> 5);
288 __u32 m = 1 << (nr & 31); 288 __u32 m = 1 << (nr & 31);
289 int oldbitset = *p & m; 289 int oldbitset = (*p & m) != 0;
290 290
291 *p &= ~m; 291 *p &= ~m;
292 return oldbitset; 292 return oldbitset;
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 0a9cc73d35c7..8840a690d1e7 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -155,7 +155,6 @@
155#include <linux/bitops.h> 155#include <linux/bitops.h>
156#include <asm/cacheflush.h> 156#include <asm/cacheflush.h>
157#include <asm/mmu_context.h> 157#include <asm/mmu_context.h>
158#include <asm/processor.h>
159 158
160/* 159/*
161 * Next come the mappings that determine how mmap() protection bits 160 * Next come the mappings that determine how mmap() protection bits
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 2d311864e359..8ebccb589e1c 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -21,6 +21,7 @@ EXPORT_SYMBOL(csum_ipv6_magic);
21 21
22#include <asm/page.h> 22#include <asm/page.h>
23EXPORT_SYMBOL(clear_page); 23EXPORT_SYMBOL(clear_page);
24EXPORT_SYMBOL(copy_page);
24 25
25#ifdef CONFIG_VIRTUAL_MEM_MAP 26#ifdef CONFIG_VIRTUAL_MEM_MAP
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
@@ -60,9 +61,6 @@ EXPORT_SYMBOL(__udivdi3);
60EXPORT_SYMBOL(__moddi3); 61EXPORT_SYMBOL(__moddi3);
61EXPORT_SYMBOL(__umoddi3); 62EXPORT_SYMBOL(__umoddi3);
62 63
63#include <asm/page.h>
64EXPORT_SYMBOL(copy_page);
65
66#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE) 64#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
67extern void xor_ia64_2(void); 65extern void xor_ia64_2(void);
68extern void xor_ia64_3(void); 66extern void xor_ia64_3(void);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index c48b03f2b61d..dab4d393908c 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -1072,6 +1072,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
1072 } 1072 }
1073 1073
1074 addr = ioremap(phys_addr, 0); 1074 addr = ioremap(phys_addr, 0);
1075 if (addr == NULL) {
1076 spin_unlock_irqrestore(&iosapic_lock, flags);
1077 return -ENOMEM;
1078 }
1075 ver = iosapic_version(addr); 1079 ver = iosapic_version(addr);
1076 if ((err = iosapic_check_gsi_range(gsi_base, ver))) { 1080 if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1077 iounmap(addr); 1081 iounmap(addr);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index 05695962fe44..f6b1ff0aea76 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -69,11 +69,6 @@ iommu_dma_init(void)
69 69
70int iommu_dma_supported(struct device *dev, u64 mask) 70int iommu_dma_supported(struct device *dev, u64 mask)
71{ 71{
72 struct dma_map_ops *ops = platform_dma_get_ops(dev);
73
74 if (ops->dma_supported)
75 return ops->dma_supported(dev, mask);
76
77 /* Copied from i386. Doesn't make much sense, because it will 72 /* Copied from i386. Doesn't make much sense, because it will
78 only work for pci_alloc_coherent. 73 only work for pci_alloc_coherent.
79 The caller just has to use GFP_DMA in this case. */ 74 The caller just has to use GFP_DMA in this case. */
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index bc80dff1df7a..8f060352e129 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -372,6 +372,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
372 retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, 372 retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
373 &cache_ktype_percpu_entry, &sys_dev->kobj, 373 &cache_ktype_percpu_entry, &sys_dev->kobj,
374 "%s", "cache"); 374 "%s", "cache");
375 if (unlikely(retval < 0)) {
376 cpu_cache_sysfs_exit(cpu);
377 return retval;
378 }
375 379
376 for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { 380 for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
377 this_object = LEAF_KOBJECT_PTR(cpu,i); 381 this_object = LEAF_KOBJECT_PTR(cpu,i);
@@ -385,7 +389,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
385 } 389 }
386 kobject_put(&all_cpu_cache_info[cpu].kobj); 390 kobject_put(&all_cpu_cache_info[cpu].kobj);
387 cpu_cache_sysfs_exit(cpu); 391 cpu_cache_sysfs_exit(cpu);
388 break; 392 return retval;
389 } 393 }
390 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 394 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
391 } 395 }
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 21f63fffc379..9bf55afd08d0 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -247,7 +247,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
247 vcpu_get_fpreg(vcpu, inst.M9.f2, &v); 247 vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
248 /* Write high word. FIXME: this is a kludge! */ 248 /* Write high word. FIXME: this is a kludge! */
249 v.u.bits[1] &= 0x3ffff; 249 v.u.bits[1] &= 0x3ffff;
250 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); 250 mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
251 ma, IOREQ_WRITE);
251 data = v.u.bits[0]; 252 data = v.u.bits[0];
252 size = 3; 253 size = 3;
253 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { 254 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
@@ -265,7 +266,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
265 266
266 /* Write high word.FIXME: this is a kludge! */ 267 /* Write high word.FIXME: this is a kludge! */
267 v.u.bits[1] &= 0x3ffff; 268 v.u.bits[1] &= 0x3ffff;
268 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); 269 mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
270 8, ma, IOREQ_WRITE);
269 data = v.u.bits[0]; 271 data = v.u.bits[0];
270 size = 3; 272 size = 3;
271 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { 273 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index 46b02cbcc874..cc406d064a09 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -461,7 +461,7 @@ void setreg(unsigned long regnum, unsigned long val,
461u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) 461u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
462{ 462{
463 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 463 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
464 u64 val; 464 unsigned long val;
465 465
466 if (!reg) 466 if (!reg)
467 return 0; 467 return 0;
@@ -469,7 +469,7 @@ u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
469 return val; 469 return val;
470} 470}
471 471
472void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) 472void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
473{ 473{
474 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 474 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
475 long sof = (regs->cr_ifs) & 0x7f; 475 long sof = (regs->cr_ifs) & 0x7f;
@@ -1072,7 +1072,7 @@ void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
1072 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); 1072 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
1073} 1073}
1074 1074
1075int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) 1075int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
1076{ 1076{
1077 struct thash_data *data; 1077 struct thash_data *data;
1078 union ia64_isr visr, pt_isr; 1078 union ia64_isr visr, pt_isr;
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index 042af92ced83..360724d3ae69 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -686,14 +686,15 @@ static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
686 return highest_bits((int *)&(VMX(vcpu, insvc[0]))); 686 return highest_bits((int *)&(VMX(vcpu, insvc[0])));
687} 687}
688 688
689extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, 689extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
690 struct ia64_fpreg *val); 690 struct ia64_fpreg *val);
691extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, 691extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
692 struct ia64_fpreg *val); 692 struct ia64_fpreg *val);
693extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); 693extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
694extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); 694extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
695extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); 695 u64 val, int nat);
696extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); 696extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
697extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
697extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); 698extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
698extern void vcpu_bsw0(struct kvm_vcpu *vcpu); 699extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
699extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, 700extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 35d2ed6396f6..19aecc90f7a4 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -59,7 +59,6 @@ void pcibios_penalize_isa_irq(int irq);
59#include <linux/slab.h> 59#include <linux/slab.h>
60#include <asm/scatterlist.h> 60#include <asm/scatterlist.h>
61#include <linux/string.h> 61#include <linux/string.h>
62#include <linux/mm.h>
63#include <asm/io.h> 62#include <asm/io.h>
64 63
65struct pci_dev; 64struct pci_dev;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index dfdf13c9fefd..fddc3ed715fa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
34#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 34#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
35 35
36/* We don't currently support large pages. */ 36/* We don't currently support large pages. */
37#define KVM_PAGES_PER_HPAGE (1<<31) 37#define KVM_PAGES_PER_HPAGE (1UL << 31)
38 38
39struct kvm; 39struct kvm;
40struct kvm_run; 40struct kvm_run;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 20a60d661ba8..ccf129d47d84 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/device.h> 8#include <linux/device.h>
9#include <linux/dma-mapping.h> 9#include <linux/dma-mapping.h>
10#include <linux/lmb.h>
10#include <asm/bug.h> 11#include <asm/bug.h>
11#include <asm/abs_addr.h> 12#include <asm/abs_addr.h>
12 13
@@ -90,11 +91,10 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
90static int dma_direct_dma_supported(struct device *dev, u64 mask) 91static int dma_direct_dma_supported(struct device *dev, u64 mask)
91{ 92{
92#ifdef CONFIG_PPC64 93#ifdef CONFIG_PPC64
93 /* Could be improved to check for memory though it better be 94 /* Could be improved so platforms can set the limit in case
94 * done via some global so platforms can set the limit in case
95 * they have limited DMA windows 95 * they have limited DMA windows
96 */ 96 */
97 return mask >= DMA_BIT_MASK(32); 97 return mask >= (lmb_end_of_DRAM() - 1);
98#else 98#else
99 return 1; 99 return 1;
100#endif 100#endif
diff --git a/arch/powerpc/kernel/perf_counter.c b/arch/powerpc/kernel/perf_counter.c
index 809fdf94b95f..70e1f57f7dd8 100644
--- a/arch/powerpc/kernel/perf_counter.c
+++ b/arch/powerpc/kernel/perf_counter.c
@@ -518,6 +518,8 @@ void hw_perf_disable(void)
518 struct cpu_hw_counters *cpuhw; 518 struct cpu_hw_counters *cpuhw;
519 unsigned long flags; 519 unsigned long flags;
520 520
521 if (!ppmu)
522 return;
521 local_irq_save(flags); 523 local_irq_save(flags);
522 cpuhw = &__get_cpu_var(cpu_hw_counters); 524 cpuhw = &__get_cpu_var(cpu_hw_counters);
523 525
@@ -572,6 +574,8 @@ void hw_perf_enable(void)
572 int n_lim; 574 int n_lim;
573 int idx; 575 int idx;
574 576
577 if (!ppmu)
578 return;
575 local_irq_save(flags); 579 local_irq_save(flags);
576 cpuhw = &__get_cpu_var(cpu_hw_counters); 580 cpuhw = &__get_cpu_var(cpu_hw_counters);
577 if (!cpuhw->disabled) { 581 if (!cpuhw->disabled) {
@@ -737,6 +741,8 @@ int hw_perf_group_sched_in(struct perf_counter *group_leader,
737 long i, n, n0; 741 long i, n, n0;
738 struct perf_counter *sub; 742 struct perf_counter *sub;
739 743
744 if (!ppmu)
745 return 0;
740 cpuhw = &__get_cpu_var(cpu_hw_counters); 746 cpuhw = &__get_cpu_var(cpu_hw_counters);
741 n0 = cpuhw->n_counters; 747 n0 = cpuhw->n_counters;
742 n = collect_events(group_leader, ppmu->n_counter - n0, 748 n = collect_events(group_leader, ppmu->n_counter - n0,
@@ -1281,6 +1287,8 @@ void hw_perf_counter_setup(int cpu)
1281{ 1287{
1282 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); 1288 struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
1283 1289
1290 if (!ppmu)
1291 return;
1284 memset(cpuhw, 0, sizeof(*cpuhw)); 1292 memset(cpuhw, 0, sizeof(*cpuhw));
1285 cpuhw->mmcr[0] = MMCR0_FC; 1293 cpuhw->mmcr[0] = MMCR0_FC;
1286} 1294}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f04f5301b1b4..4d613415c435 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -386,7 +386,7 @@ no_timer:
386 } 386 }
387 __unset_cpu_idle(vcpu); 387 __unset_cpu_idle(vcpu);
388 __set_current_state(TASK_RUNNING); 388 __set_current_state(TASK_RUNNING);
389 remove_wait_queue(&vcpu->wq, &wait); 389 remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
390 spin_unlock_bh(&vcpu->arch.local_int.lock); 390 spin_unlock_bh(&vcpu->arch.local_int.lock);
391 spin_unlock(&vcpu->arch.local_int.float_int->lock); 391 spin_unlock(&vcpu->arch.local_int.float_int->lock);
392 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); 392 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7ffd1b4315bd..b9c88cc519e2 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -547,7 +547,7 @@ static int __init ap325rxa_devices_setup(void)
547 return platform_add_devices(ap325rxa_devices, 547 return platform_add_devices(ap325rxa_devices,
548 ARRAY_SIZE(ap325rxa_devices)); 548 ARRAY_SIZE(ap325rxa_devices));
549} 549}
550device_initcall(ap325rxa_devices_setup); 550arch_initcall(ap325rxa_devices_setup);
551 551
552/* Return the board specific boot mode pin configuration */ 552/* Return the board specific boot mode pin configuration */
553static int ap325rxa_mode_pins(void) 553static int ap325rxa_mode_pins(void)
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index f70f4644deb4..f9b2e4df35b9 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -608,7 +608,7 @@ static int __init migor_devices_setup(void)
608 608
609 return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices)); 609 return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
610} 610}
611__initcall(migor_devices_setup); 611arch_initcall(migor_devices_setup);
612 612
613/* Return the board specific boot mode pin configuration */ 613/* Return the board specific boot mode pin configuration */
614static int migor_mode_pins(void) 614static int migor_mode_pins(void)
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index 13798733f2db..8555c05e8667 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -187,7 +187,7 @@ static int __init sh7619_devices_setup(void)
187 return platform_add_devices(sh7619_devices, 187 return platform_add_devices(sh7619_devices,
188 ARRAY_SIZE(sh7619_devices)); 188 ARRAY_SIZE(sh7619_devices));
189} 189}
190__initcall(sh7619_devices_setup); 190arch_initcall(sh7619_devices_setup);
191 191
192void __init plat_irq_setup(void) 192void __init plat_irq_setup(void)
193{ 193{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 869c2da4820b..b67376445315 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -238,7 +238,7 @@ static int __init mxg_devices_setup(void)
238 return platform_add_devices(mxg_devices, 238 return platform_add_devices(mxg_devices,
239 ARRAY_SIZE(mxg_devices)); 239 ARRAY_SIZE(mxg_devices));
240} 240}
241__initcall(mxg_devices_setup); 241arch_initcall(mxg_devices_setup);
242 242
243void __init plat_irq_setup(void) 243void __init plat_irq_setup(void)
244{ 244{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index d8febe128066..fbde5b75deb9 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -357,7 +357,7 @@ static int __init sh7201_devices_setup(void)
357 return platform_add_devices(sh7201_devices, 357 return platform_add_devices(sh7201_devices,
358 ARRAY_SIZE(sh7201_devices)); 358 ARRAY_SIZE(sh7201_devices));
359} 359}
360__initcall(sh7201_devices_setup); 360arch_initcall(sh7201_devices_setup);
361 361
362void __init plat_irq_setup(void) 362void __init plat_irq_setup(void)
363{ 363{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 62e3039d2398..d3fd536c9a84 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -367,7 +367,7 @@ static int __init sh7203_devices_setup(void)
367 return platform_add_devices(sh7203_devices, 367 return platform_add_devices(sh7203_devices,
368 ARRAY_SIZE(sh7203_devices)); 368 ARRAY_SIZE(sh7203_devices));
369} 369}
370__initcall(sh7203_devices_setup); 370arch_initcall(sh7203_devices_setup);
371 371
372void __init plat_irq_setup(void) 372void __init plat_irq_setup(void)
373{ 373{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index 3e6f3d7a58be..a9ccc5e8d9e9 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -338,7 +338,7 @@ static int __init sh7206_devices_setup(void)
338 return platform_add_devices(sh7206_devices, 338 return platform_add_devices(sh7206_devices,
339 ARRAY_SIZE(sh7206_devices)); 339 ARRAY_SIZE(sh7206_devices));
340} 340}
341__initcall(sh7206_devices_setup); 341arch_initcall(sh7206_devices_setup);
342 342
343void __init plat_irq_setup(void) 343void __init plat_irq_setup(void)
344{ 344{
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index 88f742fed9ed..c23105983878 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -222,7 +222,7 @@ static int __init sh7705_devices_setup(void)
222 return platform_add_devices(sh7705_devices, 222 return platform_add_devices(sh7705_devices,
223 ARRAY_SIZE(sh7705_devices)); 223 ARRAY_SIZE(sh7705_devices));
224} 224}
225__initcall(sh7705_devices_setup); 225arch_initcall(sh7705_devices_setup);
226 226
227static struct platform_device *sh7705_early_devices[] __initdata = { 227static struct platform_device *sh7705_early_devices[] __initdata = {
228 &tmu0_device, 228 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index c56306798584..347ab35d0697 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -250,7 +250,7 @@ static int __init sh770x_devices_setup(void)
250 return platform_add_devices(sh770x_devices, 250 return platform_add_devices(sh770x_devices,
251 ARRAY_SIZE(sh770x_devices)); 251 ARRAY_SIZE(sh770x_devices));
252} 252}
253__initcall(sh770x_devices_setup); 253arch_initcall(sh770x_devices_setup);
254 254
255static struct platform_device *sh770x_early_devices[] __initdata = { 255static struct platform_device *sh770x_early_devices[] __initdata = {
256 &tmu0_device, 256 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index efa76c8148f4..717e90ae1097 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -226,7 +226,7 @@ static int __init sh7710_devices_setup(void)
226 return platform_add_devices(sh7710_devices, 226 return platform_add_devices(sh7710_devices,
227 ARRAY_SIZE(sh7710_devices)); 227 ARRAY_SIZE(sh7710_devices));
228} 228}
229__initcall(sh7710_devices_setup); 229arch_initcall(sh7710_devices_setup);
230 230
231static struct platform_device *sh7710_early_devices[] __initdata = { 231static struct platform_device *sh7710_early_devices[] __initdata = {
232 &tmu0_device, 232 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 5b2107798edb..74d8baaf8e96 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -388,7 +388,7 @@ static int __init sh7720_devices_setup(void)
388 return platform_add_devices(sh7720_devices, 388 return platform_add_devices(sh7720_devices,
389 ARRAY_SIZE(sh7720_devices)); 389 ARRAY_SIZE(sh7720_devices));
390} 390}
391__initcall(sh7720_devices_setup); 391arch_initcall(sh7720_devices_setup);
392 392
393static struct platform_device *sh7720_early_devices[] __initdata = { 393static struct platform_device *sh7720_early_devices[] __initdata = {
394 &cmt0_device, 394 &cmt0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index 6d088d123591..de4827df19aa 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -138,7 +138,7 @@ static int __init sh4202_devices_setup(void)
138 return platform_add_devices(sh4202_devices, 138 return platform_add_devices(sh4202_devices,
139 ARRAY_SIZE(sh4202_devices)); 139 ARRAY_SIZE(sh4202_devices));
140} 140}
141__initcall(sh4202_devices_setup); 141arch_initcall(sh4202_devices_setup);
142 142
143static struct platform_device *sh4202_early_devices[] __initdata = { 143static struct platform_device *sh4202_early_devices[] __initdata = {
144 &tmu0_device, 144 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 851672d15cf4..1b8b122e8f3d 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -239,7 +239,7 @@ static int __init sh7750_devices_setup(void)
239 return platform_add_devices(sh7750_devices, 239 return platform_add_devices(sh7750_devices,
240 ARRAY_SIZE(sh7750_devices)); 240 ARRAY_SIZE(sh7750_devices));
241} 241}
242__initcall(sh7750_devices_setup); 242arch_initcall(sh7750_devices_setup);
243 243
244static struct platform_device *sh7750_early_devices[] __initdata = { 244static struct platform_device *sh7750_early_devices[] __initdata = {
245 &tmu0_device, 245 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 5b822519bd90..7fbb7be9284c 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -265,7 +265,7 @@ static int __init sh7760_devices_setup(void)
265 return platform_add_devices(sh7760_devices, 265 return platform_add_devices(sh7760_devices,
266 ARRAY_SIZE(sh7760_devices)); 266 ARRAY_SIZE(sh7760_devices));
267} 267}
268__initcall(sh7760_devices_setup); 268arch_initcall(sh7760_devices_setup);
269 269
270static struct platform_device *sh7760_early_devices[] __initdata = { 270static struct platform_device *sh7760_early_devices[] __initdata = {
271 &tmu0_device, 271 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index 6307e087c864..ac4d5672ec1a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -325,7 +325,7 @@ static int __init sh7343_devices_setup(void)
325 return platform_add_devices(sh7343_devices, 325 return platform_add_devices(sh7343_devices,
326 ARRAY_SIZE(sh7343_devices)); 326 ARRAY_SIZE(sh7343_devices));
327} 327}
328__initcall(sh7343_devices_setup); 328arch_initcall(sh7343_devices_setup);
329 329
330static struct platform_device *sh7343_early_devices[] __initdata = { 330static struct platform_device *sh7343_early_devices[] __initdata = {
331 &cmt_device, 331 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index c18f7d09281b..1a956b1beccc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -318,7 +318,7 @@ static int __init sh7366_devices_setup(void)
318 return platform_add_devices(sh7366_devices, 318 return platform_add_devices(sh7366_devices,
319 ARRAY_SIZE(sh7366_devices)); 319 ARRAY_SIZE(sh7366_devices));
320} 320}
321__initcall(sh7366_devices_setup); 321arch_initcall(sh7366_devices_setup);
322 322
323static struct platform_device *sh7366_early_devices[] __initdata = { 323static struct platform_device *sh7366_early_devices[] __initdata = {
324 &cmt_device, 324 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index ea524a2da3e4..cda76ebf87c3 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -359,7 +359,7 @@ static int __init sh7722_devices_setup(void)
359 return platform_add_devices(sh7722_devices, 359 return platform_add_devices(sh7722_devices,
360 ARRAY_SIZE(sh7722_devices)); 360 ARRAY_SIZE(sh7722_devices));
361} 361}
362__initcall(sh7722_devices_setup); 362arch_initcall(sh7722_devices_setup);
363 363
364static struct platform_device *sh7722_early_devices[] __initdata = { 364static struct platform_device *sh7722_early_devices[] __initdata = {
365 &cmt_device, 365 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index e1bb80b2a27b..b45dace9539f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -473,7 +473,7 @@ static int __init sh7723_devices_setup(void)
473 return platform_add_devices(sh7723_devices, 473 return platform_add_devices(sh7723_devices,
474 ARRAY_SIZE(sh7723_devices)); 474 ARRAY_SIZE(sh7723_devices));
475} 475}
476__initcall(sh7723_devices_setup); 476arch_initcall(sh7723_devices_setup);
477 477
478static struct platform_device *sh7723_early_devices[] __initdata = { 478static struct platform_device *sh7723_early_devices[] __initdata = {
479 &cmt_device, 479 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index e5ac9eb11c63..a04edaab9a29 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -508,7 +508,7 @@ static int __init sh7724_devices_setup(void)
508 return platform_add_devices(sh7724_devices, 508 return platform_add_devices(sh7724_devices,
509 ARRAY_SIZE(sh7724_devices)); 509 ARRAY_SIZE(sh7724_devices));
510} 510}
511device_initcall(sh7724_devices_setup); 511arch_initcall(sh7724_devices_setup);
512 512
513static struct platform_device *sh7724_early_devices[] __initdata = { 513static struct platform_device *sh7724_early_devices[] __initdata = {
514 &cmt_device, 514 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index f1e0c0d36da7..4659fff6b842 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -314,7 +314,7 @@ static int __init sh7763_devices_setup(void)
314 return platform_add_devices(sh7763_devices, 314 return platform_add_devices(sh7763_devices,
315 ARRAY_SIZE(sh7763_devices)); 315 ARRAY_SIZE(sh7763_devices));
316} 316}
317__initcall(sh7763_devices_setup); 317arch_initcall(sh7763_devices_setup);
318 318
319static struct platform_device *sh7763_early_devices[] __initdata = { 319static struct platform_device *sh7763_early_devices[] __initdata = {
320 &tmu0_device, 320 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index 1e86209db284..eead08d89d32 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -368,7 +368,7 @@ static int __init sh7770_devices_setup(void)
368 return platform_add_devices(sh7770_devices, 368 return platform_add_devices(sh7770_devices,
369 ARRAY_SIZE(sh7770_devices)); 369 ARRAY_SIZE(sh7770_devices));
370} 370}
371__initcall(sh7770_devices_setup); 371arch_initcall(sh7770_devices_setup);
372 372
373static struct platform_device *sh7770_early_devices[] __initdata = { 373static struct platform_device *sh7770_early_devices[] __initdata = {
374 &tmu0_device, 374 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 715e05b431e5..2c901f446959 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -256,7 +256,7 @@ static int __init sh7780_devices_setup(void)
256 return platform_add_devices(sh7780_devices, 256 return platform_add_devices(sh7780_devices,
257 ARRAY_SIZE(sh7780_devices)); 257 ARRAY_SIZE(sh7780_devices));
258} 258}
259__initcall(sh7780_devices_setup); 259arch_initcall(sh7780_devices_setup);
260 260
261static struct platform_device *sh7780_early_devices[] __initdata = { 261static struct platform_device *sh7780_early_devices[] __initdata = {
262 &tmu0_device, 262 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index af561402570b..7f6c718b6c36 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -263,7 +263,7 @@ static int __init sh7785_devices_setup(void)
263 return platform_add_devices(sh7785_devices, 263 return platform_add_devices(sh7785_devices,
264 ARRAY_SIZE(sh7785_devices)); 264 ARRAY_SIZE(sh7785_devices));
265} 265}
266__initcall(sh7785_devices_setup); 266arch_initcall(sh7785_devices_setup);
267 267
268static struct platform_device *sh7785_early_devices[] __initdata = { 268static struct platform_device *sh7785_early_devices[] __initdata = {
269 &tmu0_device, 269 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index b70049470a0b..0104a8ec5369 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -547,7 +547,7 @@ static int __init sh7786_devices_setup(void)
547 return platform_add_devices(sh7786_devices, 547 return platform_add_devices(sh7786_devices,
548 ARRAY_SIZE(sh7786_devices)); 548 ARRAY_SIZE(sh7786_devices));
549} 549}
550device_initcall(sh7786_devices_setup); 550arch_initcall(sh7786_devices_setup);
551 551
552void __init plat_early_device_setup(void) 552void __init plat_early_device_setup(void)
553{ 553{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 53c65fd9ccef..07f078961c71 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -256,7 +256,7 @@ static int __init shx3_devices_setup(void)
256 return platform_add_devices(shx3_devices, 256 return platform_add_devices(shx3_devices,
257 ARRAY_SIZE(shx3_devices)); 257 ARRAY_SIZE(shx3_devices));
258} 258}
259__initcall(shx3_devices_setup); 259arch_initcall(shx3_devices_setup);
260 260
261void __init plat_early_device_setup(void) 261void __init plat_early_device_setup(void)
262{ 262{
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index f5ff1ac57fc2..6a0f82f70032 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -186,7 +186,7 @@ static int __init sh5_devices_setup(void)
186 return platform_add_devices(sh5_devices, 186 return platform_add_devices(sh5_devices,
187 ARRAY_SIZE(sh5_devices)); 187 ARRAY_SIZE(sh5_devices));
188} 188}
189__initcall(sh5_devices_setup); 189arch_initcall(sh5_devices_setup);
190 190
191void __init plat_early_device_setup(void) 191void __init plat_early_device_setup(void)
192{ 192{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486)
27 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
28 select HAVE_KPROBES 29 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
742config X86_LOCAL_APIC 743config X86_LOCAL_APIC
743 def_bool y 744 def_bool y
744 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 745 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
745 select HAVE_PERF_COUNTERS if (!M386 && !M486)
746 746
747config X86_IO_APIC 747config X86_IO_APIC
748 def_bool y 748 def_bool y
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 2ed4e2bb3b32..a5371ec36776 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
17 return x2apic_enabled(); 17 return x2apic_enabled();
18} 18}
19 19
20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/*
21 21 * need to use more than cpu 0, because we need more vectors when
22 * MSI-X are used.
23 */
22static const struct cpumask *x2apic_target_cpus(void) 24static const struct cpumask *x2apic_target_cpus(void)
23{ 25{
24 return cpumask_of(0); 26 return cpu_online_mask;
25} 27}
26 28
27/* 29/*
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 0b631c6a2e00..a8989aadc99a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/*
31 31 * need to use more than cpu 0, because we need more vectors when
32 * MSI-X are used.
33 */
32static const struct cpumask *x2apic_target_cpus(void) 34static const struct cpumask *x2apic_target_cpus(void)
33{ 35{
34 return cpumask_of(0); 36 return cpu_online_mask;
35} 37}
36 38
37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) 39static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1cf..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
400 level = cpuid_eax(1); 400 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
403 } 410 }
404 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
405 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..8bc64cfbe936 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
39 40
40static atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
41 42
@@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
96{ 97{
97 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
98 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
99 102
100 if (curr) 103 if (is_throttled)
101 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
102 105
103 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
104 return 0; 108 return 0;
105 109
106 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
107 111
108 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
109 if (curr) { 113 if (is_throttled) {
110 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
111 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
112 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
113 117
114 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
115 } else { 119 } else if (was_throttled) {
116 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
117 } 121 }
118 122
119 return 1; 123 return 1;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index a7aa8f900954..900332b800f8 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -55,6 +55,7 @@ struct x86_pmu {
55 int num_counters_fixed; 55 int num_counters_fixed;
56 int counter_bits; 56 int counter_bits;
57 u64 counter_mask; 57 u64 counter_mask;
58 int apic;
58 u64 max_period; 59 u64 max_period;
59 u64 intel_ctrl; 60 u64 intel_ctrl;
60}; 61};
@@ -72,8 +73,8 @@ static const u64 p6_perfmon_event_map[] =
72{ 73{
73 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 74 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
74 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 75 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
75 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 76 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
76 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 77 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
77 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 78 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
78 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 79 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
79 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 80 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -613,6 +614,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
613 614
614static bool reserve_pmc_hardware(void) 615static bool reserve_pmc_hardware(void)
615{ 616{
617#ifdef CONFIG_X86_LOCAL_APIC
616 int i; 618 int i;
617 619
618 if (nmi_watchdog == NMI_LOCAL_APIC) 620 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -627,9 +629,11 @@ static bool reserve_pmc_hardware(void)
627 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 629 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
628 goto eventsel_fail; 630 goto eventsel_fail;
629 } 631 }
632#endif
630 633
631 return true; 634 return true;
632 635
636#ifdef CONFIG_X86_LOCAL_APIC
633eventsel_fail: 637eventsel_fail:
634 for (i--; i >= 0; i--) 638 for (i--; i >= 0; i--)
635 release_evntsel_nmi(x86_pmu.eventsel + i); 639 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -644,10 +648,12 @@ perfctr_fail:
644 enable_lapic_nmi_watchdog(); 648 enable_lapic_nmi_watchdog();
645 649
646 return false; 650 return false;
651#endif
647} 652}
648 653
649static void release_pmc_hardware(void) 654static void release_pmc_hardware(void)
650{ 655{
656#ifdef CONFIG_X86_LOCAL_APIC
651 int i; 657 int i;
652 658
653 for (i = 0; i < x86_pmu.num_counters; i++) { 659 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -657,6 +663,7 @@ static void release_pmc_hardware(void)
657 663
658 if (nmi_watchdog == NMI_LOCAL_APIC) 664 if (nmi_watchdog == NMI_LOCAL_APIC)
659 enable_lapic_nmi_watchdog(); 665 enable_lapic_nmi_watchdog();
666#endif
660} 667}
661 668
662static void hw_perf_counter_destroy(struct perf_counter *counter) 669static void hw_perf_counter_destroy(struct perf_counter *counter)
@@ -748,6 +755,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
748 hwc->sample_period = x86_pmu.max_period; 755 hwc->sample_period = x86_pmu.max_period;
749 hwc->last_period = hwc->sample_period; 756 hwc->last_period = hwc->sample_period;
750 atomic64_set(&hwc->period_left, hwc->sample_period); 757 atomic64_set(&hwc->period_left, hwc->sample_period);
758 } else {
759 /*
760 * If we have a PMU initialized but no APIC
761 * interrupts, we cannot sample hardware
762 * counters (user-space has to fall back and
763 * sample via a hrtimer based software counter):
764 */
765 if (!x86_pmu.apic)
766 return -EOPNOTSUPP;
751 } 767 }
752 768
753 counter->destroy = hw_perf_counter_destroy; 769 counter->destroy = hw_perf_counter_destroy;
@@ -1449,18 +1465,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1449 1465
1450void set_perf_counter_pending(void) 1466void set_perf_counter_pending(void)
1451{ 1467{
1468#ifdef CONFIG_X86_LOCAL_APIC
1452 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1469 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1470#endif
1453} 1471}
1454 1472
1455void perf_counters_lapic_init(void) 1473void perf_counters_lapic_init(void)
1456{ 1474{
1457 if (!x86_pmu_initialized()) 1475#ifdef CONFIG_X86_LOCAL_APIC
1476 if (!x86_pmu.apic || !x86_pmu_initialized())
1458 return; 1477 return;
1459 1478
1460 /* 1479 /*
1461 * Always use NMI for PMU 1480 * Always use NMI for PMU
1462 */ 1481 */
1463 apic_write(APIC_LVTPC, APIC_DM_NMI); 1482 apic_write(APIC_LVTPC, APIC_DM_NMI);
1483#endif
1464} 1484}
1465 1485
1466static int __kprobes 1486static int __kprobes
@@ -1484,7 +1504,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1484 1504
1485 regs = args->regs; 1505 regs = args->regs;
1486 1506
1507#ifdef CONFIG_X86_LOCAL_APIC
1487 apic_write(APIC_LVTPC, APIC_DM_NMI); 1508 apic_write(APIC_LVTPC, APIC_DM_NMI);
1509#endif
1488 /* 1510 /*
1489 * Can't rely on the handled return value to say it was our NMI, two 1511 * Can't rely on the handled return value to say it was our NMI, two
1490 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1512 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1515,6 +1537,7 @@ static struct x86_pmu p6_pmu = {
1515 .event_map = p6_pmu_event_map, 1537 .event_map = p6_pmu_event_map,
1516 .raw_event = p6_pmu_raw_event, 1538 .raw_event = p6_pmu_raw_event,
1517 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1539 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1540 .apic = 1,
1518 .max_period = (1ULL << 31) - 1, 1541 .max_period = (1ULL << 31) - 1,
1519 .version = 0, 1542 .version = 0,
1520 .num_counters = 2, 1543 .num_counters = 2,
@@ -1541,6 +1564,7 @@ static struct x86_pmu intel_pmu = {
1541 .event_map = intel_pmu_event_map, 1564 .event_map = intel_pmu_event_map,
1542 .raw_event = intel_pmu_raw_event, 1565 .raw_event = intel_pmu_raw_event,
1543 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1566 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1567 .apic = 1,
1544 /* 1568 /*
1545 * Intel PMCs cannot be accessed sanely above 32 bit width, 1569 * Intel PMCs cannot be accessed sanely above 32 bit width,
1546 * so we install an artificial 1<<31 period regardless of 1570 * so we install an artificial 1<<31 period regardless of
@@ -1564,6 +1588,7 @@ static struct x86_pmu amd_pmu = {
1564 .num_counters = 4, 1588 .num_counters = 4,
1565 .counter_bits = 48, 1589 .counter_bits = 48,
1566 .counter_mask = (1ULL << 48) - 1, 1590 .counter_mask = (1ULL << 48) - 1,
1591 .apic = 1,
1567 /* use highest bit to detect overflow */ 1592 /* use highest bit to detect overflow */
1568 .max_period = (1ULL << 47) - 1, 1593 .max_period = (1ULL << 47) - 1,
1569}; 1594};
@@ -1589,13 +1614,14 @@ static int p6_pmu_init(void)
1589 return -ENODEV; 1614 return -ENODEV;
1590 } 1615 }
1591 1616
1617 x86_pmu = p6_pmu;
1618
1592 if (!cpu_has_apic) { 1619 if (!cpu_has_apic) {
1593 pr_info("no Local APIC, try rebooting with lapic"); 1620 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1594 return -ENODEV; 1621 pr_info("no hardware sampling interrupt available.\n");
1622 x86_pmu.apic = 0;
1595 } 1623 }
1596 1624
1597 x86_pmu = p6_pmu;
1598
1599 return 0; 1625 return 0;
1600} 1626}
1601 1627
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 19ccf6d0dccf..fe26ba3e3451 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
354 */ 354 */
355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); 355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
356 if (c16) { 356 if (c16) {
357 for (i = 0; i < sizeof(vendor) && *c16; ++i) 357 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
358 vendor[i] = *c16++; 358 vendor[i] = *c16++;
359 vendor[i] = '\0'; 359 vendor[i] = '\0';
360 } else 360 } else
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 834c9da8bf9d..a06e8d101844 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -405,7 +405,7 @@ EXPORT_SYMBOL(machine_real_restart);
405#endif /* CONFIG_X86_32 */ 405#endif /* CONFIG_X86_32 */
406 406
407/* 407/*
408 * Apple MacBook5,2 (2009 MacBook) needs reboot=p 408 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
409 */ 409 */
410static int __init set_pci_reboot(const struct dmi_system_id *d) 410static int __init set_pci_reboot(const struct dmi_system_id *d)
411{ 411{
@@ -418,12 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
418} 418}
419 419
420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { 420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
421 { /* Handle problems with rebooting on Apple MacBook5,2 */ 421 { /* Handle problems with rebooting on Apple MacBook5 */
422 .callback = set_pci_reboot, 422 .callback = set_pci_reboot,
423 .ident = "Apple MacBook", 423 .ident = "Apple MacBook5",
424 .matches = { 424 .matches = {
425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), 425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), 426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
427 },
428 },
429 { /* Handle problems with rebooting on Apple MacBookPro5 */
430 .callback = set_pci_reboot,
431 .ident = "Apple MacBookPro5",
432 .matches = {
433 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
434 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
427 }, 435 },
428 }, 436 },
429 { } 437 { }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d4..71f4368b357e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
275 * use the TSC value at the transitions to calculate a pretty 275 * use the TSC value at the transitions to calculate a pretty
276 * good value for the TSC frequencty. 276 * good value for the TSC frequencty.
277 */ 277 */
278static inline int pit_verify_msb(unsigned char val)
279{
280 /* Ignore LSB */
281 inb(0x42);
282 return inb(0x42) == val;
283}
284
278static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 285static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
279{ 286{
280 int count; 287 int count;
281 u64 tsc = 0; 288 u64 tsc = 0;
282 289
283 for (count = 0; count < 50000; count++) { 290 for (count = 0; count < 50000; count++) {
284 /* Ignore LSB */ 291 if (!pit_verify_msb(val))
285 inb(0x42);
286 if (inb(0x42) != val)
287 break; 292 break;
288 tsc = get_cycles(); 293 tsc = get_cycles();
289 } 294 }
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
336 * to do that is to just read back the 16-bit counter 341 * to do that is to just read back the 16-bit counter
337 * once from the PIT. 342 * once from the PIT.
338 */ 343 */
339 inb(0x42); 344 pit_verify_msb(0);
340 inb(0x42);
341 345
342 if (pit_expect_msb(0xff, &tsc, &d1)) { 346 if (pit_expect_msb(0xff, &tsc, &d1)) {
343 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 347 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
348 * Iterate until the error is less than 500 ppm 352 * Iterate until the error is less than 500 ppm
349 */ 353 */
350 delta -= tsc; 354 delta -= tsc;
351 if (d1+d2 < delta >> 11) 355 if (d1+d2 >= delta >> 11)
352 goto success; 356 continue;
357
358 /*
359 * Check the PIT one more time to verify that
360 * all TSC reads were stable wrt the PIT.
361 *
362 * This also guarantees serialization of the
363 * last cycle read ('d2') in pit_expect_msb.
364 */
365 if (!pit_verify_msb(0xfe - i))
366 break;
367 goto success;
353 } 368 }
354 } 369 }
355 printk("Fast TSC calibration failed\n"); 370 printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2a..95a7289e4b0c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
441 ap.ds = __USER_DS; 441 ap.ds = __USER_DS;
442 ap.es = __USER_DS; 442 ap.es = __USER_DS;
443 ap.fs = __KERNEL_PERCPU; 443 ap.fs = __KERNEL_PERCPU;
444 ap.gs = 0; 444 ap.gs = __KERNEL_STACK_CANARY;
445 445
446 ap.eflags = 0; 446 ap.eflags = 0;
447 447
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d293ee2..21f68e00524f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
104 ktime_t remaining; 104 ktime_t remaining;
105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
106 106
107 if (!ps->pit_timer.period)
108 return 0;
109
107 /* 110 /*
108 * The Counter does not stop when it reaches zero. In 111 * The Counter does not stop when it reaches zero. In
109 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to 112 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7030b5f911bf..0ef5bb2b4043 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
489 * 489 *
490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
491 * containing more mappings. 491 * containing more mappings.
492 *
493 * Returns the number of rmap entries before the spte was added or zero if
494 * the spte was not added.
495 *
492 */ 496 */
493static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 497static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
494{ 498{
495 struct kvm_mmu_page *sp; 499 struct kvm_mmu_page *sp;
496 struct kvm_rmap_desc *desc; 500 struct kvm_rmap_desc *desc;
497 unsigned long *rmapp; 501 unsigned long *rmapp;
498 int i; 502 int i, count = 0;
499 503
500 if (!is_rmap_pte(*spte)) 504 if (!is_rmap_pte(*spte))
501 return; 505 return count;
502 gfn = unalias_gfn(vcpu->kvm, gfn); 506 gfn = unalias_gfn(vcpu->kvm, gfn);
503 sp = page_header(__pa(spte)); 507 sp = page_header(__pa(spte));
504 sp->gfns[spte - sp->spt] = gfn; 508 sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
515 } else { 519 } else {
516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 520 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 521 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
518 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 522 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
519 desc = desc->more; 523 desc = desc->more;
524 count += RMAP_EXT;
525 }
520 if (desc->shadow_ptes[RMAP_EXT-1]) { 526 if (desc->shadow_ptes[RMAP_EXT-1]) {
521 desc->more = mmu_alloc_rmap_desc(vcpu); 527 desc->more = mmu_alloc_rmap_desc(vcpu);
522 desc = desc->more; 528 desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
525 ; 531 ;
526 desc->shadow_ptes[i] = spte; 532 desc->shadow_ptes[i] = spte;
527 } 533 }
534 return count;
528} 535}
529 536
530static void rmap_desc_remove_entry(unsigned long *rmapp, 537static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
754 return young; 761 return young;
755} 762}
756 763
764#define RMAP_RECYCLE_THRESHOLD 1000
765
766static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
767{
768 unsigned long *rmapp;
769
770 gfn = unalias_gfn(vcpu->kvm, gfn);
771 rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
772
773 kvm_unmap_rmapp(vcpu->kvm, rmapp);
774 kvm_flush_remote_tlbs(vcpu->kvm);
775}
776
757int kvm_age_hva(struct kvm *kvm, unsigned long hva) 777int kvm_age_hva(struct kvm *kvm, unsigned long hva)
758{ 778{
759 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 779 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1407 */ 1427 */
1408void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1428void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1409{ 1429{
1430 int used_pages;
1431
1432 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
1433 used_pages = max(0, used_pages);
1434
1410 /* 1435 /*
1411 * If we set the number of mmu pages to be smaller be than the 1436 * If we set the number of mmu pages to be smaller be than the
1412 * number of actived pages , we must to free some mmu pages before we 1437 * number of actived pages , we must to free some mmu pages before we
1413 * change the value 1438 * change the value
1414 */ 1439 */
1415 1440
1416 if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > 1441 if (used_pages > kvm_nr_mmu_pages) {
1417 kvm_nr_mmu_pages) { 1442 while (used_pages > kvm_nr_mmu_pages) {
1418 int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
1419 - kvm->arch.n_free_mmu_pages;
1420
1421 while (n_used_mmu_pages > kvm_nr_mmu_pages) {
1422 struct kvm_mmu_page *page; 1443 struct kvm_mmu_page *page;
1423 1444
1424 page = container_of(kvm->arch.active_mmu_pages.prev, 1445 page = container_of(kvm->arch.active_mmu_pages.prev,
1425 struct kvm_mmu_page, link); 1446 struct kvm_mmu_page, link);
1426 kvm_mmu_zap_page(kvm, page); 1447 kvm_mmu_zap_page(kvm, page);
1427 n_used_mmu_pages--; 1448 used_pages--;
1428 } 1449 }
1429 kvm->arch.n_free_mmu_pages = 0; 1450 kvm->arch.n_free_mmu_pages = 0;
1430 } 1451 }
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1740{ 1761{
1741 int was_rmapped = 0; 1762 int was_rmapped = 0;
1742 int was_writeble = is_writeble_pte(*shadow_pte); 1763 int was_writeble = is_writeble_pte(*shadow_pte);
1764 int rmap_count;
1743 1765
1744 pgprintk("%s: spte %llx access %x write_fault %d" 1766 pgprintk("%s: spte %llx access %x write_fault %d"
1745 " user_fault %d gfn %lx\n", 1767 " user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1781 1803
1782 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1804 page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
1783 if (!was_rmapped) { 1805 if (!was_rmapped) {
1784 rmap_add(vcpu, shadow_pte, gfn, largepage); 1806 rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
1785 if (!is_rmap_pte(*shadow_pte)) 1807 if (!is_rmap_pte(*shadow_pte))
1786 kvm_release_pfn_clean(pfn); 1808 kvm_release_pfn_clean(pfn);
1809 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1810 rmap_recycle(vcpu, gfn, largepage);
1787 } else { 1811 } else {
1788 if (was_writeble) 1812 if (was_writeble)
1789 kvm_release_pfn_dirty(pfn); 1813 kvm_release_pfn_dirty(pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 71510e07e69e..b1f658ad2f06 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
711 svm->vmcb->control.tsc_offset += delta; 711 svm->vmcb->control.tsc_offset += delta;
712 vcpu->cpu = cpu; 712 vcpu->cpu = cpu;
713 kvm_migrate_timers(vcpu); 713 kvm_migrate_timers(vcpu);
714 svm->asid_generation = 0;
714 } 715 }
715 716
716 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 717 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
1031 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1032 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1032 } 1033 }
1033 1034
1034 svm->vcpu.cpu = svm_data->cpu;
1035 svm->asid_generation = svm_data->asid_generation; 1035 svm->asid_generation = svm_data->asid_generation;
1036 svm->vmcb->control.asid = svm_data->next_asid++; 1036 svm->vmcb->control.asid = svm_data->next_asid++;
1037} 1037}
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
2301 2301
2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2303 if (svm->vcpu.cpu != cpu || 2303 /* FIXME: handle wraparound of asid_generation */
2304 svm->asid_generation != svm_data->asid_generation) 2304 if (svm->asid_generation != svm_data->asid_generation)
2305 new_asid(svm, svm_data); 2305 new_asid(svm, svm_data);
2306} 2306}
2307 2307
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 356a0ce85c68..29f912927a58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3157 struct vcpu_vmx *vmx = to_vmx(vcpu); 3157 struct vcpu_vmx *vmx = to_vmx(vcpu);
3158 enum emulation_result err = EMULATE_DONE; 3158 enum emulation_result err = EMULATE_DONE;
3159 3159
3160 preempt_enable();
3161 local_irq_enable(); 3160 local_irq_enable();
3161 preempt_enable();
3162 3162
3163 while (!guest_state_valid(vcpu)) { 3163 while (!guest_state_valid(vcpu)) {
3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3168 3168
3169 if (err != EMULATE_DONE) { 3169 if (err != EMULATE_DONE) {
3170 kvm_report_emulation_failure(vcpu, "emulation failure"); 3170 kvm_report_emulation_failure(vcpu, "emulation failure");
3171 return; 3171 break;
3172 } 3172 }
3173 3173
3174 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3177 schedule(); 3177 schedule();
3178 } 3178 }
3179 3179
3180 local_irq_disable();
3181 preempt_disable(); 3180 preempt_disable();
3181 local_irq_disable();
3182 3182
3183 vmx->invalid_state_emulation_result = err; 3183 vmx->invalid_state_emulation_result = err;
3184} 3184}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41a..3d4529011828 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
704 return false; 704 return false;
705} 705}
706 706
707static bool valid_pat_type(unsigned t)
708{
709 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
710}
711
712static bool valid_mtrr_type(unsigned t)
713{
714 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
715}
716
717static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
718{
719 int i;
720
721 if (!msr_mtrr_valid(msr))
722 return false;
723
724 if (msr == MSR_IA32_CR_PAT) {
725 for (i = 0; i < 8; i++)
726 if (!valid_pat_type((data >> (i * 8)) & 0xff))
727 return false;
728 return true;
729 } else if (msr == MSR_MTRRdefType) {
730 if (data & ~0xcff)
731 return false;
732 return valid_mtrr_type(data & 0xff);
733 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
734 for (i = 0; i < 8 ; i++)
735 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
736 return false;
737 return true;
738 }
739
740 /* variable MTRRs */
741 return valid_mtrr_type(data & 0xff);
742}
743
707static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 744static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
708{ 745{
709 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 746 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
710 747
711 if (!msr_mtrr_valid(msr)) 748 if (!mtrr_valid(vcpu, msr, data))
712 return 1; 749 return 1;
713 750
714 if (msr == MSR_MTRRdefType) { 751 if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
1079 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1116 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1080 goto out; 1117 goto out;
1081 r = -E2BIG; 1118 r = -E2BIG;
1082 if (n < num_msrs_to_save) 1119 if (n < msr_list.nmsrs)
1083 goto out; 1120 goto out;
1084 r = -EFAULT; 1121 r = -EFAULT;
1085 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1122 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1086 num_msrs_to_save * sizeof(u32))) 1123 num_msrs_to_save * sizeof(u32)))
1087 goto out; 1124 goto out;
1088 if (copy_to_user(user_msr_list->indices 1125 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
1089 + num_msrs_to_save * sizeof(u32),
1090 &emulated_msrs, 1126 &emulated_msrs,
1091 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1127 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
1092 goto out; 1128 goto out;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 958c1fa41900..fe3eba5d6b3e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -219,6 +219,8 @@ enum {
219 AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ 219 AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */
220 AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ 220 AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */
221 AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ 221 AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */
222 AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as
223 link offline */
222 224
223 /* ap->flags bits */ 225 /* ap->flags bits */
224 226
@@ -1663,6 +1665,7 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1663 int (*check_ready)(struct ata_link *link)) 1665 int (*check_ready)(struct ata_link *link))
1664{ 1666{
1665 struct ata_port *ap = link->ap; 1667 struct ata_port *ap = link->ap;
1668 struct ahci_host_priv *hpriv = ap->host->private_data;
1666 const char *reason = NULL; 1669 const char *reason = NULL;
1667 unsigned long now, msecs; 1670 unsigned long now, msecs;
1668 struct ata_taskfile tf; 1671 struct ata_taskfile tf;
@@ -1701,12 +1704,21 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1701 1704
1702 /* wait for link to become ready */ 1705 /* wait for link to become ready */
1703 rc = ata_wait_after_reset(link, deadline, check_ready); 1706 rc = ata_wait_after_reset(link, deadline, check_ready);
1704 /* link occupied, -ENODEV too is an error */ 1707 if (rc == -EBUSY && hpriv->flags & AHCI_HFLAG_SRST_TOUT_IS_OFFLINE) {
1705 if (rc) { 1708 /*
1709 * Workaround for cases where link online status can't
1710 * be trusted. Treat device readiness timeout as link
1711 * offline.
1712 */
1713 ata_link_printk(link, KERN_INFO,
1714 "device not ready, treating as offline\n");
1715 *class = ATA_DEV_NONE;
1716 } else if (rc) {
1717 /* link occupied, -ENODEV too is an error */
1706 reason = "device not ready"; 1718 reason = "device not ready";
1707 goto fail; 1719 goto fail;
1708 } 1720 } else
1709 *class = ahci_dev_classify(ap); 1721 *class = ahci_dev_classify(ap);
1710 1722
1711 DPRINTK("EXIT, class=%u\n", *class); 1723 DPRINTK("EXIT, class=%u\n", *class);
1712 return 0; 1724 return 0;
@@ -1773,7 +1785,8 @@ static int ahci_sb600_softreset(struct ata_link *link, unsigned int *class,
1773 irq_sts = readl(port_mmio + PORT_IRQ_STAT); 1785 irq_sts = readl(port_mmio + PORT_IRQ_STAT);
1774 if (irq_sts & PORT_IRQ_BAD_PMP) { 1786 if (irq_sts & PORT_IRQ_BAD_PMP) {
1775 ata_link_printk(link, KERN_WARNING, 1787 ata_link_printk(link, KERN_WARNING,
1776 "failed due to HW bug, retry pmp=0\n"); 1788 "applying SB600 PMP SRST workaround "
1789 "and retrying\n");
1777 rc = ahci_do_softreset(link, class, 0, deadline, 1790 rc = ahci_do_softreset(link, class, 0, deadline,
1778 ahci_check_ready); 1791 ahci_check_ready);
1779 } 1792 }
@@ -2726,6 +2739,56 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
2726 return !ver || strcmp(ver, dmi->driver_data) < 0; 2739 return !ver || strcmp(ver, dmi->driver_data) < 0;
2727} 2740}
2728 2741
2742static bool ahci_broken_online(struct pci_dev *pdev)
2743{
2744#define ENCODE_BUSDEVFN(bus, slot, func) \
2745 (void *)(unsigned long)(((bus) << 8) | PCI_DEVFN((slot), (func)))
2746 static const struct dmi_system_id sysids[] = {
2747 /*
2748 * There are several gigabyte boards which use
2749 * SIMG5723s configured as hardware RAID. Certain
2750 * 5723 firmware revisions shipped there keep the link
2751 * online but fail to answer properly to SRST or
2752 * IDENTIFY when no device is attached downstream
2753 * causing libata to retry quite a few times leading
2754 * to excessive detection delay.
2755 *
2756 * As these firmwares respond to the second reset try
2757 * with invalid device signature, considering unknown
2758 * sig as offline works around the problem acceptably.
2759 */
2760 {
2761 .ident = "EP45-DQ6",
2762 .matches = {
2763 DMI_MATCH(DMI_BOARD_VENDOR,
2764 "Gigabyte Technology Co., Ltd."),
2765 DMI_MATCH(DMI_BOARD_NAME, "EP45-DQ6"),
2766 },
2767 .driver_data = ENCODE_BUSDEVFN(0x0a, 0x00, 0),
2768 },
2769 {
2770 .ident = "EP45-DS5",
2771 .matches = {
2772 DMI_MATCH(DMI_BOARD_VENDOR,
2773 "Gigabyte Technology Co., Ltd."),
2774 DMI_MATCH(DMI_BOARD_NAME, "EP45-DS5"),
2775 },
2776 .driver_data = ENCODE_BUSDEVFN(0x03, 0x00, 0),
2777 },
2778 { } /* terminate list */
2779 };
2780#undef ENCODE_BUSDEVFN
2781 const struct dmi_system_id *dmi = dmi_first_match(sysids);
2782 unsigned int val;
2783
2784 if (!dmi)
2785 return false;
2786
2787 val = (unsigned long)dmi->driver_data;
2788
2789 return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff);
2790}
2791
2729static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 2792static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2730{ 2793{
2731 static int printed_version; 2794 static int printed_version;
@@ -2841,6 +2904,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2841 "BIOS update required for suspend/resume\n"); 2904 "BIOS update required for suspend/resume\n");
2842 } 2905 }
2843 2906
2907 if (ahci_broken_online(pdev)) {
2908 hpriv->flags |= AHCI_HFLAG_SRST_TOUT_IS_OFFLINE;
2909 dev_info(&pdev->dev,
2910 "online status unreliable, applying workaround\n");
2911 }
2912
2844 /* CAP.NP sometimes indicate the index of the last enabled 2913 /* CAP.NP sometimes indicate the index of the last enabled
2845 * port, at other times, that of the last possible port, so 2914 * port, at other times, that of the last possible port, so
2846 * determining the maximum port number requires looking at 2915 * determining the maximum port number requires looking at
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8ac98ff16d7d..072ba5ea138f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4302,6 +4302,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
4302 { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA }, 4302 { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
4303 { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA }, 4303 { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA },
4304 4304
4305 /* this one allows HPA unlocking but fails IOs on the area */
4306 { "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA },
4307
4305 /* Devices which report 1 sector over size HPA */ 4308 /* Devices which report 1 sector over size HPA */
4306 { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, }, 4309 { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, },
4307 { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, }, 4310 { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, },
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
index 5702affcb325..41c94b1ae493 100644
--- a/drivers/ata/pata_at91.c
+++ b/drivers/ata/pata_at91.c
@@ -250,7 +250,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
250 ata_port_desc(ap, "no IRQ, using PIO polling"); 250 ata_port_desc(ap, "no IRQ, using PIO polling");
251 } 251 }
252 252
253 info = kzalloc(sizeof(*info), GFP_KERNEL); 253 info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
254 254
255 if (!info) { 255 if (!info) {
256 dev_err(dev, "failed to allocate memory for private data\n"); 256 dev_err(dev, "failed to allocate memory for private data\n");
@@ -275,7 +275,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
275 if (!info->ide_addr) { 275 if (!info->ide_addr) {
276 dev_err(dev, "failed to map IO base\n"); 276 dev_err(dev, "failed to map IO base\n");
277 ret = -ENOMEM; 277 ret = -ENOMEM;
278 goto err_ide_ioremap; 278 goto err_put;
279 } 279 }
280 280
281 info->alt_addr = devm_ioremap(dev, 281 info->alt_addr = devm_ioremap(dev,
@@ -284,7 +284,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
284 if (!info->alt_addr) { 284 if (!info->alt_addr) {
285 dev_err(dev, "failed to map CTL base\n"); 285 dev_err(dev, "failed to map CTL base\n");
286 ret = -ENOMEM; 286 ret = -ENOMEM;
287 goto err_alt_ioremap; 287 goto err_put;
288 } 288 }
289 289
290 ap->ioaddr.cmd_addr = info->ide_addr; 290 ap->ioaddr.cmd_addr = info->ide_addr;
@@ -303,13 +303,8 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
303 irq ? ata_sff_interrupt : NULL, 303 irq ? ata_sff_interrupt : NULL,
304 irq_flags, &pata_at91_sht); 304 irq_flags, &pata_at91_sht);
305 305
306err_alt_ioremap: 306err_put:
307 devm_iounmap(dev, info->ide_addr);
308
309err_ide_ioremap:
310 clk_put(info->mck); 307 clk_put(info->mck);
311 kfree(info);
312
313 return ret; 308 return ret;
314} 309}
315 310
@@ -317,7 +312,6 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
317{ 312{
318 struct ata_host *host = dev_get_drvdata(&pdev->dev); 313 struct ata_host *host = dev_get_drvdata(&pdev->dev);
319 struct at91_ide_info *info; 314 struct at91_ide_info *info;
320 struct device *dev = &pdev->dev;
321 315
322 if (!host) 316 if (!host)
323 return 0; 317 return 0;
@@ -328,11 +322,8 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
328 if (!info) 322 if (!info)
329 return 0; 323 return 0;
330 324
331 devm_iounmap(dev, info->ide_addr);
332 devm_iounmap(dev, info->alt_addr);
333 clk_put(info->mck); 325 clk_put(info->mck);
334 326
335 kfree(info);
336 return 0; 327 return 0;
337} 328}
338 329
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index bec0b8ade66d..45915566e4e9 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * pata_atiixp.c - ATI PATA for new ATA layer 2 * pata_atiixp.c - ATI PATA for new ATA layer
3 * (C) 2005 Red Hat Inc 3 * (C) 2005 Red Hat Inc
4 * (C) 2009 Bartlomiej Zolnierkiewicz
4 * 5 *
5 * Based on 6 * Based on
6 * 7 *
@@ -61,20 +62,19 @@ static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev,
61 62
62 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 63 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
63 int dn = 2 * ap->port_no + adev->devno; 64 int dn = 2 * ap->port_no + adev->devno;
64
65 /* Check this is correct - the order is odd in both drivers */
66 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); 65 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
67 u16 pio_mode_data, pio_timing_data; 66 u32 pio_timing_data;
67 u16 pio_mode_data;
68 68
69 pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); 69 pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
70 pio_mode_data &= ~(0x7 << (4 * dn)); 70 pio_mode_data &= ~(0x7 << (4 * dn));
71 pio_mode_data |= pio << (4 * dn); 71 pio_mode_data |= pio << (4 * dn);
72 pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data); 72 pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data);
73 73
74 pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); 74 pci_read_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
75 pio_timing_data &= ~(0xFF << timing_shift); 75 pio_timing_data &= ~(0xFF << timing_shift);
76 pio_timing_data |= (pio_timings[pio] << timing_shift); 76 pio_timing_data |= (pio_timings[pio] << timing_shift);
77 pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); 77 pci_write_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
78} 78}
79 79
80/** 80/**
@@ -119,16 +119,17 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev)
119 udma_mode_data |= dma << (4 * dn); 119 udma_mode_data |= dma << (4 * dn);
120 pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data); 120 pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data);
121 } else { 121 } else {
122 u16 mwdma_timing_data;
123 /* Check this is correct - the order is odd in both drivers */
124 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); 122 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
123 u32 mwdma_timing_data;
125 124
126 dma -= XFER_MW_DMA_0; 125 dma -= XFER_MW_DMA_0;
127 126
128 pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data); 127 pci_read_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
128 &mwdma_timing_data);
129 mwdma_timing_data &= ~(0xFF << timing_shift); 129 mwdma_timing_data &= ~(0xFF << timing_shift);
130 mwdma_timing_data |= (mwdma_timings[dma] << timing_shift); 130 mwdma_timing_data |= (mwdma_timings[dma] << timing_shift);
131 pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data); 131 pci_write_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
132 mwdma_timing_data);
132 } 133 }
133 /* 134 /*
134 * We must now look at the PIO mode situation. We may need to 135 * We must now look at the PIO mode situation. We may need to
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index b2d11f300c39..86a40582999c 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -602,6 +602,7 @@ MODULE_VERSION(DRV_VERSION);
602 602
603static int adma_enabled; 603static int adma_enabled;
604static int swncq_enabled = 1; 604static int swncq_enabled = 1;
605static int msi_enabled;
605 606
606static void nv_adma_register_mode(struct ata_port *ap) 607static void nv_adma_register_mode(struct ata_port *ap)
607{ 608{
@@ -2459,6 +2460,11 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2459 } else if (type == SWNCQ) 2460 } else if (type == SWNCQ)
2460 nv_swncq_host_init(host); 2461 nv_swncq_host_init(host);
2461 2462
2463 if (msi_enabled) {
2464 dev_printk(KERN_NOTICE, &pdev->dev, "Using MSI\n");
2465 pci_enable_msi(pdev);
2466 }
2467
2462 pci_set_master(pdev); 2468 pci_set_master(pdev);
2463 return ata_host_activate(host, pdev->irq, ipriv->irq_handler, 2469 return ata_host_activate(host, pdev->irq, ipriv->irq_handler,
2464 IRQF_SHARED, ipriv->sht); 2470 IRQF_SHARED, ipriv->sht);
@@ -2558,4 +2564,6 @@ module_param_named(adma, adma_enabled, bool, 0444);
2558MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)"); 2564MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)");
2559module_param_named(swncq, swncq_enabled, bool, 0444); 2565module_param_named(swncq, swncq_enabled, bool, 0444);
2560MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)"); 2566MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)");
2567module_param_named(msi, msi_enabled, bool, 0444);
2568MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)");
2561 2569
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 81cb01bfc356..456594bd97bc 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -483,9 +483,6 @@ int platform_driver_register(struct platform_driver *drv)
483 drv->driver.remove = platform_drv_remove; 483 drv->driver.remove = platform_drv_remove;
484 if (drv->shutdown) 484 if (drv->shutdown)
485 drv->driver.shutdown = platform_drv_shutdown; 485 drv->driver.shutdown = platform_drv_shutdown;
486 if (drv->suspend || drv->resume)
487 pr_warning("Platform driver '%s' needs updating - please use "
488 "dev_pm_ops\n", drv->driver.name);
489 486
490 return driver_register(&drv->driver); 487 return driver_register(&drv->driver);
491} 488}
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 6e6942c45f5b..d083c73d784a 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -144,6 +144,8 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
144 144
145static int pty_write_room(struct tty_struct *tty) 145static int pty_write_room(struct tty_struct *tty)
146{ 146{
147 if (tty->stopped)
148 return 0;
147 return pty_space(tty->link); 149 return pty_space(tty->link);
148} 150}
149 151
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index b4a3dbcebe9b..f85aaf21e783 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -566,7 +566,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
566 566
567 ret = drm_vblank_get(dev, crtc); 567 ret = drm_vblank_get(dev, crtc);
568 if (ret) { 568 if (ret) {
569 DRM_ERROR("failed to acquire vblank counter, %d\n", ret); 569 DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
570 return ret; 570 return ret;
571 } 571 }
572 seq = drm_vblank_count(dev, crtc); 572 seq = drm_vblank_count(dev, crtc);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 54f492a488a9..7914097b09c6 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -566,6 +566,8 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
566 found_it = 1; 566 found_it = 1;
567 /* if equal delete the probed mode */ 567 /* if equal delete the probed mode */
568 mode->status = pmode->status; 568 mode->status = pmode->status;
569 /* Merge type bits together */
570 mode->type |= pmode->type;
569 list_del(&pmode->head); 571 list_del(&pmode->head);
570 drm_mode_destroy(connector->dev, pmode); 572 drm_mode_destroy(connector->dev, pmode);
571 break; 573 break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 83aee80e77a6..7ebc84c2881e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -190,7 +190,7 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe)
190 low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL; 190 low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL;
191 191
192 if (!i915_pipe_enabled(dev, pipe)) { 192 if (!i915_pipe_enabled(dev, pipe)) {
193 DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); 193 DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe);
194 return 0; 194 return 0;
195 } 195 }
196 196
@@ -219,7 +219,7 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe)
219 int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45; 219 int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45;
220 220
221 if (!i915_pipe_enabled(dev, pipe)) { 221 if (!i915_pipe_enabled(dev, pipe)) {
222 DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); 222 DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe);
223 return 0; 223 return 0;
224 } 224 }
225 225
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9b..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
1974 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1975 mddev->events ++; 1976 mddev->events ++;
1976 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1977 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1978 if ((mddev->events&1)==0) { 1979 * to spares. */
1979 mddev->events++; 1980 if ((mddev->events&1)==0)
1980 nospares = 0; 1981 nospares = 0;
1981 }
1982 } else { 1982 } else {
1983 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1984 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1985 mddev->events++;
1986 nospares = 0; 1985 nospares = 0;
1987 }
1988 } 1986 }
1989 } 1987 }
1990 1988
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3601 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3602 return -EINVAL; 3600 return -EINVAL;
3603 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3604 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3605 return -EBUSY; 3604 return -EBUSY;
3606 3605
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4304 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4305 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4306 4305
4306 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4309 return -EBUSY; 4309 err = -EBUSY;
4310 } 4310 } else if (mddev->pers) {
4311
4312 if (mddev->pers) {
4313 4311
4314 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4367 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4369 } 4367 }
4370 4368out:
4369 mutex_unlock(&mddev->open_mutex);
4370 if (err)
4371 return err;
4371 /* 4372 /*
4372 * Free resources if final stop 4373 * Free resources if final stop
4373 */ 4374 */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4433 blk_integrity_unregister(disk); 4434 blk_integrity_unregister(disk);
4434 md_new_event(mddev); 4435 md_new_event(mddev);
4435 sysfs_notify_dirent(mddev->sysfs_state); 4436 sysfs_notify_dirent(mddev->sysfs_state);
4436out:
4437 return err; 4437 return err;
4438} 4438}
4439 4439
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5518 } 5518 }
5519 BUG_ON(mddev != bdev->bd_disk->private_data); 5519 BUG_ON(mddev != bdev->bd_disk->private_data);
5520 5520
5521 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5521 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5522 goto out; 5522 goto out;
5523 5523
5524 err = 0; 5524 err = 0;
5525 atomic_inc(&mddev->openers); 5525 atomic_inc(&mddev->openers);
5526 mddev_unlock(mddev); 5526 mutex_unlock(&mddev->open_mutex);
5527 5527
5528 check_disk_change(bdev); 5528 check_disk_change(bdev);
5529 out: 5529 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf9..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2b521ee67dfa..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3786 sector_nr = raid5_size(mddev, 0, 0) 3786 sector_nr = raid5_size(mddev, 0, 0)
3787 - conf->reshape_progress; 3787 - conf->reshape_progress;
3788 } else if (mddev->delta_disks > 0 && 3788 } else if (mddev->delta_disks >= 0 &&
3789 conf->reshape_progress > 0) 3789 conf->reshape_progress > 0)
3790 sector_nr = conf->reshape_progress; 3790 sector_nr = conf->reshape_progress;
3791 sector_div(sector_nr, new_data_disks); 3791 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
4509 (old_disks-max_degraded)); 4509 (old_disks-max_degraded));
4510 /* here_old is the first stripe that we might need to read 4510 /* here_old is the first stripe that we might need to read
4511 * from */ 4511 * from */
4512 if (here_new >= here_old) { 4512 if (mddev->delta_disks == 0) {
4513 /* We cannot be sure it is safe to start an in-place
4514 * reshape. It is only safe if user-space if monitoring
4515 * and taking constant backups.
4516 * mdadm always starts a situation like this in
4517 * readonly mode so it can take control before
4518 * allowing any writes. So just check for that.
4519 */
4520 if ((here_new * mddev->new_chunk_sectors !=
4521 here_old * mddev->chunk_sectors) ||
4522 mddev->ro == 0) {
4523 printk(KERN_ERR "raid5: in-place reshape must be started"
4524 " in read-only mode - aborting\n");
4525 return -EINVAL;
4526 }
4527 } else if (mddev->delta_disks < 0
4528 ? (here_new * mddev->new_chunk_sectors <=
4529 here_old * mddev->chunk_sectors)
4530 : (here_new * mddev->new_chunk_sectors >=
4531 here_old * mddev->chunk_sectors)) {
4513 /* Reading from the same stripe as writing to - bad */ 4532 /* Reading from the same stripe as writing to - bad */
4514 printk(KERN_ERR "raid5: reshape_position too early for " 4533 printk(KERN_ERR "raid5: reshape_position too early for "
4515 "auto-recovery - aborting.\n"); 4534 "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5078 mddev->degraded--; 5097 mddev->degraded--;
5079 for (d = conf->raid_disks ; 5098 for (d = conf->raid_disks ;
5080 d < conf->raid_disks - mddev->delta_disks; 5099 d < conf->raid_disks - mddev->delta_disks;
5081 d++) 5100 d++) {
5082 raid5_remove_disk(mddev, d); 5101 mdk_rdev_t *rdev = conf->disks[d].rdev;
5102 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5103 char nm[20];
5104 sprintf(nm, "rd%d", rdev->raid_disk);
5105 sysfs_remove_link(&mddev->kobj, nm);
5106 rdev->raid_disk = -1;
5107 }
5108 }
5083 } 5109 }
5084 mddev->layout = conf->algorithm; 5110 mddev->layout = conf->algorithm;
5085 mddev->chunk_sectors = conf->chunk_sectors; 5111 mddev->chunk_sectors = conf->chunk_sectors;
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/drivers/mtd/maps/sbc8240.c
+++ /dev/null
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 0f2034c3ed2f..e4d9ef0c965a 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1254,6 +1254,7 @@ out_free:
1254 if (!ubi->volumes[i]) 1254 if (!ubi->volumes[i])
1255 continue; 1255 continue;
1256 kfree(ubi->volumes[i]->eba_tbl); 1256 kfree(ubi->volumes[i]->eba_tbl);
1257 ubi->volumes[i]->eba_tbl = NULL;
1257 } 1258 }
1258 return err; 1259 return err;
1259} 1260}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index a423131b6171..b847745394b4 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -781,11 +781,22 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
781 return -EINVAL; 781 return -EINVAL;
782 } 782 }
783 783
784 /*
785 * Make sure that all PEBs have the same image sequence number.
786 * This allows us to detect situations when users flash UBI
787 * images incorrectly, so that the flash has the new UBI image
788 * and leftovers from the old one. This feature was added
789 * relatively recently, and the sequence number was always
790 * zero, because old UBI implementations always set it to zero.
791 * For this reasons, we do not panic if some PEBs have zero
792 * sequence number, while other PEBs have non-zero sequence
793 * number.
794 */
784 image_seq = be32_to_cpu(ech->image_seq); 795 image_seq = be32_to_cpu(ech->image_seq);
785 if (!si->image_seq_set) { 796 if (!si->image_seq_set) {
786 ubi->image_seq = image_seq; 797 ubi->image_seq = image_seq;
787 si->image_seq_set = 1; 798 si->image_seq_set = 1;
788 } else if (ubi->image_seq != image_seq) { 799 } else if (ubi->image_seq && ubi->image_seq != image_seq) {
789 ubi_err("bad image sequence number %d in PEB %d, " 800 ubi_err("bad image sequence number %d in PEB %d, "
790 "expected %d", image_seq, pnum, ubi->image_seq); 801 "expected %d", image_seq, pnum, ubi->image_seq);
791 ubi_dbg_dump_ec_hdr(ech); 802 ubi_dbg_dump_ec_hdr(ech);
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index a4494d78e7c2..8aebe1e9d3d6 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -90,11 +90,10 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = {
90 90
91static DEFINE_MUTEX(sn_hotplug_mutex); 91static DEFINE_MUTEX(sn_hotplug_mutex);
92 92
93static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, 93static ssize_t path_show(struct pci_slot *pci_slot, char *buf)
94 char *buf)
95{ 94{
96 int retval = -ENOENT; 95 int retval = -ENOENT;
97 struct slot *slot = bss_hotplug_slot->private; 96 struct slot *slot = pci_slot->hotplug->private;
98 97
99 if (!slot) 98 if (!slot)
100 return retval; 99 return retval;
@@ -103,7 +102,7 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
103 return retval; 102 return retval;
104} 103}
105 104
106static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path); 105static struct pci_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
107 106
108static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) 107static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device)
109{ 108{
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
255 255
256 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
257 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata); 258 nfs_readdata_free(data);
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
314 data->npages, 1, 0, data->pagevec, NULL); 314 data->npages, 1, 0, data->pagevec, NULL);
315 up_read(&current->mm->mmap_sem); 315 up_read(&current->mm->mmap_sem);
316 if (result < 0) { 316 if (result < 0) {
317 nfs_readdata_release(data); 317 nfs_readdata_free(data);
318 break; 318 break;
319 } 319 }
320 if ((unsigned)result < data->npages) { 320 if ((unsigned)result < data->npages) {
321 bytes = result * PAGE_SIZE; 321 bytes = result * PAGE_SIZE;
322 if (bytes <= pgbase) { 322 if (bytes <= pgbase) {
323 nfs_direct_release_pages(data->pagevec, result); 323 nfs_direct_release_pages(data->pagevec, result);
324 nfs_readdata_release(data); 324 nfs_readdata_free(data);
325 break; 325 break;
326 } 326 }
327 bytes -= pgbase; 327 bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
334 data->inode = inode; 334 data->inode = inode;
335 data->cred = msg.rpc_cred; 335 data->cred = msg.rpc_cred;
336 data->args.fh = NFS_FH(inode); 336 data->args.fh = NFS_FH(inode);
337 data->args.context = get_nfs_open_context(ctx); 337 data->args.context = ctx;
338 data->args.offset = pos; 338 data->args.offset = pos;
339 data->args.pgbase = pgbase; 339 data->args.pgbase = pgbase;
340 data->args.pages = data->pagevec; 340 data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
442 list_del(&data->pages); 442 list_del(&data->pages);
443 nfs_direct_release_pages(data->pagevec, data->npages); 443 nfs_direct_release_pages(data->pagevec, data->npages);
444 nfs_writedata_release(data); 444 nfs_writedata_free(data);
445 } 445 }
446} 446}
447 447
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
534 534
535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
536 nfs_direct_write_complete(dreq, data->inode); 536 nfs_direct_write_complete(dreq, data->inode);
537 nfs_commitdata_release(calldata); 537 nfs_commit_free(data);
538} 538}
539 539
540static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
570 data->args.fh = NFS_FH(data->inode); 570 data->args.fh = NFS_FH(data->inode);
571 data->args.offset = 0; 571 data->args.offset = 0;
572 data->args.count = 0; 572 data->args.count = 0;
573 data->args.context = get_nfs_open_context(dreq->ctx); 573 data->args.context = dreq->ctx;
574 data->res.count = 0; 574 data->res.count = 0;
575 data->res.fattr = &data->fattr; 575 data->res.fattr = &data->fattr;
576 data->res.verf = &data->verf; 576 data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
734 data->npages, 0, 0, data->pagevec, NULL); 734 data->npages, 0, 0, data->pagevec, NULL);
735 up_read(&current->mm->mmap_sem); 735 up_read(&current->mm->mmap_sem);
736 if (result < 0) { 736 if (result < 0) {
737 nfs_writedata_release(data); 737 nfs_writedata_free(data);
738 break; 738 break;
739 } 739 }
740 if ((unsigned)result < data->npages) { 740 if ((unsigned)result < data->npages) {
741 bytes = result * PAGE_SIZE; 741 bytes = result * PAGE_SIZE;
742 if (bytes <= pgbase) { 742 if (bytes <= pgbase) {
743 nfs_direct_release_pages(data->pagevec, result); 743 nfs_direct_release_pages(data->pagevec, result);
744 nfs_writedata_release(data); 744 nfs_writedata_free(data);
745 break; 745 break;
746 } 746 }
747 bytes -= pgbase; 747 bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
756 data->inode = inode; 756 data->inode = inode;
757 data->cred = msg.rpc_cred; 757 data->cred = msg.rpc_cred;
758 data->args.fh = NFS_FH(inode); 758 data->args.fh = NFS_FH(inode);
759 data->args.context = get_nfs_open_context(ctx); 759 data->args.context = ctx;
760 data->args.offset = pos; 760 data->args.offset = pos;
761 data->args.pgbase = pgbase; 761 data->args.pgbase = pgbase;
762 data->args.pages = data->pagevec; 762 data->args.pages = data->pagevec;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e65cc2e650c8..cfc30d362f94 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -702,29 +702,12 @@ struct compound_hdr {
702 u32 minorversion; 702 u32 minorversion;
703}; 703};
704 704
705/* 705static __be32 *reserve_space(struct xdr_stream *xdr, size_t nbytes)
706 * START OF "GENERIC" ENCODE ROUTINES. 706{
707 * These may look a little ugly since they are imported from a "generic" 707 __be32 *p = xdr_reserve_space(xdr, nbytes);
708 * set of XDR encode/decode routines which are intended to be shared by 708 BUG_ON(!p);
709 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 709 return p;
710 * 710}
711 * If the pain of reading these is too great, it should be a straightforward
712 * task to translate them into Linux-specific versions which are more
713 * consistent with the style used in NFSv2/v3...
714 */
715#define WRITE32(n) *p++ = htonl(n)
716#define WRITE64(n) do { \
717 *p++ = htonl((uint32_t)((n) >> 32)); \
718 *p++ = htonl((uint32_t)(n)); \
719} while (0)
720#define WRITEMEM(ptr,nbytes) do { \
721 p = xdr_encode_opaque_fixed(p, ptr, nbytes); \
722} while (0)
723
724#define RESERVE_SPACE(nbytes) do { \
725 p = xdr_reserve_space(xdr, nbytes); \
726 BUG_ON(!p); \
727} while (0)
728 711
729static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) 712static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str)
730{ 713{
@@ -749,12 +732,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
749 732
750 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); 733 dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag);
751 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); 734 BUG_ON(hdr->taglen > NFS4_MAXTAGLEN);
752 RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); 735 p = reserve_space(xdr, 4 + hdr->taglen + 8);
753 WRITE32(hdr->taglen); 736 p = xdr_encode_opaque(p, hdr->tag, hdr->taglen);
754 WRITEMEM(hdr->tag, hdr->taglen); 737 *p++ = cpu_to_be32(hdr->minorversion);
755 WRITE32(hdr->minorversion);
756 hdr->nops_p = p; 738 hdr->nops_p = p;
757 WRITE32(hdr->nops); 739 *p = cpu_to_be32(hdr->nops);
758} 740}
759 741
760static void encode_nops(struct compound_hdr *hdr) 742static void encode_nops(struct compound_hdr *hdr)
@@ -829,55 +811,53 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
829 len += 16; 811 len += 16;
830 else if (iap->ia_valid & ATTR_MTIME) 812 else if (iap->ia_valid & ATTR_MTIME)
831 len += 4; 813 len += 4;
832 RESERVE_SPACE(len); 814 p = reserve_space(xdr, len);
833 815
834 /* 816 /*
835 * We write the bitmap length now, but leave the bitmap and the attribute 817 * We write the bitmap length now, but leave the bitmap and the attribute
836 * buffer length to be backfilled at the end of this routine. 818 * buffer length to be backfilled at the end of this routine.
837 */ 819 */
838 WRITE32(2); 820 *p++ = cpu_to_be32(2);
839 q = p; 821 q = p;
840 p += 3; 822 p += 3;
841 823
842 if (iap->ia_valid & ATTR_SIZE) { 824 if (iap->ia_valid & ATTR_SIZE) {
843 bmval0 |= FATTR4_WORD0_SIZE; 825 bmval0 |= FATTR4_WORD0_SIZE;
844 WRITE64(iap->ia_size); 826 p = xdr_encode_hyper(p, iap->ia_size);
845 } 827 }
846 if (iap->ia_valid & ATTR_MODE) { 828 if (iap->ia_valid & ATTR_MODE) {
847 bmval1 |= FATTR4_WORD1_MODE; 829 bmval1 |= FATTR4_WORD1_MODE;
848 WRITE32(iap->ia_mode & S_IALLUGO); 830 *p++ = cpu_to_be32(iap->ia_mode & S_IALLUGO);
849 } 831 }
850 if (iap->ia_valid & ATTR_UID) { 832 if (iap->ia_valid & ATTR_UID) {
851 bmval1 |= FATTR4_WORD1_OWNER; 833 bmval1 |= FATTR4_WORD1_OWNER;
852 WRITE32(owner_namelen); 834 p = xdr_encode_opaque(p, owner_name, owner_namelen);
853 WRITEMEM(owner_name, owner_namelen);
854 } 835 }
855 if (iap->ia_valid & ATTR_GID) { 836 if (iap->ia_valid & ATTR_GID) {
856 bmval1 |= FATTR4_WORD1_OWNER_GROUP; 837 bmval1 |= FATTR4_WORD1_OWNER_GROUP;
857 WRITE32(owner_grouplen); 838 p = xdr_encode_opaque(p, owner_group, owner_grouplen);
858 WRITEMEM(owner_group, owner_grouplen);
859 } 839 }
860 if (iap->ia_valid & ATTR_ATIME_SET) { 840 if (iap->ia_valid & ATTR_ATIME_SET) {
861 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 841 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
862 WRITE32(NFS4_SET_TO_CLIENT_TIME); 842 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
863 WRITE32(0); 843 *p++ = cpu_to_be32(0);
864 WRITE32(iap->ia_mtime.tv_sec); 844 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
865 WRITE32(iap->ia_mtime.tv_nsec); 845 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
866 } 846 }
867 else if (iap->ia_valid & ATTR_ATIME) { 847 else if (iap->ia_valid & ATTR_ATIME) {
868 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET; 848 bmval1 |= FATTR4_WORD1_TIME_ACCESS_SET;
869 WRITE32(NFS4_SET_TO_SERVER_TIME); 849 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
870 } 850 }
871 if (iap->ia_valid & ATTR_MTIME_SET) { 851 if (iap->ia_valid & ATTR_MTIME_SET) {
872 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 852 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
873 WRITE32(NFS4_SET_TO_CLIENT_TIME); 853 *p++ = cpu_to_be32(NFS4_SET_TO_CLIENT_TIME);
874 WRITE32(0); 854 *p++ = cpu_to_be32(0);
875 WRITE32(iap->ia_mtime.tv_sec); 855 *p++ = cpu_to_be32(iap->ia_mtime.tv_sec);
876 WRITE32(iap->ia_mtime.tv_nsec); 856 *p++ = cpu_to_be32(iap->ia_mtime.tv_nsec);
877 } 857 }
878 else if (iap->ia_valid & ATTR_MTIME) { 858 else if (iap->ia_valid & ATTR_MTIME) {
879 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET; 859 bmval1 |= FATTR4_WORD1_TIME_MODIFY_SET;
880 WRITE32(NFS4_SET_TO_SERVER_TIME); 860 *p++ = cpu_to_be32(NFS4_SET_TO_SERVER_TIME);
881 } 861 }
882 862
883 /* 863 /*
@@ -891,7 +871,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
891 len = (char *)p - (char *)q - 12; 871 len = (char *)p - (char *)q - 12;
892 *q++ = htonl(bmval0); 872 *q++ = htonl(bmval0);
893 *q++ = htonl(bmval1); 873 *q++ = htonl(bmval1);
894 *q++ = htonl(len); 874 *q = htonl(len);
895 875
896/* out: */ 876/* out: */
897} 877}
@@ -900,9 +880,9 @@ static void encode_access(struct xdr_stream *xdr, u32 access, struct compound_hd
900{ 880{
901 __be32 *p; 881 __be32 *p;
902 882
903 RESERVE_SPACE(8); 883 p = reserve_space(xdr, 8);
904 WRITE32(OP_ACCESS); 884 *p++ = cpu_to_be32(OP_ACCESS);
905 WRITE32(access); 885 *p = cpu_to_be32(access);
906 hdr->nops++; 886 hdr->nops++;
907 hdr->replen += decode_access_maxsz; 887 hdr->replen += decode_access_maxsz;
908} 888}
@@ -911,10 +891,10 @@ static void encode_close(struct xdr_stream *xdr, const struct nfs_closeargs *arg
911{ 891{
912 __be32 *p; 892 __be32 *p;
913 893
914 RESERVE_SPACE(8+NFS4_STATEID_SIZE); 894 p = reserve_space(xdr, 8+NFS4_STATEID_SIZE);
915 WRITE32(OP_CLOSE); 895 *p++ = cpu_to_be32(OP_CLOSE);
916 WRITE32(arg->seqid->sequence->counter); 896 *p++ = cpu_to_be32(arg->seqid->sequence->counter);
917 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 897 xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
918 hdr->nops++; 898 hdr->nops++;
919 hdr->replen += decode_close_maxsz; 899 hdr->replen += decode_close_maxsz;
920} 900}
@@ -923,10 +903,10 @@ static void encode_commit(struct xdr_stream *xdr, const struct nfs_writeargs *ar
923{ 903{
924 __be32 *p; 904 __be32 *p;
925 905
926 RESERVE_SPACE(16); 906 p = reserve_space(xdr, 16);
927 WRITE32(OP_COMMIT); 907 *p++ = cpu_to_be32(OP_COMMIT);
928 WRITE64(args->offset); 908 p = xdr_encode_hyper(p, args->offset);
929 WRITE32(args->count); 909 *p = cpu_to_be32(args->count);
930 hdr->nops++; 910 hdr->nops++;
931 hdr->replen += decode_commit_maxsz; 911 hdr->replen += decode_commit_maxsz;
932} 912}
@@ -935,30 +915,28 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
935{ 915{
936 __be32 *p; 916 __be32 *p;
937 917
938 RESERVE_SPACE(8); 918 p = reserve_space(xdr, 8);
939 WRITE32(OP_CREATE); 919 *p++ = cpu_to_be32(OP_CREATE);
940 WRITE32(create->ftype); 920 *p = cpu_to_be32(create->ftype);
941 921
942 switch (create->ftype) { 922 switch (create->ftype) {
943 case NF4LNK: 923 case NF4LNK:
944 RESERVE_SPACE(4); 924 p = reserve_space(xdr, 4);
945 WRITE32(create->u.symlink.len); 925 *p = cpu_to_be32(create->u.symlink.len);
946 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len); 926 xdr_write_pages(xdr, create->u.symlink.pages, 0, create->u.symlink.len);
947 break; 927 break;
948 928
949 case NF4BLK: case NF4CHR: 929 case NF4BLK: case NF4CHR:
950 RESERVE_SPACE(8); 930 p = reserve_space(xdr, 8);
951 WRITE32(create->u.device.specdata1); 931 *p++ = cpu_to_be32(create->u.device.specdata1);
952 WRITE32(create->u.device.specdata2); 932 *p = cpu_to_be32(create->u.device.specdata2);
953 break; 933 break;
954 934
955 default: 935 default:
956 break; 936 break;
957 } 937 }
958 938
959 RESERVE_SPACE(4 + create->name->len); 939 encode_string(xdr, create->name->len, create->name->name);
960 WRITE32(create->name->len);
961 WRITEMEM(create->name->name, create->name->len);
962 hdr->nops++; 940 hdr->nops++;
963 hdr->replen += decode_create_maxsz; 941 hdr->replen += decode_create_maxsz;
964 942
@@ -969,10 +947,10 @@ static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct c
969{ 947{
970 __be32 *p; 948 __be32 *p;
971 949
972 RESERVE_SPACE(12); 950 p = reserve_space(xdr, 12);
973 WRITE32(OP_GETATTR); 951 *p++ = cpu_to_be32(OP_GETATTR);
974 WRITE32(1); 952 *p++ = cpu_to_be32(1);
975 WRITE32(bitmap); 953 *p = cpu_to_be32(bitmap);
976 hdr->nops++; 954 hdr->nops++;
977 hdr->replen += decode_getattr_maxsz; 955 hdr->replen += decode_getattr_maxsz;
978} 956}
@@ -981,11 +959,11 @@ static void encode_getattr_two(struct xdr_stream *xdr, uint32_t bm0, uint32_t bm
981{ 959{
982 __be32 *p; 960 __be32 *p;
983 961
984 RESERVE_SPACE(16); 962 p = reserve_space(xdr, 16);
985 WRITE32(OP_GETATTR); 963 *p++ = cpu_to_be32(OP_GETATTR);
986 WRITE32(2); 964 *p++ = cpu_to_be32(2);
987 WRITE32(bm0); 965 *p++ = cpu_to_be32(bm0);
988 WRITE32(bm1); 966 *p = cpu_to_be32(bm1);
989 hdr->nops++; 967 hdr->nops++;
990 hdr->replen += decode_getattr_maxsz; 968 hdr->replen += decode_getattr_maxsz;
991} 969}
@@ -1012,8 +990,8 @@ static void encode_getfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1012{ 990{
1013 __be32 *p; 991 __be32 *p;
1014 992
1015 RESERVE_SPACE(4); 993 p = reserve_space(xdr, 4);
1016 WRITE32(OP_GETFH); 994 *p = cpu_to_be32(OP_GETFH);
1017 hdr->nops++; 995 hdr->nops++;
1018 hdr->replen += decode_getfh_maxsz; 996 hdr->replen += decode_getfh_maxsz;
1019} 997}
@@ -1022,10 +1000,9 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
1022{ 1000{
1023 __be32 *p; 1001 __be32 *p;
1024 1002
1025 RESERVE_SPACE(8 + name->len); 1003 p = reserve_space(xdr, 8 + name->len);
1026 WRITE32(OP_LINK); 1004 *p++ = cpu_to_be32(OP_LINK);
1027 WRITE32(name->len); 1005 xdr_encode_opaque(p, name->name, name->len);
1028 WRITEMEM(name->name, name->len);
1029 hdr->nops++; 1006 hdr->nops++;
1030 hdr->replen += decode_link_maxsz; 1007 hdr->replen += decode_link_maxsz;
1031} 1008}
@@ -1052,27 +1029,27 @@ static void encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args
1052{ 1029{
1053 __be32 *p; 1030 __be32 *p;
1054 1031
1055 RESERVE_SPACE(32); 1032 p = reserve_space(xdr, 32);
1056 WRITE32(OP_LOCK); 1033 *p++ = cpu_to_be32(OP_LOCK);
1057 WRITE32(nfs4_lock_type(args->fl, args->block)); 1034 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, args->block));
1058 WRITE32(args->reclaim); 1035 *p++ = cpu_to_be32(args->reclaim);
1059 WRITE64(args->fl->fl_start); 1036 p = xdr_encode_hyper(p, args->fl->fl_start);
1060 WRITE64(nfs4_lock_length(args->fl)); 1037 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1061 WRITE32(args->new_lock_owner); 1038 *p = cpu_to_be32(args->new_lock_owner);
1062 if (args->new_lock_owner){ 1039 if (args->new_lock_owner){
1063 RESERVE_SPACE(4+NFS4_STATEID_SIZE+32); 1040 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+32);
1064 WRITE32(args->open_seqid->sequence->counter); 1041 *p++ = cpu_to_be32(args->open_seqid->sequence->counter);
1065 WRITEMEM(args->open_stateid->data, NFS4_STATEID_SIZE); 1042 p = xdr_encode_opaque_fixed(p, args->open_stateid->data, NFS4_STATEID_SIZE);
1066 WRITE32(args->lock_seqid->sequence->counter); 1043 *p++ = cpu_to_be32(args->lock_seqid->sequence->counter);
1067 WRITE64(args->lock_owner.clientid); 1044 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1068 WRITE32(16); 1045 *p++ = cpu_to_be32(16);
1069 WRITEMEM("lock id:", 8); 1046 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1070 WRITE64(args->lock_owner.id); 1047 xdr_encode_hyper(p, args->lock_owner.id);
1071 } 1048 }
1072 else { 1049 else {
1073 RESERVE_SPACE(NFS4_STATEID_SIZE+4); 1050 p = reserve_space(xdr, NFS4_STATEID_SIZE+4);
1074 WRITEMEM(args->lock_stateid->data, NFS4_STATEID_SIZE); 1051 p = xdr_encode_opaque_fixed(p, args->lock_stateid->data, NFS4_STATEID_SIZE);
1075 WRITE32(args->lock_seqid->sequence->counter); 1052 *p = cpu_to_be32(args->lock_seqid->sequence->counter);
1076 } 1053 }
1077 hdr->nops++; 1054 hdr->nops++;
1078 hdr->replen += decode_lock_maxsz; 1055 hdr->replen += decode_lock_maxsz;
@@ -1082,15 +1059,15 @@ static void encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *ar
1082{ 1059{
1083 __be32 *p; 1060 __be32 *p;
1084 1061
1085 RESERVE_SPACE(52); 1062 p = reserve_space(xdr, 52);
1086 WRITE32(OP_LOCKT); 1063 *p++ = cpu_to_be32(OP_LOCKT);
1087 WRITE32(nfs4_lock_type(args->fl, 0)); 1064 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1088 WRITE64(args->fl->fl_start); 1065 p = xdr_encode_hyper(p, args->fl->fl_start);
1089 WRITE64(nfs4_lock_length(args->fl)); 1066 p = xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1090 WRITE64(args->lock_owner.clientid); 1067 p = xdr_encode_hyper(p, args->lock_owner.clientid);
1091 WRITE32(16); 1068 *p++ = cpu_to_be32(16);
1092 WRITEMEM("lock id:", 8); 1069 p = xdr_encode_opaque_fixed(p, "lock id:", 8);
1093 WRITE64(args->lock_owner.id); 1070 xdr_encode_hyper(p, args->lock_owner.id);
1094 hdr->nops++; 1071 hdr->nops++;
1095 hdr->replen += decode_lockt_maxsz; 1072 hdr->replen += decode_lockt_maxsz;
1096} 1073}
@@ -1099,13 +1076,13 @@ static void encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *ar
1099{ 1076{
1100 __be32 *p; 1077 __be32 *p;
1101 1078
1102 RESERVE_SPACE(12+NFS4_STATEID_SIZE+16); 1079 p = reserve_space(xdr, 12+NFS4_STATEID_SIZE+16);
1103 WRITE32(OP_LOCKU); 1080 *p++ = cpu_to_be32(OP_LOCKU);
1104 WRITE32(nfs4_lock_type(args->fl, 0)); 1081 *p++ = cpu_to_be32(nfs4_lock_type(args->fl, 0));
1105 WRITE32(args->seqid->sequence->counter); 1082 *p++ = cpu_to_be32(args->seqid->sequence->counter);
1106 WRITEMEM(args->stateid->data, NFS4_STATEID_SIZE); 1083 p = xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
1107 WRITE64(args->fl->fl_start); 1084 p = xdr_encode_hyper(p, args->fl->fl_start);
1108 WRITE64(nfs4_lock_length(args->fl)); 1085 xdr_encode_hyper(p, nfs4_lock_length(args->fl));
1109 hdr->nops++; 1086 hdr->nops++;
1110 hdr->replen += decode_locku_maxsz; 1087 hdr->replen += decode_locku_maxsz;
1111} 1088}
@@ -1115,10 +1092,9 @@ static void encode_lookup(struct xdr_stream *xdr, const struct qstr *name, struc
1115 int len = name->len; 1092 int len = name->len;
1116 __be32 *p; 1093 __be32 *p;
1117 1094
1118 RESERVE_SPACE(8 + len); 1095 p = reserve_space(xdr, 8 + len);
1119 WRITE32(OP_LOOKUP); 1096 *p++ = cpu_to_be32(OP_LOOKUP);
1120 WRITE32(len); 1097 xdr_encode_opaque(p, name->name, len);
1121 WRITEMEM(name->name, len);
1122 hdr->nops++; 1098 hdr->nops++;
1123 hdr->replen += decode_lookup_maxsz; 1099 hdr->replen += decode_lookup_maxsz;
1124} 1100}
@@ -1127,21 +1103,21 @@ static void encode_share_access(struct xdr_stream *xdr, fmode_t fmode)
1127{ 1103{
1128 __be32 *p; 1104 __be32 *p;
1129 1105
1130 RESERVE_SPACE(8); 1106 p = reserve_space(xdr, 8);
1131 switch (fmode & (FMODE_READ|FMODE_WRITE)) { 1107 switch (fmode & (FMODE_READ|FMODE_WRITE)) {
1132 case FMODE_READ: 1108 case FMODE_READ:
1133 WRITE32(NFS4_SHARE_ACCESS_READ); 1109 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_READ);
1134 break; 1110 break;
1135 case FMODE_WRITE: 1111 case FMODE_WRITE:
1136 WRITE32(NFS4_SHARE_ACCESS_WRITE); 1112 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_WRITE);
1137 break; 1113 break;
1138 case FMODE_READ|FMODE_WRITE: 1114 case FMODE_READ|FMODE_WRITE:
1139 WRITE32(NFS4_SHARE_ACCESS_BOTH); 1115 *p++ = cpu_to_be32(NFS4_SHARE_ACCESS_BOTH);
1140 break; 1116 break;
1141 default: 1117 default:
1142 WRITE32(0); 1118 *p++ = cpu_to_be32(0);
1143 } 1119 }
1144 WRITE32(0); /* for linux, share_deny = 0 always */ 1120 *p = cpu_to_be32(0); /* for linux, share_deny = 0 always */
1145} 1121}
1146 1122
1147static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1123static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg)
@@ -1151,29 +1127,29 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena
1151 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, 1127 * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4,
1152 * owner 4 = 32 1128 * owner 4 = 32
1153 */ 1129 */
1154 RESERVE_SPACE(8); 1130 p = reserve_space(xdr, 8);
1155 WRITE32(OP_OPEN); 1131 *p++ = cpu_to_be32(OP_OPEN);
1156 WRITE32(arg->seqid->sequence->counter); 1132 *p = cpu_to_be32(arg->seqid->sequence->counter);
1157 encode_share_access(xdr, arg->fmode); 1133 encode_share_access(xdr, arg->fmode);
1158 RESERVE_SPACE(28); 1134 p = reserve_space(xdr, 28);
1159 WRITE64(arg->clientid); 1135 p = xdr_encode_hyper(p, arg->clientid);
1160 WRITE32(16); 1136 *p++ = cpu_to_be32(16);
1161 WRITEMEM("open id:", 8); 1137 p = xdr_encode_opaque_fixed(p, "open id:", 8);
1162 WRITE64(arg->id); 1138 xdr_encode_hyper(p, arg->id);
1163} 1139}
1164 1140
1165static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) 1141static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg)
1166{ 1142{
1167 __be32 *p; 1143 __be32 *p;
1168 1144
1169 RESERVE_SPACE(4); 1145 p = reserve_space(xdr, 4);
1170 switch(arg->open_flags & O_EXCL) { 1146 switch(arg->open_flags & O_EXCL) {
1171 case 0: 1147 case 0:
1172 WRITE32(NFS4_CREATE_UNCHECKED); 1148 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
1173 encode_attrs(xdr, arg->u.attrs, arg->server); 1149 encode_attrs(xdr, arg->u.attrs, arg->server);
1174 break; 1150 break;
1175 default: 1151 default:
1176 WRITE32(NFS4_CREATE_EXCLUSIVE); 1152 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
1177 encode_nfs4_verifier(xdr, &arg->u.verifier); 1153 encode_nfs4_verifier(xdr, &arg->u.verifier);
1178 } 1154 }
1179} 1155}
@@ -1182,14 +1158,14 @@ static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *a
1182{ 1158{
1183 __be32 *p; 1159 __be32 *p;
1184 1160
1185 RESERVE_SPACE(4); 1161 p = reserve_space(xdr, 4);
1186 switch (arg->open_flags & O_CREAT) { 1162 switch (arg->open_flags & O_CREAT) {
1187 case 0: 1163 case 0:
1188 WRITE32(NFS4_OPEN_NOCREATE); 1164 *p = cpu_to_be32(NFS4_OPEN_NOCREATE);
1189 break; 1165 break;
1190 default: 1166 default:
1191 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); 1167 BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL);
1192 WRITE32(NFS4_OPEN_CREATE); 1168 *p = cpu_to_be32(NFS4_OPEN_CREATE);
1193 encode_createmode(xdr, arg); 1169 encode_createmode(xdr, arg);
1194 } 1170 }
1195} 1171}
@@ -1198,16 +1174,16 @@ static inline void encode_delegation_type(struct xdr_stream *xdr, fmode_t delega
1198{ 1174{
1199 __be32 *p; 1175 __be32 *p;
1200 1176
1201 RESERVE_SPACE(4); 1177 p = reserve_space(xdr, 4);
1202 switch (delegation_type) { 1178 switch (delegation_type) {
1203 case 0: 1179 case 0:
1204 WRITE32(NFS4_OPEN_DELEGATE_NONE); 1180 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_NONE);
1205 break; 1181 break;
1206 case FMODE_READ: 1182 case FMODE_READ:
1207 WRITE32(NFS4_OPEN_DELEGATE_READ); 1183 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_READ);
1208 break; 1184 break;
1209 case FMODE_WRITE|FMODE_READ: 1185 case FMODE_WRITE|FMODE_READ:
1210 WRITE32(NFS4_OPEN_DELEGATE_WRITE); 1186 *p = cpu_to_be32(NFS4_OPEN_DELEGATE_WRITE);
1211 break; 1187 break;
1212 default: 1188 default:
1213 BUG(); 1189 BUG();
@@ -1218,8 +1194,8 @@ static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *
1218{ 1194{
1219 __be32 *p; 1195 __be32 *p;
1220 1196
1221 RESERVE_SPACE(4); 1197 p = reserve_space(xdr, 4);
1222 WRITE32(NFS4_OPEN_CLAIM_NULL); 1198 *p = cpu_to_be32(NFS4_OPEN_CLAIM_NULL);
1223 encode_string(xdr, name->len, name->name); 1199 encode_string(xdr, name->len, name->name);
1224} 1200}
1225 1201
@@ -1227,8 +1203,8 @@ static inline void encode_claim_previous(struct xdr_stream *xdr, fmode_t type)
1227{ 1203{
1228 __be32 *p; 1204 __be32 *p;
1229 1205
1230 RESERVE_SPACE(4); 1206 p = reserve_space(xdr, 4);
1231 WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); 1207 *p = cpu_to_be32(NFS4_OPEN_CLAIM_PREVIOUS);
1232 encode_delegation_type(xdr, type); 1208 encode_delegation_type(xdr, type);
1233} 1209}
1234 1210
@@ -1236,9 +1212,9 @@ static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struc
1236{ 1212{
1237 __be32 *p; 1213 __be32 *p;
1238 1214
1239 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1215 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1240 WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); 1216 *p++ = cpu_to_be32(NFS4_OPEN_CLAIM_DELEGATE_CUR);
1241 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1217 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1242 encode_string(xdr, name->len, name->name); 1218 encode_string(xdr, name->len, name->name);
1243} 1219}
1244 1220
@@ -1267,10 +1243,10 @@ static void encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_co
1267{ 1243{
1268 __be32 *p; 1244 __be32 *p;
1269 1245
1270 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1246 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1271 WRITE32(OP_OPEN_CONFIRM); 1247 *p++ = cpu_to_be32(OP_OPEN_CONFIRM);
1272 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1248 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1273 WRITE32(arg->seqid->sequence->counter); 1249 *p = cpu_to_be32(arg->seqid->sequence->counter);
1274 hdr->nops++; 1250 hdr->nops++;
1275 hdr->replen += decode_open_confirm_maxsz; 1251 hdr->replen += decode_open_confirm_maxsz;
1276} 1252}
@@ -1279,10 +1255,10 @@ static void encode_open_downgrade(struct xdr_stream *xdr, const struct nfs_close
1279{ 1255{
1280 __be32 *p; 1256 __be32 *p;
1281 1257
1282 RESERVE_SPACE(4+NFS4_STATEID_SIZE+4); 1258 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE+4);
1283 WRITE32(OP_OPEN_DOWNGRADE); 1259 *p++ = cpu_to_be32(OP_OPEN_DOWNGRADE);
1284 WRITEMEM(arg->stateid->data, NFS4_STATEID_SIZE); 1260 p = xdr_encode_opaque_fixed(p, arg->stateid->data, NFS4_STATEID_SIZE);
1285 WRITE32(arg->seqid->sequence->counter); 1261 *p = cpu_to_be32(arg->seqid->sequence->counter);
1286 encode_share_access(xdr, arg->fmode); 1262 encode_share_access(xdr, arg->fmode);
1287 hdr->nops++; 1263 hdr->nops++;
1288 hdr->replen += decode_open_downgrade_maxsz; 1264 hdr->replen += decode_open_downgrade_maxsz;
@@ -1294,10 +1270,9 @@ encode_putfh(struct xdr_stream *xdr, const struct nfs_fh *fh, struct compound_hd
1294 int len = fh->size; 1270 int len = fh->size;
1295 __be32 *p; 1271 __be32 *p;
1296 1272
1297 RESERVE_SPACE(8 + len); 1273 p = reserve_space(xdr, 8 + len);
1298 WRITE32(OP_PUTFH); 1274 *p++ = cpu_to_be32(OP_PUTFH);
1299 WRITE32(len); 1275 xdr_encode_opaque(p, fh->data, len);
1300 WRITEMEM(fh->data, len);
1301 hdr->nops++; 1276 hdr->nops++;
1302 hdr->replen += decode_putfh_maxsz; 1277 hdr->replen += decode_putfh_maxsz;
1303} 1278}
@@ -1306,8 +1281,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1306{ 1281{
1307 __be32 *p; 1282 __be32 *p;
1308 1283
1309 RESERVE_SPACE(4); 1284 p = reserve_space(xdr, 4);
1310 WRITE32(OP_PUTROOTFH); 1285 *p = cpu_to_be32(OP_PUTROOTFH);
1311 hdr->nops++; 1286 hdr->nops++;
1312 hdr->replen += decode_putrootfh_maxsz; 1287 hdr->replen += decode_putrootfh_maxsz;
1313} 1288}
@@ -1317,26 +1292,26 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1317 nfs4_stateid stateid; 1292 nfs4_stateid stateid;
1318 __be32 *p; 1293 __be32 *p;
1319 1294
1320 RESERVE_SPACE(NFS4_STATEID_SIZE); 1295 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1321 if (ctx->state != NULL) { 1296 if (ctx->state != NULL) {
1322 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner); 1297 nfs4_copy_stateid(&stateid, ctx->state, ctx->lockowner);
1323 WRITEMEM(stateid.data, NFS4_STATEID_SIZE); 1298 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1324 } else 1299 } else
1325 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1300 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1326} 1301}
1327 1302
1328static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr) 1303static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
1329{ 1304{
1330 __be32 *p; 1305 __be32 *p;
1331 1306
1332 RESERVE_SPACE(4); 1307 p = reserve_space(xdr, 4);
1333 WRITE32(OP_READ); 1308 *p = cpu_to_be32(OP_READ);
1334 1309
1335 encode_stateid(xdr, args->context); 1310 encode_stateid(xdr, args->context);
1336 1311
1337 RESERVE_SPACE(12); 1312 p = reserve_space(xdr, 12);
1338 WRITE64(args->offset); 1313 p = xdr_encode_hyper(p, args->offset);
1339 WRITE32(args->count); 1314 *p = cpu_to_be32(args->count);
1340 hdr->nops++; 1315 hdr->nops++;
1341 hdr->replen += decode_read_maxsz; 1316 hdr->replen += decode_read_maxsz;
1342} 1317}
@@ -1349,20 +1324,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
1349 }; 1324 };
1350 __be32 *p; 1325 __be32 *p;
1351 1326
1352 RESERVE_SPACE(12+NFS4_VERIFIER_SIZE+20); 1327 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1353 WRITE32(OP_READDIR); 1328 *p++ = cpu_to_be32(OP_READDIR);
1354 WRITE64(readdir->cookie); 1329 p = xdr_encode_hyper(p, readdir->cookie);
1355 WRITEMEM(readdir->verifier.data, NFS4_VERIFIER_SIZE); 1330 p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE);
1356 WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ 1331 *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */
1357 WRITE32(readdir->count); 1332 *p++ = cpu_to_be32(readdir->count);
1358 WRITE32(2); 1333 *p++ = cpu_to_be32(2);
1359 /* Switch to mounted_on_fileid if the server supports it */ 1334 /* Switch to mounted_on_fileid if the server supports it */
1360 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) 1335 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1361 attrs[0] &= ~FATTR4_WORD0_FILEID; 1336 attrs[0] &= ~FATTR4_WORD0_FILEID;
1362 else 1337 else
1363 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 1338 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1364 WRITE32(attrs[0] & readdir->bitmask[0]); 1339 *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]);
1365 WRITE32(attrs[1] & readdir->bitmask[1]); 1340 *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]);
1366 hdr->nops++; 1341 hdr->nops++;
1367 hdr->replen += decode_readdir_maxsz; 1342 hdr->replen += decode_readdir_maxsz;
1368 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n", 1343 dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
@@ -1378,8 +1353,8 @@ static void encode_readlink(struct xdr_stream *xdr, const struct nfs4_readlink *
1378{ 1353{
1379 __be32 *p; 1354 __be32 *p;
1380 1355
1381 RESERVE_SPACE(4); 1356 p = reserve_space(xdr, 4);
1382 WRITE32(OP_READLINK); 1357 *p = cpu_to_be32(OP_READLINK);
1383 hdr->nops++; 1358 hdr->nops++;
1384 hdr->replen += decode_readlink_maxsz; 1359 hdr->replen += decode_readlink_maxsz;
1385} 1360}
@@ -1388,10 +1363,9 @@ static void encode_remove(struct xdr_stream *xdr, const struct qstr *name, struc
1388{ 1363{
1389 __be32 *p; 1364 __be32 *p;
1390 1365
1391 RESERVE_SPACE(8 + name->len); 1366 p = reserve_space(xdr, 8 + name->len);
1392 WRITE32(OP_REMOVE); 1367 *p++ = cpu_to_be32(OP_REMOVE);
1393 WRITE32(name->len); 1368 xdr_encode_opaque(p, name->name, name->len);
1394 WRITEMEM(name->name, name->len);
1395 hdr->nops++; 1369 hdr->nops++;
1396 hdr->replen += decode_remove_maxsz; 1370 hdr->replen += decode_remove_maxsz;
1397} 1371}
@@ -1400,14 +1374,10 @@ static void encode_rename(struct xdr_stream *xdr, const struct qstr *oldname, co
1400{ 1374{
1401 __be32 *p; 1375 __be32 *p;
1402 1376
1403 RESERVE_SPACE(8 + oldname->len); 1377 p = reserve_space(xdr, 4);
1404 WRITE32(OP_RENAME); 1378 *p = cpu_to_be32(OP_RENAME);
1405 WRITE32(oldname->len); 1379 encode_string(xdr, oldname->len, oldname->name);
1406 WRITEMEM(oldname->name, oldname->len); 1380 encode_string(xdr, newname->len, newname->name);
1407
1408 RESERVE_SPACE(4 + newname->len);
1409 WRITE32(newname->len);
1410 WRITEMEM(newname->name, newname->len);
1411 hdr->nops++; 1381 hdr->nops++;
1412 hdr->replen += decode_rename_maxsz; 1382 hdr->replen += decode_rename_maxsz;
1413} 1383}
@@ -1416,9 +1386,9 @@ static void encode_renew(struct xdr_stream *xdr, const struct nfs_client *client
1416{ 1386{
1417 __be32 *p; 1387 __be32 *p;
1418 1388
1419 RESERVE_SPACE(12); 1389 p = reserve_space(xdr, 12);
1420 WRITE32(OP_RENEW); 1390 *p++ = cpu_to_be32(OP_RENEW);
1421 WRITE64(client_stateid->cl_clientid); 1391 xdr_encode_hyper(p, client_stateid->cl_clientid);
1422 hdr->nops++; 1392 hdr->nops++;
1423 hdr->replen += decode_renew_maxsz; 1393 hdr->replen += decode_renew_maxsz;
1424} 1394}
@@ -1428,8 +1398,8 @@ encode_restorefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1428{ 1398{
1429 __be32 *p; 1399 __be32 *p;
1430 1400
1431 RESERVE_SPACE(4); 1401 p = reserve_space(xdr, 4);
1432 WRITE32(OP_RESTOREFH); 1402 *p = cpu_to_be32(OP_RESTOREFH);
1433 hdr->nops++; 1403 hdr->nops++;
1434 hdr->replen += decode_restorefh_maxsz; 1404 hdr->replen += decode_restorefh_maxsz;
1435} 1405}
@@ -1439,16 +1409,16 @@ encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg, struct compoun
1439{ 1409{
1440 __be32 *p; 1410 __be32 *p;
1441 1411
1442 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1412 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1443 WRITE32(OP_SETATTR); 1413 *p++ = cpu_to_be32(OP_SETATTR);
1444 WRITEMEM(zero_stateid.data, NFS4_STATEID_SIZE); 1414 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
1445 RESERVE_SPACE(2*4); 1415 p = reserve_space(xdr, 2*4);
1446 WRITE32(1); 1416 *p++ = cpu_to_be32(1);
1447 WRITE32(FATTR4_WORD0_ACL); 1417 *p = cpu_to_be32(FATTR4_WORD0_ACL);
1448 if (arg->acl_len % 4) 1418 if (arg->acl_len % 4)
1449 return -EINVAL; 1419 return -EINVAL;
1450 RESERVE_SPACE(4); 1420 p = reserve_space(xdr, 4);
1451 WRITE32(arg->acl_len); 1421 *p = cpu_to_be32(arg->acl_len);
1452 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); 1422 xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len);
1453 hdr->nops++; 1423 hdr->nops++;
1454 hdr->replen += decode_setacl_maxsz; 1424 hdr->replen += decode_setacl_maxsz;
@@ -1460,8 +1430,8 @@ encode_savefh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1460{ 1430{
1461 __be32 *p; 1431 __be32 *p;
1462 1432
1463 RESERVE_SPACE(4); 1433 p = reserve_space(xdr, 4);
1464 WRITE32(OP_SAVEFH); 1434 *p = cpu_to_be32(OP_SAVEFH);
1465 hdr->nops++; 1435 hdr->nops++;
1466 hdr->replen += decode_savefh_maxsz; 1436 hdr->replen += decode_savefh_maxsz;
1467} 1437}
@@ -1470,9 +1440,9 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
1470{ 1440{
1471 __be32 *p; 1441 __be32 *p;
1472 1442
1473 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1443 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1474 WRITE32(OP_SETATTR); 1444 *p++ = cpu_to_be32(OP_SETATTR);
1475 WRITEMEM(arg->stateid.data, NFS4_STATEID_SIZE); 1445 xdr_encode_opaque_fixed(p, arg->stateid.data, NFS4_STATEID_SIZE);
1476 hdr->nops++; 1446 hdr->nops++;
1477 hdr->replen += decode_setattr_maxsz; 1447 hdr->replen += decode_setattr_maxsz;
1478 encode_attrs(xdr, arg->iap, server); 1448 encode_attrs(xdr, arg->iap, server);
@@ -1482,17 +1452,17 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
1482{ 1452{
1483 __be32 *p; 1453 __be32 *p;
1484 1454
1485 RESERVE_SPACE(4 + NFS4_VERIFIER_SIZE); 1455 p = reserve_space(xdr, 4 + NFS4_VERIFIER_SIZE);
1486 WRITE32(OP_SETCLIENTID); 1456 *p++ = cpu_to_be32(OP_SETCLIENTID);
1487 WRITEMEM(setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE); 1457 xdr_encode_opaque_fixed(p, setclientid->sc_verifier->data, NFS4_VERIFIER_SIZE);
1488 1458
1489 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); 1459 encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name);
1490 RESERVE_SPACE(4); 1460 p = reserve_space(xdr, 4);
1491 WRITE32(setclientid->sc_prog); 1461 *p = cpu_to_be32(setclientid->sc_prog);
1492 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); 1462 encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid);
1493 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); 1463 encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr);
1494 RESERVE_SPACE(4); 1464 p = reserve_space(xdr, 4);
1495 WRITE32(setclientid->sc_cb_ident); 1465 *p = cpu_to_be32(setclientid->sc_cb_ident);
1496 hdr->nops++; 1466 hdr->nops++;
1497 hdr->replen += decode_setclientid_maxsz; 1467 hdr->replen += decode_setclientid_maxsz;
1498} 1468}
@@ -1501,10 +1471,10 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_
1501{ 1471{
1502 __be32 *p; 1472 __be32 *p;
1503 1473
1504 RESERVE_SPACE(12 + NFS4_VERIFIER_SIZE); 1474 p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
1505 WRITE32(OP_SETCLIENTID_CONFIRM); 1475 *p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
1506 WRITE64(client_state->cl_clientid); 1476 p = xdr_encode_hyper(p, client_state->cl_clientid);
1507 WRITEMEM(client_state->cl_confirm.data, NFS4_VERIFIER_SIZE); 1477 xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
1508 hdr->nops++; 1478 hdr->nops++;
1509 hdr->replen += decode_setclientid_confirm_maxsz; 1479 hdr->replen += decode_setclientid_confirm_maxsz;
1510} 1480}
@@ -1513,15 +1483,15 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1513{ 1483{
1514 __be32 *p; 1484 __be32 *p;
1515 1485
1516 RESERVE_SPACE(4); 1486 p = reserve_space(xdr, 4);
1517 WRITE32(OP_WRITE); 1487 *p = cpu_to_be32(OP_WRITE);
1518 1488
1519 encode_stateid(xdr, args->context); 1489 encode_stateid(xdr, args->context);
1520 1490
1521 RESERVE_SPACE(16); 1491 p = reserve_space(xdr, 16);
1522 WRITE64(args->offset); 1492 p = xdr_encode_hyper(p, args->offset);
1523 WRITE32(args->stable); 1493 *p++ = cpu_to_be32(args->stable);
1524 WRITE32(args->count); 1494 *p = cpu_to_be32(args->count);
1525 1495
1526 xdr_write_pages(xdr, args->pages, args->pgbase, args->count); 1496 xdr_write_pages(xdr, args->pages, args->pgbase, args->count);
1527 hdr->nops++; 1497 hdr->nops++;
@@ -1532,10 +1502,10 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1532{ 1502{
1533 __be32 *p; 1503 __be32 *p;
1534 1504
1535 RESERVE_SPACE(4+NFS4_STATEID_SIZE); 1505 p = reserve_space(xdr, 4+NFS4_STATEID_SIZE);
1536 1506
1537 WRITE32(OP_DELEGRETURN); 1507 *p++ = cpu_to_be32(OP_DELEGRETURN);
1538 WRITEMEM(stateid->data, NFS4_STATEID_SIZE); 1508 xdr_encode_opaque_fixed(p, stateid->data, NFS4_STATEID_SIZE);
1539 hdr->nops++; 1509 hdr->nops++;
1540 hdr->replen += decode_delegreturn_maxsz; 1510 hdr->replen += decode_delegreturn_maxsz;
1541} 1511}
@@ -1548,16 +1518,16 @@ static void encode_exchange_id(struct xdr_stream *xdr,
1548{ 1518{
1549 __be32 *p; 1519 __be32 *p;
1550 1520
1551 RESERVE_SPACE(4 + sizeof(args->verifier->data)); 1521 p = reserve_space(xdr, 4 + sizeof(args->verifier->data));
1552 WRITE32(OP_EXCHANGE_ID); 1522 *p++ = cpu_to_be32(OP_EXCHANGE_ID);
1553 WRITEMEM(args->verifier->data, sizeof(args->verifier->data)); 1523 xdr_encode_opaque_fixed(p, args->verifier->data, sizeof(args->verifier->data));
1554 1524
1555 encode_string(xdr, args->id_len, args->id); 1525 encode_string(xdr, args->id_len, args->id);
1556 1526
1557 RESERVE_SPACE(12); 1527 p = reserve_space(xdr, 12);
1558 WRITE32(args->flags); 1528 *p++ = cpu_to_be32(args->flags);
1559 WRITE32(0); /* zero length state_protect4_a */ 1529 *p++ = cpu_to_be32(0); /* zero length state_protect4_a */
1560 WRITE32(0); /* zero length implementation id array */ 1530 *p = cpu_to_be32(0); /* zero length implementation id array */
1561 hdr->nops++; 1531 hdr->nops++;
1562 hdr->replen += decode_exchange_id_maxsz; 1532 hdr->replen += decode_exchange_id_maxsz;
1563} 1533}
@@ -1571,55 +1541,43 @@ static void encode_create_session(struct xdr_stream *xdr,
1571 uint32_t len; 1541 uint32_t len;
1572 struct nfs_client *clp = args->client; 1542 struct nfs_client *clp = args->client;
1573 1543
1574 RESERVE_SPACE(4); 1544 len = scnprintf(machine_name, sizeof(machine_name), "%s",
1575 WRITE32(OP_CREATE_SESSION); 1545 clp->cl_ipaddr);
1576
1577 RESERVE_SPACE(8);
1578 WRITE64(clp->cl_ex_clid);
1579 1546
1580 RESERVE_SPACE(8); 1547 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1581 WRITE32(clp->cl_seqid); /*Sequence id */ 1548 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1582 WRITE32(args->flags); /*flags */ 1549 p = xdr_encode_hyper(p, clp->cl_ex_clid);
1550 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1551 *p++ = cpu_to_be32(args->flags); /*flags */
1583 1552
1584 RESERVE_SPACE(2*28); /* 2 channel_attrs */
1585 /* Fore Channel */ 1553 /* Fore Channel */
1586 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1554 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1587 WRITE32(args->fc_attrs.max_rqst_sz); /* max req size */ 1555 *p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz); /* max req size */
1588 WRITE32(args->fc_attrs.max_resp_sz); /* max resp size */ 1556 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz); /* max resp size */
1589 WRITE32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1557 *p++ = cpu_to_be32(args->fc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1590 WRITE32(args->fc_attrs.max_ops); /* max operations */ 1558 *p++ = cpu_to_be32(args->fc_attrs.max_ops); /* max operations */
1591 WRITE32(args->fc_attrs.max_reqs); /* max requests */ 1559 *p++ = cpu_to_be32(args->fc_attrs.max_reqs); /* max requests */
1592 WRITE32(0); /* rdmachannel_attrs */ 1560 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1593 1561
1594 /* Back Channel */ 1562 /* Back Channel */
1595 WRITE32(args->fc_attrs.headerpadsz); /* header padding size */ 1563 *p++ = cpu_to_be32(args->fc_attrs.headerpadsz); /* header padding size */
1596 WRITE32(args->bc_attrs.max_rqst_sz); /* max req size */ 1564 *p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz); /* max req size */
1597 WRITE32(args->bc_attrs.max_resp_sz); /* max resp size */ 1565 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz); /* max resp size */
1598 WRITE32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */ 1566 *p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached); /* Max resp sz cached */
1599 WRITE32(args->bc_attrs.max_ops); /* max operations */ 1567 *p++ = cpu_to_be32(args->bc_attrs.max_ops); /* max operations */
1600 WRITE32(args->bc_attrs.max_reqs); /* max requests */ 1568 *p++ = cpu_to_be32(args->bc_attrs.max_reqs); /* max requests */
1601 WRITE32(0); /* rdmachannel_attrs */ 1569 *p++ = cpu_to_be32(0); /* rdmachannel_attrs */
1602 1570
1603 RESERVE_SPACE(4); 1571 *p++ = cpu_to_be32(args->cb_program); /* cb_program */
1604 WRITE32(args->cb_program); /* cb_program */ 1572 *p++ = cpu_to_be32(1);
1605 1573 *p++ = cpu_to_be32(RPC_AUTH_UNIX); /* auth_sys */
1606 RESERVE_SPACE(4); /* # of security flavors */
1607 WRITE32(1);
1608
1609 RESERVE_SPACE(4);
1610 WRITE32(RPC_AUTH_UNIX); /* auth_sys */
1611 1574
1612 /* authsys_parms rfc1831 */ 1575 /* authsys_parms rfc1831 */
1613 RESERVE_SPACE(4); 1576 *p++ = cpu_to_be32((u32)clp->cl_boot_time.tv_nsec); /* stamp */
1614 WRITE32((u32)clp->cl_boot_time.tv_nsec); /* stamp */ 1577 p = xdr_encode_opaque(p, machine_name, len);
1615 len = scnprintf(machine_name, sizeof(machine_name), "%s", 1578 *p++ = cpu_to_be32(0); /* UID */
1616 clp->cl_ipaddr); 1579 *p++ = cpu_to_be32(0); /* GID */
1617 RESERVE_SPACE(16 + len); 1580 *p = cpu_to_be32(0); /* No more gids */
1618 WRITE32(len);
1619 WRITEMEM(machine_name, len);
1620 WRITE32(0); /* UID */
1621 WRITE32(0); /* GID */
1622 WRITE32(0); /* No more gids */
1623 hdr->nops++; 1581 hdr->nops++;
1624 hdr->replen += decode_create_session_maxsz; 1582 hdr->replen += decode_create_session_maxsz;
1625} 1583}
@@ -1629,9 +1587,9 @@ static void encode_destroy_session(struct xdr_stream *xdr,
1629 struct compound_hdr *hdr) 1587 struct compound_hdr *hdr)
1630{ 1588{
1631 __be32 *p; 1589 __be32 *p;
1632 RESERVE_SPACE(4 + NFS4_MAX_SESSIONID_LEN); 1590 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN);
1633 WRITE32(OP_DESTROY_SESSION); 1591 *p++ = cpu_to_be32(OP_DESTROY_SESSION);
1634 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1592 xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1635 hdr->nops++; 1593 hdr->nops++;
1636 hdr->replen += decode_destroy_session_maxsz; 1594 hdr->replen += decode_destroy_session_maxsz;
1637} 1595}
@@ -1655,8 +1613,8 @@ static void encode_sequence(struct xdr_stream *xdr,
1655 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE); 1613 WARN_ON(args->sa_slotid == NFS4_MAX_SLOT_TABLE);
1656 slot = tp->slots + args->sa_slotid; 1614 slot = tp->slots + args->sa_slotid;
1657 1615
1658 RESERVE_SPACE(4); 1616 p = reserve_space(xdr, 4 + NFS4_MAX_SESSIONID_LEN + 16);
1659 WRITE32(OP_SEQUENCE); 1617 *p++ = cpu_to_be32(OP_SEQUENCE);
1660 1618
1661 /* 1619 /*
1662 * Sessionid + seqid + slotid + max slotid + cache_this 1620 * Sessionid + seqid + slotid + max slotid + cache_this
@@ -1670,12 +1628,11 @@ static void encode_sequence(struct xdr_stream *xdr,
1670 ((u32 *)session->sess_id.data)[3], 1628 ((u32 *)session->sess_id.data)[3],
1671 slot->seq_nr, args->sa_slotid, 1629 slot->seq_nr, args->sa_slotid,
1672 tp->highest_used_slotid, args->sa_cache_this); 1630 tp->highest_used_slotid, args->sa_cache_this);
1673 RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 16); 1631 p = xdr_encode_opaque_fixed(p, session->sess_id.data, NFS4_MAX_SESSIONID_LEN);
1674 WRITEMEM(session->sess_id.data, NFS4_MAX_SESSIONID_LEN); 1632 *p++ = cpu_to_be32(slot->seq_nr);
1675 WRITE32(slot->seq_nr); 1633 *p++ = cpu_to_be32(args->sa_slotid);
1676 WRITE32(args->sa_slotid); 1634 *p++ = cpu_to_be32(tp->highest_used_slotid);
1677 WRITE32(tp->highest_used_slotid); 1635 *p = cpu_to_be32(args->sa_cache_this);
1678 WRITE32(args->sa_cache_this);
1679 hdr->nops++; 1636 hdr->nops++;
1680 hdr->replen += decode_sequence_maxsz; 1637 hdr->replen += decode_sequence_maxsz;
1681#endif /* CONFIG_NFS_V4_1 */ 1638#endif /* CONFIG_NFS_V4_1 */
@@ -2466,68 +2423,53 @@ static int nfs4_xdr_enc_get_lease_time(struct rpc_rqst *req, uint32_t *p,
2466} 2423}
2467#endif /* CONFIG_NFS_V4_1 */ 2424#endif /* CONFIG_NFS_V4_1 */
2468 2425
2469/* 2426static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
2470 * START OF "GENERIC" DECODE ROUTINES. 2427{
2471 * These may look a little ugly since they are imported from a "generic" 2428 dprintk("nfs: %s: prematurely hit end of receive buffer. "
2472 * set of XDR encode/decode routines which are intended to be shared by 2429 "Remaining buffer length is %tu words.\n",
2473 * all of our NFSv4 implementations (OpenBSD, MacOS X...). 2430 func, xdr->end - xdr->p);
2474 * 2431}
2475 * If the pain of reading these is too great, it should be a straightforward
2476 * task to translate them into Linux-specific versions which are more
2477 * consistent with the style used in NFSv2/v3...
2478 */
2479#define READ32(x) (x) = ntohl(*p++)
2480#define READ64(x) do { \
2481 (x) = (u64)ntohl(*p++) << 32; \
2482 (x) |= ntohl(*p++); \
2483} while (0)
2484#define READTIME(x) do { \
2485 p++; \
2486 (x.tv_sec) = ntohl(*p++); \
2487 (x.tv_nsec) = ntohl(*p++); \
2488} while (0)
2489#define COPYMEM(x,nbytes) do { \
2490 memcpy((x), p, nbytes); \
2491 p += XDR_QUADLEN(nbytes); \
2492} while (0)
2493
2494#define READ_BUF(nbytes) do { \
2495 p = xdr_inline_decode(xdr, nbytes); \
2496 if (unlikely(!p)) { \
2497 dprintk("nfs: %s: prematurely hit end of receive" \
2498 " buffer\n", __func__); \
2499 dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \
2500 __func__, xdr->p, nbytes, xdr->end); \
2501 return -EIO; \
2502 } \
2503} while (0)
2504 2432
2505static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) 2433static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
2506{ 2434{
2507 __be32 *p; 2435 __be32 *p;
2508 2436
2509 READ_BUF(4); 2437 p = xdr_inline_decode(xdr, 4);
2510 READ32(*len); 2438 if (unlikely(!p))
2511 READ_BUF(*len); 2439 goto out_overflow;
2440 *len = be32_to_cpup(p);
2441 p = xdr_inline_decode(xdr, *len);
2442 if (unlikely(!p))
2443 goto out_overflow;
2512 *string = (char *)p; 2444 *string = (char *)p;
2513 return 0; 2445 return 0;
2446out_overflow:
2447 print_overflow_msg(__func__, xdr);
2448 return -EIO;
2514} 2449}
2515 2450
2516static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) 2451static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)
2517{ 2452{
2518 __be32 *p; 2453 __be32 *p;
2519 2454
2520 READ_BUF(8); 2455 p = xdr_inline_decode(xdr, 8);
2521 READ32(hdr->status); 2456 if (unlikely(!p))
2522 READ32(hdr->taglen); 2457 goto out_overflow;
2458 hdr->status = be32_to_cpup(p++);
2459 hdr->taglen = be32_to_cpup(p);
2523 2460
2524 READ_BUF(hdr->taglen + 4); 2461 p = xdr_inline_decode(xdr, hdr->taglen + 4);
2462 if (unlikely(!p))
2463 goto out_overflow;
2525 hdr->tag = (char *)p; 2464 hdr->tag = (char *)p;
2526 p += XDR_QUADLEN(hdr->taglen); 2465 p += XDR_QUADLEN(hdr->taglen);
2527 READ32(hdr->nops); 2466 hdr->nops = be32_to_cpup(p);
2528 if (unlikely(hdr->nops < 1)) 2467 if (unlikely(hdr->nops < 1))
2529 return nfs4_stat_to_errno(hdr->status); 2468 return nfs4_stat_to_errno(hdr->status);
2530 return 0; 2469 return 0;
2470out_overflow:
2471 print_overflow_msg(__func__, xdr);
2472 return -EIO;
2531} 2473}
2532 2474
2533static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) 2475static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
@@ -2536,18 +2478,23 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
2536 uint32_t opnum; 2478 uint32_t opnum;
2537 int32_t nfserr; 2479 int32_t nfserr;
2538 2480
2539 READ_BUF(8); 2481 p = xdr_inline_decode(xdr, 8);
2540 READ32(opnum); 2482 if (unlikely(!p))
2483 goto out_overflow;
2484 opnum = be32_to_cpup(p++);
2541 if (opnum != expected) { 2485 if (opnum != expected) {
2542 dprintk("nfs: Server returned operation" 2486 dprintk("nfs: Server returned operation"
2543 " %d but we issued a request for %d\n", 2487 " %d but we issued a request for %d\n",
2544 opnum, expected); 2488 opnum, expected);
2545 return -EIO; 2489 return -EIO;
2546 } 2490 }
2547 READ32(nfserr); 2491 nfserr = be32_to_cpup(p);
2548 if (nfserr != NFS_OK) 2492 if (nfserr != NFS_OK)
2549 return nfs4_stat_to_errno(nfserr); 2493 return nfs4_stat_to_errno(nfserr);
2550 return 0; 2494 return 0;
2495out_overflow:
2496 print_overflow_msg(__func__, xdr);
2497 return -EIO;
2551} 2498}
2552 2499
2553/* Dummy routine */ 2500/* Dummy routine */
@@ -2557,8 +2504,11 @@ static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs_client *clp)
2557 unsigned int strlen; 2504 unsigned int strlen;
2558 char *str; 2505 char *str;
2559 2506
2560 READ_BUF(12); 2507 p = xdr_inline_decode(xdr, 12);
2561 return decode_opaque_inline(xdr, &strlen, &str); 2508 if (likely(p))
2509 return decode_opaque_inline(xdr, &strlen, &str);
2510 print_overflow_msg(__func__, xdr);
2511 return -EIO;
2562} 2512}
2563 2513
2564static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) 2514static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -2566,27 +2516,39 @@ static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
2566 uint32_t bmlen; 2516 uint32_t bmlen;
2567 __be32 *p; 2517 __be32 *p;
2568 2518
2569 READ_BUF(4); 2519 p = xdr_inline_decode(xdr, 4);
2570 READ32(bmlen); 2520 if (unlikely(!p))
2521 goto out_overflow;
2522 bmlen = be32_to_cpup(p);
2571 2523
2572 bitmap[0] = bitmap[1] = 0; 2524 bitmap[0] = bitmap[1] = 0;
2573 READ_BUF((bmlen << 2)); 2525 p = xdr_inline_decode(xdr, (bmlen << 2));
2526 if (unlikely(!p))
2527 goto out_overflow;
2574 if (bmlen > 0) { 2528 if (bmlen > 0) {
2575 READ32(bitmap[0]); 2529 bitmap[0] = be32_to_cpup(p++);
2576 if (bmlen > 1) 2530 if (bmlen > 1)
2577 READ32(bitmap[1]); 2531 bitmap[1] = be32_to_cpup(p);
2578 } 2532 }
2579 return 0; 2533 return 0;
2534out_overflow:
2535 print_overflow_msg(__func__, xdr);
2536 return -EIO;
2580} 2537}
2581 2538
2582static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep) 2539static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
2583{ 2540{
2584 __be32 *p; 2541 __be32 *p;
2585 2542
2586 READ_BUF(4); 2543 p = xdr_inline_decode(xdr, 4);
2587 READ32(*attrlen); 2544 if (unlikely(!p))
2545 goto out_overflow;
2546 *attrlen = be32_to_cpup(p);
2588 *savep = xdr->p; 2547 *savep = xdr->p;
2589 return 0; 2548 return 0;
2549out_overflow:
2550 print_overflow_msg(__func__, xdr);
2551 return -EIO;
2590} 2552}
2591 2553
2592static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) 2554static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -2609,8 +2571,10 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2609 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U))) 2571 if (unlikely(bitmap[0] & (FATTR4_WORD0_TYPE - 1U)))
2610 return -EIO; 2572 return -EIO;
2611 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) { 2573 if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
2612 READ_BUF(4); 2574 p = xdr_inline_decode(xdr, 4);
2613 READ32(*type); 2575 if (unlikely(!p))
2576 goto out_overflow;
2577 *type = be32_to_cpup(p);
2614 if (*type < NF4REG || *type > NF4NAMEDATTR) { 2578 if (*type < NF4REG || *type > NF4NAMEDATTR) {
2615 dprintk("%s: bad type %d\n", __func__, *type); 2579 dprintk("%s: bad type %d\n", __func__, *type);
2616 return -EIO; 2580 return -EIO;
@@ -2620,6 +2584,9 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
2620 } 2584 }
2621 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]); 2585 dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
2622 return ret; 2586 return ret;
2587out_overflow:
2588 print_overflow_msg(__func__, xdr);
2589 return -EIO;
2623} 2590}
2624 2591
2625static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change) 2592static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -2631,14 +2598,19 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2631 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U))) 2598 if (unlikely(bitmap[0] & (FATTR4_WORD0_CHANGE - 1U)))
2632 return -EIO; 2599 return -EIO;
2633 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) { 2600 if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
2634 READ_BUF(8); 2601 p = xdr_inline_decode(xdr, 8);
2635 READ64(*change); 2602 if (unlikely(!p))
2603 goto out_overflow;
2604 xdr_decode_hyper(p, change);
2636 bitmap[0] &= ~FATTR4_WORD0_CHANGE; 2605 bitmap[0] &= ~FATTR4_WORD0_CHANGE;
2637 ret = NFS_ATTR_FATTR_CHANGE; 2606 ret = NFS_ATTR_FATTR_CHANGE;
2638 } 2607 }
2639 dprintk("%s: change attribute=%Lu\n", __func__, 2608 dprintk("%s: change attribute=%Lu\n", __func__,
2640 (unsigned long long)*change); 2609 (unsigned long long)*change);
2641 return ret; 2610 return ret;
2611out_overflow:
2612 print_overflow_msg(__func__, xdr);
2613 return -EIO;
2642} 2614}
2643 2615
2644static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size) 2616static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -2650,13 +2622,18 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
2650 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U))) 2622 if (unlikely(bitmap[0] & (FATTR4_WORD0_SIZE - 1U)))
2651 return -EIO; 2623 return -EIO;
2652 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) { 2624 if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
2653 READ_BUF(8); 2625 p = xdr_inline_decode(xdr, 8);
2654 READ64(*size); 2626 if (unlikely(!p))
2627 goto out_overflow;
2628 xdr_decode_hyper(p, size);
2655 bitmap[0] &= ~FATTR4_WORD0_SIZE; 2629 bitmap[0] &= ~FATTR4_WORD0_SIZE;
2656 ret = NFS_ATTR_FATTR_SIZE; 2630 ret = NFS_ATTR_FATTR_SIZE;
2657 } 2631 }
2658 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size); 2632 dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
2659 return ret; 2633 return ret;
2634out_overflow:
2635 print_overflow_msg(__func__, xdr);
2636 return -EIO;
2660} 2637}
2661 2638
2662static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2639static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2667,12 +2644,17 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
2667 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U))) 2644 if (unlikely(bitmap[0] & (FATTR4_WORD0_LINK_SUPPORT - 1U)))
2668 return -EIO; 2645 return -EIO;
2669 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) { 2646 if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
2670 READ_BUF(4); 2647 p = xdr_inline_decode(xdr, 4);
2671 READ32(*res); 2648 if (unlikely(!p))
2649 goto out_overflow;
2650 *res = be32_to_cpup(p);
2672 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT; 2651 bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
2673 } 2652 }
2674 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true"); 2653 dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
2675 return 0; 2654 return 0;
2655out_overflow:
2656 print_overflow_msg(__func__, xdr);
2657 return -EIO;
2676} 2658}
2677 2659
2678static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2660static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2683,12 +2665,17 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
2683 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U))) 2665 if (unlikely(bitmap[0] & (FATTR4_WORD0_SYMLINK_SUPPORT - 1U)))
2684 return -EIO; 2666 return -EIO;
2685 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) { 2667 if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
2686 READ_BUF(4); 2668 p = xdr_inline_decode(xdr, 4);
2687 READ32(*res); 2669 if (unlikely(!p))
2670 goto out_overflow;
2671 *res = be32_to_cpup(p);
2688 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT; 2672 bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
2689 } 2673 }
2690 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true"); 2674 dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
2691 return 0; 2675 return 0;
2676out_overflow:
2677 print_overflow_msg(__func__, xdr);
2678 return -EIO;
2692} 2679}
2693 2680
2694static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) 2681static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -2701,9 +2688,11 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2701 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U))) 2688 if (unlikely(bitmap[0] & (FATTR4_WORD0_FSID - 1U)))
2702 return -EIO; 2689 return -EIO;
2703 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) { 2690 if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
2704 READ_BUF(16); 2691 p = xdr_inline_decode(xdr, 16);
2705 READ64(fsid->major); 2692 if (unlikely(!p))
2706 READ64(fsid->minor); 2693 goto out_overflow;
2694 p = xdr_decode_hyper(p, &fsid->major);
2695 xdr_decode_hyper(p, &fsid->minor);
2707 bitmap[0] &= ~FATTR4_WORD0_FSID; 2696 bitmap[0] &= ~FATTR4_WORD0_FSID;
2708 ret = NFS_ATTR_FATTR_FSID; 2697 ret = NFS_ATTR_FATTR_FSID;
2709 } 2698 }
@@ -2711,6 +2700,9 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
2711 (unsigned long long)fsid->major, 2700 (unsigned long long)fsid->major,
2712 (unsigned long long)fsid->minor); 2701 (unsigned long long)fsid->minor);
2713 return ret; 2702 return ret;
2703out_overflow:
2704 print_overflow_msg(__func__, xdr);
2705 return -EIO;
2714} 2706}
2715 2707
2716static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2708static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2721,12 +2713,17 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
2721 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U))) 2713 if (unlikely(bitmap[0] & (FATTR4_WORD0_LEASE_TIME - 1U)))
2722 return -EIO; 2714 return -EIO;
2723 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) { 2715 if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
2724 READ_BUF(4); 2716 p = xdr_inline_decode(xdr, 4);
2725 READ32(*res); 2717 if (unlikely(!p))
2718 goto out_overflow;
2719 *res = be32_to_cpup(p);
2726 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME; 2720 bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
2727 } 2721 }
2728 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res); 2722 dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
2729 return 0; 2723 return 0;
2724out_overflow:
2725 print_overflow_msg(__func__, xdr);
2726 return -EIO;
2730} 2727}
2731 2728
2732static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 2729static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -2737,12 +2734,17 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
2737 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U))) 2734 if (unlikely(bitmap[0] & (FATTR4_WORD0_ACLSUPPORT - 1U)))
2738 return -EIO; 2735 return -EIO;
2739 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) { 2736 if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
2740 READ_BUF(4); 2737 p = xdr_inline_decode(xdr, 4);
2741 READ32(*res); 2738 if (unlikely(!p))
2739 goto out_overflow;
2740 *res = be32_to_cpup(p);
2742 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT; 2741 bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
2743 } 2742 }
2744 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res); 2743 dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
2745 return 0; 2744 return 0;
2745out_overflow:
2746 print_overflow_msg(__func__, xdr);
2747 return -EIO;
2746} 2748}
2747 2749
2748static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2750static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2754,13 +2756,18 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
2754 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U))) 2756 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEID - 1U)))
2755 return -EIO; 2757 return -EIO;
2756 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) { 2758 if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
2757 READ_BUF(8); 2759 p = xdr_inline_decode(xdr, 8);
2758 READ64(*fileid); 2760 if (unlikely(!p))
2761 goto out_overflow;
2762 xdr_decode_hyper(p, fileid);
2759 bitmap[0] &= ~FATTR4_WORD0_FILEID; 2763 bitmap[0] &= ~FATTR4_WORD0_FILEID;
2760 ret = NFS_ATTR_FATTR_FILEID; 2764 ret = NFS_ATTR_FATTR_FILEID;
2761 } 2765 }
2762 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2766 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2763 return ret; 2767 return ret;
2768out_overflow:
2769 print_overflow_msg(__func__, xdr);
2770 return -EIO;
2764} 2771}
2765 2772
2766static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid) 2773static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -2772,13 +2779,18 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
2772 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U))) 2779 if (unlikely(bitmap[1] & (FATTR4_WORD1_MOUNTED_ON_FILEID - 1U)))
2773 return -EIO; 2780 return -EIO;
2774 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) { 2781 if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
2775 READ_BUF(8); 2782 p = xdr_inline_decode(xdr, 8);
2776 READ64(*fileid); 2783 if (unlikely(!p))
2784 goto out_overflow;
2785 xdr_decode_hyper(p, fileid);
2777 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 2786 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
2778 ret = NFS_ATTR_FATTR_FILEID; 2787 ret = NFS_ATTR_FATTR_FILEID;
2779 } 2788 }
2780 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 2789 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
2781 return ret; 2790 return ret;
2791out_overflow:
2792 print_overflow_msg(__func__, xdr);
2793 return -EIO;
2782} 2794}
2783 2795
2784static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2796static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2790,12 +2802,17 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
2790 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U))) 2802 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_AVAIL - 1U)))
2791 return -EIO; 2803 return -EIO;
2792 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) { 2804 if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
2793 READ_BUF(8); 2805 p = xdr_inline_decode(xdr, 8);
2794 READ64(*res); 2806 if (unlikely(!p))
2807 goto out_overflow;
2808 xdr_decode_hyper(p, res);
2795 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL; 2809 bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
2796 } 2810 }
2797 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res); 2811 dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
2798 return status; 2812 return status;
2813out_overflow:
2814 print_overflow_msg(__func__, xdr);
2815 return -EIO;
2799} 2816}
2800 2817
2801static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2818static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2807,12 +2824,17 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
2807 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U))) 2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_FREE - 1U)))
2808 return -EIO; 2825 return -EIO;
2809 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) { 2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
2810 READ_BUF(8); 2827 p = xdr_inline_decode(xdr, 8);
2811 READ64(*res); 2828 if (unlikely(!p))
2829 goto out_overflow;
2830 xdr_decode_hyper(p, res);
2812 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE; 2831 bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
2813 } 2832 }
2814 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res); 2833 dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
2815 return status; 2834 return status;
2835out_overflow:
2836 print_overflow_msg(__func__, xdr);
2837 return -EIO;
2816} 2838}
2817 2839
2818static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 2840static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -2824,12 +2846,17 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
2824 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U))) 2846 if (unlikely(bitmap[0] & (FATTR4_WORD0_FILES_TOTAL - 1U)))
2825 return -EIO; 2847 return -EIO;
2826 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) { 2848 if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
2827 READ_BUF(8); 2849 p = xdr_inline_decode(xdr, 8);
2828 READ64(*res); 2850 if (unlikely(!p))
2851 goto out_overflow;
2852 xdr_decode_hyper(p, res);
2829 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL; 2853 bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
2830 } 2854 }
2831 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res); 2855 dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
2832 return status; 2856 return status;
2857out_overflow:
2858 print_overflow_msg(__func__, xdr);
2859 return -EIO;
2833} 2860}
2834 2861
2835static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path) 2862static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -2838,8 +2865,10 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
2838 __be32 *p; 2865 __be32 *p;
2839 int status = 0; 2866 int status = 0;
2840 2867
2841 READ_BUF(4); 2868 p = xdr_inline_decode(xdr, 4);
2842 READ32(n); 2869 if (unlikely(!p))
2870 goto out_overflow;
2871 n = be32_to_cpup(p);
2843 if (n == 0) 2872 if (n == 0)
2844 goto root_path; 2873 goto root_path;
2845 dprintk("path "); 2874 dprintk("path ");
@@ -2873,6 +2902,9 @@ out_eio:
2873 dprintk(" status %d", status); 2902 dprintk(" status %d", status);
2874 status = -EIO; 2903 status = -EIO;
2875 goto out; 2904 goto out;
2905out_overflow:
2906 print_overflow_msg(__func__, xdr);
2907 return -EIO;
2876} 2908}
2877 2909
2878static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res) 2910static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -2890,8 +2922,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2890 status = decode_pathname(xdr, &res->fs_path); 2922 status = decode_pathname(xdr, &res->fs_path);
2891 if (unlikely(status != 0)) 2923 if (unlikely(status != 0))
2892 goto out; 2924 goto out;
2893 READ_BUF(4); 2925 p = xdr_inline_decode(xdr, 4);
2894 READ32(n); 2926 if (unlikely(!p))
2927 goto out_overflow;
2928 n = be32_to_cpup(p);
2895 if (n <= 0) 2929 if (n <= 0)
2896 goto out_eio; 2930 goto out_eio;
2897 res->nlocations = 0; 2931 res->nlocations = 0;
@@ -2899,8 +2933,10 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2899 u32 m; 2933 u32 m;
2900 struct nfs4_fs_location *loc = &res->locations[res->nlocations]; 2934 struct nfs4_fs_location *loc = &res->locations[res->nlocations];
2901 2935
2902 READ_BUF(4); 2936 p = xdr_inline_decode(xdr, 4);
2903 READ32(m); 2937 if (unlikely(!p))
2938 goto out_overflow;
2939 m = be32_to_cpup(p);
2904 2940
2905 loc->nservers = 0; 2941 loc->nservers = 0;
2906 dprintk("%s: servers ", __func__); 2942 dprintk("%s: servers ", __func__);
@@ -2939,6 +2975,8 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
2939out: 2975out:
2940 dprintk("%s: fs_locations done, error = %d\n", __func__, status); 2976 dprintk("%s: fs_locations done, error = %d\n", __func__, status);
2941 return status; 2977 return status;
2978out_overflow:
2979 print_overflow_msg(__func__, xdr);
2942out_eio: 2980out_eio:
2943 status = -EIO; 2981 status = -EIO;
2944 goto out; 2982 goto out;
@@ -2953,12 +2991,17 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
2953 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U))) 2991 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXFILESIZE - 1U)))
2954 return -EIO; 2992 return -EIO;
2955 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) { 2993 if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
2956 READ_BUF(8); 2994 p = xdr_inline_decode(xdr, 8);
2957 READ64(*res); 2995 if (unlikely(!p))
2996 goto out_overflow;
2997 xdr_decode_hyper(p, res);
2958 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE; 2998 bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
2959 } 2999 }
2960 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res); 3000 dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
2961 return status; 3001 return status;
3002out_overflow:
3003 print_overflow_msg(__func__, xdr);
3004 return -EIO;
2962} 3005}
2963 3006
2964static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink) 3007static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -2970,12 +3013,17 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2970 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U))) 3013 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXLINK - 1U)))
2971 return -EIO; 3014 return -EIO;
2972 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) { 3015 if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
2973 READ_BUF(4); 3016 p = xdr_inline_decode(xdr, 4);
2974 READ32(*maxlink); 3017 if (unlikely(!p))
3018 goto out_overflow;
3019 *maxlink = be32_to_cpup(p);
2975 bitmap[0] &= ~FATTR4_WORD0_MAXLINK; 3020 bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
2976 } 3021 }
2977 dprintk("%s: maxlink=%u\n", __func__, *maxlink); 3022 dprintk("%s: maxlink=%u\n", __func__, *maxlink);
2978 return status; 3023 return status;
3024out_overflow:
3025 print_overflow_msg(__func__, xdr);
3026 return -EIO;
2979} 3027}
2980 3028
2981static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname) 3029static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -2987,12 +3035,17 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
2987 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U))) 3035 if (unlikely(bitmap[0] & (FATTR4_WORD0_MAXNAME - 1U)))
2988 return -EIO; 3036 return -EIO;
2989 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) { 3037 if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
2990 READ_BUF(4); 3038 p = xdr_inline_decode(xdr, 4);
2991 READ32(*maxname); 3039 if (unlikely(!p))
3040 goto out_overflow;
3041 *maxname = be32_to_cpup(p);
2992 bitmap[0] &= ~FATTR4_WORD0_MAXNAME; 3042 bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
2993 } 3043 }
2994 dprintk("%s: maxname=%u\n", __func__, *maxname); 3044 dprintk("%s: maxname=%u\n", __func__, *maxname);
2995 return status; 3045 return status;
3046out_overflow:
3047 print_overflow_msg(__func__, xdr);
3048 return -EIO;
2996} 3049}
2997 3050
2998static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3051static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3005,8 +3058,10 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3005 return -EIO; 3058 return -EIO;
3006 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) { 3059 if (likely(bitmap[0] & FATTR4_WORD0_MAXREAD)) {
3007 uint64_t maxread; 3060 uint64_t maxread;
3008 READ_BUF(8); 3061 p = xdr_inline_decode(xdr, 8);
3009 READ64(maxread); 3062 if (unlikely(!p))
3063 goto out_overflow;
3064 xdr_decode_hyper(p, &maxread);
3010 if (maxread > 0x7FFFFFFF) 3065 if (maxread > 0x7FFFFFFF)
3011 maxread = 0x7FFFFFFF; 3066 maxread = 0x7FFFFFFF;
3012 *res = (uint32_t)maxread; 3067 *res = (uint32_t)maxread;
@@ -3014,6 +3069,9 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
3014 } 3069 }
3015 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res); 3070 dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
3016 return status; 3071 return status;
3072out_overflow:
3073 print_overflow_msg(__func__, xdr);
3074 return -EIO;
3017} 3075}
3018 3076
3019static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) 3077static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3026,8 +3084,10 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3026 return -EIO; 3084 return -EIO;
3027 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) { 3085 if (likely(bitmap[0] & FATTR4_WORD0_MAXWRITE)) {
3028 uint64_t maxwrite; 3086 uint64_t maxwrite;
3029 READ_BUF(8); 3087 p = xdr_inline_decode(xdr, 8);
3030 READ64(maxwrite); 3088 if (unlikely(!p))
3089 goto out_overflow;
3090 xdr_decode_hyper(p, &maxwrite);
3031 if (maxwrite > 0x7FFFFFFF) 3091 if (maxwrite > 0x7FFFFFFF)
3032 maxwrite = 0x7FFFFFFF; 3092 maxwrite = 0x7FFFFFFF;
3033 *res = (uint32_t)maxwrite; 3093 *res = (uint32_t)maxwrite;
@@ -3035,6 +3095,9 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
3035 } 3095 }
3036 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res); 3096 dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
3037 return status; 3097 return status;
3098out_overflow:
3099 print_overflow_msg(__func__, xdr);
3100 return -EIO;
3038} 3101}
3039 3102
3040static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode) 3103static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3047,14 +3110,19 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
3047 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U))) 3110 if (unlikely(bitmap[1] & (FATTR4_WORD1_MODE - 1U)))
3048 return -EIO; 3111 return -EIO;
3049 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) { 3112 if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
3050 READ_BUF(4); 3113 p = xdr_inline_decode(xdr, 4);
3051 READ32(tmp); 3114 if (unlikely(!p))
3115 goto out_overflow;
3116 tmp = be32_to_cpup(p);
3052 *mode = tmp & ~S_IFMT; 3117 *mode = tmp & ~S_IFMT;
3053 bitmap[1] &= ~FATTR4_WORD1_MODE; 3118 bitmap[1] &= ~FATTR4_WORD1_MODE;
3054 ret = NFS_ATTR_FATTR_MODE; 3119 ret = NFS_ATTR_FATTR_MODE;
3055 } 3120 }
3056 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode); 3121 dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
3057 return ret; 3122 return ret;
3123out_overflow:
3124 print_overflow_msg(__func__, xdr);
3125 return -EIO;
3058} 3126}
3059 3127
3060static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink) 3128static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3066,13 +3134,18 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
3066 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U))) 3134 if (unlikely(bitmap[1] & (FATTR4_WORD1_NUMLINKS - 1U)))
3067 return -EIO; 3135 return -EIO;
3068 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) { 3136 if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
3069 READ_BUF(4); 3137 p = xdr_inline_decode(xdr, 4);
3070 READ32(*nlink); 3138 if (unlikely(!p))
3139 goto out_overflow;
3140 *nlink = be32_to_cpup(p);
3071 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS; 3141 bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
3072 ret = NFS_ATTR_FATTR_NLINK; 3142 ret = NFS_ATTR_FATTR_NLINK;
3073 } 3143 }
3074 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink); 3144 dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
3075 return ret; 3145 return ret;
3146out_overflow:
3147 print_overflow_msg(__func__, xdr);
3148 return -EIO;
3076} 3149}
3077 3150
3078static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3151static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -3086,9 +3159,13 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3086 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U))) 3159 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER - 1U)))
3087 return -EIO; 3160 return -EIO;
3088 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) { 3161 if (likely(bitmap[1] & FATTR4_WORD1_OWNER)) {
3089 READ_BUF(4); 3162 p = xdr_inline_decode(xdr, 4);
3090 READ32(len); 3163 if (unlikely(!p))
3091 READ_BUF(len); 3164 goto out_overflow;
3165 len = be32_to_cpup(p);
3166 p = xdr_inline_decode(xdr, len);
3167 if (unlikely(!p))
3168 goto out_overflow;
3092 if (!may_sleep) { 3169 if (!may_sleep) {
3093 /* do nothing */ 3170 /* do nothing */
3094 } else if (len < XDR_MAX_NETOBJ) { 3171 } else if (len < XDR_MAX_NETOBJ) {
@@ -3104,6 +3181,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3104 } 3181 }
3105 dprintk("%s: uid=%d\n", __func__, (int)*uid); 3182 dprintk("%s: uid=%d\n", __func__, (int)*uid);
3106 return ret; 3183 return ret;
3184out_overflow:
3185 print_overflow_msg(__func__, xdr);
3186 return -EIO;
3107} 3187}
3108 3188
3109static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3189static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -3117,9 +3197,13 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3117 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U))) 3197 if (unlikely(bitmap[1] & (FATTR4_WORD1_OWNER_GROUP - 1U)))
3118 return -EIO; 3198 return -EIO;
3119 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) { 3199 if (likely(bitmap[1] & FATTR4_WORD1_OWNER_GROUP)) {
3120 READ_BUF(4); 3200 p = xdr_inline_decode(xdr, 4);
3121 READ32(len); 3201 if (unlikely(!p))
3122 READ_BUF(len); 3202 goto out_overflow;
3203 len = be32_to_cpup(p);
3204 p = xdr_inline_decode(xdr, len);
3205 if (unlikely(!p))
3206 goto out_overflow;
3123 if (!may_sleep) { 3207 if (!may_sleep) {
3124 /* do nothing */ 3208 /* do nothing */
3125 } else if (len < XDR_MAX_NETOBJ) { 3209 } else if (len < XDR_MAX_NETOBJ) {
@@ -3135,6 +3219,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3135 } 3219 }
3136 dprintk("%s: gid=%d\n", __func__, (int)*gid); 3220 dprintk("%s: gid=%d\n", __func__, (int)*gid);
3137 return ret; 3221 return ret;
3222out_overflow:
3223 print_overflow_msg(__func__, xdr);
3224 return -EIO;
3138} 3225}
3139 3226
3140static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev) 3227static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -3149,9 +3236,11 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3149 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) { 3236 if (likely(bitmap[1] & FATTR4_WORD1_RAWDEV)) {
3150 dev_t tmp; 3237 dev_t tmp;
3151 3238
3152 READ_BUF(8); 3239 p = xdr_inline_decode(xdr, 8);
3153 READ32(major); 3240 if (unlikely(!p))
3154 READ32(minor); 3241 goto out_overflow;
3242 major = be32_to_cpup(p++);
3243 minor = be32_to_cpup(p);
3155 tmp = MKDEV(major, minor); 3244 tmp = MKDEV(major, minor);
3156 if (MAJOR(tmp) == major && MINOR(tmp) == minor) 3245 if (MAJOR(tmp) == major && MINOR(tmp) == minor)
3157 *rdev = tmp; 3246 *rdev = tmp;
@@ -3160,6 +3249,9 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
3160 } 3249 }
3161 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor); 3250 dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
3162 return ret; 3251 return ret;
3252out_overflow:
3253 print_overflow_msg(__func__, xdr);
3254 return -EIO;
3163} 3255}
3164 3256
3165static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3257static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3171,12 +3263,17 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
3171 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U))) 3263 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_AVAIL - 1U)))
3172 return -EIO; 3264 return -EIO;
3173 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) { 3265 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
3174 READ_BUF(8); 3266 p = xdr_inline_decode(xdr, 8);
3175 READ64(*res); 3267 if (unlikely(!p))
3268 goto out_overflow;
3269 xdr_decode_hyper(p, res);
3176 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL; 3270 bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
3177 } 3271 }
3178 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res); 3272 dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
3179 return status; 3273 return status;
3274out_overflow:
3275 print_overflow_msg(__func__, xdr);
3276 return -EIO;
3180} 3277}
3181 3278
3182static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3279static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3188,12 +3285,17 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
3188 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U))) 3285 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_FREE - 1U)))
3189 return -EIO; 3286 return -EIO;
3190 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) { 3287 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
3191 READ_BUF(8); 3288 p = xdr_inline_decode(xdr, 8);
3192 READ64(*res); 3289 if (unlikely(!p))
3290 goto out_overflow;
3291 xdr_decode_hyper(p, res);
3193 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE; 3292 bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
3194 } 3293 }
3195 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res); 3294 dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
3196 return status; 3295 return status;
3296out_overflow:
3297 print_overflow_msg(__func__, xdr);
3298 return -EIO;
3197} 3299}
3198 3300
3199static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) 3301static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3205,12 +3307,17 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
3205 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U))) 3307 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_TOTAL - 1U)))
3206 return -EIO; 3308 return -EIO;
3207 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) { 3309 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
3208 READ_BUF(8); 3310 p = xdr_inline_decode(xdr, 8);
3209 READ64(*res); 3311 if (unlikely(!p))
3312 goto out_overflow;
3313 xdr_decode_hyper(p, res);
3210 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL; 3314 bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
3211 } 3315 }
3212 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res); 3316 dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
3213 return status; 3317 return status;
3318out_overflow:
3319 print_overflow_msg(__func__, xdr);
3320 return -EIO;
3214} 3321}
3215 3322
3216static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used) 3323static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -3222,14 +3329,19 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
3222 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U))) 3329 if (unlikely(bitmap[1] & (FATTR4_WORD1_SPACE_USED - 1U)))
3223 return -EIO; 3330 return -EIO;
3224 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) { 3331 if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
3225 READ_BUF(8); 3332 p = xdr_inline_decode(xdr, 8);
3226 READ64(*used); 3333 if (unlikely(!p))
3334 goto out_overflow;
3335 xdr_decode_hyper(p, used);
3227 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED; 3336 bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
3228 ret = NFS_ATTR_FATTR_SPACE_USED; 3337 ret = NFS_ATTR_FATTR_SPACE_USED;
3229 } 3338 }
3230 dprintk("%s: space used=%Lu\n", __func__, 3339 dprintk("%s: space used=%Lu\n", __func__,
3231 (unsigned long long)*used); 3340 (unsigned long long)*used);
3232 return ret; 3341 return ret;
3342out_overflow:
3343 print_overflow_msg(__func__, xdr);
3344 return -EIO;
3233} 3345}
3234 3346
3235static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time) 3347static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
@@ -3238,12 +3350,17 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)
3238 uint64_t sec; 3350 uint64_t sec;
3239 uint32_t nsec; 3351 uint32_t nsec;
3240 3352
3241 READ_BUF(12); 3353 p = xdr_inline_decode(xdr, 12);
3242 READ64(sec); 3354 if (unlikely(!p))
3243 READ32(nsec); 3355 goto out_overflow;
3356 p = xdr_decode_hyper(p, &sec);
3357 nsec = be32_to_cpup(p);
3244 time->tv_sec = (time_t)sec; 3358 time->tv_sec = (time_t)sec;
3245 time->tv_nsec = (long)nsec; 3359 time->tv_nsec = (long)nsec;
3246 return 0; 3360 return 0;
3361out_overflow:
3362 print_overflow_msg(__func__, xdr);
3363 return -EIO;
3247} 3364}
3248 3365
3249static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) 3366static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -3321,11 +3438,16 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c
3321{ 3438{
3322 __be32 *p; 3439 __be32 *p;
3323 3440
3324 READ_BUF(20); 3441 p = xdr_inline_decode(xdr, 20);
3325 READ32(cinfo->atomic); 3442 if (unlikely(!p))
3326 READ64(cinfo->before); 3443 goto out_overflow;
3327 READ64(cinfo->after); 3444 cinfo->atomic = be32_to_cpup(p++);
3445 p = xdr_decode_hyper(p, &cinfo->before);
3446 xdr_decode_hyper(p, &cinfo->after);
3328 return 0; 3447 return 0;
3448out_overflow:
3449 print_overflow_msg(__func__, xdr);
3450 return -EIO;
3329} 3451}
3330 3452
3331static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) 3453static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
@@ -3337,40 +3459,62 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
3337 status = decode_op_hdr(xdr, OP_ACCESS); 3459 status = decode_op_hdr(xdr, OP_ACCESS);
3338 if (status) 3460 if (status)
3339 return status; 3461 return status;
3340 READ_BUF(8); 3462 p = xdr_inline_decode(xdr, 8);
3341 READ32(supp); 3463 if (unlikely(!p))
3342 READ32(acc); 3464 goto out_overflow;
3465 supp = be32_to_cpup(p++);
3466 acc = be32_to_cpup(p);
3343 access->supported = supp; 3467 access->supported = supp;
3344 access->access = acc; 3468 access->access = acc;
3345 return 0; 3469 return 0;
3470out_overflow:
3471 print_overflow_msg(__func__, xdr);
3472 return -EIO;
3346} 3473}
3347 3474
3348static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res) 3475static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
3349{ 3476{
3350 __be32 *p; 3477 __be32 *p;
3478
3479 p = xdr_inline_decode(xdr, len);
3480 if (likely(p)) {
3481 memcpy(buf, p, len);
3482 return 0;
3483 }
3484 print_overflow_msg(__func__, xdr);
3485 return -EIO;
3486}
3487
3488static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
3489{
3490 return decode_opaque_fixed(xdr, stateid->data, NFS4_STATEID_SIZE);
3491}
3492
3493static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
3494{
3351 int status; 3495 int status;
3352 3496
3353 status = decode_op_hdr(xdr, OP_CLOSE); 3497 status = decode_op_hdr(xdr, OP_CLOSE);
3354 if (status != -EIO) 3498 if (status != -EIO)
3355 nfs_increment_open_seqid(status, res->seqid); 3499 nfs_increment_open_seqid(status, res->seqid);
3356 if (status) 3500 if (!status)
3357 return status; 3501 status = decode_stateid(xdr, &res->stateid);
3358 READ_BUF(NFS4_STATEID_SIZE); 3502 return status;
3359 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3503}
3360 return 0; 3504
3505static int decode_verifier(struct xdr_stream *xdr, void *verifier)
3506{
3507 return decode_opaque_fixed(xdr, verifier, 8);
3361} 3508}
3362 3509
3363static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res) 3510static int decode_commit(struct xdr_stream *xdr, struct nfs_writeres *res)
3364{ 3511{
3365 __be32 *p;
3366 int status; 3512 int status;
3367 3513
3368 status = decode_op_hdr(xdr, OP_COMMIT); 3514 status = decode_op_hdr(xdr, OP_COMMIT);
3369 if (status) 3515 if (!status)
3370 return status; 3516 status = decode_verifier(xdr, res->verf->verifier);
3371 READ_BUF(8); 3517 return status;
3372 COPYMEM(res->verf->verifier, 8);
3373 return 0;
3374} 3518}
3375 3519
3376static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3520static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3384,10 +3528,16 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
3384 return status; 3528 return status;
3385 if ((status = decode_change_info(xdr, cinfo))) 3529 if ((status = decode_change_info(xdr, cinfo)))
3386 return status; 3530 return status;
3387 READ_BUF(4); 3531 p = xdr_inline_decode(xdr, 4);
3388 READ32(bmlen); 3532 if (unlikely(!p))
3389 READ_BUF(bmlen << 2); 3533 goto out_overflow;
3390 return 0; 3534 bmlen = be32_to_cpup(p);
3535 p = xdr_inline_decode(xdr, bmlen << 2);
3536 if (likely(p))
3537 return 0;
3538out_overflow:
3539 print_overflow_msg(__func__, xdr);
3540 return -EIO;
3391} 3541}
3392 3542
3393static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res) 3543static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
@@ -3642,14 +3792,21 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
3642 if (status) 3792 if (status)
3643 return status; 3793 return status;
3644 3794
3645 READ_BUF(4); 3795 p = xdr_inline_decode(xdr, 4);
3646 READ32(len); 3796 if (unlikely(!p))
3797 goto out_overflow;
3798 len = be32_to_cpup(p);
3647 if (len > NFS4_FHSIZE) 3799 if (len > NFS4_FHSIZE)
3648 return -EIO; 3800 return -EIO;
3649 fh->size = len; 3801 fh->size = len;
3650 READ_BUF(len); 3802 p = xdr_inline_decode(xdr, len);
3651 COPYMEM(fh->data, len); 3803 if (unlikely(!p))
3804 goto out_overflow;
3805 memcpy(fh->data, p, len);
3652 return 0; 3806 return 0;
3807out_overflow:
3808 print_overflow_msg(__func__, xdr);
3809 return -EIO;
3653} 3810}
3654 3811
3655static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 3812static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -3671,10 +3828,12 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3671 __be32 *p; 3828 __be32 *p;
3672 uint32_t namelen, type; 3829 uint32_t namelen, type;
3673 3830
3674 READ_BUF(32); 3831 p = xdr_inline_decode(xdr, 32);
3675 READ64(offset); 3832 if (unlikely(!p))
3676 READ64(length); 3833 goto out_overflow;
3677 READ32(type); 3834 p = xdr_decode_hyper(p, &offset);
3835 p = xdr_decode_hyper(p, &length);
3836 type = be32_to_cpup(p++);
3678 if (fl != NULL) { 3837 if (fl != NULL) {
3679 fl->fl_start = (loff_t)offset; 3838 fl->fl_start = (loff_t)offset;
3680 fl->fl_end = fl->fl_start + (loff_t)length - 1; 3839 fl->fl_end = fl->fl_start + (loff_t)length - 1;
@@ -3685,23 +3844,27 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
3685 fl->fl_type = F_RDLCK; 3844 fl->fl_type = F_RDLCK;
3686 fl->fl_pid = 0; 3845 fl->fl_pid = 0;
3687 } 3846 }
3688 READ64(clientid); 3847 p = xdr_decode_hyper(p, &clientid);
3689 READ32(namelen); 3848 namelen = be32_to_cpup(p);
3690 READ_BUF(namelen); 3849 p = xdr_inline_decode(xdr, namelen);
3691 return -NFS4ERR_DENIED; 3850 if (likely(p))
3851 return -NFS4ERR_DENIED;
3852out_overflow:
3853 print_overflow_msg(__func__, xdr);
3854 return -EIO;
3692} 3855}
3693 3856
3694static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res) 3857static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
3695{ 3858{
3696 __be32 *p;
3697 int status; 3859 int status;
3698 3860
3699 status = decode_op_hdr(xdr, OP_LOCK); 3861 status = decode_op_hdr(xdr, OP_LOCK);
3700 if (status == -EIO) 3862 if (status == -EIO)
3701 goto out; 3863 goto out;
3702 if (status == 0) { 3864 if (status == 0) {
3703 READ_BUF(NFS4_STATEID_SIZE); 3865 status = decode_stateid(xdr, &res->stateid);
3704 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE); 3866 if (unlikely(status))
3867 goto out;
3705 } else if (status == -NFS4ERR_DENIED) 3868 } else if (status == -NFS4ERR_DENIED)
3706 status = decode_lock_denied(xdr, NULL); 3869 status = decode_lock_denied(xdr, NULL);
3707 if (res->open_seqid != NULL) 3870 if (res->open_seqid != NULL)
@@ -3722,16 +3885,13 @@ static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
3722 3885
3723static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res) 3886static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
3724{ 3887{
3725 __be32 *p;
3726 int status; 3888 int status;
3727 3889
3728 status = decode_op_hdr(xdr, OP_LOCKU); 3890 status = decode_op_hdr(xdr, OP_LOCKU);
3729 if (status != -EIO) 3891 if (status != -EIO)
3730 nfs_increment_lock_seqid(status, res->seqid); 3892 nfs_increment_lock_seqid(status, res->seqid);
3731 if (status == 0) { 3893 if (status == 0)
3732 READ_BUF(NFS4_STATEID_SIZE); 3894 status = decode_stateid(xdr, &res->stateid);
3733 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3734 }
3735 return status; 3895 return status;
3736} 3896}
3737 3897
@@ -3746,34 +3906,46 @@ static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize)
3746 __be32 *p; 3906 __be32 *p;
3747 uint32_t limit_type, nblocks, blocksize; 3907 uint32_t limit_type, nblocks, blocksize;
3748 3908
3749 READ_BUF(12); 3909 p = xdr_inline_decode(xdr, 12);
3750 READ32(limit_type); 3910 if (unlikely(!p))
3911 goto out_overflow;
3912 limit_type = be32_to_cpup(p++);
3751 switch (limit_type) { 3913 switch (limit_type) {
3752 case 1: 3914 case 1:
3753 READ64(*maxsize); 3915 xdr_decode_hyper(p, maxsize);
3754 break; 3916 break;
3755 case 2: 3917 case 2:
3756 READ32(nblocks); 3918 nblocks = be32_to_cpup(p++);
3757 READ32(blocksize); 3919 blocksize = be32_to_cpup(p);
3758 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; 3920 *maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
3759 } 3921 }
3760 return 0; 3922 return 0;
3923out_overflow:
3924 print_overflow_msg(__func__, xdr);
3925 return -EIO;
3761} 3926}
3762 3927
3763static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) 3928static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3764{ 3929{
3765 __be32 *p; 3930 __be32 *p;
3766 uint32_t delegation_type; 3931 uint32_t delegation_type;
3932 int status;
3767 3933
3768 READ_BUF(4); 3934 p = xdr_inline_decode(xdr, 4);
3769 READ32(delegation_type); 3935 if (unlikely(!p))
3936 goto out_overflow;
3937 delegation_type = be32_to_cpup(p);
3770 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { 3938 if (delegation_type == NFS4_OPEN_DELEGATE_NONE) {
3771 res->delegation_type = 0; 3939 res->delegation_type = 0;
3772 return 0; 3940 return 0;
3773 } 3941 }
3774 READ_BUF(NFS4_STATEID_SIZE+4); 3942 status = decode_stateid(xdr, &res->delegation);
3775 COPYMEM(res->delegation.data, NFS4_STATEID_SIZE); 3943 if (unlikely(status))
3776 READ32(res->do_recall); 3944 return status;
3945 p = xdr_inline_decode(xdr, 4);
3946 if (unlikely(!p))
3947 goto out_overflow;
3948 res->do_recall = be32_to_cpup(p);
3777 3949
3778 switch (delegation_type) { 3950 switch (delegation_type) {
3779 case NFS4_OPEN_DELEGATE_READ: 3951 case NFS4_OPEN_DELEGATE_READ:
@@ -3785,6 +3957,9 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
3785 return -EIO; 3957 return -EIO;
3786 } 3958 }
3787 return decode_ace(xdr, NULL, res->server->nfs_client); 3959 return decode_ace(xdr, NULL, res->server->nfs_client);
3960out_overflow:
3961 print_overflow_msg(__func__, xdr);
3962 return -EIO;
3788} 3963}
3789 3964
3790static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) 3965static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -3796,23 +3971,27 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3796 status = decode_op_hdr(xdr, OP_OPEN); 3971 status = decode_op_hdr(xdr, OP_OPEN);
3797 if (status != -EIO) 3972 if (status != -EIO)
3798 nfs_increment_open_seqid(status, res->seqid); 3973 nfs_increment_open_seqid(status, res->seqid);
3799 if (status) 3974 if (!status)
3975 status = decode_stateid(xdr, &res->stateid);
3976 if (unlikely(status))
3800 return status; 3977 return status;
3801 READ_BUF(NFS4_STATEID_SIZE);
3802 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3803 3978
3804 decode_change_info(xdr, &res->cinfo); 3979 decode_change_info(xdr, &res->cinfo);
3805 3980
3806 READ_BUF(8); 3981 p = xdr_inline_decode(xdr, 8);
3807 READ32(res->rflags); 3982 if (unlikely(!p))
3808 READ32(bmlen); 3983 goto out_overflow;
3984 res->rflags = be32_to_cpup(p++);
3985 bmlen = be32_to_cpup(p);
3809 if (bmlen > 10) 3986 if (bmlen > 10)
3810 goto xdr_error; 3987 goto xdr_error;
3811 3988
3812 READ_BUF(bmlen << 2); 3989 p = xdr_inline_decode(xdr, bmlen << 2);
3990 if (unlikely(!p))
3991 goto out_overflow;
3813 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE); 3992 savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
3814 for (i = 0; i < savewords; ++i) 3993 for (i = 0; i < savewords; ++i)
3815 READ32(res->attrset[i]); 3994 res->attrset[i] = be32_to_cpup(p++);
3816 for (; i < NFS4_BITMAP_SIZE; i++) 3995 for (; i < NFS4_BITMAP_SIZE; i++)
3817 res->attrset[i] = 0; 3996 res->attrset[i] = 0;
3818 3997
@@ -3820,36 +3999,33 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
3820xdr_error: 3999xdr_error:
3821 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen); 4000 dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
3822 return -EIO; 4001 return -EIO;
4002out_overflow:
4003 print_overflow_msg(__func__, xdr);
4004 return -EIO;
3823} 4005}
3824 4006
3825static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) 4007static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
3826{ 4008{
3827 __be32 *p;
3828 int status; 4009 int status;
3829 4010
3830 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); 4011 status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
3831 if (status != -EIO) 4012 if (status != -EIO)
3832 nfs_increment_open_seqid(status, res->seqid); 4013 nfs_increment_open_seqid(status, res->seqid);
3833 if (status) 4014 if (!status)
3834 return status; 4015 status = decode_stateid(xdr, &res->stateid);
3835 READ_BUF(NFS4_STATEID_SIZE); 4016 return status;
3836 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3837 return 0;
3838} 4017}
3839 4018
3840static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) 4019static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res)
3841{ 4020{
3842 __be32 *p;
3843 int status; 4021 int status;
3844 4022
3845 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); 4023 status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
3846 if (status != -EIO) 4024 if (status != -EIO)
3847 nfs_increment_open_seqid(status, res->seqid); 4025 nfs_increment_open_seqid(status, res->seqid);
3848 if (status) 4026 if (!status)
3849 return status; 4027 status = decode_stateid(xdr, &res->stateid);
3850 READ_BUF(NFS4_STATEID_SIZE); 4028 return status;
3851 COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
3852 return 0;
3853} 4029}
3854 4030
3855static int decode_putfh(struct xdr_stream *xdr) 4031static int decode_putfh(struct xdr_stream *xdr)
@@ -3872,9 +4048,11 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3872 status = decode_op_hdr(xdr, OP_READ); 4048 status = decode_op_hdr(xdr, OP_READ);
3873 if (status) 4049 if (status)
3874 return status; 4050 return status;
3875 READ_BUF(8); 4051 p = xdr_inline_decode(xdr, 8);
3876 READ32(eof); 4052 if (unlikely(!p))
3877 READ32(count); 4053 goto out_overflow;
4054 eof = be32_to_cpup(p++);
4055 count = be32_to_cpup(p);
3878 hdrlen = (u8 *) p - (u8 *) iov->iov_base; 4056 hdrlen = (u8 *) p - (u8 *) iov->iov_base;
3879 recvd = req->rq_rcv_buf.len - hdrlen; 4057 recvd = req->rq_rcv_buf.len - hdrlen;
3880 if (count > recvd) { 4058 if (count > recvd) {
@@ -3887,6 +4065,9 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
3887 res->eof = eof; 4065 res->eof = eof;
3888 res->count = count; 4066 res->count = count;
3889 return 0; 4067 return 0;
4068out_overflow:
4069 print_overflow_msg(__func__, xdr);
4070 return -EIO;
3890} 4071}
3891 4072
3892static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) 4073static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -3901,17 +4082,17 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
3901 int status; 4082 int status;
3902 4083
3903 status = decode_op_hdr(xdr, OP_READDIR); 4084 status = decode_op_hdr(xdr, OP_READDIR);
3904 if (status) 4085 if (!status)
4086 status = decode_verifier(xdr, readdir->verifier.data);
4087 if (unlikely(status))
3905 return status; 4088 return status;
3906 READ_BUF(8);
3907 COPYMEM(readdir->verifier.data, 8);
3908 dprintk("%s: verifier = %08x:%08x\n", 4089 dprintk("%s: verifier = %08x:%08x\n",
3909 __func__, 4090 __func__,
3910 ((u32 *)readdir->verifier.data)[0], 4091 ((u32 *)readdir->verifier.data)[0],
3911 ((u32 *)readdir->verifier.data)[1]); 4092 ((u32 *)readdir->verifier.data)[1]);
3912 4093
3913 4094
3914 hdrlen = (char *) p - (char *) iov->iov_base; 4095 hdrlen = (char *) xdr->p - (char *) iov->iov_base;
3915 recvd = rcvbuf->len - hdrlen; 4096 recvd = rcvbuf->len - hdrlen;
3916 if (pglen > recvd) 4097 if (pglen > recvd)
3917 pglen = recvd; 4098 pglen = recvd;
@@ -3999,8 +4180,10 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
3999 return status; 4180 return status;
4000 4181
4001 /* Convert length of symlink */ 4182 /* Convert length of symlink */
4002 READ_BUF(4); 4183 p = xdr_inline_decode(xdr, 4);
4003 READ32(len); 4184 if (unlikely(!p))
4185 goto out_overflow;
4186 len = be32_to_cpup(p);
4004 if (len >= rcvbuf->page_len || len <= 0) { 4187 if (len >= rcvbuf->page_len || len <= 0) {
4005 dprintk("nfs: server returned giant symlink!\n"); 4188 dprintk("nfs: server returned giant symlink!\n");
4006 return -ENAMETOOLONG; 4189 return -ENAMETOOLONG;
@@ -4024,6 +4207,9 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
4024 kaddr[len+rcvbuf->page_base] = '\0'; 4207 kaddr[len+rcvbuf->page_base] = '\0';
4025 kunmap_atomic(kaddr, KM_USER0); 4208 kunmap_atomic(kaddr, KM_USER0);
4026 return 0; 4209 return 0;
4210out_overflow:
4211 print_overflow_msg(__func__, xdr);
4212 return -EIO;
4027} 4213}
4028 4214
4029static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) 4215static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -4121,10 +4307,16 @@ static int decode_setattr(struct xdr_stream *xdr)
4121 status = decode_op_hdr(xdr, OP_SETATTR); 4307 status = decode_op_hdr(xdr, OP_SETATTR);
4122 if (status) 4308 if (status)
4123 return status; 4309 return status;
4124 READ_BUF(4); 4310 p = xdr_inline_decode(xdr, 4);
4125 READ32(bmlen); 4311 if (unlikely(!p))
4126 READ_BUF(bmlen << 2); 4312 goto out_overflow;
4127 return 0; 4313 bmlen = be32_to_cpup(p);
4314 p = xdr_inline_decode(xdr, bmlen << 2);
4315 if (likely(p))
4316 return 0;
4317out_overflow:
4318 print_overflow_msg(__func__, xdr);
4319 return -EIO;
4128} 4320}
4129 4321
4130static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) 4322static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
@@ -4133,35 +4325,50 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
4133 uint32_t opnum; 4325 uint32_t opnum;
4134 int32_t nfserr; 4326 int32_t nfserr;
4135 4327
4136 READ_BUF(8); 4328 p = xdr_inline_decode(xdr, 8);
4137 READ32(opnum); 4329 if (unlikely(!p))
4330 goto out_overflow;
4331 opnum = be32_to_cpup(p++);
4138 if (opnum != OP_SETCLIENTID) { 4332 if (opnum != OP_SETCLIENTID) {
4139 dprintk("nfs: decode_setclientid: Server returned operation" 4333 dprintk("nfs: decode_setclientid: Server returned operation"
4140 " %d\n", opnum); 4334 " %d\n", opnum);
4141 return -EIO; 4335 return -EIO;
4142 } 4336 }
4143 READ32(nfserr); 4337 nfserr = be32_to_cpup(p);
4144 if (nfserr == NFS_OK) { 4338 if (nfserr == NFS_OK) {
4145 READ_BUF(8 + NFS4_VERIFIER_SIZE); 4339 p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
4146 READ64(clp->cl_clientid); 4340 if (unlikely(!p))
4147 COPYMEM(clp->cl_confirm.data, NFS4_VERIFIER_SIZE); 4341 goto out_overflow;
4342 p = xdr_decode_hyper(p, &clp->cl_clientid);
4343 memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
4148 } else if (nfserr == NFSERR_CLID_INUSE) { 4344 } else if (nfserr == NFSERR_CLID_INUSE) {
4149 uint32_t len; 4345 uint32_t len;
4150 4346
4151 /* skip netid string */ 4347 /* skip netid string */
4152 READ_BUF(4); 4348 p = xdr_inline_decode(xdr, 4);
4153 READ32(len); 4349 if (unlikely(!p))
4154 READ_BUF(len); 4350 goto out_overflow;
4351 len = be32_to_cpup(p);
4352 p = xdr_inline_decode(xdr, len);
4353 if (unlikely(!p))
4354 goto out_overflow;
4155 4355
4156 /* skip uaddr string */ 4356 /* skip uaddr string */
4157 READ_BUF(4); 4357 p = xdr_inline_decode(xdr, 4);
4158 READ32(len); 4358 if (unlikely(!p))
4159 READ_BUF(len); 4359 goto out_overflow;
4360 len = be32_to_cpup(p);
4361 p = xdr_inline_decode(xdr, len);
4362 if (unlikely(!p))
4363 goto out_overflow;
4160 return -NFSERR_CLID_INUSE; 4364 return -NFSERR_CLID_INUSE;
4161 } else 4365 } else
4162 return nfs4_stat_to_errno(nfserr); 4366 return nfs4_stat_to_errno(nfserr);
4163 4367
4164 return 0; 4368 return 0;
4369out_overflow:
4370 print_overflow_msg(__func__, xdr);
4371 return -EIO;
4165} 4372}
4166 4373
4167static int decode_setclientid_confirm(struct xdr_stream *xdr) 4374static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -4178,11 +4385,16 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
4178 if (status) 4385 if (status)
4179 return status; 4386 return status;
4180 4387
4181 READ_BUF(16); 4388 p = xdr_inline_decode(xdr, 16);
4182 READ32(res->count); 4389 if (unlikely(!p))
4183 READ32(res->verf->committed); 4390 goto out_overflow;
4184 COPYMEM(res->verf->verifier, 8); 4391 res->count = be32_to_cpup(p++);
4392 res->verf->committed = be32_to_cpup(p++);
4393 memcpy(res->verf->verifier, p, 8);
4185 return 0; 4394 return 0;
4395out_overflow:
4396 print_overflow_msg(__func__, xdr);
4397 return -EIO;
4186} 4398}
4187 4399
4188static int decode_delegreturn(struct xdr_stream *xdr) 4400static int decode_delegreturn(struct xdr_stream *xdr)
@@ -4196,6 +4408,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4196{ 4408{
4197 __be32 *p; 4409 __be32 *p;
4198 uint32_t dummy; 4410 uint32_t dummy;
4411 char *dummy_str;
4199 int status; 4412 int status;
4200 struct nfs_client *clp = res->client; 4413 struct nfs_client *clp = res->client;
4201 4414
@@ -4203,36 +4416,45 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4203 if (status) 4416 if (status)
4204 return status; 4417 return status;
4205 4418
4206 READ_BUF(8); 4419 p = xdr_inline_decode(xdr, 8);
4207 READ64(clp->cl_ex_clid); 4420 if (unlikely(!p))
4208 READ_BUF(12); 4421 goto out_overflow;
4209 READ32(clp->cl_seqid); 4422 xdr_decode_hyper(p, &clp->cl_ex_clid);
4210 READ32(clp->cl_exchange_flags); 4423 p = xdr_inline_decode(xdr, 12);
4424 if (unlikely(!p))
4425 goto out_overflow;
4426 clp->cl_seqid = be32_to_cpup(p++);
4427 clp->cl_exchange_flags = be32_to_cpup(p++);
4211 4428
4212 /* We ask for SP4_NONE */ 4429 /* We ask for SP4_NONE */
4213 READ32(dummy); 4430 dummy = be32_to_cpup(p);
4214 if (dummy != SP4_NONE) 4431 if (dummy != SP4_NONE)
4215 return -EIO; 4432 return -EIO;
4216 4433
4217 /* Throw away minor_id */ 4434 /* Throw away minor_id */
4218 READ_BUF(8); 4435 p = xdr_inline_decode(xdr, 8);
4436 if (unlikely(!p))
4437 goto out_overflow;
4219 4438
4220 /* Throw away Major id */ 4439 /* Throw away Major id */
4221 READ_BUF(4); 4440 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4222 READ32(dummy); 4441 if (unlikely(status))
4223 READ_BUF(dummy); 4442 return status;
4224 4443
4225 /* Throw away server_scope */ 4444 /* Throw away server_scope */
4226 READ_BUF(4); 4445 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4227 READ32(dummy); 4446 if (unlikely(status))
4228 READ_BUF(dummy); 4447 return status;
4229 4448
4230 /* Throw away Implementation id array */ 4449 /* Throw away Implementation id array */
4231 READ_BUF(4); 4450 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4232 READ32(dummy); 4451 if (unlikely(status))
4233 READ_BUF(dummy); 4452 return status;
4234 4453
4235 return 0; 4454 return 0;
4455out_overflow:
4456 print_overflow_msg(__func__, xdr);
4457 return -EIO;
4236} 4458}
4237 4459
4238static int decode_chan_attrs(struct xdr_stream *xdr, 4460static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -4241,22 +4463,35 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
4241 __be32 *p; 4463 __be32 *p;
4242 u32 nr_attrs; 4464 u32 nr_attrs;
4243 4465
4244 READ_BUF(28); 4466 p = xdr_inline_decode(xdr, 28);
4245 READ32(attrs->headerpadsz); 4467 if (unlikely(!p))
4246 READ32(attrs->max_rqst_sz); 4468 goto out_overflow;
4247 READ32(attrs->max_resp_sz); 4469 attrs->headerpadsz = be32_to_cpup(p++);
4248 READ32(attrs->max_resp_sz_cached); 4470 attrs->max_rqst_sz = be32_to_cpup(p++);
4249 READ32(attrs->max_ops); 4471 attrs->max_resp_sz = be32_to_cpup(p++);
4250 READ32(attrs->max_reqs); 4472 attrs->max_resp_sz_cached = be32_to_cpup(p++);
4251 READ32(nr_attrs); 4473 attrs->max_ops = be32_to_cpup(p++);
4474 attrs->max_reqs = be32_to_cpup(p++);
4475 nr_attrs = be32_to_cpup(p);
4252 if (unlikely(nr_attrs > 1)) { 4476 if (unlikely(nr_attrs > 1)) {
4253 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n", 4477 printk(KERN_WARNING "%s: Invalid rdma channel attrs count %u\n",
4254 __func__, nr_attrs); 4478 __func__, nr_attrs);
4255 return -EINVAL; 4479 return -EINVAL;
4256 } 4480 }
4257 if (nr_attrs == 1) 4481 if (nr_attrs == 1) {
4258 READ_BUF(4); /* skip rdma_attrs */ 4482 p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
4483 if (unlikely(!p))
4484 goto out_overflow;
4485 }
4259 return 0; 4486 return 0;
4487out_overflow:
4488 print_overflow_msg(__func__, xdr);
4489 return -EIO;
4490}
4491
4492static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
4493{
4494 return decode_opaque_fixed(xdr, sid->data, NFS4_MAX_SESSIONID_LEN);
4260} 4495}
4261 4496
4262static int decode_create_session(struct xdr_stream *xdr, 4497static int decode_create_session(struct xdr_stream *xdr,
@@ -4268,24 +4503,26 @@ static int decode_create_session(struct xdr_stream *xdr,
4268 struct nfs4_session *session = clp->cl_session; 4503 struct nfs4_session *session = clp->cl_session;
4269 4504
4270 status = decode_op_hdr(xdr, OP_CREATE_SESSION); 4505 status = decode_op_hdr(xdr, OP_CREATE_SESSION);
4271 4506 if (!status)
4272 if (status) 4507 status = decode_sessionid(xdr, &session->sess_id);
4508 if (unlikely(status))
4273 return status; 4509 return status;
4274 4510
4275 /* sessionid */
4276 READ_BUF(NFS4_MAX_SESSIONID_LEN);
4277 COPYMEM(&session->sess_id, NFS4_MAX_SESSIONID_LEN);
4278
4279 /* seqid, flags */ 4511 /* seqid, flags */
4280 READ_BUF(8); 4512 p = xdr_inline_decode(xdr, 8);
4281 READ32(clp->cl_seqid); 4513 if (unlikely(!p))
4282 READ32(session->flags); 4514 goto out_overflow;
4515 clp->cl_seqid = be32_to_cpup(p++);
4516 session->flags = be32_to_cpup(p);
4283 4517
4284 /* Channel attributes */ 4518 /* Channel attributes */
4285 status = decode_chan_attrs(xdr, &session->fc_attrs); 4519 status = decode_chan_attrs(xdr, &session->fc_attrs);
4286 if (!status) 4520 if (!status)
4287 status = decode_chan_attrs(xdr, &session->bc_attrs); 4521 status = decode_chan_attrs(xdr, &session->bc_attrs);
4288 return status; 4522 return status;
4523out_overflow:
4524 print_overflow_msg(__func__, xdr);
4525 return -EIO;
4289} 4526}
4290 4527
4291static int decode_destroy_session(struct xdr_stream *xdr, void *dummy) 4528static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -4309,7 +4546,9 @@ static int decode_sequence(struct xdr_stream *xdr,
4309 return 0; 4546 return 0;
4310 4547
4311 status = decode_op_hdr(xdr, OP_SEQUENCE); 4548 status = decode_op_hdr(xdr, OP_SEQUENCE);
4312 if (status) 4549 if (!status)
4550 status = decode_sessionid(xdr, &id);
4551 if (unlikely(status))
4313 goto out_err; 4552 goto out_err;
4314 4553
4315 /* 4554 /*
@@ -4318,36 +4557,43 @@ static int decode_sequence(struct xdr_stream *xdr,
4318 */ 4557 */
4319 status = -ESERVERFAULT; 4558 status = -ESERVERFAULT;
4320 4559
4321 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4322 READ_BUF(NFS4_MAX_SESSIONID_LEN + 20);
4323 COPYMEM(id.data, NFS4_MAX_SESSIONID_LEN);
4324 if (memcmp(id.data, res->sr_session->sess_id.data, 4560 if (memcmp(id.data, res->sr_session->sess_id.data,
4325 NFS4_MAX_SESSIONID_LEN)) { 4561 NFS4_MAX_SESSIONID_LEN)) {
4326 dprintk("%s Invalid session id\n", __func__); 4562 dprintk("%s Invalid session id\n", __func__);
4327 goto out_err; 4563 goto out_err;
4328 } 4564 }
4565
4566 p = xdr_inline_decode(xdr, 20);
4567 if (unlikely(!p))
4568 goto out_overflow;
4569
4329 /* seqid */ 4570 /* seqid */
4330 READ32(dummy); 4571 slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid];
4572 dummy = be32_to_cpup(p++);
4331 if (dummy != slot->seq_nr) { 4573 if (dummy != slot->seq_nr) {
4332 dprintk("%s Invalid sequence number\n", __func__); 4574 dprintk("%s Invalid sequence number\n", __func__);
4333 goto out_err; 4575 goto out_err;
4334 } 4576 }
4335 /* slot id */ 4577 /* slot id */
4336 READ32(dummy); 4578 dummy = be32_to_cpup(p++);
4337 if (dummy != res->sr_slotid) { 4579 if (dummy != res->sr_slotid) {
4338 dprintk("%s Invalid slot id\n", __func__); 4580 dprintk("%s Invalid slot id\n", __func__);
4339 goto out_err; 4581 goto out_err;
4340 } 4582 }
4341 /* highest slot id - currently not processed */ 4583 /* highest slot id - currently not processed */
4342 READ32(dummy); 4584 dummy = be32_to_cpup(p++);
4343 /* target highest slot id - currently not processed */ 4585 /* target highest slot id - currently not processed */
4344 READ32(dummy); 4586 dummy = be32_to_cpup(p++);
4345 /* result flags - currently not processed */ 4587 /* result flags - currently not processed */
4346 READ32(dummy); 4588 dummy = be32_to_cpup(p);
4347 status = 0; 4589 status = 0;
4348out_err: 4590out_err:
4349 res->sr_status = status; 4591 res->sr_status = status;
4350 return status; 4592 return status;
4593out_overflow:
4594 print_overflow_msg(__func__, xdr);
4595 status = -EIO;
4596 goto out_err;
4351#else /* CONFIG_NFS_V4_1 */ 4597#else /* CONFIG_NFS_V4_1 */
4352 return 0; 4598 return 0;
4353#endif /* CONFIG_NFS_V4_1 */ 4599#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
60 return p; 60 return p;
61} 61}
62 62
63static void nfs_readdata_free(struct nfs_read_data *p) 63void nfs_readdata_free(struct nfs_read_data *p)
64{ 64{
65 if (p && (p->pagevec != &p->page_array[0])) 65 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 66 kfree(p->pagevec);
67 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
68} 68}
69 69
70void nfs_readdata_release(void *data) 70static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 71{
72 struct nfs_read_data *rdata = data;
73
74 put_nfs_open_context(rdata->args.context); 72 put_nfs_open_context(rdata->args.context);
75 nfs_readdata_free(rdata); 73 nfs_readdata_free(rdata);
76} 74}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 6240e644f249..120acadc6a84 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,17 +89,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
89 return p; 89 return p;
90} 90}
91 91
92static void nfs_writedata_free(struct nfs_write_data *p) 92void nfs_writedata_free(struct nfs_write_data *p)
93{ 93{
94 if (p && (p->pagevec != &p->page_array[0])) 94 if (p && (p->pagevec != &p->page_array[0]))
95 kfree(p->pagevec); 95 kfree(p->pagevec);
96 mempool_free(p, nfs_wdata_mempool); 96 mempool_free(p, nfs_wdata_mempool);
97} 97}
98 98
99void nfs_writedata_release(void *data) 99static void nfs_writedata_release(struct nfs_write_data *wdata)
100{ 100{
101 struct nfs_write_data *wdata = data;
102
103 put_nfs_open_context(wdata->args.context); 101 put_nfs_open_context(wdata->args.context);
104 nfs_writedata_free(wdata); 102 nfs_writedata_free(wdata);
105} 103}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
310 return ret; 310 return ret;
311} 311}
312 312
313static DEFINE_SPINLOCK(dentry_list_lock); 313DEFINE_SPINLOCK(dentry_list_lock);
314 314
315/* We limit the number of dentry locks to drop in one go. We have 315/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 316 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 317#define DL_INODE_DROP_COUNT 64
318 318
319/* Drop inode references from dentry locks */ 319/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 320static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 321{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 322 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 323
327 spin_lock(&dentry_list_lock); 324 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 325 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 326 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 327 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 328 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 330 kfree(dl);
334 spin_lock(&dentry_list_lock); 331 spin_lock(&dentry_list_lock);
335 } 332 }
336 if (osb->dentry_lock_list) 333 spin_unlock(&dentry_list_lock);
334}
335
336void ocfs2_drop_dl_inodes(struct work_struct *work)
337{
338 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
339 dentry_lock_work);
340
341 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
342 /*
343 * Don't queue dropping if umount is in progress. We flush the
344 * list in ocfs2_dismount_volume
345 */
346 spin_lock(&dentry_list_lock);
347 if (osb->dentry_lock_list &&
348 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 349 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 350 spin_unlock(&dentry_list_lock);
339} 351}
340 352
353/* Flush the whole work queue */
354void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
355{
356 __ocfs2_drop_dl_inodes(osb, -1);
357}
358
341/* 359/*
342 * ocfs2_dentry_iput() and friends. 360 * ocfs2_dentry_iput() and friends.
343 * 361 *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 386 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 387 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 388 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 389 if (!osb->dentry_lock_list &&
390 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 391 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 392 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 393 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 73}
73 74
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 75static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 212
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 213 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 214 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 215 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 216 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 217
217 if (err < 0) 218 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 220 oinfo->dqi_gqi_bh,
221 off + len); 221 rounded_end);
222 if (err < 0) 222 if (err < 0)
223 goto out; 223 goto out;
224 new = 1; 224 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 234 }
235 if (err) { 235 if (err) {
236 mlog_errno(err); 236 mlog_errno(err);
237 return err; 237 goto out;
238 } 238 }
239 lock_buffer(bh); 239 lock_buffer(bh);
240 if (new) 240 if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 345 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 346 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 347 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 351 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 352 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 353 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 354 msecs_to_jiffies(oinfo->dqi_syncms));
356 355
357out_err: 356out_err:
358 mlog_exit(status); 357 mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 401 return err;
403} 402}
404 403
404static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
405{
406 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
407
408 /*
409 * We may need to allocate tree blocks and a leaf block but not the
410 * root block
411 */
412 return oinfo->dqi_gi.dqi_qtree_depth;
413}
414
415static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
416{
417 /* We modify all the allocated blocks, tree root, and info block */
418 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
419 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
420}
421
405/* Read in information from global quota file and acquire a reference to it. 422/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 423 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 424int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 425{
409 int err, err2, ex = 0; 426 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 427 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 428 int type = dquot->dq_type;
429 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
430 struct ocfs2_super *osb = OCFS2_SB(sb);
431 struct inode *gqinode = info->dqi_gqinode;
432 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
433 handle_t *handle = NULL;
412 434
413 err = ocfs2_qinfo_lock(info, 0); 435 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 436 if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 441 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 442 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 443 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
444 ocfs2_qinfo_unlock(info, 0);
445
422 if (!dquot->dq_off) { /* No real quota entry? */ 446 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 447 ex = 1;
448 /*
449 * Add blocks to quota file before we start a transaction since
450 * locking allocators ranks above a transaction start
451 */
452 WARN_ON(journal_current_handle());
453 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
454 err = ocfs2_extend_no_holes(gqinode,
455 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
456 gqinode->i_size);
457 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
458 if (err < 0)
459 goto out;
429 } 460 }
461
462 handle = ocfs2_start_trans(osb,
463 ocfs2_calc_global_qinit_credits(sb, type));
464 if (IS_ERR(handle)) {
465 err = PTR_ERR(handle);
466 goto out;
467 }
468 err = ocfs2_qinfo_lock(info, ex);
469 if (err < 0)
470 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 471 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 472 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 473 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 479 ocfs2_qinfo_unlock(info, 1);
439 else 480 else
440 ocfs2_qinfo_unlock(info, 0); 481 ocfs2_qinfo_unlock(info, 0);
482out_trans:
483 if (handle)
484 ocfs2_commit_trans(osb, handle);
441out: 485out:
442 if (err < 0) 486 if (err < 0)
443 mlog_errno(err); 487 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
607 651
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 652 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 653 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 654 msecs_to_jiffies(oinfo->dqi_syncms));
611} 655}
612 656
613/* 657/*
@@ -635,20 +679,18 @@ out:
635 return status; 679 return status;
636} 680}
637 681
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 682static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 683{
640 struct ocfs2_mem_dqinfo *oinfo; 684 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 685 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 686 * We modify tree, leaf block, global info, local chunk header,
643 687 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 688 * accounts for inode update
645 return 0; 689 */
646 690 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 691 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 692 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 693 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 694}
653 695
654static int ocfs2_release_dquot(struct dquot *dquot) 696static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
680 return status; 722 return status;
681} 723}
682 724
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 725static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 726{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 727 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 728 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 729 int status = 0;
711 730
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 731 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 734 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 735 if (status < 0)
717 goto out; 736 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 737 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 738 ocfs2_unlock_global_qf(oinfo, 1);
729out: 739out:
730 mlog_exit(status); 740 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work);
1227
1228 kill_block_super(sb);
1229}
1230
1216static struct file_system_type ocfs2_fs_type = { 1231static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1232 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1233 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1234 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1235 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1236 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1237
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1238 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1239 .next = NULL
1226}; 1240};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1833
1820 debugfs_remove(osb->osb_ctxt); 1834 debugfs_remove(osb->osb_ctxt);
1821 1835
1836 /*
1837 * Flush inode dropping work queue so that deletes are
1838 * performed while the filesystem is still working
1839 */
1840 ocfs2_drop_all_dl_inodes(osb);
1841
1822 /* Orphan scan should be stopped as early as possible */ 1842 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1843 ocfs2_orphan_scan_stop(osb);
1824 1844
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2001 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2002 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2003
2004 ocfs2_orphan_scan_init(osb);
2005
1984 status = ocfs2_recovery_init(osb); 2006 status = ocfs2_recovery_init(osb);
1985 if (status) { 2007 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2008 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3ce5ae9e3d2d..175db258942f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task)
234 234
235struct mm_struct *mm_for_maps(struct task_struct *task) 235struct mm_struct *mm_for_maps(struct task_struct *task)
236{ 236{
237 struct mm_struct *mm = get_task_mm(task); 237 struct mm_struct *mm;
238 if (!mm) 238
239 if (mutex_lock_killable(&task->cred_guard_mutex))
239 return NULL; 240 return NULL;
240 down_read(&mm->mmap_sem); 241
241 task_lock(task); 242 mm = get_task_mm(task);
242 if (task->mm != mm) 243 if (mm && mm != current->mm &&
243 goto out; 244 !ptrace_may_access(task, PTRACE_MODE_READ)) {
244 if (task->mm != current->mm && 245 mmput(mm);
245 __ptrace_may_access(task, PTRACE_MODE_READ) < 0) 246 mm = NULL;
246 goto out; 247 }
247 task_unlock(task); 248 mutex_unlock(&task->cred_guard_mutex);
249
248 return mm; 250 return mm;
249out:
250 task_unlock(task);
251 up_read(&mm->mmap_sem);
252 mmput(mm);
253 return NULL;
254} 251}
255 252
256static int proc_pid_cmdline(struct task_struct *task, char * buffer) 253static int proc_pid_cmdline(struct task_struct *task, char * buffer)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6f61b7cc32e0..9bd8be1d235c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 mm = mm_for_maps(priv->task); 119 mm = mm_for_maps(priv->task);
120 if (!mm) 120 if (!mm)
121 return NULL; 121 return NULL;
122 down_read(&mm->mmap_sem);
122 123
123 tail_vma = get_gate_vma(priv->task); 124 tail_vma = get_gate_vma(priv->task);
124 priv->tail_vma = tail_vma; 125 priv->tail_vma = tail_vma;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 64a72e2e7650..8f5c05d3dbd3 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
189 priv->task = NULL; 189 priv->task = NULL;
190 return NULL; 190 return NULL;
191 } 191 }
192 down_read(&mm->mmap_sem);
192 193
193 /* start from the Nth VMA */ 194 /* start from the Nth VMA */
194 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) 195 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0c93c7ef3d18..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
770 bp->b_pages = NULL; 770 bp->b_pages = NULL;
771 bp->b_addr = mem; 771 bp->b_addr = mem;
772 772
773 rval = _xfs_buf_get_pages(bp, page_count, 0); 773 rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
774 if (rval) 774 if (rval)
775 return rval; 775 return rval;
776 776
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, XFS_BUF_LOCK, &bp); 2013 blkcnt,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp);
2014 if (error) 2016 if (error)
2015 return(error); 2017 return(error);
2016 2018
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2142 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2143 2145
2144 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, 2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
2145 blkcnt, XFS_BUF_LOCK); 2147 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2146 ASSERT(bp); 2148 ASSERT(bp);
2147 ASSERT(!XFS_BUF_GETERROR(bp)); 2149 ASSERT(!XFS_BUF_GETERROR(bp));
2148 2150
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
6009 */ 6009 */
6010 error = ENOMEM; 6010 error = ENOMEM;
6011 subnex = 16; 6011 subnex = 16;
6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); 6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
6013 if (!map) 6013 if (!map)
6014 goto out_unlock_ilock; 6014 goto out_unlock_ilock;
6015 6015
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 121 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 122 xfs_buftrace("SBTREE ERROR", bp);
123 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, 123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 cur->bc_mp); 124 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 125 return XFS_ERROR(EFSCORRUPTED);
126 } 126 }
127 return 0; 127 return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2201xfs_da_state_t * 2201xfs_da_state_t *
2202xfs_da_state_alloc(void) 2202xfs_da_state_alloc(void)
2203{ 2203{
2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2205} 2205}
2206 2206
2207/* 2207/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2261 int off; 2261 int off;
2262 2262
2263 if (nbuf == 1) 2263 if (nbuf == 1)
2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2265 else 2265 else
2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2267 dabuf->dirty = 0; 2267 dabuf->dirty = 0;
2268#ifdef XFS_DABUF_DEBUG 2268#ifdef XFS_DABUF_DEBUG
2269 dabuf->ra = ra; 2269 dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
256 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 256 !(args->op_flags & XFS_DA_OP_CILOOKUP))
257 return EEXIST; 257 return EEXIST;
258 258
259 args->value = kmem_alloc(len, KM_MAYFAIL); 259 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
260 if (!args->value) 260 if (!args->value)
261 return ENOMEM; 261 return ENOMEM;
262 262
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
167 new = nb - mp->m_sb.sb_dblocks; 167 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 168 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) { 169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
170 xfs_filestream_flush(mp); 172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
171 down_write(&mp->m_peraglock); 179 down_write(&mp->m_peraglock);
172 mp->m_perag = kmem_realloc(mp->m_perag, 180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
173 sizeof(xfs_perag_t) * nagcount, 181 old_perag = mp->m_perag;
174 sizeof(xfs_perag_t) * oagcount, 182 mp->m_perag = new_perag;
175 KM_SLEEP); 183
176 memset(&mp->m_perag[oagcount], 0,
177 (nagcount - oagcount) * sizeof(xfs_perag_t));
178 mp->m_flags |= XFS_MOUNT_32BITINODES; 184 mp->m_flags |= XFS_MOUNT_32BITINODES;
179 nagimax = xfs_initialize_perag(mp, nagcount); 185 nagimax = xfs_initialize_perag(mp, nagcount);
180 up_write(&mp->m_peraglock); 186 up_write(&mp->m_peraglock);
187
188 kmem_free(old_perag);
181 } 189 }
182 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
183 tp->t_flags |= XFS_TRANS_RESERVE; 191 tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
343 return XFS_ERROR(EFSCORRUPTED); 343 return XFS_ERROR(EFSCORRUPTED);
344 } 344 }
345 345
346 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
347 !ip->i_mount->m_rtdev_targp)) {
348 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
349 "corrupt dinode %Lu, has realtime flag set.",
350 ip->i_ino);
351 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
352 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355
346 switch (ip->i_d.di_mode & S_IFMT) { 356 switch (ip->i_d.di_mode & S_IFMT) {
347 case S_IFIFO: 357 case S_IFIFO:
348 case S_IFCHR: 358 case S_IFCHR:
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
3180STATIC void 3180STATIC void
3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3182{ 3182{
3183 ASSERT(spin_is_locked(&log->l_icloglock)); 3183 assert_spin_locked(&log->l_icloglock);
3184 3184
3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3186 xlog_state_switch_iclogs(log, iclog, 0); 3186 xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED |
543 XBF_DONT_BLOCK);
542 error = XFS_BUF_GETERROR(bp); 544 error = XFS_BUF_GETERROR(bp);
543 if (error) { 545 if (error) {
544 xfs_ioerror_alert("xfs_readlink", 546 xfs_ioerror_alert("xfs_readlink",
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index d7cd193c2277..a81170de7f6b 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -89,7 +89,9 @@ enum print_line_t {
89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ 89 TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */
90}; 90};
91 91
92 92void tracing_generic_entry_update(struct trace_entry *entry,
93 unsigned long flags,
94 int pc);
93struct ring_buffer_event * 95struct ring_buffer_event *
94trace_current_buffer_lock_reserve(int type, unsigned long len, 96trace_current_buffer_lock_reserve(int type, unsigned long len,
95 unsigned long flags, int pc); 97 unsigned long flags, int pc);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 16713dc672e4..3060bdc35ffe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -110,6 +110,7 @@ struct kvm_memory_slot {
110 110
111struct kvm_kernel_irq_routing_entry { 111struct kvm_kernel_irq_routing_entry {
112 u32 gsi; 112 u32 gsi;
113 u32 type;
113 int (*set)(struct kvm_kernel_irq_routing_entry *e, 114 int (*set)(struct kvm_kernel_irq_routing_entry *e,
114 struct kvm *kvm, int level); 115 struct kvm *kvm, int level);
115 union { 116 union {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fdffb413b192..f6b90240dd41 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -473,7 +473,6 @@ extern int nfs_writepages(struct address_space *, struct writeback_control *);
473extern int nfs_flush_incompatible(struct file *file, struct page *page); 473extern int nfs_flush_incompatible(struct file *file, struct page *page);
474extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); 474extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
475extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); 475extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
476extern void nfs_writedata_release(void *);
477 476
478/* 477/*
479 * Try to write back everything synchronously (but check the 478 * Try to write back everything synchronously (but check the
@@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
488extern int nfs_commit_inode(struct inode *, int); 487extern int nfs_commit_inode(struct inode *, int);
489extern struct nfs_write_data *nfs_commitdata_alloc(void); 488extern struct nfs_write_data *nfs_commitdata_alloc(void);
490extern void nfs_commit_free(struct nfs_write_data *wdata); 489extern void nfs_commit_free(struct nfs_write_data *wdata);
491extern void nfs_commitdata_release(void *wdata);
492#else 490#else
493static inline int 491static inline int
494nfs_commit_inode(struct inode *inode, int how) 492nfs_commit_inode(struct inode *inode, int how)
@@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode)
507 * Allocate nfs_write_data structures 505 * Allocate nfs_write_data structures
508 */ 506 */
509extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); 507extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
508extern void nfs_writedata_free(struct nfs_write_data *);
510 509
511/* 510/*
512 * linux/fs/nfs/read.c 511 * linux/fs/nfs/read.c
@@ -515,7 +514,6 @@ extern int nfs_readpage(struct file *, struct page *);
515extern int nfs_readpages(struct file *, struct address_space *, 514extern int nfs_readpages(struct file *, struct address_space *,
516 struct list_head *, unsigned); 515 struct list_head *, unsigned);
517extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); 516extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
518extern void nfs_readdata_release(void *data);
519extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, 517extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
520 struct page *); 518 struct page *);
521 519
@@ -523,6 +521,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
523 * Allocate nfs_read_data structures 521 * Allocate nfs_read_data structures
524 */ 522 */
525extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); 523extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
524extern void nfs_readdata_free(struct nfs_read_data *);
526 525
527/* 526/*
528 * linux/fs/nfs3proc.c 527 * linux/fs/nfs3proc.c
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e604e6ef72dd..b53f7006cc4e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,27 +115,44 @@ enum perf_counter_sample_format {
115 PERF_SAMPLE_TID = 1U << 1, 115 PERF_SAMPLE_TID = 1U << 1,
116 PERF_SAMPLE_TIME = 1U << 2, 116 PERF_SAMPLE_TIME = 1U << 2,
117 PERF_SAMPLE_ADDR = 1U << 3, 117 PERF_SAMPLE_ADDR = 1U << 3,
118 PERF_SAMPLE_GROUP = 1U << 4, 118 PERF_SAMPLE_READ = 1U << 4,
119 PERF_SAMPLE_CALLCHAIN = 1U << 5, 119 PERF_SAMPLE_CALLCHAIN = 1U << 5,
120 PERF_SAMPLE_ID = 1U << 6, 120 PERF_SAMPLE_ID = 1U << 6,
121 PERF_SAMPLE_CPU = 1U << 7, 121 PERF_SAMPLE_CPU = 1U << 7,
122 PERF_SAMPLE_PERIOD = 1U << 8, 122 PERF_SAMPLE_PERIOD = 1U << 8,
123 PERF_SAMPLE_STREAM_ID = 1U << 9, 123 PERF_SAMPLE_STREAM_ID = 1U << 9,
124 PERF_SAMPLE_RAW = 1U << 10,
124 125
125 PERF_SAMPLE_MAX = 1U << 10, /* non-ABI */ 126 PERF_SAMPLE_MAX = 1U << 11, /* non-ABI */
126}; 127};
127 128
128/* 129/*
129 * Bits that can be set in attr.read_format to request that 130 * The format of the data returned by read() on a perf counter fd,
130 * reads on the counter should return the indicated quantities, 131 * as specified by attr.read_format:
131 * in increasing order of bit value, after the counter value. 132 *
133 * struct read_format {
134 * { u64 value;
135 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
136 * { u64 time_running; } && PERF_FORMAT_RUNNING
137 * { u64 id; } && PERF_FORMAT_ID
138 * } && !PERF_FORMAT_GROUP
139 *
140 * { u64 nr;
141 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
142 * { u64 time_running; } && PERF_FORMAT_RUNNING
143 * { u64 value;
144 * { u64 id; } && PERF_FORMAT_ID
145 * } cntr[nr];
146 * } && PERF_FORMAT_GROUP
147 * };
132 */ 148 */
133enum perf_counter_read_format { 149enum perf_counter_read_format {
134 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, 150 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
135 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, 151 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
136 PERF_FORMAT_ID = 1U << 2, 152 PERF_FORMAT_ID = 1U << 2,
153 PERF_FORMAT_GROUP = 1U << 3,
137 154
138 PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ 155 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
139}; 156};
140 157
141#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 158#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -342,10 +359,8 @@ enum perf_event_type {
342 * struct { 359 * struct {
343 * struct perf_event_header header; 360 * struct perf_event_header header;
344 * u32 pid, tid; 361 * u32 pid, tid;
345 * u64 value; 362 *
346 * { u64 time_enabled; } && PERF_FORMAT_ENABLED 363 * struct read_format values;
347 * { u64 time_running; } && PERF_FORMAT_RUNNING
348 * { u64 parent_id; } && PERF_FORMAT_ID
349 * }; 364 * };
350 */ 365 */
351 PERF_EVENT_READ = 8, 366 PERF_EVENT_READ = 8,
@@ -363,11 +378,24 @@ enum perf_event_type {
363 * { u32 cpu, res; } && PERF_SAMPLE_CPU 378 * { u32 cpu, res; } && PERF_SAMPLE_CPU
364 * { u64 period; } && PERF_SAMPLE_PERIOD 379 * { u64 period; } && PERF_SAMPLE_PERIOD
365 * 380 *
366 * { u64 nr; 381 * { struct read_format values; } && PERF_SAMPLE_READ
367 * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
368 * 382 *
369 * { u64 nr, 383 * { u64 nr,
370 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 384 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
385 *
386 * #
387 * # The RAW record below is opaque data wrt the ABI
388 * #
389 * # That is, the ABI doesn't make any promises wrt to
390 * # the stability of its content, it may vary depending
391 * # on event, hardware, kernel version and phase of
392 * # the moon.
393 * #
394 * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
395 * #
396 *
397 * { u32 size;
398 * char data[size];}&& PERF_SAMPLE_RAW
371 * }; 399 * };
372 */ 400 */
373 PERF_EVENT_SAMPLE = 9, 401 PERF_EVENT_SAMPLE = 9,
@@ -413,6 +441,11 @@ struct perf_callchain_entry {
413 __u64 ip[PERF_MAX_STACK_DEPTH]; 441 __u64 ip[PERF_MAX_STACK_DEPTH];
414}; 442};
415 443
444struct perf_raw_record {
445 u32 size;
446 void *data;
447};
448
416struct task_struct; 449struct task_struct;
417 450
418/** 451/**
@@ -681,10 +714,13 @@ struct perf_sample_data {
681 struct pt_regs *regs; 714 struct pt_regs *regs;
682 u64 addr; 715 u64 addr;
683 u64 period; 716 u64 period;
717 struct perf_raw_record *raw;
684}; 718};
685 719
686extern int perf_counter_overflow(struct perf_counter *counter, int nmi, 720extern int perf_counter_overflow(struct perf_counter *counter, int nmi,
687 struct perf_sample_data *data); 721 struct perf_sample_data *data);
722extern void perf_counter_output(struct perf_counter *counter, int nmi,
723 struct perf_sample_data *data);
688 724
689/* 725/*
690 * Return 1 for a software counter, 0 for a hardware counter 726 * Return 1 for a software counter, 0 for a hardware counter
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index b99c625fddfe..7da466ba4b0d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -117,17 +117,15 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
117static inline __be32 * 117static inline __be32 *
118xdr_encode_hyper(__be32 *p, __u64 val) 118xdr_encode_hyper(__be32 *p, __u64 val)
119{ 119{
120 *p++ = htonl(val >> 32); 120 *(__be64 *)p = cpu_to_be64(val);
121 *p++ = htonl(val & 0xFFFFFFFF); 121 return p + 2;
122 return p;
123} 122}
124 123
125static inline __be32 * 124static inline __be32 *
126xdr_decode_hyper(__be32 *p, __u64 *valp) 125xdr_decode_hyper(__be32 *p, __u64 *valp)
127{ 126{
128 *valp = ((__u64) ntohl(*p++)) << 32; 127 *valp = be64_to_cpup((__be64 *)p);
129 *valp |= ntohl(*p++); 128 return p + 2;
130 return p;
131} 129}
132 130
133/* 131/*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6788e1a4d4ca..cf3c2f5dba51 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,7 +77,14 @@ struct task_struct;
77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
78 { .flags = word, .bit_nr = bit, } 78 { .flags = word, .bit_nr = bit, }
79 79
80extern void init_waitqueue_head(wait_queue_head_t *q); 80extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
81
82#define init_waitqueue_head(q) \
83 do { \
84 static struct lock_class_key __key; \
85 \
86 __init_waitqueue_head((q), &__key); \
87 } while (0)
81 88
82#ifdef CONFIG_LOCKDEP 89#ifdef CONFIG_LOCKDEP
83# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ 90# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 1867553c61e5..f64fbaae781a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -144,6 +144,9 @@
144#undef TP_fast_assign 144#undef TP_fast_assign
145#define TP_fast_assign(args...) args 145#define TP_fast_assign(args...) args
146 146
147#undef TP_perf_assign
148#define TP_perf_assign(args...)
149
147#undef TRACE_EVENT 150#undef TRACE_EVENT
148#define TRACE_EVENT(call, proto, args, tstruct, func, print) \ 151#define TRACE_EVENT(call, proto, args, tstruct, func, print) \
149static int \ 152static int \
@@ -345,6 +348,56 @@ static inline int ftrace_get_offsets_##call( \
345 348
346#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 349#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
347 350
351#ifdef CONFIG_EVENT_PROFILE
352
353/*
354 * Generate the functions needed for tracepoint perf_counter support.
355 *
356 * NOTE: The insertion profile callback (ftrace_profile_<call>) is defined later
357 *
358 * static int ftrace_profile_enable_<call>(struct ftrace_event_call *event_call)
359 * {
360 * int ret = 0;
361 *
362 * if (!atomic_inc_return(&event_call->profile_count))
363 * ret = register_trace_<call>(ftrace_profile_<call>);
364 *
365 * return ret;
366 * }
367 *
368 * static void ftrace_profile_disable_<call>(struct ftrace_event_call *event_call)
369 * {
370 * if (atomic_add_negative(-1, &event->call->profile_count))
371 * unregister_trace_<call>(ftrace_profile_<call>);
372 * }
373 *
374 */
375
376#undef TRACE_EVENT
377#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
378 \
379static void ftrace_profile_##call(proto); \
380 \
381static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
382{ \
383 int ret = 0; \
384 \
385 if (!atomic_inc_return(&event_call->profile_count)) \
386 ret = register_trace_##call(ftrace_profile_##call); \
387 \
388 return ret; \
389} \
390 \
391static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
392{ \
393 if (atomic_add_negative(-1, &event_call->profile_count)) \
394 unregister_trace_##call(ftrace_profile_##call); \
395}
396
397#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
398
399#endif
400
348/* 401/*
349 * Stage 4 of the trace events. 402 * Stage 4 of the trace events.
350 * 403 *
@@ -447,28 +500,6 @@ static inline int ftrace_get_offsets_##call( \
447#define TP_FMT(fmt, args...) fmt "\n", ##args 500#define TP_FMT(fmt, args...) fmt "\n", ##args
448 501
449#ifdef CONFIG_EVENT_PROFILE 502#ifdef CONFIG_EVENT_PROFILE
450#define _TRACE_PROFILE(call, proto, args) \
451static void ftrace_profile_##call(proto) \
452{ \
453 extern void perf_tpcounter_event(int); \
454 perf_tpcounter_event(event_##call.id); \
455} \
456 \
457static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
458{ \
459 int ret = 0; \
460 \
461 if (!atomic_inc_return(&event_call->profile_count)) \
462 ret = register_trace_##call(ftrace_profile_##call); \
463 \
464 return ret; \
465} \
466 \
467static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
468{ \
469 if (atomic_add_negative(-1, &event_call->profile_count)) \
470 unregister_trace_##call(ftrace_profile_##call); \
471}
472 503
473#define _TRACE_PROFILE_INIT(call) \ 504#define _TRACE_PROFILE_INIT(call) \
474 .profile_count = ATOMIC_INIT(-1), \ 505 .profile_count = ATOMIC_INIT(-1), \
@@ -476,7 +507,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
476 .profile_disable = ftrace_profile_disable_##call, 507 .profile_disable = ftrace_profile_disable_##call,
477 508
478#else 509#else
479#define _TRACE_PROFILE(call, proto, args)
480#define _TRACE_PROFILE_INIT(call) 510#define _TRACE_PROFILE_INIT(call)
481#endif 511#endif
482 512
@@ -502,7 +532,6 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
502 532
503#undef TRACE_EVENT 533#undef TRACE_EVENT
504#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ 534#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
505_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
506 \ 535 \
507static struct ftrace_event_call event_##call; \ 536static struct ftrace_event_call event_##call; \
508 \ 537 \
@@ -586,6 +615,110 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
586 615
587#include TRACE_INCLUDE(TRACE_INCLUDE_FILE) 616#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
588 617
589#undef _TRACE_PROFILE 618/*
619 * Define the insertion callback to profile events
620 *
621 * The job is very similar to ftrace_raw_event_<call> except that we don't
622 * insert in the ring buffer but in a perf counter.
623 *
624 * static void ftrace_profile_<call>(proto)
625 * {
626 * struct ftrace_data_offsets_<call> __maybe_unused __data_offsets;
627 * struct ftrace_event_call *event_call = &event_<call>;
628 * extern void perf_tpcounter_event(int, u64, u64, void *, int);
629 * struct ftrace_raw_##call *entry;
630 * u64 __addr = 0, __count = 1;
631 * unsigned long irq_flags;
632 * int __entry_size;
633 * int __data_size;
634 * int pc;
635 *
636 * local_save_flags(irq_flags);
637 * pc = preempt_count();
638 *
639 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
640 *
641 * // Below we want to get the aligned size by taking into account
642 * // the u32 field that will later store the buffer size
643 * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
644 * sizeof(u64));
645 * __entry_size -= sizeof(u32);
646 *
647 * do {
648 * char raw_data[__entry_size]; <- allocate our sample in the stack
649 * struct trace_entry *ent;
650 *
651 * zero dead bytes from alignment to avoid stack leak to userspace:
652 *
653 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
654 * entry = (struct ftrace_raw_<call> *)raw_data;
655 * ent = &entry->ent;
656 * tracing_generic_entry_update(ent, irq_flags, pc);
657 * ent->type = event_call->id;
658 *
659 * <tstruct> <- do some jobs with dynamic arrays
660 *
661 * <assign> <- affect our values
662 *
663 * perf_tpcounter_event(event_call->id, __addr, __count, entry,
664 * __entry_size); <- submit them to perf counter
665 * } while (0);
666 *
667 * }
668 */
669
670#ifdef CONFIG_EVENT_PROFILE
671
672#undef __perf_addr
673#define __perf_addr(a) __addr = (a)
674
675#undef __perf_count
676#define __perf_count(c) __count = (c)
677
678#undef TRACE_EVENT
679#define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
680static void ftrace_profile_##call(proto) \
681{ \
682 struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\
683 struct ftrace_event_call *event_call = &event_##call; \
684 extern void perf_tpcounter_event(int, u64, u64, void *, int); \
685 struct ftrace_raw_##call *entry; \
686 u64 __addr = 0, __count = 1; \
687 unsigned long irq_flags; \
688 int __entry_size; \
689 int __data_size; \
690 int pc; \
691 \
692 local_save_flags(irq_flags); \
693 pc = preempt_count(); \
694 \
695 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
696 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
697 sizeof(u64)); \
698 __entry_size -= sizeof(u32); \
699 \
700 do { \
701 char raw_data[__entry_size]; \
702 struct trace_entry *ent; \
703 \
704 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
705 entry = (struct ftrace_raw_##call *)raw_data; \
706 ent = &entry->ent; \
707 tracing_generic_entry_update(ent, irq_flags, pc); \
708 ent->type = event_call->id; \
709 \
710 tstruct \
711 \
712 { assign; } \
713 \
714 perf_tpcounter_event(event_call->id, __addr, __count, entry,\
715 __entry_size); \
716 } while (0); \
717 \
718}
719
720#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
721#endif /* CONFIG_EVENT_PROFILE */
722
590#undef _TRACE_PROFILE_INIT 723#undef _TRACE_PROFILE_INIT
591 724
diff --git a/kernel/futex.c b/kernel/futex.c
index 0672ff88f159..e18cfbdc7190 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue 1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1011 * q: the futex_q 1011 * q: the futex_q
1012 * key: the key of the requeue target futex 1012 * key: the key of the requeue target futex
1013 * hb: the hash_bucket of the requeue target futex
1013 * 1014 *
1014 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1015 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1015 * target futex if it is uncontended or via a lock steal. Set the futex_q key 1016 * target futex if it is uncontended or via a lock steal. Set the futex_q key
1016 * to the requeue target futex so the waiter can detect the wakeup on the right 1017 * to the requeue target futex so the waiter can detect the wakeup on the right
1017 * futex, but remove it from the hb and NULL the rt_waiter so it can detect 1018 * futex, but remove it from the hb and NULL the rt_waiter so it can detect
1018 * atomic lock acquisition. Must be called with the q->lock_ptr held. 1019 * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1020 * to protect access to the pi_state to fixup the owner later. Must be called
1021 * with both q->lock_ptr and hb->lock held.
1019 */ 1022 */
1020static inline 1023static inline
1021void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) 1024void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1025 struct futex_hash_bucket *hb)
1022{ 1026{
1023 drop_futex_key_refs(&q->key); 1027 drop_futex_key_refs(&q->key);
1024 get_futex_key_refs(key); 1028 get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
1030 WARN_ON(!q->rt_waiter); 1034 WARN_ON(!q->rt_waiter);
1031 q->rt_waiter = NULL; 1035 q->rt_waiter = NULL;
1032 1036
1037 q->lock_ptr = &hb->lock;
1038#ifdef CONFIG_DEBUG_PI_LIST
1039 q->list.plist.lock = &hb->lock;
1040#endif
1041
1033 wake_up_state(q->task, TASK_NORMAL); 1042 wake_up_state(q->task, TASK_NORMAL);
1034} 1043}
1035 1044
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1088 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, 1097 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1089 set_waiters); 1098 set_waiters);
1090 if (ret == 1) 1099 if (ret == 1)
1091 requeue_pi_wake_futex(top_waiter, key2); 1100 requeue_pi_wake_futex(top_waiter, key2, hb2);
1092 1101
1093 return ret; 1102 return ret;
1094} 1103}
@@ -1247,8 +1256,15 @@ retry_private:
1247 if (!match_futex(&this->key, &key1)) 1256 if (!match_futex(&this->key, &key1))
1248 continue; 1257 continue;
1249 1258
1250 WARN_ON(!requeue_pi && this->rt_waiter); 1259 /*
1251 WARN_ON(requeue_pi && !this->rt_waiter); 1260 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
1261 * be paired with each other and no other futex ops.
1262 */
1263 if ((requeue_pi && !this->rt_waiter) ||
1264 (!requeue_pi && this->rt_waiter)) {
1265 ret = -EINVAL;
1266 break;
1267 }
1252 1268
1253 /* 1269 /*
1254 * Wake nr_wake waiters. For requeue_pi, if we acquired the 1270 * Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
1273 this->task, 1); 1289 this->task, 1);
1274 if (ret == 1) { 1290 if (ret == 1) {
1275 /* We got the lock. */ 1291 /* We got the lock. */
1276 requeue_pi_wake_futex(this, &key2); 1292 requeue_pi_wake_futex(this, &key2, hb2);
1277 continue; 1293 continue;
1278 } else if (ret) { 1294 } else if (ret) {
1279 /* -EDEADLK */ 1295 /* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b9ee29..235716556bf1 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
180 int cmd = op & FUTEX_CMD_MASK; 180 int cmd = op & FUTEX_CMD_MASK;
181 181
182 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 182 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
183 cmd == FUTEX_WAIT_BITSET)) { 183 cmd == FUTEX_WAIT_BITSET ||
184 cmd == FUTEX_WAIT_REQUEUE_PI)) {
184 if (get_compat_timespec(&ts, utime)) 185 if (get_compat_timespec(&ts, utime))
185 return -EFAULT; 186 return -EFAULT;
186 if (!timespec_valid(&ts)) 187 if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
191 t = ktime_add_safe(ktime_get(), t); 192 t = ktime_add_safe(ktime_get(), t);
192 tp = &t; 193 tp = &t;
193 } 194 }
194 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) 195 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
196 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
195 val2 = (int) (unsigned long) utime; 197 val2 = (int) (unsigned long) utime;
196 198
197 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 199 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c679db4687..d222515a5a06 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
761{ 761{
762 struct irq_desc *desc = irq_to_desc(irq); 762 struct irq_desc *desc = irq_to_desc(irq);
763 struct irqaction *action, **action_ptr; 763 struct irqaction *action, **action_ptr;
764 struct task_struct *irqthread;
765 unsigned long flags; 764 unsigned long flags;
766 765
767 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 766 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
809 desc->chip->disable(irq); 808 desc->chip->disable(irq);
810 } 809 }
811 810
812 irqthread = action->thread;
813 action->thread = NULL;
814
815 spin_unlock_irqrestore(&desc->lock, flags); 811 spin_unlock_irqrestore(&desc->lock, flags);
816 812
817 unregister_handler_proc(irq, action); 813 unregister_handler_proc(irq, action);
@@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
819 /* Make sure it's not being used on another CPU: */ 815 /* Make sure it's not being used on another CPU: */
820 synchronize_irq(irq); 816 synchronize_irq(irq);
821 817
822 if (irqthread) {
823 if (!test_bit(IRQTF_DIED, &action->thread_flags))
824 kthread_stop(irqthread);
825 put_task_struct(irqthread);
826 }
827
828#ifdef CONFIG_DEBUG_SHIRQ 818#ifdef CONFIG_DEBUG_SHIRQ
829 /* 819 /*
830 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 820 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
840 local_irq_restore(flags); 830 local_irq_restore(flags);
841 } 831 }
842#endif 832#endif
833
834 if (action->thread) {
835 if (!test_bit(IRQTF_DIED, &action->thread_flags))
836 kthread_stop(action->thread);
837 put_task_struct(action->thread);
838 }
839
843 return action; 840 return action;
844} 841}
845 842
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 2f69bee57bf2..3fd30197da2e 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -107,8 +107,8 @@ out_unlock:
107 107
108struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) 108struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
109{ 109{
110 /* those all static, do move them */ 110 /* those static or target node is -1, do not move them */
111 if (desc->irq < NR_IRQS_LEGACY) 111 if (desc->irq < NR_IRQS_LEGACY || node == -1)
112 return desc; 112 return desc;
113 113
114 if (desc->node != node) 114 if (desc->node != node)
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d7135aa2d2c4..e94caa666dba 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
758 &proc_lockdep_stats_operations); 758 &proc_lockdep_stats_operations);
759 759
760#ifdef CONFIG_LOCK_STAT 760#ifdef CONFIG_LOCK_STAT
761 proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); 761 proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
762 &proc_lock_stat_operations);
762#endif 763#endif
763 764
764 return 0; 765 return 0;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 673c1aaf7332..534e20d14d63 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -88,6 +88,7 @@ void __weak hw_perf_disable(void) { barrier(); }
88void __weak hw_perf_enable(void) { barrier(); } 88void __weak hw_perf_enable(void) { barrier(); }
89 89
90void __weak hw_perf_counter_setup(int cpu) { barrier(); } 90void __weak hw_perf_counter_setup(int cpu) { barrier(); }
91void __weak hw_perf_counter_setup_online(int cpu) { barrier(); }
91 92
92int __weak 93int __weak
93hw_perf_group_sched_in(struct perf_counter *group_leader, 94hw_perf_group_sched_in(struct perf_counter *group_leader,
@@ -306,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
306 return; 307 return;
307 308
308 counter->state = PERF_COUNTER_STATE_INACTIVE; 309 counter->state = PERF_COUNTER_STATE_INACTIVE;
310 if (counter->pending_disable) {
311 counter->pending_disable = 0;
312 counter->state = PERF_COUNTER_STATE_OFF;
313 }
309 counter->tstamp_stopped = ctx->time; 314 counter->tstamp_stopped = ctx->time;
310 counter->pmu->disable(counter); 315 counter->pmu->disable(counter);
311 counter->oncpu = -1; 316 counter->oncpu = -1;
@@ -1691,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
1691 return 0; 1696 return 0;
1692} 1697}
1693 1698
1694static u64 perf_counter_read_tree(struct perf_counter *counter) 1699static int perf_counter_read_size(struct perf_counter *counter)
1700{
1701 int entry = sizeof(u64); /* value */
1702 int size = 0;
1703 int nr = 1;
1704
1705 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1706 size += sizeof(u64);
1707
1708 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1709 size += sizeof(u64);
1710
1711 if (counter->attr.read_format & PERF_FORMAT_ID)
1712 entry += sizeof(u64);
1713
1714 if (counter->attr.read_format & PERF_FORMAT_GROUP) {
1715 nr += counter->group_leader->nr_siblings;
1716 size += sizeof(u64);
1717 }
1718
1719 size += entry * nr;
1720
1721 return size;
1722}
1723
1724static u64 perf_counter_read_value(struct perf_counter *counter)
1695{ 1725{
1696 struct perf_counter *child; 1726 struct perf_counter *child;
1697 u64 total = 0; 1727 u64 total = 0;
@@ -1703,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
1703 return total; 1733 return total;
1704} 1734}
1705 1735
1736static int perf_counter_read_entry(struct perf_counter *counter,
1737 u64 read_format, char __user *buf)
1738{
1739 int n = 0, count = 0;
1740 u64 values[2];
1741
1742 values[n++] = perf_counter_read_value(counter);
1743 if (read_format & PERF_FORMAT_ID)
1744 values[n++] = primary_counter_id(counter);
1745
1746 count = n * sizeof(u64);
1747
1748 if (copy_to_user(buf, values, count))
1749 return -EFAULT;
1750
1751 return count;
1752}
1753
1754static int perf_counter_read_group(struct perf_counter *counter,
1755 u64 read_format, char __user *buf)
1756{
1757 struct perf_counter *leader = counter->group_leader, *sub;
1758 int n = 0, size = 0, err = -EFAULT;
1759 u64 values[3];
1760
1761 values[n++] = 1 + leader->nr_siblings;
1762 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1763 values[n++] = leader->total_time_enabled +
1764 atomic64_read(&leader->child_total_time_enabled);
1765 }
1766 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1767 values[n++] = leader->total_time_running +
1768 atomic64_read(&leader->child_total_time_running);
1769 }
1770
1771 size = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, size))
1774 return -EFAULT;
1775
1776 err = perf_counter_read_entry(leader, read_format, buf + size);
1777 if (err < 0)
1778 return err;
1779
1780 size += err;
1781
1782 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
1783 err = perf_counter_read_entry(counter, read_format,
1784 buf + size);
1785 if (err < 0)
1786 return err;
1787
1788 size += err;
1789 }
1790
1791 return size;
1792}
1793
1794static int perf_counter_read_one(struct perf_counter *counter,
1795 u64 read_format, char __user *buf)
1796{
1797 u64 values[4];
1798 int n = 0;
1799
1800 values[n++] = perf_counter_read_value(counter);
1801 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1802 values[n++] = counter->total_time_enabled +
1803 atomic64_read(&counter->child_total_time_enabled);
1804 }
1805 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1806 values[n++] = counter->total_time_running +
1807 atomic64_read(&counter->child_total_time_running);
1808 }
1809 if (read_format & PERF_FORMAT_ID)
1810 values[n++] = primary_counter_id(counter);
1811
1812 if (copy_to_user(buf, values, n * sizeof(u64)))
1813 return -EFAULT;
1814
1815 return n * sizeof(u64);
1816}
1817
1706/* 1818/*
1707 * Read the performance counter - simple non blocking version for now 1819 * Read the performance counter - simple non blocking version for now
1708 */ 1820 */
1709static ssize_t 1821static ssize_t
1710perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) 1822perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1711{ 1823{
1712 u64 values[4]; 1824 u64 read_format = counter->attr.read_format;
1713 int n; 1825 int ret;
1714 1826
1715 /* 1827 /*
1716 * Return end-of-file for a read on a counter that is in 1828 * Return end-of-file for a read on a counter that is in
@@ -1720,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1720 if (counter->state == PERF_COUNTER_STATE_ERROR) 1832 if (counter->state == PERF_COUNTER_STATE_ERROR)
1721 return 0; 1833 return 0;
1722 1834
1835 if (count < perf_counter_read_size(counter))
1836 return -ENOSPC;
1837
1723 WARN_ON_ONCE(counter->ctx->parent_ctx); 1838 WARN_ON_ONCE(counter->ctx->parent_ctx);
1724 mutex_lock(&counter->child_mutex); 1839 mutex_lock(&counter->child_mutex);
1725 values[0] = perf_counter_read_tree(counter); 1840 if (read_format & PERF_FORMAT_GROUP)
1726 n = 1; 1841 ret = perf_counter_read_group(counter, read_format, buf);
1727 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1842 else
1728 values[n++] = counter->total_time_enabled + 1843 ret = perf_counter_read_one(counter, read_format, buf);
1729 atomic64_read(&counter->child_total_time_enabled);
1730 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1731 values[n++] = counter->total_time_running +
1732 atomic64_read(&counter->child_total_time_running);
1733 if (counter->attr.read_format & PERF_FORMAT_ID)
1734 values[n++] = primary_counter_id(counter);
1735 mutex_unlock(&counter->child_mutex); 1844 mutex_unlock(&counter->child_mutex);
1736 1845
1737 if (count < n * sizeof(u64)) 1846 return ret;
1738 return -EINVAL;
1739 count = n * sizeof(u64);
1740
1741 if (copy_to_user(buf, values, count))
1742 return -EFAULT;
1743
1744 return count;
1745} 1847}
1746 1848
1747static ssize_t 1849static ssize_t
@@ -2245,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
2245 2347
2246 if (counter->pending_disable) { 2348 if (counter->pending_disable) {
2247 counter->pending_disable = 0; 2349 counter->pending_disable = 0;
2248 perf_counter_disable(counter); 2350 __perf_counter_disable(counter);
2249 } 2351 }
2250 2352
2251 if (counter->pending_wakeup) { 2353 if (counter->pending_wakeup) {
@@ -2630,7 +2732,80 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
2630 return task_pid_nr_ns(p, counter->ns); 2732 return task_pid_nr_ns(p, counter->ns);
2631} 2733}
2632 2734
2633static void perf_counter_output(struct perf_counter *counter, int nmi, 2735static void perf_output_read_one(struct perf_output_handle *handle,
2736 struct perf_counter *counter)
2737{
2738 u64 read_format = counter->attr.read_format;
2739 u64 values[4];
2740 int n = 0;
2741
2742 values[n++] = atomic64_read(&counter->count);
2743 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2744 values[n++] = counter->total_time_enabled +
2745 atomic64_read(&counter->child_total_time_enabled);
2746 }
2747 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2748 values[n++] = counter->total_time_running +
2749 atomic64_read(&counter->child_total_time_running);
2750 }
2751 if (read_format & PERF_FORMAT_ID)
2752 values[n++] = primary_counter_id(counter);
2753
2754 perf_output_copy(handle, values, n * sizeof(u64));
2755}
2756
2757/*
2758 * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
2759 */
2760static void perf_output_read_group(struct perf_output_handle *handle,
2761 struct perf_counter *counter)
2762{
2763 struct perf_counter *leader = counter->group_leader, *sub;
2764 u64 read_format = counter->attr.read_format;
2765 u64 values[5];
2766 int n = 0;
2767
2768 values[n++] = 1 + leader->nr_siblings;
2769
2770 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2771 values[n++] = leader->total_time_enabled;
2772
2773 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2774 values[n++] = leader->total_time_running;
2775
2776 if (leader != counter)
2777 leader->pmu->read(leader);
2778
2779 values[n++] = atomic64_read(&leader->count);
2780 if (read_format & PERF_FORMAT_ID)
2781 values[n++] = primary_counter_id(leader);
2782
2783 perf_output_copy(handle, values, n * sizeof(u64));
2784
2785 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2786 n = 0;
2787
2788 if (sub != counter)
2789 sub->pmu->read(sub);
2790
2791 values[n++] = atomic64_read(&sub->count);
2792 if (read_format & PERF_FORMAT_ID)
2793 values[n++] = primary_counter_id(sub);
2794
2795 perf_output_copy(handle, values, n * sizeof(u64));
2796 }
2797}
2798
2799static void perf_output_read(struct perf_output_handle *handle,
2800 struct perf_counter *counter)
2801{
2802 if (counter->attr.read_format & PERF_FORMAT_GROUP)
2803 perf_output_read_group(handle, counter);
2804 else
2805 perf_output_read_one(handle, counter);
2806}
2807
2808void perf_counter_output(struct perf_counter *counter, int nmi,
2634 struct perf_sample_data *data) 2809 struct perf_sample_data *data)
2635{ 2810{
2636 int ret; 2811 int ret;
@@ -2641,10 +2816,6 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2641 struct { 2816 struct {
2642 u32 pid, tid; 2817 u32 pid, tid;
2643 } tid_entry; 2818 } tid_entry;
2644 struct {
2645 u64 id;
2646 u64 counter;
2647 } group_entry;
2648 struct perf_callchain_entry *callchain = NULL; 2819 struct perf_callchain_entry *callchain = NULL;
2649 int callchain_size = 0; 2820 int callchain_size = 0;
2650 u64 time; 2821 u64 time;
@@ -2699,10 +2870,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2699 if (sample_type & PERF_SAMPLE_PERIOD) 2870 if (sample_type & PERF_SAMPLE_PERIOD)
2700 header.size += sizeof(u64); 2871 header.size += sizeof(u64);
2701 2872
2702 if (sample_type & PERF_SAMPLE_GROUP) { 2873 if (sample_type & PERF_SAMPLE_READ)
2703 header.size += sizeof(u64) + 2874 header.size += perf_counter_read_size(counter);
2704 counter->nr_siblings * sizeof(group_entry);
2705 }
2706 2875
2707 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2876 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2708 callchain = perf_callchain(data->regs); 2877 callchain = perf_callchain(data->regs);
@@ -2714,6 +2883,18 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2714 header.size += sizeof(u64); 2883 header.size += sizeof(u64);
2715 } 2884 }
2716 2885
2886 if (sample_type & PERF_SAMPLE_RAW) {
2887 int size = sizeof(u32);
2888
2889 if (data->raw)
2890 size += data->raw->size;
2891 else
2892 size += sizeof(u32);
2893
2894 WARN_ON_ONCE(size & (sizeof(u64)-1));
2895 header.size += size;
2896 }
2897
2717 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2898 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
2718 if (ret) 2899 if (ret)
2719 return; 2900 return;
@@ -2747,26 +2928,8 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2747 if (sample_type & PERF_SAMPLE_PERIOD) 2928 if (sample_type & PERF_SAMPLE_PERIOD)
2748 perf_output_put(&handle, data->period); 2929 perf_output_put(&handle, data->period);
2749 2930
2750 /* 2931 if (sample_type & PERF_SAMPLE_READ)
2751 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. 2932 perf_output_read(&handle, counter);
2752 */
2753 if (sample_type & PERF_SAMPLE_GROUP) {
2754 struct perf_counter *leader, *sub;
2755 u64 nr = counter->nr_siblings;
2756
2757 perf_output_put(&handle, nr);
2758
2759 leader = counter->group_leader;
2760 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2761 if (sub != counter)
2762 sub->pmu->read(sub);
2763
2764 group_entry.id = primary_counter_id(sub);
2765 group_entry.counter = atomic64_read(&sub->count);
2766
2767 perf_output_put(&handle, group_entry);
2768 }
2769 }
2770 2933
2771 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2772 if (callchain) 2935 if (callchain)
@@ -2777,6 +2940,22 @@ static void perf_counter_output(struct perf_counter *counter, int nmi,
2777 } 2940 }
2778 } 2941 }
2779 2942
2943 if (sample_type & PERF_SAMPLE_RAW) {
2944 if (data->raw) {
2945 perf_output_put(&handle, data->raw->size);
2946 perf_output_copy(&handle, data->raw->data, data->raw->size);
2947 } else {
2948 struct {
2949 u32 size;
2950 u32 data;
2951 } raw = {
2952 .size = sizeof(u32),
2953 .data = 0,
2954 };
2955 perf_output_put(&handle, raw);
2956 }
2957 }
2958
2780 perf_output_end(&handle); 2959 perf_output_end(&handle);
2781} 2960}
2782 2961
@@ -2789,8 +2968,6 @@ struct perf_read_event {
2789 2968
2790 u32 pid; 2969 u32 pid;
2791 u32 tid; 2970 u32 tid;
2792 u64 value;
2793 u64 format[3];
2794}; 2971};
2795 2972
2796static void 2973static void
@@ -2802,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
2802 .header = { 2979 .header = {
2803 .type = PERF_EVENT_READ, 2980 .type = PERF_EVENT_READ,
2804 .misc = 0, 2981 .misc = 0,
2805 .size = sizeof(event) - sizeof(event.format), 2982 .size = sizeof(event) + perf_counter_read_size(counter),
2806 }, 2983 },
2807 .pid = perf_counter_pid(counter, task), 2984 .pid = perf_counter_pid(counter, task),
2808 .tid = perf_counter_tid(counter, task), 2985 .tid = perf_counter_tid(counter, task),
2809 .value = atomic64_read(&counter->count),
2810 }; 2986 };
2811 int ret, i = 0; 2987 int ret;
2812
2813 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2814 event.header.size += sizeof(u64);
2815 event.format[i++] = counter->total_time_enabled;
2816 }
2817
2818 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2819 event.header.size += sizeof(u64);
2820 event.format[i++] = counter->total_time_running;
2821 }
2822
2823 if (counter->attr.read_format & PERF_FORMAT_ID) {
2824 event.header.size += sizeof(u64);
2825 event.format[i++] = primary_counter_id(counter);
2826 }
2827 2988
2828 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); 2989 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2829 if (ret) 2990 if (ret)
2830 return; 2991 return;
2831 2992
2832 perf_output_copy(&handle, &event, event.header.size); 2993 perf_output_put(&handle, event);
2994 perf_output_read(&handle, counter);
2995
2833 perf_output_end(&handle); 2996 perf_output_end(&handle);
2834} 2997}
2835 2998
@@ -2840,7 +3003,8 @@ perf_counter_read_event(struct perf_counter *counter,
2840 */ 3003 */
2841 3004
2842struct perf_task_event { 3005struct perf_task_event {
2843 struct task_struct *task; 3006 struct task_struct *task;
3007 struct perf_counter_context *task_ctx;
2844 3008
2845 struct { 3009 struct {
2846 struct perf_event_header header; 3010 struct perf_event_header header;
@@ -2864,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
2864 return; 3028 return;
2865 3029
2866 task_event->event.pid = perf_counter_pid(counter, task); 3030 task_event->event.pid = perf_counter_pid(counter, task);
2867 task_event->event.ppid = perf_counter_pid(counter, task->real_parent); 3031 task_event->event.ppid = perf_counter_pid(counter, current);
2868 3032
2869 task_event->event.tid = perf_counter_tid(counter, task); 3033 task_event->event.tid = perf_counter_tid(counter, task);
2870 task_event->event.ptid = perf_counter_tid(counter, task->real_parent); 3034 task_event->event.ptid = perf_counter_tid(counter, current);
2871 3035
2872 perf_output_put(&handle, task_event->event); 3036 perf_output_put(&handle, task_event->event);
2873 perf_output_end(&handle); 3037 perf_output_end(&handle);
@@ -2900,24 +3064,23 @@ static void perf_counter_task_ctx(struct perf_counter_context *ctx,
2900static void perf_counter_task_event(struct perf_task_event *task_event) 3064static void perf_counter_task_event(struct perf_task_event *task_event)
2901{ 3065{
2902 struct perf_cpu_context *cpuctx; 3066 struct perf_cpu_context *cpuctx;
2903 struct perf_counter_context *ctx; 3067 struct perf_counter_context *ctx = task_event->task_ctx;
2904 3068
2905 cpuctx = &get_cpu_var(perf_cpu_context); 3069 cpuctx = &get_cpu_var(perf_cpu_context);
2906 perf_counter_task_ctx(&cpuctx->ctx, task_event); 3070 perf_counter_task_ctx(&cpuctx->ctx, task_event);
2907 put_cpu_var(perf_cpu_context); 3071 put_cpu_var(perf_cpu_context);
2908 3072
2909 rcu_read_lock(); 3073 rcu_read_lock();
2910 /* 3074 if (!ctx)
2911 * doesn't really matter which of the child contexts the 3075 ctx = rcu_dereference(task_event->task->perf_counter_ctxp);
2912 * events ends up in.
2913 */
2914 ctx = rcu_dereference(current->perf_counter_ctxp);
2915 if (ctx) 3076 if (ctx)
2916 perf_counter_task_ctx(ctx, task_event); 3077 perf_counter_task_ctx(ctx, task_event);
2917 rcu_read_unlock(); 3078 rcu_read_unlock();
2918} 3079}
2919 3080
2920static void perf_counter_task(struct task_struct *task, int new) 3081static void perf_counter_task(struct task_struct *task,
3082 struct perf_counter_context *task_ctx,
3083 int new)
2921{ 3084{
2922 struct perf_task_event task_event; 3085 struct perf_task_event task_event;
2923 3086
@@ -2927,8 +3090,9 @@ static void perf_counter_task(struct task_struct *task, int new)
2927 return; 3090 return;
2928 3091
2929 task_event = (struct perf_task_event){ 3092 task_event = (struct perf_task_event){
2930 .task = task, 3093 .task = task,
2931 .event = { 3094 .task_ctx = task_ctx,
3095 .event = {
2932 .header = { 3096 .header = {
2933 .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT, 3097 .type = new ? PERF_EVENT_FORK : PERF_EVENT_EXIT,
2934 .misc = 0, 3098 .misc = 0,
@@ -2946,7 +3110,7 @@ static void perf_counter_task(struct task_struct *task, int new)
2946 3110
2947void perf_counter_fork(struct task_struct *task) 3111void perf_counter_fork(struct task_struct *task)
2948{ 3112{
2949 perf_counter_task(task, 1); 3113 perf_counter_task(task, NULL, 1);
2950} 3114}
2951 3115
2952/* 3116/*
@@ -3335,125 +3499,111 @@ int perf_counter_overflow(struct perf_counter *counter, int nmi,
3335 * Generic software counter infrastructure 3499 * Generic software counter infrastructure
3336 */ 3500 */
3337 3501
3338static void perf_swcounter_update(struct perf_counter *counter) 3502/*
3503 * We directly increment counter->count and keep a second value in
3504 * counter->hw.period_left to count intervals. This period counter
3505 * is kept in the range [-sample_period, 0] so that we can use the
3506 * sign as trigger.
3507 */
3508
3509static u64 perf_swcounter_set_period(struct perf_counter *counter)
3339{ 3510{
3340 struct hw_perf_counter *hwc = &counter->hw; 3511 struct hw_perf_counter *hwc = &counter->hw;
3341 u64 prev, now; 3512 u64 period = hwc->last_period;
3342 s64 delta; 3513 u64 nr, offset;
3514 s64 old, val;
3515
3516 hwc->last_period = hwc->sample_period;
3343 3517
3344again: 3518again:
3345 prev = atomic64_read(&hwc->prev_count); 3519 old = val = atomic64_read(&hwc->period_left);
3346 now = atomic64_read(&hwc->count); 3520 if (val < 0)
3347 if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) 3521 return 0;
3348 goto again;
3349 3522
3350 delta = now - prev; 3523 nr = div64_u64(period + val, period);
3524 offset = nr * period;
3525 val -= offset;
3526 if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
3527 goto again;
3351 3528
3352 atomic64_add(delta, &counter->count); 3529 return nr;
3353 atomic64_sub(delta, &hwc->period_left);
3354} 3530}
3355 3531
3356static void perf_swcounter_set_period(struct perf_counter *counter) 3532static void perf_swcounter_overflow(struct perf_counter *counter,
3533 int nmi, struct perf_sample_data *data)
3357{ 3534{
3358 struct hw_perf_counter *hwc = &counter->hw; 3535 struct hw_perf_counter *hwc = &counter->hw;
3359 s64 left = atomic64_read(&hwc->period_left); 3536 u64 overflow;
3360 s64 period = hwc->sample_period;
3361 3537
3362 if (unlikely(left <= -period)) { 3538 data->period = counter->hw.last_period;
3363 left = period; 3539 overflow = perf_swcounter_set_period(counter);
3364 atomic64_set(&hwc->period_left, left);
3365 hwc->last_period = period;
3366 }
3367 3540
3368 if (unlikely(left <= 0)) { 3541 if (hwc->interrupts == MAX_INTERRUPTS)
3369 left += period; 3542 return;
3370 atomic64_add(period, &hwc->period_left);
3371 hwc->last_period = period;
3372 }
3373 3543
3374 atomic64_set(&hwc->prev_count, -left); 3544 for (; overflow; overflow--) {
3375 atomic64_set(&hwc->count, -left); 3545 if (perf_counter_overflow(counter, nmi, data)) {
3546 /*
3547 * We inhibit the overflow from happening when
3548 * hwc->interrupts == MAX_INTERRUPTS.
3549 */
3550 break;
3551 }
3552 }
3376} 3553}
3377 3554
3378static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) 3555static void perf_swcounter_unthrottle(struct perf_counter *counter)
3379{ 3556{
3380 enum hrtimer_restart ret = HRTIMER_RESTART;
3381 struct perf_sample_data data;
3382 struct perf_counter *counter;
3383 u64 period;
3384
3385 counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
3386 counter->pmu->read(counter);
3387
3388 data.addr = 0;
3389 data.regs = get_irq_regs();
3390 /* 3557 /*
3391 * In case we exclude kernel IPs or are somehow not in interrupt 3558 * Nothing to do, we already reset hwc->interrupts.
3392 * context, provide the next best thing, the user IP.
3393 */ 3559 */
3394 if ((counter->attr.exclude_kernel || !data.regs) && 3560}
3395 !counter->attr.exclude_user)
3396 data.regs = task_pt_regs(current);
3397 3561
3398 if (data.regs) { 3562static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3399 if (perf_counter_overflow(counter, 0, &data)) 3563 int nmi, struct perf_sample_data *data)
3400 ret = HRTIMER_NORESTART; 3564{
3401 } 3565 struct hw_perf_counter *hwc = &counter->hw;
3402 3566
3403 period = max_t(u64, 10000, counter->hw.sample_period); 3567 atomic64_add(nr, &counter->count);
3404 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
3405 3568
3406 return ret; 3569 if (!hwc->sample_period)
3407} 3570 return;
3408 3571
3409static void perf_swcounter_overflow(struct perf_counter *counter, 3572 if (!data->regs)
3410 int nmi, struct perf_sample_data *data) 3573 return;
3411{
3412 data->period = counter->hw.last_period;
3413 3574
3414 perf_swcounter_update(counter); 3575 if (!atomic64_add_negative(nr, &hwc->period_left))
3415 perf_swcounter_set_period(counter); 3576 perf_swcounter_overflow(counter, nmi, data);
3416 if (perf_counter_overflow(counter, nmi, data))
3417 /* soft-disable the counter */
3418 ;
3419} 3577}
3420 3578
3421static int perf_swcounter_is_counting(struct perf_counter *counter) 3579static int perf_swcounter_is_counting(struct perf_counter *counter)
3422{ 3580{
3423 struct perf_counter_context *ctx; 3581 /*
3424 unsigned long flags; 3582 * The counter is active, we're good!
3425 int count; 3583 */
3426
3427 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 3584 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
3428 return 1; 3585 return 1;
3429 3586
3587 /*
3588 * The counter is off/error, not counting.
3589 */
3430 if (counter->state != PERF_COUNTER_STATE_INACTIVE) 3590 if (counter->state != PERF_COUNTER_STATE_INACTIVE)
3431 return 0; 3591 return 0;
3432 3592
3433 /* 3593 /*
3434 * If the counter is inactive, it could be just because 3594 * The counter is inactive, if the context is active
3435 * its task is scheduled out, or because it's in a group 3595 * we're part of a group that didn't make it on the 'pmu',
3436 * which could not go on the PMU. We want to count in 3596 * not counting.
3437 * the first case but not the second. If the context is
3438 * currently active then an inactive software counter must
3439 * be the second case. If it's not currently active then
3440 * we need to know whether the counter was active when the
3441 * context was last active, which we can determine by
3442 * comparing counter->tstamp_stopped with ctx->time.
3443 *
3444 * We are within an RCU read-side critical section,
3445 * which protects the existence of *ctx.
3446 */ 3597 */
3447 ctx = counter->ctx; 3598 if (counter->ctx->is_active)
3448 spin_lock_irqsave(&ctx->lock, flags); 3599 return 0;
3449 count = 1; 3600
3450 /* Re-check state now we have the lock */ 3601 /*
3451 if (counter->state < PERF_COUNTER_STATE_INACTIVE || 3602 * We're inactive and the context is too, this means the
3452 counter->ctx->is_active || 3603 * task is scheduled out, we're counting events that happen
3453 counter->tstamp_stopped < ctx->time) 3604 * to us, like migration events.
3454 count = 0; 3605 */
3455 spin_unlock_irqrestore(&ctx->lock, flags); 3606 return 1;
3456 return count;
3457} 3607}
3458 3608
3459static int perf_swcounter_match(struct perf_counter *counter, 3609static int perf_swcounter_match(struct perf_counter *counter,
@@ -3479,15 +3629,6 @@ static int perf_swcounter_match(struct perf_counter *counter,
3479 return 1; 3629 return 1;
3480} 3630}
3481 3631
3482static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3483 int nmi, struct perf_sample_data *data)
3484{
3485 int neg = atomic64_add_negative(nr, &counter->hw.count);
3486
3487 if (counter->hw.sample_period && !neg && data->regs)
3488 perf_swcounter_overflow(counter, nmi, data);
3489}
3490
3491static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, 3632static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
3492 enum perf_type_id type, 3633 enum perf_type_id type,
3493 u32 event, u64 nr, int nmi, 3634 u32 event, u64 nr, int nmi,
@@ -3566,27 +3707,66 @@ void __perf_swcounter_event(u32 event, u64 nr, int nmi,
3566 3707
3567static void perf_swcounter_read(struct perf_counter *counter) 3708static void perf_swcounter_read(struct perf_counter *counter)
3568{ 3709{
3569 perf_swcounter_update(counter);
3570} 3710}
3571 3711
3572static int perf_swcounter_enable(struct perf_counter *counter) 3712static int perf_swcounter_enable(struct perf_counter *counter)
3573{ 3713{
3574 perf_swcounter_set_period(counter); 3714 struct hw_perf_counter *hwc = &counter->hw;
3715
3716 if (hwc->sample_period) {
3717 hwc->last_period = hwc->sample_period;
3718 perf_swcounter_set_period(counter);
3719 }
3575 return 0; 3720 return 0;
3576} 3721}
3577 3722
3578static void perf_swcounter_disable(struct perf_counter *counter) 3723static void perf_swcounter_disable(struct perf_counter *counter)
3579{ 3724{
3580 perf_swcounter_update(counter);
3581} 3725}
3582 3726
3583static const struct pmu perf_ops_generic = { 3727static const struct pmu perf_ops_generic = {
3584 .enable = perf_swcounter_enable, 3728 .enable = perf_swcounter_enable,
3585 .disable = perf_swcounter_disable, 3729 .disable = perf_swcounter_disable,
3586 .read = perf_swcounter_read, 3730 .read = perf_swcounter_read,
3731 .unthrottle = perf_swcounter_unthrottle,
3587}; 3732};
3588 3733
3589/* 3734/*
3735 * hrtimer based swcounter callback
3736 */
3737
3738static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
3739{
3740 enum hrtimer_restart ret = HRTIMER_RESTART;
3741 struct perf_sample_data data;
3742 struct perf_counter *counter;
3743 u64 period;
3744
3745 counter = container_of(hrtimer, struct perf_counter, hw.hrtimer);
3746 counter->pmu->read(counter);
3747
3748 data.addr = 0;
3749 data.regs = get_irq_regs();
3750 /*
3751 * In case we exclude kernel IPs or are somehow not in interrupt
3752 * context, provide the next best thing, the user IP.
3753 */
3754 if ((counter->attr.exclude_kernel || !data.regs) &&
3755 !counter->attr.exclude_user)
3756 data.regs = task_pt_regs(current);
3757
3758 if (data.regs) {
3759 if (perf_counter_overflow(counter, 0, &data))
3760 ret = HRTIMER_NORESTART;
3761 }
3762
3763 period = max_t(u64, 10000, counter->hw.sample_period);
3764 hrtimer_forward_now(hrtimer, ns_to_ktime(period));
3765
3766 return ret;
3767}
3768
3769/*
3590 * Software counter: cpu wall time clock 3770 * Software counter: cpu wall time clock
3591 */ 3771 */
3592 3772
@@ -3703,17 +3883,24 @@ static const struct pmu perf_ops_task_clock = {
3703}; 3883};
3704 3884
3705#ifdef CONFIG_EVENT_PROFILE 3885#ifdef CONFIG_EVENT_PROFILE
3706void perf_tpcounter_event(int event_id) 3886void perf_tpcounter_event(int event_id, u64 addr, u64 count, void *record,
3887 int entry_size)
3707{ 3888{
3889 struct perf_raw_record raw = {
3890 .size = entry_size,
3891 .data = record,
3892 };
3893
3708 struct perf_sample_data data = { 3894 struct perf_sample_data data = {
3709 .regs = get_irq_regs(), 3895 .regs = get_irq_regs(),
3710 .addr = 0, 3896 .addr = addr,
3897 .raw = &raw,
3711 }; 3898 };
3712 3899
3713 if (!data.regs) 3900 if (!data.regs)
3714 data.regs = task_pt_regs(current); 3901 data.regs = task_pt_regs(current);
3715 3902
3716 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data); 3903 do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
3717} 3904}
3718EXPORT_SYMBOL_GPL(perf_tpcounter_event); 3905EXPORT_SYMBOL_GPL(perf_tpcounter_event);
3719 3906
@@ -3727,6 +3914,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
3727 3914
3728static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) 3915static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3729{ 3916{
3917 /*
3918 * Raw tracepoint data is a severe data leak, only allow root to
3919 * have these.
3920 */
3921 if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
3922 !capable(CAP_SYS_ADMIN))
3923 return ERR_PTR(-EPERM);
3924
3730 if (ftrace_profile_enable(counter->attr.config)) 3925 if (ftrace_profile_enable(counter->attr.config))
3731 return NULL; 3926 return NULL;
3732 3927
@@ -3860,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3860 atomic64_set(&hwc->period_left, hwc->sample_period); 4055 atomic64_set(&hwc->period_left, hwc->sample_period);
3861 4056
3862 /* 4057 /*
3863 * we currently do not support PERF_SAMPLE_GROUP on inherited counters 4058 * we currently do not support PERF_FORMAT_GROUP on inherited counters
3864 */ 4059 */
3865 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) 4060 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
3866 goto done; 4061 goto done;
3867 4062
3868 switch (attr->type) { 4063 switch (attr->type) {
@@ -4269,7 +4464,7 @@ void perf_counter_exit_task(struct task_struct *child)
4269 unsigned long flags; 4464 unsigned long flags;
4270 4465
4271 if (likely(!child->perf_counter_ctxp)) { 4466 if (likely(!child->perf_counter_ctxp)) {
4272 perf_counter_task(child, 0); 4467 perf_counter_task(child, NULL, 0);
4273 return; 4468 return;
4274 } 4469 }
4275 4470
@@ -4289,6 +4484,7 @@ void perf_counter_exit_task(struct task_struct *child)
4289 * incremented the context's refcount before we do put_ctx below. 4484 * incremented the context's refcount before we do put_ctx below.
4290 */ 4485 */
4291 spin_lock(&child_ctx->lock); 4486 spin_lock(&child_ctx->lock);
4487 child->perf_counter_ctxp = NULL;
4292 /* 4488 /*
4293 * If this context is a clone; unclone it so it can't get 4489 * If this context is a clone; unclone it so it can't get
4294 * swapped to another process while we're removing all 4490 * swapped to another process while we're removing all
@@ -4302,9 +4498,7 @@ void perf_counter_exit_task(struct task_struct *child)
4302 * won't get any samples after PERF_EVENT_EXIT. We can however still 4498 * won't get any samples after PERF_EVENT_EXIT. We can however still
4303 * get a few PERF_EVENT_READ events. 4499 * get a few PERF_EVENT_READ events.
4304 */ 4500 */
4305 perf_counter_task(child, 0); 4501 perf_counter_task(child, child_ctx, 0);
4306
4307 child->perf_counter_ctxp = NULL;
4308 4502
4309 /* 4503 /*
4310 * We can recurse on the same lock type through: 4504 * We can recurse on the same lock type through:
@@ -4525,6 +4719,11 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
4525 perf_counter_init_cpu(cpu); 4719 perf_counter_init_cpu(cpu);
4526 break; 4720 break;
4527 4721
4722 case CPU_ONLINE:
4723 case CPU_ONLINE_FROZEN:
4724 hw_perf_counter_setup_online(cpu);
4725 break;
4726
4528 case CPU_DOWN_PREPARE: 4727 case CPU_DOWN_PREPARE:
4529 case CPU_DOWN_PREPARE_FROZEN: 4728 case CPU_DOWN_PREPARE_FROZEN:
4530 perf_counter_exit_cpu(cpu); 4729 perf_counter_exit_cpu(cpu);
@@ -4549,6 +4748,8 @@ void __init perf_counter_init(void)
4549{ 4748{
4550 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, 4749 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
4551 (void *)(long)smp_processor_id()); 4750 (void *)(long)smp_processor_id());
4751 perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE,
4752 (void *)(long)smp_processor_id());
4552 register_cpu_notifier(&perf_cpu_nb); 4753 register_cpu_notifier(&perf_cpu_nb);
4553} 4754}
4554 4755
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0b67b2..e33a21cb9407 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
521} 521}
522void posix_cpu_timers_exit_group(struct task_struct *tsk) 522void posix_cpu_timers_exit_group(struct task_struct *tsk)
523{ 523{
524 struct task_cputime cputime; 524 struct signal_struct *const sig = tsk->signal;
525 525
526 thread_group_cputimer(tsk, &cputime);
527 cleanup_timers(tsk->signal->cpu_timers, 526 cleanup_timers(tsk->signal->cpu_timers,
528 cputime.utime, cputime.stime, cputime.sum_exec_runtime); 527 cputime_add(tsk->utime, sig->utime),
528 cputime_add(tsk->stime, sig->stime),
529 tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
529} 530}
530 531
531static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) 532static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index fcd107a78c5a..29bd4baf9e75 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { 1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */ 1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock); 1041 debug_rt_mutex_lock(lock);
1042
1043 rt_mutex_set_owner(lock, task, 0); 1042 rt_mutex_set_owner(lock, task, 0);
1044 1043 spin_unlock(&lock->wait_lock);
1045 rt_mutex_deadlock_account_lock(lock, task); 1044 rt_mutex_deadlock_account_lock(lock, task);
1046 return 1; 1045 return 1;
1047 } 1046 }
1048 1047
1049 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1050 1049
1051
1052 if (ret && !waiter->task) { 1050 if (ret && !waiter->task) {
1053 /* 1051 /*
1054 * Reset the return value. We might have 1052 * Reset the return value. We might have
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1090b0aed9ba..7a34cb563fec 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -267,8 +267,8 @@ static void blk_trace_free(struct blk_trace *bt)
267{ 267{
268 debugfs_remove(bt->msg_file); 268 debugfs_remove(bt->msg_file);
269 debugfs_remove(bt->dropped_file); 269 debugfs_remove(bt->dropped_file);
270 debugfs_remove(bt->dir);
271 relay_close(bt->rchan); 270 relay_close(bt->rchan);
271 debugfs_remove(bt->dir);
272 free_percpu(bt->sequence); 272 free_percpu(bt->sequence);
273 free_percpu(bt->msg_data); 273 free_percpu(bt->msg_data);
274 kfree(bt); 274 kfree(bt);
@@ -378,18 +378,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
378 378
379static int blk_remove_buf_file_callback(struct dentry *dentry) 379static int blk_remove_buf_file_callback(struct dentry *dentry)
380{ 380{
381 struct dentry *parent = dentry->d_parent;
382 debugfs_remove(dentry); 381 debugfs_remove(dentry);
383 382
384 /*
385 * this will fail for all but the last file, but that is ok. what we
386 * care about is the top level buts->name directory going away, when
387 * the last trace file is gone. Then we don't have to rmdir() that
388 * manually on trace stop, so it nicely solves the issue with
389 * force killing of running traces.
390 */
391
392 debugfs_remove(parent);
393 return 0; 383 return 0;
394} 384}
395 385
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index bf27bb7a63e2..a330513d96ce 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -735,6 +735,7 @@ ring_buffer_free(struct ring_buffer *buffer)
735 735
736 put_online_cpus(); 736 put_online_cpus();
737 737
738 kfree(buffer->buffers);
738 free_cpumask_var(buffer->cpumask); 739 free_cpumask_var(buffer->cpumask);
739 740
740 kfree(buffer); 741 kfree(buffer);
@@ -1785,7 +1786,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
1785 */ 1786 */
1786 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing)); 1787 RB_WARN_ON(buffer, !local_read(&cpu_buffer->committing));
1787 1788
1788 if (!rb_try_to_discard(cpu_buffer, event)) 1789 if (rb_try_to_discard(cpu_buffer, event))
1789 goto out; 1790 goto out;
1790 1791
1791 /* 1792 /*
@@ -2383,7 +2384,6 @@ rb_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2383 * the box. Return the padding, and we will release 2384 * the box. Return the padding, and we will release
2384 * the current locks, and try again. 2385 * the current locks, and try again.
2385 */ 2386 */
2386 rb_advance_reader(cpu_buffer);
2387 return event; 2387 return event;
2388 2388
2389 case RINGBUF_TYPE_TIME_EXTEND: 2389 case RINGBUF_TYPE_TIME_EXTEND:
@@ -2486,7 +2486,7 @@ static inline int rb_ok_to_lock(void)
2486 * buffer too. A one time deal is all you get from reading 2486 * buffer too. A one time deal is all you get from reading
2487 * the ring buffer from an NMI. 2487 * the ring buffer from an NMI.
2488 */ 2488 */
2489 if (likely(!in_nmi() && !oops_in_progress)) 2489 if (likely(!in_nmi()))
2490 return 1; 2490 return 1;
2491 2491
2492 tracing_off_permanent(); 2492 tracing_off_permanent();
@@ -2519,6 +2519,8 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
2519 if (dolock) 2519 if (dolock)
2520 spin_lock(&cpu_buffer->reader_lock); 2520 spin_lock(&cpu_buffer->reader_lock);
2521 event = rb_buffer_peek(buffer, cpu, ts); 2521 event = rb_buffer_peek(buffer, cpu, ts);
2522 if (event && event->type_len == RINGBUF_TYPE_PADDING)
2523 rb_advance_reader(cpu_buffer);
2522 if (dolock) 2524 if (dolock)
2523 spin_unlock(&cpu_buffer->reader_lock); 2525 spin_unlock(&cpu_buffer->reader_lock);
2524 local_irq_restore(flags); 2526 local_irq_restore(flags);
@@ -2590,12 +2592,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
2590 spin_lock(&cpu_buffer->reader_lock); 2592 spin_lock(&cpu_buffer->reader_lock);
2591 2593
2592 event = rb_buffer_peek(buffer, cpu, ts); 2594 event = rb_buffer_peek(buffer, cpu, ts);
2593 if (!event) 2595 if (event)
2594 goto out_unlock; 2596 rb_advance_reader(cpu_buffer);
2595
2596 rb_advance_reader(cpu_buffer);
2597 2597
2598 out_unlock:
2599 if (dolock) 2598 if (dolock)
2600 spin_unlock(&cpu_buffer->reader_lock); 2599 spin_unlock(&cpu_buffer->reader_lock);
2601 local_irq_restore(flags); 2600 local_irq_restore(flags);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8930e39b9d8c..c22b40f8f576 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -848,6 +848,7 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) | 848 ((pc & SOFTIRQ_MASK) ? TRACE_FLAG_SOFTIRQ : 0) |
849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0); 849 (need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
850} 850}
851EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
851 852
852struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr, 853struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
853 int type, 854 int type,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3548ae5cc780..8b9f4f6e9559 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -438,10 +438,6 @@ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
438struct trace_entry *trace_find_next_entry(struct trace_iterator *iter, 438struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
439 int *ent_cpu, u64 *ent_ts); 439 int *ent_cpu, u64 *ent_ts);
440 440
441void tracing_generic_entry_update(struct trace_entry *entry,
442 unsigned long flags,
443 int pc);
444
445void default_wait_pipe(struct trace_iterator *iter); 441void default_wait_pipe(struct trace_iterator *iter);
446void poll_wait_pipe(struct trace_iterator *iter); 442void poll_wait_pipe(struct trace_iterator *iter);
447 443
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 936c621bbf46..f32dc9d1ea7b 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -624,9 +624,6 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
624 return -ENOSPC; 624 return -ENOSPC;
625 } 625 }
626 626
627 filter->preds[filter->n_preds] = pred;
628 filter->n_preds++;
629
630 list_for_each_entry(call, &ftrace_events, list) { 627 list_for_each_entry(call, &ftrace_events, list) {
631 628
632 if (!call->define_fields) 629 if (!call->define_fields)
@@ -643,6 +640,9 @@ static int filter_add_subsystem_pred(struct filter_parse_state *ps,
643 } 640 }
644 replace_filter_string(call->filter, filter_string); 641 replace_filter_string(call->filter, filter_string);
645 } 642 }
643
644 filter->preds[filter->n_preds] = pred;
645 filter->n_preds++;
646out: 646out:
647 return err; 647 return err;
648} 648}
@@ -1029,12 +1029,17 @@ static int replace_preds(struct event_subsystem *system,
1029 1029
1030 if (elt->op == OP_AND || elt->op == OP_OR) { 1030 if (elt->op == OP_AND || elt->op == OP_OR) {
1031 pred = create_logical_pred(elt->op); 1031 pred = create_logical_pred(elt->op);
1032 if (!pred)
1033 return -ENOMEM;
1032 if (call) { 1034 if (call) {
1033 err = filter_add_pred(ps, call, pred); 1035 err = filter_add_pred(ps, call, pred);
1034 filter_free_pred(pred); 1036 filter_free_pred(pred);
1035 } else 1037 } else {
1036 err = filter_add_subsystem_pred(ps, system, 1038 err = filter_add_subsystem_pred(ps, system,
1037 pred, filter_string); 1039 pred, filter_string);
1040 if (err)
1041 filter_free_pred(pred);
1042 }
1038 if (err) 1043 if (err)
1039 return err; 1044 return err;
1040 1045
@@ -1048,12 +1053,17 @@ static int replace_preds(struct event_subsystem *system,
1048 } 1053 }
1049 1054
1050 pred = create_pred(elt->op, operand1, operand2); 1055 pred = create_pred(elt->op, operand1, operand2);
1056 if (!pred)
1057 return -ENOMEM;
1051 if (call) { 1058 if (call) {
1052 err = filter_add_pred(ps, call, pred); 1059 err = filter_add_pred(ps, call, pred);
1053 filter_free_pred(pred); 1060 filter_free_pred(pred);
1054 } else 1061 } else {
1055 err = filter_add_subsystem_pred(ps, system, pred, 1062 err = filter_add_subsystem_pred(ps, system, pred,
1056 filter_string); 1063 filter_string);
1064 if (err)
1065 filter_free_pred(pred);
1066 }
1057 if (err) 1067 if (err)
1058 return err; 1068 return err;
1059 1069
diff --git a/kernel/wait.c b/kernel/wait.c
index ea7c3b4275cf..c4bd3d825f35 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13void init_waitqueue_head(wait_queue_head_t *q) 13void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
14{ 14{
15 spin_lock_init(&q->lock); 15 spin_lock_init(&q->lock);
16 lockdep_set_class(&q->lock, key);
16 INIT_LIST_HEAD(&q->task_list); 17 INIT_LIST_HEAD(&q->task_list);
17} 18}
18 19
19EXPORT_SYMBOL(init_waitqueue_head); 20EXPORT_SYMBOL(__init_waitqueue_head);
20 21
21void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 22void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
22{ 23{
diff --git a/mm/mempool.c b/mm/mempool.c
index a46eb1b4bb66..32e75d400503 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab);
303 */ 303 */
304void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 304void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
305{ 305{
306 size_t size = (size_t)(long)pool_data; 306 size_t size = (size_t)pool_data;
307 return kmalloc(size, gfp_mask); 307 return kmalloc(size, gfp_mask);
308} 308}
309EXPORT_SYMBOL(mempool_kmalloc); 309EXPORT_SYMBOL(mempool_kmalloc);
310 310
311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) 311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
312{ 312{
313 size_t size = (size_t) pool_data; 313 size_t size = (size_t)pool_data;
314 return kzalloc(size, gfp_mask); 314 return kzalloc(size, gfp_mask);
315} 315}
316EXPORT_SYMBOL(mempool_kzalloc); 316EXPORT_SYMBOL(mempool_kzalloc);
diff --git a/net/socket.c b/net/socket.c
index 791d71a36a93..6d4716559047 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
736 if (more) 736 if (more)
737 flags |= MSG_MORE; 737 flags |= MSG_MORE;
738 738
739 return sock->ops->sendpage(sock, page, offset, size, flags); 739 return kernel_sendpage(sock, page, offset, size, flags);
740} 740}
741 741
742static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 742static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 406e26de584e..8bd690c48b69 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -24,7 +24,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
24 unsigned int quadlen = XDR_QUADLEN(obj->len); 24 unsigned int quadlen = XDR_QUADLEN(obj->len);
25 25
26 p[quadlen] = 0; /* zero trailing bytes */ 26 p[quadlen] = 0; /* zero trailing bytes */
27 *p++ = htonl(obj->len); 27 *p++ = cpu_to_be32(obj->len);
28 memcpy(p, obj->data, obj->len); 28 memcpy(p, obj->data, obj->len);
29 return p + XDR_QUADLEN(obj->len); 29 return p + XDR_QUADLEN(obj->len);
30} 30}
@@ -35,7 +35,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
35{ 35{
36 unsigned int len; 36 unsigned int len;
37 37
38 if ((len = ntohl(*p++)) > XDR_MAX_NETOBJ) 38 if ((len = be32_to_cpu(*p++)) > XDR_MAX_NETOBJ)
39 return NULL; 39 return NULL;
40 obj->len = len; 40 obj->len = len;
41 obj->data = (u8 *) p; 41 obj->data = (u8 *) p;
@@ -83,7 +83,7 @@ EXPORT_SYMBOL_GPL(xdr_encode_opaque_fixed);
83 */ 83 */
84__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes) 84__be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
85{ 85{
86 *p++ = htonl(nbytes); 86 *p++ = cpu_to_be32(nbytes);
87 return xdr_encode_opaque_fixed(p, ptr, nbytes); 87 return xdr_encode_opaque_fixed(p, ptr, nbytes);
88} 88}
89EXPORT_SYMBOL_GPL(xdr_encode_opaque); 89EXPORT_SYMBOL_GPL(xdr_encode_opaque);
@@ -101,7 +101,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp,
101{ 101{
102 u32 len; 102 u32 len;
103 103
104 len = ntohl(*p++); 104 len = be32_to_cpu(*p++);
105 if (len > maxlen) 105 if (len > maxlen)
106 return NULL; 106 return NULL;
107 *lenp = len; 107 *lenp = len;
@@ -771,7 +771,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
771 status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); 771 status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
772 if (status) 772 if (status)
773 return status; 773 return status;
774 *obj = ntohl(raw); 774 *obj = be32_to_cpu(raw);
775 return 0; 775 return 0;
776} 776}
777EXPORT_SYMBOL_GPL(xdr_decode_word); 777EXPORT_SYMBOL_GPL(xdr_decode_word);
@@ -779,7 +779,7 @@ EXPORT_SYMBOL_GPL(xdr_decode_word);
779int 779int
780xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj) 780xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
781{ 781{
782 __be32 raw = htonl(obj); 782 __be32 raw = cpu_to_be32(obj);
783 783
784 return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); 784 return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
785} 785}
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index d29baa2e063a..911ba7ffab84 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -393,7 +393,7 @@ while (<IN>) {
393 $read_function = 0; 393 $read_function = 0;
394 } 394 }
395 # print out any recorded offsets 395 # print out any recorded offsets
396 update_funcs() if ($text_found); 396 update_funcs() if (defined($ref_func));
397 397
398 # reset all markers and arrays 398 # reset all markers and arrays
399 $text_found = 0; 399 $text_found = 0;
@@ -414,7 +414,10 @@ while (<IN>) {
414 $offset = hex $1; 414 $offset = hex $1;
415 } else { 415 } else {
416 # if we already have a function, and this is weak, skip it 416 # if we already have a function, and this is weak, skip it
417 if (!defined($ref_func) && !defined($weak{$text})) { 417 if (!defined($ref_func) && !defined($weak{$text}) &&
418 # PPC64 can have symbols that start with .L and
419 # gcc considers these special. Don't use them!
420 $text !~ /^\.L/) {
418 $ref_func = $text; 421 $ref_func = $text;
419 $offset = hex $1; 422 $offset = hex $1;
420 } 423 }
@@ -441,7 +444,7 @@ while (<IN>) {
441} 444}
442 445
443# dump out anymore offsets that may have been found 446# dump out anymore offsets that may have been found
444update_funcs() if ($text_found); 447update_funcs() if (defined($ref_func));
445 448
446# If we did not find any mcount callers, we are done (do nothing). 449# If we did not find any mcount callers, we are done (do nothing).
447if (!$opened) { 450if (!$opened) {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 15c2a08a66f1..1e8cfc4c2ed6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1285,6 +1285,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1285 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 1285 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
1286 context, len); 1286 context, len);
1287 if (rc == -ERANGE) { 1287 if (rc == -ERANGE) {
1288 kfree(context);
1289
1288 /* Need a larger buffer. Query for the right size. */ 1290 /* Need a larger buffer. Query for the right size. */
1289 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 1291 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
1290 NULL, 0); 1292 NULL, 0);
@@ -1292,7 +1294,6 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1292 dput(dentry); 1294 dput(dentry);
1293 goto out_unlock; 1295 goto out_unlock;
1294 } 1296 }
1295 kfree(context);
1296 len = rc; 1297 len = rc;
1297 context = kmalloc(len+1, GFP_NOFS); 1298 context = kmalloc(len+1, GFP_NOFS);
1298 if (!context) { 1299 if (!context) {
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 51c44fdbc0f0..fea976793ae5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -13563,6 +13563,8 @@ static int patch_alc269(struct hda_codec *codec)
13563 set_capture_mixer(spec); 13563 set_capture_mixer(spec);
13564 set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT); 13564 set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT);
13565 13565
13566 spec->vmaster_nid = 0x02;
13567
13566 codec->patch_ops = alc_patch_ops; 13568 codec->patch_ops = alc_patch_ops;
13567 if (board_config == ALC269_AUTO) 13569 if (board_config == ALC269_AUTO)
13568 spec->init_hook = alc269_auto_init; 13570 spec->init_hook = alc269_auto_init;
@@ -15577,9 +15579,12 @@ static int patch_alc861vd(struct hda_codec *codec)
15577 spec->stream_digital_playback = &alc861vd_pcm_digital_playback; 15579 spec->stream_digital_playback = &alc861vd_pcm_digital_playback;
15578 spec->stream_digital_capture = &alc861vd_pcm_digital_capture; 15580 spec->stream_digital_capture = &alc861vd_pcm_digital_capture;
15579 15581
15580 spec->adc_nids = alc861vd_adc_nids; 15582 if (!spec->adc_nids) {
15581 spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids); 15583 spec->adc_nids = alc861vd_adc_nids;
15582 spec->capsrc_nids = alc861vd_capsrc_nids; 15584 spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids);
15585 }
15586 if (!spec->capsrc_nids)
15587 spec->capsrc_nids = alc861vd_capsrc_nids;
15583 15588
15584 set_capture_mixer(spec); 15589 set_capture_mixer(spec);
15585 set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT); 15590 set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT);
@@ -17496,9 +17501,12 @@ static int patch_alc662(struct hda_codec *codec)
17496 spec->stream_digital_playback = &alc662_pcm_digital_playback; 17501 spec->stream_digital_playback = &alc662_pcm_digital_playback;
17497 spec->stream_digital_capture = &alc662_pcm_digital_capture; 17502 spec->stream_digital_capture = &alc662_pcm_digital_capture;
17498 17503
17499 spec->adc_nids = alc662_adc_nids; 17504 if (!spec->adc_nids) {
17500 spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids); 17505 spec->adc_nids = alc662_adc_nids;
17501 spec->capsrc_nids = alc662_capsrc_nids; 17506 spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids);
17507 }
17508 if (!spec->capsrc_nids)
17509 spec->capsrc_nids = alc662_capsrc_nids;
17502 17510
17503 if (!spec->cap_mixer) 17511 if (!spec->cap_mixer)
17504 set_capture_mixer(spec); 17512 set_capture_mixer(spec);
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
index 85b0e7569504..3326e2a1e863 100644
--- a/sound/soc/fsl/efika-audio-fabric.c
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -30,6 +30,8 @@
30#include "mpc5200_psc_ac97.h" 30#include "mpc5200_psc_ac97.h"
31#include "../codecs/stac9766.h" 31#include "../codecs/stac9766.h"
32 32
33#define DRV_NAME "efika-audio-fabric"
34
33static struct snd_soc_device device; 35static struct snd_soc_device device;
34static struct snd_soc_card card; 36static struct snd_soc_card card;
35 37
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index 8766f7a3893d..b928ef7d28eb 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -30,6 +30,8 @@
30#include "mpc5200_psc_ac97.h" 30#include "mpc5200_psc_ac97.h"
31#include "../codecs/wm9712.h" 31#include "../codecs/wm9712.h"
32 32
33#define DRV_NAME "pcm030-audio-fabric"
34
33static struct snd_soc_device device; 35static struct snd_soc_device device;
34static struct snd_soc_card card; 36static struct snd_soc_card card;
35 37
diff --git a/tools/perf/Documentation/perf-examples.txt b/tools/perf/Documentation/perf-examples.txt
new file mode 100644
index 000000000000..8eb6c489fb15
--- /dev/null
+++ b/tools/perf/Documentation/perf-examples.txt
@@ -0,0 +1,225 @@
1
2 ------------------------------
3 ****** perf by examples ******
4 ------------------------------
5
6[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
7
8
9First, discovery/enumeration of available counters can be done via
10'perf list':
11
12titan:~> perf list
13 [...]
14 kmem:kmalloc [Tracepoint event]
15 kmem:kmem_cache_alloc [Tracepoint event]
16 kmem:kmalloc_node [Tracepoint event]
17 kmem:kmem_cache_alloc_node [Tracepoint event]
18 kmem:kfree [Tracepoint event]
19 kmem:kmem_cache_free [Tracepoint event]
20 kmem:mm_page_free_direct [Tracepoint event]
21 kmem:mm_pagevec_free [Tracepoint event]
22 kmem:mm_page_alloc [Tracepoint event]
23 kmem:mm_page_alloc_zone_locked [Tracepoint event]
24 kmem:mm_page_pcpu_drain [Tracepoint event]
25 kmem:mm_page_alloc_extfrag [Tracepoint event]
26
27Then any (or all) of the above event sources can be activated and
28measured. For example the page alloc/free properties of a 'hackbench
29run' are:
30
31 titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
32 -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10
33 Time: 0.575
34
35 Performance counter stats for './hackbench 10':
36
37 13857 kmem:mm_page_pcpu_drain
38 27576 kmem:mm_page_alloc
39 6025 kmem:mm_pagevec_free
40 20934 kmem:mm_page_free_direct
41
42 0.613972165 seconds time elapsed
43
44You can observe the statistical properties as well, by using the
45'repeat the workload N times' feature of perf stat:
46
47 titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
48 kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
49 kmem:mm_page_free_direct ./hackbench 10
50 Time: 0.627
51 Time: 0.644
52 Time: 0.564
53 Time: 0.559
54 Time: 0.626
55
56 Performance counter stats for './hackbench 10' (5 runs):
57
58 12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
59 25035 kmem:mm_page_alloc ( +- 3.783% )
60 6104 kmem:mm_pagevec_free ( +- 0.934% )
61 18376 kmem:mm_page_free_direct ( +- 4.941% )
62
63 0.643954516 seconds time elapsed ( +- 2.363% )
64
65Furthermore, these tracepoints can be used to sample the workload as
66well. For example the page allocations done by a 'git gc' can be
67captured the following way:
68
69 titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
70 Counting objects: 1148, done.
71 Delta compression using up to 2 threads.
72 Compressing objects: 100% (450/450), done.
73 Writing objects: 100% (1148/1148), done.
74 Total 1148 (delta 690), reused 1148 (delta 690)
75 [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
76
77To check which functions generated page allocations:
78
79 titan:~/git> perf report
80 # Samples: 10646
81 #
82 # Overhead Command Shared Object
83 # ........ ............... ..........................
84 #
85 23.57% git-repack /lib64/libc-2.5.so
86 21.81% git /lib64/libc-2.5.so
87 14.59% git ./git
88 11.79% git-repack ./git
89 7.12% git /lib64/ld-2.5.so
90 3.16% git-repack /lib64/libpthread-2.5.so
91 2.09% git-repack /bin/bash
92 1.97% rm /lib64/libc-2.5.so
93 1.39% mv /lib64/ld-2.5.so
94 1.37% mv /lib64/libc-2.5.so
95 1.12% git-repack /lib64/ld-2.5.so
96 0.95% rm /lib64/ld-2.5.so
97 0.90% git-update-serv /lib64/libc-2.5.so
98 0.73% git-update-serv /lib64/ld-2.5.so
99 0.68% perf /lib64/libpthread-2.5.so
100 0.64% git-repack /usr/lib64/libz.so.1.2.3
101
102Or to see it on a more finegrained level:
103
104titan:~/git> perf report --sort comm,dso,symbol
105# Samples: 10646
106#
107# Overhead Command Shared Object Symbol
108# ........ ............... .......................... ......
109#
110 9.35% git-repack ./git [.] insert_obj_hash
111 9.12% git ./git [.] insert_obj_hash
112 7.31% git /lib64/libc-2.5.so [.] memcpy
113 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
114 6.24% git-repack /lib64/libc-2.5.so [.] memcpy
115 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
116 5.47% git /lib64/libc-2.5.so [.] _int_malloc
117 2.99% git /lib64/libc-2.5.so [.] memset
118
119Furthermore, call-graph sampling can be done too, of page
120allocations - to see precisely what kind of page allocations there
121are:
122
123 titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
124 Counting objects: 1148, done.
125 Delta compression using up to 2 threads.
126 Compressing objects: 100% (450/450), done.
127 Writing objects: 100% (1148/1148), done.
128 Total 1148 (delta 690), reused 1148 (delta 690)
129 [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
130
131 titan:~/git> perf report -g
132 # Samples: 10686
133 #
134 # Overhead Command Shared Object
135 # ........ ............... ..........................
136 #
137 23.25% git-repack /lib64/libc-2.5.so
138 |
139 |--50.00%-- _int_free
140 |
141 |--37.50%-- __GI___fork
142 | make_child
143 |
144 |--12.50%-- ptmalloc_unlock_all2
145 | make_child
146 |
147 --6.25%-- __GI_strcpy
148 21.61% git /lib64/libc-2.5.so
149 |
150 |--30.00%-- __GI_read
151 | |
152 | --83.33%-- git_config_from_file
153 | git_config
154 | |
155 [...]
156
157Or you can observe the whole system's page allocations for 10
158seconds:
159
160titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
161kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
162kmem:mm_page_free_direct sleep 10
163
164 Performance counter stats for 'sleep 10':
165
166 171585 kmem:mm_page_pcpu_drain
167 322114 kmem:mm_page_alloc
168 73623 kmem:mm_pagevec_free
169 254115 kmem:mm_page_free_direct
170
171 10.000591410 seconds time elapsed
172
173Or observe how fluctuating the page allocations are, via statistical
174analysis done over ten 1-second intervals:
175
176 titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
177 kmem:mm_page_alloc -e kmem:mm_pagevec_free -e
178 kmem:mm_page_free_direct sleep 1
179
180 Performance counter stats for 'sleep 1' (10 runs):
181
182 17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
183 34394 kmem:mm_page_alloc ( +- 4.617% )
184 7509 kmem:mm_pagevec_free ( +- 4.820% )
185 25653 kmem:mm_page_free_direct ( +- 3.672% )
186
187 1.058135029 seconds time elapsed ( +- 3.089% )
188
189Or you can annotate the recorded 'git gc' run on a per symbol basis
190and check which instructions/source-code generated page allocations:
191
192 titan:~/git> perf annotate __GI___fork
193 ------------------------------------------------
194 Percent | Source code & Disassembly of libc-2.5.so
195 ------------------------------------------------
196 :
197 :
198 : Disassembly of section .plt:
199 : Disassembly of section .text:
200 :
201 : 00000031a2e95560 <__fork>:
202 [...]
203 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
204 0.00 : 31a2e95607: 0f 05 syscall
205 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
206 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
207 0.00 : 31a2e95615: 85 c0 test %eax,%eax
208
209( this shows that 83.42% of __GI___fork's page allocations come from
210 the 0x38 system call it performs. )
211
212etc. etc. - a lot more is possible. I could list a dozen of
213other different usecases straight away - neither of which is
214possible via /proc/vmstat.
215
216/proc/vmstat is not in the same league really, in terms of
217expressive power of system analysis and performance
218analysis.
219
220All that the above results needed were those new tracepoints
221in include/tracing/events/kmem.h.
222
223 Ingo
224
225
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 0d74346d21ab..484080dd5b6f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -40,7 +40,7 @@ OPTIONS
40-a:: 40-a::
41 system-wide collection 41 system-wide collection
42 42
43-S:: 43-c::
44 scale counter values 44 scale counter values
45 45
46EXAMPLES 46EXAMPLES
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 539d01289725..4a7d558dc309 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -3,36 +3,122 @@ perf-top(1)
3 3
4NAME 4NAME
5---- 5----
6perf-top - Run a command and profile it 6perf-top - System profiling tool.
7 7
8SYNOPSIS 8SYNOPSIS
9-------- 9--------
10[verse] 10[verse]
11'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> 11'perf top' [-e <EVENT> | --event=EVENT] [<options>]
12 12
13DESCRIPTION 13DESCRIPTION
14----------- 14-----------
15This command runs a command and gathers a performance counter profile 15This command generates and displays a performance counter profile in realtime.
16from it.
17 16
18 17
19OPTIONS 18OPTIONS
20------- 19-------
21<command>...:: 20-a::
22 Any command you can specify in a shell. 21--all-cpus::
22 System-wide collection. (default)
23
24-c <count>::
25--count=<count>::
26 Event period to sample.
27
28-C <cpu>::
29--CPU=<cpu>::
30 CPU to profile.
31
32-d <seconds>::
33--delay=<seconds>::
34 Number of seconds to delay between refreshes.
23 35
24-e:: 36-e <event>::
25--event=:: 37--event=<event>::
26 Select the PMU event. Selection can be a symbolic event name 38 Select the PMU event. Selection can be a symbolic event name
27 (use 'perf list' to list all events) or a raw PMU 39 (use 'perf list' to list all events) or a raw PMU
28 event (eventsel+umask) in the form of rNNN where NNN is a 40 event (eventsel+umask) in the form of rNNN where NNN is a
29 hexadecimal event descriptor. 41 hexadecimal event descriptor.
30 42
31-a:: 43-E <entries>::
32 system-wide collection 44--entries=<entries>::
45 Display this many functions.
46
47-f <count>::
48--count-filter=<count>::
49 Only display functions with more events than this.
50
51-F <freq>::
52--freq=<freq>::
53 Profile at this frequency.
54
55-i::
56--inherit::
57 Child tasks inherit counters, only makes sens with -p option.
58
59-k <path>::
60--vmlinux=<path>::
61 Path to vmlinux. Required for annotation functionality.
62
63-m <pages>::
64--mmap-pages=<pages>::
65 Number of mmapped data pages.
66
67-p <pid>::
68--pid=<pid>::
69 Profile events on existing pid.
70
71-r <priority>::
72--realtime=<priority>::
73 Collect data with this RT SCHED_FIFO priority.
74
75-s <symbol>::
76--sym-annotate=<symbol>::
77 Annotate this symbol. Requires -k option.
78
79-v::
80--verbose::
81 Be more verbose (show counter open errors, etc).
82
83-z::
84--zero::
85 Zero history across display updates.
86
87INTERACTIVE PROMPTING KEYS
88--------------------------
89
90[d]::
91 Display refresh delay.
92
93[e]::
94 Number of entries to display.
95
96[E]::
97 Event to display when multiple counters are active.
98
99[f]::
100 Profile display filter (>= hit count).
101
102[F]::
103 Annotation display filter (>= % of total).
104
105[s]::
106 Annotate symbol.
107
108[S]::
109 Stop annotation, return to full profile display.
110
111[w]::
112 Toggle between weighted sum and individual count[E]r profile.
113
114[z]::
115 Toggle event count zeroing across display updates.
116
117[qQ]::
118 Quit.
119
120Pressing any unmapped key displays a menu, and prompts for input.
33 121
34-l::
35 scale counter values
36 122
37SEE ALSO 123SEE ALSO
38-------- 124--------
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1916e44b9bb0..c045b4271e57 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -382,18 +382,29 @@ endif
382ifdef NO_DEMANGLE 382ifdef NO_DEMANGLE
383 BASIC_CFLAGS += -DNO_DEMANGLE 383 BASIC_CFLAGS += -DNO_DEMANGLE
384else 384else
385
386 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") 385 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
387 386
388 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
389
390 ifeq ($(has_bfd),y) 387 ifeq ($(has_bfd),y)
391 EXTLIBS += -lbfd 388 EXTLIBS += -lbfd
392 else ifeq ($(has_bfd_iberty),y)
393 EXTLIBS += -lbfd -liberty
394 else 389 else
395 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) 390 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
396 BASIC_CFLAGS += -DNO_DEMANGLE 391 ifeq ($(has_bfd_iberty),y)
392 EXTLIBS += -lbfd -liberty
393 else
394 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
395 ifeq ($(has_bfd_iberty_z),y)
396 EXTLIBS += -lbfd -liberty -lz
397 else
398 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
399 ifeq ($(has_cplus_demangle),y)
400 EXTLIBS += -liberty
401 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
402 else
403 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
404 BASIC_CFLAGS += -DNO_DEMANGLE
405 endif
406 endif
407 endif
397 endif 408 endif
398endif 409endif
399 410
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8a35c9..d88c6961274c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
10 10
11#include "perf.h" 11#include "perf.h"
12 12
13#include "util/parse-options.h"
14#include "util/parse-events.h" 13#include "util/parse-events.h"
14#include "util/cache.h"
15 15
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
17{ 17{
18 setup_pager();
18 print_events(); 19 print_events();
19 return 0; 20 return 0;
20} 21}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6da09928130f..3d051b9cf25f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,7 +34,9 @@ static int output;
34static const char *output_name = "perf.data"; 34static const char *output_name = "perf.data";
35static int group = 0; 35static int group = 0;
36static unsigned int realtime_prio = 0; 36static unsigned int realtime_prio = 0;
37static int raw_samples = 0;
37static int system_wide = 0; 38static int system_wide = 0;
39static int profile_cpu = -1;
38static pid_t target_pid = -1; 40static pid_t target_pid = -1;
39static int inherit = 1; 41static int inherit = 1;
40static int force = 0; 42static int force = 0;
@@ -203,46 +205,48 @@ static void sig_atexit(void)
203 kill(getpid(), signr); 205 kill(getpid(), signr);
204} 206}
205 207
206static void pid_synthesize_comm_event(pid_t pid, int full) 208static pid_t pid_synthesize_comm_event(pid_t pid, int full)
207{ 209{
208 struct comm_event comm_ev; 210 struct comm_event comm_ev;
209 char filename[PATH_MAX]; 211 char filename[PATH_MAX];
210 char bf[BUFSIZ]; 212 char bf[BUFSIZ];
211 int fd; 213 FILE *fp;
212 size_t size; 214 size_t size = 0;
213 char *field, *sep;
214 DIR *tasks; 215 DIR *tasks;
215 struct dirent dirent, *next; 216 struct dirent dirent, *next;
217 pid_t tgid = 0;
216 218
217 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); 219 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
218 220
219 fd = open(filename, O_RDONLY); 221 fp = fopen(filename, "r");
220 if (fd < 0) { 222 if (fd == NULL) {
221 /* 223 /*
222 * We raced with a task exiting - just return: 224 * We raced with a task exiting - just return:
223 */ 225 */
224 if (verbose) 226 if (verbose)
225 fprintf(stderr, "couldn't open %s\n", filename); 227 fprintf(stderr, "couldn't open %s\n", filename);
226 return; 228 return 0;
227 } 229 }
228 if (read(fd, bf, sizeof(bf)) < 0) {
229 fprintf(stderr, "couldn't read %s\n", filename);
230 exit(EXIT_FAILURE);
231 }
232 close(fd);
233 230
234 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
235 memset(&comm_ev, 0, sizeof(comm_ev)); 231 memset(&comm_ev, 0, sizeof(comm_ev));
236 field = strchr(bf, '('); 232 while (!comm_ev.comm[0] || !comm_ev.pid) {
237 if (field == NULL) 233 if (fgets(bf, sizeof(bf), fp) == NULL)
238 goto out_failure; 234 goto out_failure;
239 sep = strchr(++field, ')'); 235
240 if (sep == NULL) 236 if (memcmp(bf, "Name:", 5) == 0) {
241 goto out_failure; 237 char *name = bf + 5;
242 size = sep - field; 238 while (*name && isspace(*name))
243 memcpy(comm_ev.comm, field, size++); 239 ++name;
244 240 size = strlen(name) - 1;
245 comm_ev.pid = pid; 241 memcpy(comm_ev.comm, name, size++);
242 } else if (memcmp(bf, "Tgid:", 5) == 0) {
243 char *tgids = bf + 5;
244 while (*tgids && isspace(*tgids))
245 ++tgids;
246 tgid = comm_ev.pid = atoi(tgids);
247 }
248 }
249
246 comm_ev.header.type = PERF_EVENT_COMM; 250 comm_ev.header.type = PERF_EVENT_COMM;
247 size = ALIGN(size, sizeof(u64)); 251 size = ALIGN(size, sizeof(u64));
248 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); 252 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -251,7 +255,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
251 comm_ev.tid = pid; 255 comm_ev.tid = pid;
252 256
253 write_output(&comm_ev, comm_ev.header.size); 257 write_output(&comm_ev, comm_ev.header.size);
254 return; 258 goto out_fclose;
255 } 259 }
256 260
257 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 261 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -268,7 +272,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
268 write_output(&comm_ev, comm_ev.header.size); 272 write_output(&comm_ev, comm_ev.header.size);
269 } 273 }
270 closedir(tasks); 274 closedir(tasks);
271 return; 275
276out_fclose:
277 fclose(fp);
278 return tgid;
272 279
273out_failure: 280out_failure:
274 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", 281 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -276,7 +283,7 @@ out_failure:
276 exit(EXIT_FAILURE); 283 exit(EXIT_FAILURE);
277} 284}
278 285
279static void pid_synthesize_mmap_samples(pid_t pid) 286static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
280{ 287{
281 char filename[PATH_MAX]; 288 char filename[PATH_MAX];
282 FILE *fp; 289 FILE *fp;
@@ -328,7 +335,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
328 mmap_ev.len -= mmap_ev.start; 335 mmap_ev.len -= mmap_ev.start;
329 mmap_ev.header.size = (sizeof(mmap_ev) - 336 mmap_ev.header.size = (sizeof(mmap_ev) -
330 (sizeof(mmap_ev.filename) - size)); 337 (sizeof(mmap_ev.filename) - size));
331 mmap_ev.pid = pid; 338 mmap_ev.pid = tgid;
332 mmap_ev.tid = pid; 339 mmap_ev.tid = pid;
333 340
334 write_output(&mmap_ev, mmap_ev.header.size); 341 write_output(&mmap_ev, mmap_ev.header.size);
@@ -347,14 +354,14 @@ static void synthesize_all(void)
347 354
348 while (!readdir_r(proc, &dirent, &next) && next) { 355 while (!readdir_r(proc, &dirent, &next) && next) {
349 char *end; 356 char *end;
350 pid_t pid; 357 pid_t pid, tgid;
351 358
352 pid = strtol(dirent.d_name, &end, 10); 359 pid = strtol(dirent.d_name, &end, 10);
353 if (*end) /* only interested in proper numerical dirents */ 360 if (*end) /* only interested in proper numerical dirents */
354 continue; 361 continue;
355 362
356 pid_synthesize_comm_event(pid, 1); 363 tgid = pid_synthesize_comm_event(pid, 1);
357 pid_synthesize_mmap_samples(pid); 364 pid_synthesize_mmap_samples(pid, tgid);
358 } 365 }
359 366
360 closedir(proc); 367 closedir(proc);
@@ -392,7 +399,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
392 PERF_FORMAT_TOTAL_TIME_RUNNING | 399 PERF_FORMAT_TOTAL_TIME_RUNNING |
393 PERF_FORMAT_ID; 400 PERF_FORMAT_ID;
394 401
395 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 402 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
396 403
397 if (freq) { 404 if (freq) {
398 attr->sample_type |= PERF_SAMPLE_PERIOD; 405 attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -412,6 +419,9 @@ static void create_counter(int counter, int cpu, pid_t pid)
412 if (call_graph) 419 if (call_graph)
413 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 420 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
414 421
422 if (raw_samples)
423 attr->sample_type |= PERF_SAMPLE_RAW;
424
415 attr->mmap = track; 425 attr->mmap = track;
416 attr->comm = track; 426 attr->comm = track;
417 attr->inherit = (cpu < 0) && inherit; 427 attr->inherit = (cpu < 0) && inherit;
@@ -425,6 +435,8 @@ try_again:
425 435
426 if (err == EPERM) 436 if (err == EPERM)
427 die("Permission error - are you root?\n"); 437 die("Permission error - are you root?\n");
438 else if (err == ENODEV && profile_cpu != -1)
439 die("No such device - did you specify an out-of-range profile CPU?\n");
428 440
429 /* 441 /*
430 * If it's cycles then fall back to hrtimer 442 * If it's cycles then fall back to hrtimer
@@ -524,10 +536,14 @@ static int __cmd_record(int argc, const char **argv)
524 signal(SIGCHLD, sig_handler); 536 signal(SIGCHLD, sig_handler);
525 signal(SIGINT, sig_handler); 537 signal(SIGINT, sig_handler);
526 538
527 if (!stat(output_name, &st) && !force && !append_file) { 539 if (!stat(output_name, &st) && st.st_size) {
528 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", 540 if (!force && !append_file) {
529 output_name); 541 fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n",
530 exit(-1); 542 output_name);
543 exit(-1);
544 }
545 } else {
546 append_file = 0;
531 } 547 }
532 548
533 flags = O_CREAT|O_RDWR; 549 flags = O_CREAT|O_RDWR;
@@ -554,16 +570,22 @@ static int __cmd_record(int argc, const char **argv)
554 if (pid == -1) 570 if (pid == -1)
555 pid = getpid(); 571 pid = getpid();
556 572
557 open_counters(-1, pid); 573 open_counters(profile_cpu, pid);
558 } else for (i = 0; i < nr_cpus; i++) 574 } else {
559 open_counters(i, target_pid); 575 if (profile_cpu != -1) {
576 open_counters(profile_cpu, target_pid);
577 } else {
578 for (i = 0; i < nr_cpus; i++)
579 open_counters(i, target_pid);
580 }
581 }
560 582
561 if (file_new) 583 if (file_new)
562 perf_header__write(header, output); 584 perf_header__write(header, output);
563 585
564 if (!system_wide) { 586 if (!system_wide) {
565 pid_synthesize_comm_event(pid, 0); 587 pid_t tgid = pid_synthesize_comm_event(pid, 0);
566 pid_synthesize_mmap_samples(pid); 588 pid_synthesize_mmap_samples(pid, tgid);
567 } else 589 } else
568 synthesize_all(); 590 synthesize_all();
569 591
@@ -631,10 +653,14 @@ static const struct option options[] = {
631 "record events on existing pid"), 653 "record events on existing pid"),
632 OPT_INTEGER('r', "realtime", &realtime_prio, 654 OPT_INTEGER('r', "realtime", &realtime_prio,
633 "collect data with this RT SCHED_FIFO priority"), 655 "collect data with this RT SCHED_FIFO priority"),
656 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
657 "collect raw sample records from all opened counters"),
634 OPT_BOOLEAN('a', "all-cpus", &system_wide, 658 OPT_BOOLEAN('a', "all-cpus", &system_wide,
635 "system-wide collection from all CPUs"), 659 "system-wide collection from all CPUs"),
636 OPT_BOOLEAN('A', "append", &append_file, 660 OPT_BOOLEAN('A', "append", &append_file,
637 "append to the output file to do incremental profiling"), 661 "append to the output file to do incremental profiling"),
662 OPT_INTEGER('C', "profile_cpu", &profile_cpu,
663 "CPU to profile on"),
638 OPT_BOOLEAN('f', "force", &force, 664 OPT_BOOLEAN('f', "force", &force,
639 "overwrite existing data file"), 665 "overwrite existing data file"),
640 OPT_LONG('c', "count", &default_interval, 666 OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8cb58d68a006..b53a60fc12de 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -68,7 +68,7 @@ static int callchain;
68 68
69static 69static
70struct callchain_param callchain_param = { 70struct callchain_param callchain_param = {
71 .mode = CHAIN_GRAPH_ABS, 71 .mode = CHAIN_GRAPH_REL,
72 .min_percent = 0.5 72 .min_percent = 0.5
73}; 73};
74 74
@@ -112,7 +112,9 @@ struct read_event {
112 struct perf_event_header header; 112 struct perf_event_header header;
113 u32 pid,tid; 113 u32 pid,tid;
114 u64 value; 114 u64 value;
115 u64 format[3]; 115 u64 time_enabled;
116 u64 time_running;
117 u64 id;
116}; 118};
117 119
118typedef union event_union { 120typedef union event_union {
@@ -698,7 +700,8 @@ sort__sym_print(FILE *fp, struct hist_entry *self, unsigned int width __used)
698 size_t ret = 0; 700 size_t ret = 0;
699 701
700 if (verbose) 702 if (verbose)
701 ret += repsep_fprintf(fp, "%#018llx ", (u64)self->ip); 703 ret += repsep_fprintf(fp, "%#018llx %c ", (u64)self->ip,
704 dso__symtab_origin(self->dso));
702 705
703 ret += repsep_fprintf(fp, "[%c] ", self->level); 706 ret += repsep_fprintf(fp, "[%c] ", self->level);
704 if (self->sym) { 707 if (self->sym) {
@@ -888,6 +891,21 @@ ipchain__fprintf_graph(FILE *fp, struct callchain_list *chain, int depth,
888 return ret; 891 return ret;
889} 892}
890 893
894static struct symbol *rem_sq_bracket;
895static struct callchain_list rem_hits;
896
897static void init_rem_hits(void)
898{
899 rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
900 if (!rem_sq_bracket) {
901 fprintf(stderr, "Not enough memory to display remaining hits\n");
902 return;
903 }
904
905 strcpy(rem_sq_bracket->name, "[...]");
906 rem_hits.sym = rem_sq_bracket;
907}
908
891static size_t 909static size_t
892callchain__fprintf_graph(FILE *fp, struct callchain_node *self, 910callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
893 u64 total_samples, int depth, int depth_mask) 911 u64 total_samples, int depth, int depth_mask)
@@ -897,25 +915,34 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
897 struct callchain_list *chain; 915 struct callchain_list *chain;
898 int new_depth_mask = depth_mask; 916 int new_depth_mask = depth_mask;
899 u64 new_total; 917 u64 new_total;
918 u64 remaining;
900 size_t ret = 0; 919 size_t ret = 0;
901 int i; 920 int i;
902 921
903 if (callchain_param.mode == CHAIN_GRAPH_REL) 922 if (callchain_param.mode == CHAIN_GRAPH_REL)
904 new_total = self->cumul_hit; 923 new_total = self->children_hit;
905 else 924 else
906 new_total = total_samples; 925 new_total = total_samples;
907 926
927 remaining = new_total;
928
908 node = rb_first(&self->rb_root); 929 node = rb_first(&self->rb_root);
909 while (node) { 930 while (node) {
931 u64 cumul;
932
910 child = rb_entry(node, struct callchain_node, rb_node); 933 child = rb_entry(node, struct callchain_node, rb_node);
934 cumul = cumul_hits(child);
935 remaining -= cumul;
911 936
912 /* 937 /*
913 * The depth mask manages the output of pipes that show 938 * The depth mask manages the output of pipes that show
914 * the depth. We don't want to keep the pipes of the current 939 * the depth. We don't want to keep the pipes of the current
915 * level for the last child of this depth 940 * level for the last child of this depth.
941 * Except if we have remaining filtered hits. They will
942 * supersede the last child
916 */ 943 */
917 next = rb_next(node); 944 next = rb_next(node);
918 if (!next) 945 if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
919 new_depth_mask &= ~(1 << (depth - 1)); 946 new_depth_mask &= ~(1 << (depth - 1));
920 947
921 /* 948 /*
@@ -930,7 +957,7 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
930 ret += ipchain__fprintf_graph(fp, chain, depth, 957 ret += ipchain__fprintf_graph(fp, chain, depth,
931 new_depth_mask, i++, 958 new_depth_mask, i++,
932 new_total, 959 new_total,
933 child->cumul_hit); 960 cumul);
934 } 961 }
935 ret += callchain__fprintf_graph(fp, child, new_total, 962 ret += callchain__fprintf_graph(fp, child, new_total,
936 depth + 1, 963 depth + 1,
@@ -938,6 +965,19 @@ callchain__fprintf_graph(FILE *fp, struct callchain_node *self,
938 node = next; 965 node = next;
939 } 966 }
940 967
968 if (callchain_param.mode == CHAIN_GRAPH_REL &&
969 remaining && remaining != new_total) {
970
971 if (!rem_sq_bracket)
972 return ret;
973
974 new_depth_mask &= ~(1 << (depth - 1));
975
976 ret += ipchain__fprintf_graph(fp, &rem_hits, depth,
977 new_depth_mask, 0, new_total,
978 remaining);
979 }
980
941 return ret; 981 return ret;
942} 982}
943 983
@@ -1358,6 +1398,8 @@ static size_t output__fprintf(FILE *fp, u64 total_samples)
1358 unsigned int width; 1398 unsigned int width;
1359 char *col_width = col_width_list_str; 1399 char *col_width = col_width_list_str;
1360 1400
1401 init_rem_hits();
1402
1361 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples); 1403 fprintf(fp, "# Samples: %Ld\n", (u64)total_samples);
1362 fprintf(fp, "#\n"); 1404 fprintf(fp, "#\n");
1363 1405
@@ -1429,6 +1471,8 @@ print_entries:
1429 } 1471 }
1430 fprintf(fp, "\n"); 1472 fprintf(fp, "\n");
1431 1473
1474 free(rem_sq_bracket);
1475
1432 return ret; 1476 return ret;
1433} 1477}
1434 1478
@@ -1482,11 +1526,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1482 more_data += sizeof(u64); 1526 more_data += sizeof(u64);
1483 } 1527 }
1484 1528
1485 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", 1529 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1486 (void *)(offset + head), 1530 (void *)(offset + head),
1487 (void *)(long)(event->header.size), 1531 (void *)(long)(event->header.size),
1488 event->header.misc, 1532 event->header.misc,
1489 event->ip.pid, 1533 event->ip.pid, event->ip.tid,
1490 (void *)(long)ip, 1534 (void *)(long)ip,
1491 (long long)period); 1535 (long long)period);
1492 1536
@@ -1546,10 +1590,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1546 if (show & show_mask) { 1590 if (show & show_mask) {
1547 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); 1591 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
1548 1592
1549 if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) 1593 if (dso_list && (!dso || !dso->name ||
1594 !strlist__has_entry(dso_list, dso->name)))
1550 return 0; 1595 return 0;
1551 1596
1552 if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) 1597 if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
1553 return 0; 1598 return 0;
1554 1599
1555 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { 1600 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1568,10 +1613,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
1568 struct thread *thread = threads__findnew(event->mmap.pid); 1613 struct thread *thread = threads__findnew(event->mmap.pid);
1569 struct map *map = map__new(&event->mmap); 1614 struct map *map = map__new(&event->mmap);
1570 1615
1571 dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", 1616 dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
1572 (void *)(offset + head), 1617 (void *)(offset + head),
1573 (void *)(long)(event->header.size), 1618 (void *)(long)(event->header.size),
1574 event->mmap.pid, 1619 event->mmap.pid,
1620 event->mmap.tid,
1575 (void *)(long)event->mmap.start, 1621 (void *)(long)event->mmap.start,
1576 (void *)(long)event->mmap.len, 1622 (void *)(long)event->mmap.len,
1577 (void *)(long)event->mmap.pgoff, 1623 (void *)(long)event->mmap.pgoff,
@@ -1690,14 +1736,37 @@ static void trace_event(event_t *event)
1690 dprintf(".\n"); 1736 dprintf(".\n");
1691} 1737}
1692 1738
1739static struct perf_header *header;
1740
1741static struct perf_counter_attr *perf_header__find_attr(u64 id)
1742{
1743 int i;
1744
1745 for (i = 0; i < header->attrs; i++) {
1746 struct perf_header_attr *attr = header->attr[i];
1747 int j;
1748
1749 for (j = 0; j < attr->ids; j++) {
1750 if (attr->id[j] == id)
1751 return &attr->attr;
1752 }
1753 }
1754
1755 return NULL;
1756}
1757
1693static int 1758static int
1694process_read_event(event_t *event, unsigned long offset, unsigned long head) 1759process_read_event(event_t *event, unsigned long offset, unsigned long head)
1695{ 1760{
1696 dprintf("%p [%p]: PERF_EVENT_READ: %d %d %Lu\n", 1761 struct perf_counter_attr *attr = perf_header__find_attr(event->read.id);
1762
1763 dprintf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n",
1697 (void *)(offset + head), 1764 (void *)(offset + head),
1698 (void *)(long)(event->header.size), 1765 (void *)(long)(event->header.size),
1699 event->read.pid, 1766 event->read.pid,
1700 event->read.tid, 1767 event->read.tid,
1768 attr ? __event_name(attr->type, attr->config)
1769 : "FAIL",
1701 event->read.value); 1770 event->read.value);
1702 1771
1703 return 0; 1772 return 0;
@@ -1743,8 +1812,6 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
1743 return 0; 1812 return 0;
1744} 1813}
1745 1814
1746static struct perf_header *header;
1747
1748static u64 perf_header__sample_type(void) 1815static u64 perf_header__sample_type(void)
1749{ 1816{
1750 u64 sample_type = 0; 1817 u64 sample_type = 0;
@@ -1812,6 +1879,13 @@ static int __cmd_report(void)
1812 " -g?\n"); 1879 " -g?\n");
1813 exit(-1); 1880 exit(-1);
1814 } 1881 }
1882 } else if (callchain_param.mode != CHAIN_NONE && !callchain) {
1883 callchain = 1;
1884 if (register_callchain_param(&callchain_param) < 0) {
1885 fprintf(stderr, "Can't register callchain"
1886 " params\n");
1887 exit(-1);
1888 }
1815 } 1889 }
1816 1890
1817 if (load_kernel() < 0) { 1891 if (load_kernel() < 0) {
@@ -1950,6 +2024,13 @@ parse_callchain_opt(const struct option *opt __used, const char *arg,
1950 else if (!strncmp(tok, "fractal", strlen(arg))) 2024 else if (!strncmp(tok, "fractal", strlen(arg)))
1951 callchain_param.mode = CHAIN_GRAPH_REL; 2025 callchain_param.mode = CHAIN_GRAPH_REL;
1952 2026
2027 else if (!strncmp(tok, "none", strlen(arg))) {
2028 callchain_param.mode = CHAIN_NONE;
2029 callchain = 0;
2030
2031 return 0;
2032 }
2033
1953 else 2034 else
1954 return -1; 2035 return -1;
1955 2036
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f9510eeeb6c7..b4b06c7903e1 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -496,7 +496,7 @@ static const struct option options[] = {
496 "stat events on existing pid"), 496 "stat events on existing pid"),
497 OPT_BOOLEAN('a', "all-cpus", &system_wide, 497 OPT_BOOLEAN('a', "all-cpus", &system_wide,
498 "system-wide collection from all CPUs"), 498 "system-wide collection from all CPUs"),
499 OPT_BOOLEAN('S', "scale", &scale, 499 OPT_BOOLEAN('c', "scale", &scale,
500 "scale/normalize counters"), 500 "scale/normalize counters"),
501 OPT_BOOLEAN('v', "verbose", &verbose, 501 OPT_BOOLEAN('v', "verbose", &verbose,
502 "be more verbose (show counter open errors, etc)"), 502 "be more verbose (show counter open errors, etc)"),
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index f139f1ab9333..7de28ce9ca26 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -31,6 +31,8 @@
31#include <fcntl.h> 31#include <fcntl.h>
32 32
33#include <stdio.h> 33#include <stdio.h>
34#include <termios.h>
35#include <unistd.h>
34 36
35#include <errno.h> 37#include <errno.h>
36#include <time.h> 38#include <time.h>
@@ -54,7 +56,7 @@ static int system_wide = 0;
54 56
55static int default_interval = 100000; 57static int default_interval = 100000;
56 58
57static u64 count_filter = 5; 59static int count_filter = 5;
58static int print_entries = 15; 60static int print_entries = 15;
59 61
60static int target_pid = -1; 62static int target_pid = -1;
@@ -69,15 +71,28 @@ static int freq = 0;
69static int verbose = 0; 71static int verbose = 0;
70static char *vmlinux = NULL; 72static char *vmlinux = NULL;
71 73
72static char *sym_filter;
73static unsigned long filter_start;
74static unsigned long filter_end;
75
76static int delay_secs = 2; 74static int delay_secs = 2;
77static int zero; 75static int zero;
78static int dump_symtab; 76static int dump_symtab;
79 77
80/* 78/*
79 * Source
80 */
81
82struct source_line {
83 u64 eip;
84 unsigned long count[MAX_COUNTERS];
85 char *line;
86 struct source_line *next;
87};
88
89static char *sym_filter = NULL;
90struct sym_entry *sym_filter_entry = NULL;
91static int sym_pcnt_filter = 5;
92static int sym_counter = 0;
93static int display_weighted = -1;
94
95/*
81 * Symbols 96 * Symbols
82 */ 97 */
83 98
@@ -91,9 +106,237 @@ struct sym_entry {
91 unsigned long snap_count; 106 unsigned long snap_count;
92 double weight; 107 double weight;
93 int skip; 108 int skip;
109 struct source_line *source;
110 struct source_line *lines;
111 struct source_line **lines_tail;
112 pthread_mutex_t source_lock;
94}; 113};
95 114
96struct sym_entry *sym_filter_entry; 115/*
116 * Source functions
117 */
118
119static void parse_source(struct sym_entry *syme)
120{
121 struct symbol *sym;
122 struct module *module;
123 struct section *section = NULL;
124 FILE *file;
125 char command[PATH_MAX*2], *path = vmlinux;
126 u64 start, end, len;
127
128 if (!syme)
129 return;
130
131 if (syme->lines) {
132 pthread_mutex_lock(&syme->source_lock);
133 goto out_assign;
134 }
135
136 sym = (struct symbol *)(syme + 1);
137 module = sym->module;
138
139 if (module)
140 path = module->path;
141 if (!path)
142 return;
143
144 start = sym->obj_start;
145 if (!start)
146 start = sym->start;
147
148 if (module) {
149 section = module->sections->find_section(module->sections, ".text");
150 if (section)
151 start -= section->vma;
152 }
153
154 end = start + sym->end - sym->start + 1;
155 len = sym->end - sym->start;
156
157 sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", start, end, path);
158
159 file = popen(command, "r");
160 if (!file)
161 return;
162
163 pthread_mutex_lock(&syme->source_lock);
164 syme->lines_tail = &syme->lines;
165 while (!feof(file)) {
166 struct source_line *src;
167 size_t dummy = 0;
168 char *c;
169
170 src = malloc(sizeof(struct source_line));
171 assert(src != NULL);
172 memset(src, 0, sizeof(struct source_line));
173
174 if (getline(&src->line, &dummy, file) < 0)
175 break;
176 if (!src->line)
177 break;
178
179 c = strchr(src->line, '\n');
180 if (c)
181 *c = 0;
182
183 src->next = NULL;
184 *syme->lines_tail = src;
185 syme->lines_tail = &src->next;
186
187 if (strlen(src->line)>8 && src->line[8] == ':') {
188 src->eip = strtoull(src->line, NULL, 16);
189 if (section)
190 src->eip += section->vma;
191 }
192 if (strlen(src->line)>8 && src->line[16] == ':') {
193 src->eip = strtoull(src->line, NULL, 16);
194 if (section)
195 src->eip += section->vma;
196 }
197 }
198 pclose(file);
199out_assign:
200 sym_filter_entry = syme;
201 pthread_mutex_unlock(&syme->source_lock);
202}
203
204static void __zero_source_counters(struct sym_entry *syme)
205{
206 int i;
207 struct source_line *line;
208
209 line = syme->lines;
210 while (line) {
211 for (i = 0; i < nr_counters; i++)
212 line->count[i] = 0;
213 line = line->next;
214 }
215}
216
217static void record_precise_ip(struct sym_entry *syme, int counter, u64 ip)
218{
219 struct source_line *line;
220
221 if (syme != sym_filter_entry)
222 return;
223
224 if (pthread_mutex_trylock(&syme->source_lock))
225 return;
226
227 if (!syme->source)
228 goto out_unlock;
229
230 for (line = syme->lines; line; line = line->next) {
231 if (line->eip == ip) {
232 line->count[counter]++;
233 break;
234 }
235 if (line->eip > ip)
236 break;
237 }
238out_unlock:
239 pthread_mutex_unlock(&syme->source_lock);
240}
241
242static void lookup_sym_source(struct sym_entry *syme)
243{
244 struct symbol *symbol = (struct symbol *)(syme + 1);
245 struct source_line *line;
246 char pattern[PATH_MAX];
247 char *idx;
248
249 sprintf(pattern, "<%s>:", symbol->name);
250
251 if (symbol->module) {
252 idx = strstr(pattern, "\t");
253 if (idx)
254 *idx = 0;
255 }
256
257 pthread_mutex_lock(&syme->source_lock);
258 for (line = syme->lines; line; line = line->next) {
259 if (strstr(line->line, pattern)) {
260 syme->source = line;
261 break;
262 }
263 }
264 pthread_mutex_unlock(&syme->source_lock);
265}
266
267static void show_lines(struct source_line *queue, int count, int total)
268{
269 int i;
270 struct source_line *line;
271
272 line = queue;
273 for (i = 0; i < count; i++) {
274 float pcnt = 100.0*(float)line->count[sym_counter]/(float)total;
275
276 printf("%8li %4.1f%%\t%s\n", line->count[sym_counter], pcnt, line->line);
277 line = line->next;
278 }
279}
280
281#define TRACE_COUNT 3
282
283static void show_details(struct sym_entry *syme)
284{
285 struct symbol *symbol;
286 struct source_line *line;
287 struct source_line *line_queue = NULL;
288 int displayed = 0;
289 int line_queue_count = 0, total = 0, more = 0;
290
291 if (!syme)
292 return;
293
294 if (!syme->source)
295 lookup_sym_source(syme);
296
297 if (!syme->source)
298 return;
299
300 symbol = (struct symbol *)(syme + 1);
301 printf("Showing %s for %s\n", event_name(sym_counter), symbol->name);
302 printf(" Events Pcnt (>=%d%%)\n", sym_pcnt_filter);
303
304 pthread_mutex_lock(&syme->source_lock);
305 line = syme->source;
306 while (line) {
307 total += line->count[sym_counter];
308 line = line->next;
309 }
310
311 line = syme->source;
312 while (line) {
313 float pcnt = 0.0;
314
315 if (!line_queue_count)
316 line_queue = line;
317 line_queue_count++;
318
319 if (line->count[sym_counter])
320 pcnt = 100.0 * line->count[sym_counter] / (float)total;
321 if (pcnt >= (float)sym_pcnt_filter) {
322 if (displayed <= print_entries)
323 show_lines(line_queue, line_queue_count, total);
324 else more++;
325 displayed += line_queue_count;
326 line_queue_count = 0;
327 line_queue = NULL;
328 } else if (line_queue_count > TRACE_COUNT) {
329 line_queue = line_queue->next;
330 line_queue_count--;
331 }
332
333 line->count[sym_counter] = zero ? 0 : line->count[sym_counter] * 7 / 8;
334 line = line->next;
335 }
336 pthread_mutex_unlock(&syme->source_lock);
337 if (more)
338 printf("%d lines not displayed, maybe increase display entries [e]\n", more);
339}
97 340
98struct dso *kernel_dso; 341struct dso *kernel_dso;
99 342
@@ -112,6 +355,9 @@ static double sym_weight(const struct sym_entry *sym)
112 double weight = sym->snap_count; 355 double weight = sym->snap_count;
113 int counter; 356 int counter;
114 357
358 if (!display_weighted)
359 return weight;
360
115 for (counter = 1; counter < nr_counters-1; counter++) 361 for (counter = 1; counter < nr_counters-1; counter++)
116 weight *= sym->count[counter]; 362 weight *= sym->count[counter];
117 363
@@ -159,7 +405,7 @@ static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se)
159static void print_sym_table(void) 405static void print_sym_table(void)
160{ 406{
161 int printed = 0, j; 407 int printed = 0, j;
162 int counter; 408 int counter, snap = !display_weighted ? sym_counter : 0;
163 float samples_per_sec = samples/delay_secs; 409 float samples_per_sec = samples/delay_secs;
164 float ksamples_per_sec = (samples-userspace_samples)/delay_secs; 410 float ksamples_per_sec = (samples-userspace_samples)/delay_secs;
165 float sum_ksamples = 0.0; 411 float sum_ksamples = 0.0;
@@ -175,7 +421,7 @@ static void print_sym_table(void)
175 pthread_mutex_unlock(&active_symbols_lock); 421 pthread_mutex_unlock(&active_symbols_lock);
176 422
177 list_for_each_entry_safe_from(syme, n, &active_symbols, node) { 423 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
178 syme->snap_count = syme->count[0]; 424 syme->snap_count = syme->count[snap];
179 if (syme->snap_count != 0) { 425 if (syme->snap_count != 0) {
180 syme->weight = sym_weight(syme); 426 syme->weight = sym_weight(syme);
181 rb_insert_active_sym(&tmp, syme); 427 rb_insert_active_sym(&tmp, syme);
@@ -195,7 +441,7 @@ static void print_sym_table(void)
195 samples_per_sec, 441 samples_per_sec,
196 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); 442 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec)));
197 443
198 if (nr_counters == 1) { 444 if (nr_counters == 1 || !display_weighted) {
199 printf("%Ld", (u64)attrs[0].sample_period); 445 printf("%Ld", (u64)attrs[0].sample_period);
200 if (freq) 446 if (freq)
201 printf("Hz "); 447 printf("Hz ");
@@ -203,7 +449,9 @@ static void print_sym_table(void)
203 printf(" "); 449 printf(" ");
204 } 450 }
205 451
206 for (counter = 0; counter < nr_counters; counter++) { 452 if (!display_weighted)
453 printf("%s", event_name(sym_counter));
454 else for (counter = 0; counter < nr_counters; counter++) {
207 if (counter) 455 if (counter)
208 printf("/"); 456 printf("/");
209 457
@@ -228,6 +476,11 @@ static void print_sym_table(void)
228 476
229 printf("------------------------------------------------------------------------------\n\n"); 477 printf("------------------------------------------------------------------------------\n\n");
230 478
479 if (sym_filter_entry) {
480 show_details(sym_filter_entry);
481 return;
482 }
483
231 if (nr_counters == 1) 484 if (nr_counters == 1)
232 printf(" samples pcnt"); 485 printf(" samples pcnt");
233 else 486 else
@@ -242,13 +495,13 @@ static void print_sym_table(void)
242 struct symbol *sym = (struct symbol *)(syme + 1); 495 struct symbol *sym = (struct symbol *)(syme + 1);
243 double pcnt; 496 double pcnt;
244 497
245 if (++printed > print_entries || syme->snap_count < count_filter) 498 if (++printed > print_entries || (int)syme->snap_count < count_filter)
246 continue; 499 continue;
247 500
248 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / 501 pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) /
249 sum_ksamples)); 502 sum_ksamples));
250 503
251 if (nr_counters == 1) 504 if (nr_counters == 1 || !display_weighted)
252 printf("%20.2f - ", syme->weight); 505 printf("%20.2f - ", syme->weight);
253 else 506 else
254 printf("%9.1f %10ld - ", syme->weight, syme->snap_count); 507 printf("%9.1f %10ld - ", syme->weight, syme->snap_count);
@@ -261,19 +514,250 @@ static void print_sym_table(void)
261 } 514 }
262} 515}
263 516
517static void prompt_integer(int *target, const char *msg)
518{
519 char *buf = malloc(0), *p;
520 size_t dummy = 0;
521 int tmp;
522
523 fprintf(stdout, "\n%s: ", msg);
524 if (getline(&buf, &dummy, stdin) < 0)
525 return;
526
527 p = strchr(buf, '\n');
528 if (p)
529 *p = 0;
530
531 p = buf;
532 while(*p) {
533 if (!isdigit(*p))
534 goto out_free;
535 p++;
536 }
537 tmp = strtoul(buf, NULL, 10);
538 *target = tmp;
539out_free:
540 free(buf);
541}
542
543static void prompt_percent(int *target, const char *msg)
544{
545 int tmp = 0;
546
547 prompt_integer(&tmp, msg);
548 if (tmp >= 0 && tmp <= 100)
549 *target = tmp;
550}
551
552static void prompt_symbol(struct sym_entry **target, const char *msg)
553{
554 char *buf = malloc(0), *p;
555 struct sym_entry *syme = *target, *n, *found = NULL;
556 size_t dummy = 0;
557
558 /* zero counters of active symbol */
559 if (syme) {
560 pthread_mutex_lock(&syme->source_lock);
561 __zero_source_counters(syme);
562 *target = NULL;
563 pthread_mutex_unlock(&syme->source_lock);
564 }
565
566 fprintf(stdout, "\n%s: ", msg);
567 if (getline(&buf, &dummy, stdin) < 0)
568 goto out_free;
569
570 p = strchr(buf, '\n');
571 if (p)
572 *p = 0;
573
574 pthread_mutex_lock(&active_symbols_lock);
575 syme = list_entry(active_symbols.next, struct sym_entry, node);
576 pthread_mutex_unlock(&active_symbols_lock);
577
578 list_for_each_entry_safe_from(syme, n, &active_symbols, node) {
579 struct symbol *sym = (struct symbol *)(syme + 1);
580
581 if (!strcmp(buf, sym->name)) {
582 found = syme;
583 break;
584 }
585 }
586
587 if (!found) {
588 fprintf(stderr, "Sorry, %s is not active.\n", sym_filter);
589 sleep(1);
590 return;
591 } else
592 parse_source(found);
593
594out_free:
595 free(buf);
596}
597
598static void print_mapped_keys(void)
599{
600 char *name = NULL;
601
602 if (sym_filter_entry) {
603 struct symbol *sym = (struct symbol *)(sym_filter_entry+1);
604 name = sym->name;
605 }
606
607 fprintf(stdout, "\nMapped keys:\n");
608 fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", delay_secs);
609 fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", print_entries);
610
611 if (nr_counters > 1)
612 fprintf(stdout, "\t[E] active event counter. \t(%s)\n", event_name(sym_counter));
613
614 fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", count_filter);
615
616 if (vmlinux) {
617 fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", sym_pcnt_filter);
618 fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
619 fprintf(stdout, "\t[S] stop annotation.\n");
620 }
621
622 if (nr_counters > 1)
623 fprintf(stdout, "\t[w] toggle display weighted/count[E]r. \t(%d)\n", display_weighted ? 1 : 0);
624
625 fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", zero ? 1 : 0);
626 fprintf(stdout, "\t[qQ] quit.\n");
627}
628
629static int key_mapped(int c)
630{
631 switch (c) {
632 case 'd':
633 case 'e':
634 case 'f':
635 case 'z':
636 case 'q':
637 case 'Q':
638 return 1;
639 case 'E':
640 case 'w':
641 return nr_counters > 1 ? 1 : 0;
642 case 'F':
643 case 's':
644 case 'S':
645 return vmlinux ? 1 : 0;
646 }
647
648 return 0;
649}
650
651static void handle_keypress(int c)
652{
653 if (!key_mapped(c)) {
654 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
655 struct termios tc, save;
656
657 print_mapped_keys();
658 fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
659 fflush(stdout);
660
661 tcgetattr(0, &save);
662 tc = save;
663 tc.c_lflag &= ~(ICANON | ECHO);
664 tc.c_cc[VMIN] = 0;
665 tc.c_cc[VTIME] = 0;
666 tcsetattr(0, TCSANOW, &tc);
667
668 poll(&stdin_poll, 1, -1);
669 c = getc(stdin);
670
671 tcsetattr(0, TCSAFLUSH, &save);
672 if (!key_mapped(c))
673 return;
674 }
675
676 switch (c) {
677 case 'd':
678 prompt_integer(&delay_secs, "Enter display delay");
679 break;
680 case 'e':
681 prompt_integer(&print_entries, "Enter display entries (lines)");
682 break;
683 case 'E':
684 if (nr_counters > 1) {
685 int i;
686
687 fprintf(stderr, "\nAvailable events:");
688 for (i = 0; i < nr_counters; i++)
689 fprintf(stderr, "\n\t%d %s", i, event_name(i));
690
691 prompt_integer(&sym_counter, "Enter details event counter");
692
693 if (sym_counter >= nr_counters) {
694 fprintf(stderr, "Sorry, no such event, using %s.\n", event_name(0));
695 sym_counter = 0;
696 sleep(1);
697 }
698 } else sym_counter = 0;
699 break;
700 case 'f':
701 prompt_integer(&count_filter, "Enter display event count filter");
702 break;
703 case 'F':
704 prompt_percent(&sym_pcnt_filter, "Enter details display event filter (percent)");
705 break;
706 case 'q':
707 case 'Q':
708 printf("exiting.\n");
709 exit(0);
710 case 's':
711 prompt_symbol(&sym_filter_entry, "Enter details symbol");
712 break;
713 case 'S':
714 if (!sym_filter_entry)
715 break;
716 else {
717 struct sym_entry *syme = sym_filter_entry;
718
719 pthread_mutex_lock(&syme->source_lock);
720 sym_filter_entry = NULL;
721 __zero_source_counters(syme);
722 pthread_mutex_unlock(&syme->source_lock);
723 }
724 break;
725 case 'w':
726 display_weighted = ~display_weighted;
727 break;
728 case 'z':
729 zero = ~zero;
730 break;
731 }
732}
733
264static void *display_thread(void *arg __used) 734static void *display_thread(void *arg __used)
265{ 735{
266 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; 736 struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
267 int delay_msecs = delay_secs * 1000; 737 struct termios tc, save;
738 int delay_msecs, c;
739
740 tcgetattr(0, &save);
741 tc = save;
742 tc.c_lflag &= ~(ICANON | ECHO);
743 tc.c_cc[VMIN] = 0;
744 tc.c_cc[VTIME] = 0;
268 745
269 printf("PerfTop refresh period: %d seconds\n", delay_secs); 746repeat:
747 delay_msecs = delay_secs * 1000;
748 tcsetattr(0, TCSANOW, &tc);
749 /* trash return*/
750 getc(stdin);
270 751
271 do { 752 do {
272 print_sym_table(); 753 print_sym_table();
273 } while (!poll(&stdin_poll, 1, delay_msecs) == 1); 754 } while (!poll(&stdin_poll, 1, delay_msecs) == 1);
274 755
275 printf("key pressed - exiting.\n"); 756 c = getc(stdin);
276 exit(0); 757 tcsetattr(0, TCSAFLUSH, &save);
758
759 handle_keypress(c);
760 goto repeat;
277 761
278 return NULL; 762 return NULL;
279} 763}
@@ -293,7 +777,6 @@ static const char *skip_symbols[] = {
293 777
294static int symbol_filter(struct dso *self, struct symbol *sym) 778static int symbol_filter(struct dso *self, struct symbol *sym)
295{ 779{
296 static int filter_match;
297 struct sym_entry *syme; 780 struct sym_entry *syme;
298 const char *name = sym->name; 781 const char *name = sym->name;
299 int i; 782 int i;
@@ -315,6 +798,10 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
315 return 1; 798 return 1;
316 799
317 syme = dso__sym_priv(self, sym); 800 syme = dso__sym_priv(self, sym);
801 pthread_mutex_init(&syme->source_lock, NULL);
802 if (!sym_filter_entry && sym_filter && !strcmp(name, sym_filter))
803 sym_filter_entry = syme;
804
318 for (i = 0; skip_symbols[i]; i++) { 805 for (i = 0; skip_symbols[i]; i++) {
319 if (!strcmp(skip_symbols[i], name)) { 806 if (!strcmp(skip_symbols[i], name)) {
320 syme->skip = 1; 807 syme->skip = 1;
@@ -322,29 +809,6 @@ static int symbol_filter(struct dso *self, struct symbol *sym)
322 } 809 }
323 } 810 }
324 811
325 if (filter_match == 1) {
326 filter_end = sym->start;
327 filter_match = -1;
328 if (filter_end - filter_start > 10000) {
329 fprintf(stderr,
330 "hm, too large filter symbol <%s> - skipping.\n",
331 sym_filter);
332 fprintf(stderr, "symbol filter start: %016lx\n",
333 filter_start);
334 fprintf(stderr, " end: %016lx\n",
335 filter_end);
336 filter_end = filter_start = 0;
337 sym_filter = NULL;
338 sleep(1);
339 }
340 }
341
342 if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) {
343 filter_match = 1;
344 filter_start = sym->start;
345 }
346
347
348 return 0; 812 return 0;
349} 813}
350 814
@@ -380,8 +844,6 @@ out_delete_dso:
380 return -1; 844 return -1;
381} 845}
382 846
383#define TRACE_COUNT 3
384
385/* 847/*
386 * Binary search in the histogram table and record the hit: 848 * Binary search in the histogram table and record the hit:
387 */ 849 */
@@ -394,6 +856,7 @@ static void record_ip(u64 ip, int counter)
394 856
395 if (!syme->skip) { 857 if (!syme->skip) {
396 syme->count[counter]++; 858 syme->count[counter]++;
859 record_precise_ip(syme, counter, ip);
397 pthread_mutex_lock(&active_symbols_lock); 860 pthread_mutex_lock(&active_symbols_lock);
398 if (list_empty(&syme->node) || !syme->node.next) 861 if (list_empty(&syme->node) || !syme->node.next)
399 __list_insert_active_sym(syme); 862 __list_insert_active_sym(syme);
@@ -690,8 +1153,8 @@ static const struct option options[] = {
690 "put the counters into a counter group"), 1153 "put the counters into a counter group"),
691 OPT_BOOLEAN('i', "inherit", &inherit, 1154 OPT_BOOLEAN('i', "inherit", &inherit,
692 "child tasks inherit counters"), 1155 "child tasks inherit counters"),
693 OPT_STRING('s', "sym-filter", &sym_filter, "pattern", 1156 OPT_STRING('s', "sym-annotate", &sym_filter, "symbol name",
694 "only display symbols matchig this pattern"), 1157 "symbol to annotate - requires -k option"),
695 OPT_BOOLEAN('z', "zero", &zero, 1158 OPT_BOOLEAN('z', "zero", &zero,
696 "zero history across updates"), 1159 "zero history across updates"),
697 OPT_INTEGER('F', "freq", &freq, 1160 OPT_INTEGER('F', "freq", &freq,
@@ -734,6 +1197,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
734 delay_secs = 1; 1197 delay_secs = 1;
735 1198
736 parse_symbols(); 1199 parse_symbols();
1200 parse_source(sym_filter_entry);
737 1201
738 /* 1202 /*
739 * Fill in the ones not specifically initialized via -c: 1203 * Fill in the ones not specifically initialized via -c:
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 9d3c8141b8c1..011473411642 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -13,6 +13,7 @@
13#include <stdio.h> 13#include <stdio.h>
14#include <stdbool.h> 14#include <stdbool.h>
15#include <errno.h> 15#include <errno.h>
16#include <math.h>
16 17
17#include "callchain.h" 18#include "callchain.h"
18 19
@@ -26,10 +27,14 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
26 struct rb_node **p = &root->rb_node; 27 struct rb_node **p = &root->rb_node;
27 struct rb_node *parent = NULL; 28 struct rb_node *parent = NULL;
28 struct callchain_node *rnode; 29 struct callchain_node *rnode;
30 u64 chain_cumul = cumul_hits(chain);
29 31
30 while (*p) { 32 while (*p) {
33 u64 rnode_cumul;
34
31 parent = *p; 35 parent = *p;
32 rnode = rb_entry(parent, struct callchain_node, rb_node); 36 rnode = rb_entry(parent, struct callchain_node, rb_node);
37 rnode_cumul = cumul_hits(rnode);
33 38
34 switch (mode) { 39 switch (mode) {
35 case CHAIN_FLAT: 40 case CHAIN_FLAT:
@@ -40,7 +45,7 @@ rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
40 break; 45 break;
41 case CHAIN_GRAPH_ABS: /* Falldown */ 46 case CHAIN_GRAPH_ABS: /* Falldown */
42 case CHAIN_GRAPH_REL: 47 case CHAIN_GRAPH_REL:
43 if (rnode->cumul_hit < chain->cumul_hit) 48 if (rnode_cumul < chain_cumul)
44 p = &(*p)->rb_left; 49 p = &(*p)->rb_left;
45 else 50 else
46 p = &(*p)->rb_right; 51 p = &(*p)->rb_right;
@@ -87,7 +92,7 @@ static void __sort_chain_graph_abs(struct callchain_node *node,
87 92
88 chain_for_each_child(child, node) { 93 chain_for_each_child(child, node) {
89 __sort_chain_graph_abs(child, min_hit); 94 __sort_chain_graph_abs(child, min_hit);
90 if (child->cumul_hit >= min_hit) 95 if (cumul_hits(child) >= min_hit)
91 rb_insert_callchain(&node->rb_root, child, 96 rb_insert_callchain(&node->rb_root, child,
92 CHAIN_GRAPH_ABS); 97 CHAIN_GRAPH_ABS);
93 } 98 }
@@ -108,11 +113,11 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
108 u64 min_hit; 113 u64 min_hit;
109 114
110 node->rb_root = RB_ROOT; 115 node->rb_root = RB_ROOT;
111 min_hit = node->cumul_hit * min_percent / 100.0; 116 min_hit = ceil(node->children_hit * min_percent);
112 117
113 chain_for_each_child(child, node) { 118 chain_for_each_child(child, node) {
114 __sort_chain_graph_rel(child, min_percent); 119 __sort_chain_graph_rel(child, min_percent);
115 if (child->cumul_hit >= min_hit) 120 if (cumul_hits(child) >= min_hit)
116 rb_insert_callchain(&node->rb_root, child, 121 rb_insert_callchain(&node->rb_root, child,
117 CHAIN_GRAPH_REL); 122 CHAIN_GRAPH_REL);
118 } 123 }
@@ -122,7 +127,7 @@ static void
122sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 127sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
123 u64 min_hit __used, struct callchain_param *param) 128 u64 min_hit __used, struct callchain_param *param)
124{ 129{
125 __sort_chain_graph_rel(chain_root, param->min_percent); 130 __sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
126 rb_root->rb_node = chain_root->rb_root.rb_node; 131 rb_root->rb_node = chain_root->rb_root.rb_node;
127} 132}
128 133
@@ -211,7 +216,8 @@ add_child(struct callchain_node *parent, struct ip_callchain *chain,
211 new = create_child(parent, false); 216 new = create_child(parent, false);
212 fill_node(new, chain, start, syms); 217 fill_node(new, chain, start, syms);
213 218
214 new->cumul_hit = new->hit = 1; 219 new->children_hit = 0;
220 new->hit = 1;
215} 221}
216 222
217/* 223/*
@@ -241,7 +247,8 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
241 247
242 /* split the hits */ 248 /* split the hits */
243 new->hit = parent->hit; 249 new->hit = parent->hit;
244 new->cumul_hit = parent->cumul_hit; 250 new->children_hit = parent->children_hit;
251 parent->children_hit = cumul_hits(new);
245 new->val_nr = parent->val_nr - idx_local; 252 new->val_nr = parent->val_nr - idx_local;
246 parent->val_nr = idx_local; 253 parent->val_nr = idx_local;
247 254
@@ -249,6 +256,7 @@ split_add_child(struct callchain_node *parent, struct ip_callchain *chain,
249 if (idx_total < chain->nr) { 256 if (idx_total < chain->nr) {
250 parent->hit = 0; 257 parent->hit = 0;
251 add_child(parent, chain, idx_total, syms); 258 add_child(parent, chain, idx_total, syms);
259 parent->children_hit++;
252 } else { 260 } else {
253 parent->hit = 1; 261 parent->hit = 1;
254 } 262 }
@@ -269,13 +277,13 @@ __append_chain_children(struct callchain_node *root, struct ip_callchain *chain,
269 unsigned int ret = __append_chain(rnode, chain, start, syms); 277 unsigned int ret = __append_chain(rnode, chain, start, syms);
270 278
271 if (!ret) 279 if (!ret)
272 goto cumul; 280 goto inc_children_hit;
273 } 281 }
274 /* nothing in children, add to the current node */ 282 /* nothing in children, add to the current node */
275 add_child(root, chain, start, syms); 283 add_child(root, chain, start, syms);
276 284
277cumul: 285inc_children_hit:
278 root->cumul_hit++; 286 root->children_hit++;
279} 287}
280 288
281static int 289static int
@@ -317,8 +325,6 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
317 /* we match 100% of the path, increment the hit */ 325 /* we match 100% of the path, increment the hit */
318 if (i - start == root->val_nr && i == chain->nr) { 326 if (i - start == root->val_nr && i == chain->nr) {
319 root->hit++; 327 root->hit++;
320 root->cumul_hit++;
321
322 return 0; 328 return 0;
323 } 329 }
324 330
@@ -331,5 +337,7 @@ __append_chain(struct callchain_node *root, struct ip_callchain *chain,
331void append_chain(struct callchain_node *root, struct ip_callchain *chain, 337void append_chain(struct callchain_node *root, struct ip_callchain *chain,
332 struct symbol **syms) 338 struct symbol **syms)
333{ 339{
340 if (!chain->nr)
341 return;
334 __append_chain_children(root, chain, syms, 0); 342 __append_chain_children(root, chain, syms, 0);
335} 343}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 7812122bea1d..a926ae4f5a16 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -7,6 +7,7 @@
7#include "symbol.h" 7#include "symbol.h"
8 8
9enum chain_mode { 9enum chain_mode {
10 CHAIN_NONE,
10 CHAIN_FLAT, 11 CHAIN_FLAT,
11 CHAIN_GRAPH_ABS, 12 CHAIN_GRAPH_ABS,
12 CHAIN_GRAPH_REL 13 CHAIN_GRAPH_REL
@@ -21,7 +22,7 @@ struct callchain_node {
21 struct rb_root rb_root; /* sorted tree of children */ 22 struct rb_root rb_root; /* sorted tree of children */
22 unsigned int val_nr; 23 unsigned int val_nr;
23 u64 hit; 24 u64 hit;
24 u64 cumul_hit; /* hit + hits of children */ 25 u64 children_hit;
25}; 26};
26 27
27struct callchain_param; 28struct callchain_param;
@@ -48,6 +49,11 @@ static inline void callchain_init(struct callchain_node *node)
48 INIT_LIST_HEAD(&node->val); 49 INIT_LIST_HEAD(&node->val);
49} 50}
50 51
52static inline u64 cumul_hits(struct callchain_node *node)
53{
54 return node->hit + node->children_hit;
55}
56
51int register_callchain_param(struct callchain_param *param); 57int register_callchain_param(struct callchain_param *param);
52void append_chain(struct callchain_node *root, struct ip_callchain *chain, 58void append_chain(struct callchain_node *root, struct ip_callchain *chain,
53 struct symbol **syms); 59 struct symbol **syms);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 450384b3bbe5..b92a457ca32e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -185,6 +185,8 @@ static void do_read(int fd, void *buf, size_t size)
185 185
186 if (ret < 0) 186 if (ret < 0)
187 die("failed to read"); 187 die("failed to read");
188 if (ret == 0)
189 die("failed to read: missing data");
188 190
189 size -= ret; 191 size -= ret;
190 buf += ret; 192 buf += ret;
@@ -213,9 +215,10 @@ struct perf_header *perf_header__read(int fd)
213 215
214 for (i = 0; i < nr_attrs; i++) { 216 for (i = 0; i < nr_attrs; i++) {
215 struct perf_header_attr *attr; 217 struct perf_header_attr *attr;
216 off_t tmp = lseek(fd, 0, SEEK_CUR); 218 off_t tmp;
217 219
218 do_read(fd, &f_attr, sizeof(f_attr)); 220 do_read(fd, &f_attr, sizeof(f_attr));
221 tmp = lseek(fd, 0, SEEK_CUR);
219 222
220 attr = perf_header_attr__new(&f_attr.attr); 223 attr = perf_header_attr__new(&f_attr.attr);
221 224
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 7bdad8df22a6..044178408783 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -121,13 +121,29 @@ static unsigned long hw_cache_stat[C(MAX)] = {
121 (strcmp(sys_dirent.d_name, ".")) && \ 121 (strcmp(sys_dirent.d_name, ".")) && \
122 (strcmp(sys_dirent.d_name, ".."))) 122 (strcmp(sys_dirent.d_name, "..")))
123 123
124static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir)
125{
126 char evt_path[MAXPATHLEN];
127 int fd;
128
129 snprintf(evt_path, MAXPATHLEN, "%s/%s/%s/id", debugfs_path,
130 sys_dir->d_name, evt_dir->d_name);
131 fd = open(evt_path, O_RDONLY);
132 if (fd < 0)
133 return -EINVAL;
134 close(fd);
135
136 return 0;
137}
138
124#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \ 139#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next, file, st) \
125 while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ 140 while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \
126 if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \ 141 if (snprintf(file, MAXPATHLEN, "%s/%s/%s", debugfs_path, \
127 sys_dirent.d_name, evt_dirent.d_name) && \ 142 sys_dirent.d_name, evt_dirent.d_name) && \
128 (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \ 143 (!stat(file, &st)) && (S_ISDIR(st.st_mode)) && \
129 (strcmp(evt_dirent.d_name, ".")) && \ 144 (strcmp(evt_dirent.d_name, ".")) && \
130 (strcmp(evt_dirent.d_name, ".."))) 145 (strcmp(evt_dirent.d_name, "..")) && \
146 (!tp_event_has_id(&sys_dirent, &evt_dirent)))
131 147
132#define MAX_EVENT_LENGTH 30 148#define MAX_EVENT_LENGTH 30
133 149
@@ -223,9 +239,15 @@ char *event_name(int counter)
223{ 239{
224 u64 config = attrs[counter].config; 240 u64 config = attrs[counter].config;
225 int type = attrs[counter].type; 241 int type = attrs[counter].type;
242
243 return __event_name(type, config);
244}
245
246char *__event_name(int type, u64 config)
247{
226 static char buf[32]; 248 static char buf[32];
227 249
228 if (attrs[counter].type == PERF_TYPE_RAW) { 250 if (type == PERF_TYPE_RAW) {
229 sprintf(buf, "raw 0x%llx", config); 251 sprintf(buf, "raw 0x%llx", config);
230 return buf; 252 return buf;
231 } 253 }
@@ -357,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
357 struct perf_counter_attr *attr) 379 struct perf_counter_attr *attr)
358{ 380{
359 const char *evt_name; 381 const char *evt_name;
382 char *flags;
360 char sys_name[MAX_EVENT_LENGTH]; 383 char sys_name[MAX_EVENT_LENGTH];
361 char id_buf[4]; 384 char id_buf[4];
362 int fd; 385 int fd;
@@ -378,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
378 strncpy(sys_name, *strp, sys_length); 401 strncpy(sys_name, *strp, sys_length);
379 sys_name[sys_length] = '\0'; 402 sys_name[sys_length] = '\0';
380 evt_name = evt_name + 1; 403 evt_name = evt_name + 1;
404
405 flags = strchr(evt_name, ':');
406 if (flags) {
407 *flags = '\0';
408 flags++;
409 if (!strncmp(flags, "record", strlen(flags)))
410 attr->sample_type |= PERF_SAMPLE_RAW;
411 }
412
381 evt_length = strlen(evt_name); 413 evt_length = strlen(evt_name);
382 if (evt_length >= MAX_EVENT_LENGTH) 414 if (evt_length >= MAX_EVENT_LENGTH)
383 return 0; 415 return 0;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1ea5d09b6eb1..192a962e3a0f 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -10,6 +10,7 @@ extern int nr_counters;
10extern struct perf_counter_attr attrs[MAX_COUNTERS]; 10extern struct perf_counter_attr attrs[MAX_COUNTERS];
11 11
12extern char *event_name(int ctr); 12extern char *event_name(int ctr);
13extern char *__event_name(int type, u64 config);
13 14
14extern int parse_events(const struct option *opt, const char *str, int unset); 15extern int parse_events(const struct option *opt, const char *str, int unset);
15 16
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 16ddca202948..5c0f42e6b33b 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,22 +7,17 @@
7#include <gelf.h> 7#include <gelf.h>
8#include <elf.h> 8#include <elf.h>
9 9
10#ifndef NO_DEMANGLE
11#include <bfd.h>
12#else
13static inline
14char *bfd_demangle(void __used *v, const char __used *c, int __used i)
15{
16 return NULL;
17}
18#endif
19
20const char *sym_hist_filter; 10const char *sym_hist_filter;
21 11
22#ifndef DMGL_PARAMS 12enum dso_origin {
23#define DMGL_PARAMS (1 << 0) /* Include function args */ 13 DSO__ORIG_KERNEL = 0,
24#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ 14 DSO__ORIG_JAVA_JIT,
25#endif 15 DSO__ORIG_FEDORA,
16 DSO__ORIG_UBUNTU,
17 DSO__ORIG_BUILDID,
18 DSO__ORIG_DSO,
19 DSO__ORIG_NOT_FOUND,
20};
26 21
27static struct symbol *symbol__new(u64 start, u64 len, 22static struct symbol *symbol__new(u64 start, u64 len,
28 const char *name, unsigned int priv_size, 23 const char *name, unsigned int priv_size,
@@ -81,6 +76,7 @@ struct dso *dso__new(const char *name, unsigned int sym_priv_size)
81 self->sym_priv_size = sym_priv_size; 76 self->sym_priv_size = sym_priv_size;
82 self->find_symbol = dso__find_symbol; 77 self->find_symbol = dso__find_symbol;
83 self->slen_calculated = 0; 78 self->slen_calculated = 0;
79 self->origin = DSO__ORIG_NOT_FOUND;
84 } 80 }
85 81
86 return self; 82 return self;
@@ -710,7 +706,7 @@ static char *dso__read_build_id(struct dso *self, int verbose)
710 ++raw; 706 ++raw;
711 bid += 2; 707 bid += 2;
712 } 708 }
713 if (verbose) 709 if (verbose >= 2)
714 printf("%s(%s): %s\n", __func__, self->name, build_id); 710 printf("%s(%s): %s\n", __func__, self->name, build_id);
715out_elf_end: 711out_elf_end:
716 elf_end(elf); 712 elf_end(elf);
@@ -720,11 +716,26 @@ out:
720 return build_id; 716 return build_id;
721} 717}
722 718
719char dso__symtab_origin(const struct dso *self)
720{
721 static const char origin[] = {
722 [DSO__ORIG_KERNEL] = 'k',
723 [DSO__ORIG_JAVA_JIT] = 'j',
724 [DSO__ORIG_FEDORA] = 'f',
725 [DSO__ORIG_UBUNTU] = 'u',
726 [DSO__ORIG_BUILDID] = 'b',
727 [DSO__ORIG_DSO] = 'd',
728 };
729
730 if (self == NULL || self->origin == DSO__ORIG_NOT_FOUND)
731 return '!';
732 return origin[self->origin];
733}
734
723int dso__load(struct dso *self, symbol_filter_t filter, int verbose) 735int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
724{ 736{
725 int size = PATH_MAX; 737 int size = PATH_MAX;
726 char *name = malloc(size), *build_id = NULL; 738 char *name = malloc(size), *build_id = NULL;
727 int variant = 0;
728 int ret = -1; 739 int ret = -1;
729 int fd; 740 int fd;
730 741
@@ -733,19 +744,26 @@ int dso__load(struct dso *self, symbol_filter_t filter, int verbose)
733 744
734 self->adjust_symbols = 0; 745 self->adjust_symbols = 0;
735 746
736 if (strncmp(self->name, "/tmp/perf-", 10) == 0) 747 if (strncmp(self->name, "/tmp/perf-", 10) == 0) {
737 return dso__load_perf_map(self, filter, verbose); 748 ret = dso__load_perf_map(self, filter, verbose);
749 self->origin = ret > 0 ? DSO__ORIG_JAVA_JIT :
750 DSO__ORIG_NOT_FOUND;
751 return ret;
752 }
753
754 self->origin = DSO__ORIG_FEDORA - 1;
738 755
739more: 756more:
740 do { 757 do {
741 switch (variant) { 758 self->origin++;
742 case 0: /* Fedora */ 759 switch (self->origin) {
760 case DSO__ORIG_FEDORA:
743 snprintf(name, size, "/usr/lib/debug%s.debug", self->name); 761 snprintf(name, size, "/usr/lib/debug%s.debug", self->name);
744 break; 762 break;
745 case 1: /* Ubuntu */ 763 case DSO__ORIG_UBUNTU:
746 snprintf(name, size, "/usr/lib/debug%s", self->name); 764 snprintf(name, size, "/usr/lib/debug%s", self->name);
747 break; 765 break;
748 case 2: 766 case DSO__ORIG_BUILDID:
749 build_id = dso__read_build_id(self, verbose); 767 build_id = dso__read_build_id(self, verbose);
750 if (build_id != NULL) { 768 if (build_id != NULL) {
751 snprintf(name, size, 769 snprintf(name, size,
@@ -754,16 +772,15 @@ more:
754 free(build_id); 772 free(build_id);
755 break; 773 break;
756 } 774 }
757 variant++; 775 self->origin++;
758 /* Fall thru */ 776 /* Fall thru */
759 case 3: /* Sane people */ 777 case DSO__ORIG_DSO:
760 snprintf(name, size, "%s", self->name); 778 snprintf(name, size, "%s", self->name);
761 break; 779 break;
762 780
763 default: 781 default:
764 goto out; 782 goto out;
765 } 783 }
766 variant++;
767 784
768 fd = open(name, O_RDONLY); 785 fd = open(name, O_RDONLY);
769 } while (fd < 0); 786 } while (fd < 0);
@@ -784,6 +801,8 @@ more:
784 } 801 }
785out: 802out:
786 free(name); 803 free(name);
804 if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
805 return 0;
787 return ret; 806 return ret;
788} 807}
789 808
@@ -899,6 +918,9 @@ int dso__load_kernel(struct dso *self, const char *vmlinux,
899 if (err <= 0) 918 if (err <= 0)
900 err = dso__load_kallsyms(self, filter, verbose); 919 err = dso__load_kallsyms(self, filter, verbose);
901 920
921 if (err > 0)
922 self->origin = DSO__ORIG_KERNEL;
923
902 return err; 924 return err;
903} 925}
904 926
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 2f92b21c712d..b53bf0125c1b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -7,6 +7,30 @@
7#include <linux/rbtree.h> 7#include <linux/rbtree.h>
8#include "module.h" 8#include "module.h"
9 9
10#ifdef HAVE_CPLUS_DEMANGLE
11extern char *cplus_demangle(const char *, int);
12
13static inline char *bfd_demangle(void __used *v, const char *c, int i)
14{
15 return cplus_demangle(c, i);
16}
17#else
18#ifdef NO_DEMANGLE
19static inline char *bfd_demangle(void __used *v, const char __used *c,
20 int __used i)
21{
22 return NULL;
23}
24#else
25#include <bfd.h>
26#endif
27#endif
28
29#ifndef DMGL_PARAMS
30#define DMGL_PARAMS (1 << 0) /* Include function args */
31#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
32#endif
33
10struct symbol { 34struct symbol {
11 struct rb_node rb_node; 35 struct rb_node rb_node;
12 u64 start; 36 u64 start;
@@ -26,6 +50,7 @@ struct dso {
26 unsigned int sym_priv_size; 50 unsigned int sym_priv_size;
27 unsigned char adjust_symbols; 51 unsigned char adjust_symbols;
28 unsigned char slen_calculated; 52 unsigned char slen_calculated;
53 unsigned char origin;
29 char name[0]; 54 char name[0];
30}; 55};
31 56
@@ -49,6 +74,7 @@ int dso__load_modules(struct dso *self, symbol_filter_t filter, int verbose);
49int dso__load(struct dso *self, symbol_filter_t filter, int verbose); 74int dso__load(struct dso *self, symbol_filter_t filter, int verbose);
50 75
51size_t dso__fprintf(struct dso *self, FILE *fp); 76size_t dso__fprintf(struct dso *self, FILE *fp);
77char dso__symtab_origin(const struct dso *self);
52 78
53void symbol__init(void); 79void symbol__init(void);
54#endif /* _PERF_SYMBOL_ */ 80#endif /* _PERF_SYMBOL_ */
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1eddae94bab3..1150c6d5c7b8 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
95 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 95 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
96 pent->fields.remote_irr = 1; 96 pent->fields.remote_irr = 1;
97 } 97 }
98 if (!pent->fields.trig_mode)
99 ioapic->irr &= ~(1 << idx);
100 98
101 return injected; 99 return injected;
102} 100}
@@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
136 mask_after = ioapic->redirtbl[index].fields.mask; 134 mask_after = ioapic->redirtbl[index].fields.mask;
137 if (mask_before != mask_after) 135 if (mask_before != mask_after)
138 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); 136 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
139 if (ioapic->irr & (1 << index)) 137 if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
138 && ioapic->irr & (1 << index))
140 ioapic_service(ioapic, index); 139 ioapic_service(ioapic, index);
141 break; 140 break;
142 } 141 }
@@ -184,9 +183,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
184 if (!level) 183 if (!level)
185 ioapic->irr &= ~mask; 184 ioapic->irr &= ~mask;
186 else { 185 else {
186 int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
187 ioapic->irr |= mask; 187 ioapic->irr |= mask;
188 if ((!entry.fields.trig_mode && old_irr != ioapic->irr) 188 if ((edge && old_irr != ioapic->irr) ||
189 || !entry.fields.remote_irr) 189 (!edge && !entry.fields.remote_irr))
190 ret = ioapic_service(ioapic, irq); 190 ret = ioapic_service(ioapic, irq);
191 } 191 }
192 } 192 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a8bd466d00cc..ddc17f0e2f35 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
160 unsigned gsi = pin; 160 unsigned gsi = pin;
161 161
162 list_for_each_entry(e, &kvm->irq_routing, link) 162 list_for_each_entry(e, &kvm->irq_routing, link)
163 if (e->irqchip.irqchip == irqchip && 163 if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
164 e->irqchip.irqchip == irqchip &&
164 e->irqchip.pin == pin) { 165 e->irqchip.pin == pin) {
165 gsi = e->gsi; 166 gsi = e->gsi;
166 break; 167 break;
@@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
259 int delta; 260 int delta;
260 261
261 e->gsi = ue->gsi; 262 e->gsi = ue->gsi;
263 e->type = ue->type;
262 switch (ue->type) { 264 switch (ue->type) {
263 case KVM_IRQ_ROUTING_IRQCHIP: 265 case KVM_IRQ_ROUTING_IRQCHIP:
264 delta = 0; 266 delta = 0;