aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/kernel-parameters.txt4
-rw-r--r--Documentation/lockdep-design.txt6
-rw-r--r--Makefile2
-rw-r--r--arch/ia64/Makefile5
-rw-r--r--arch/ia64/include/asm/bitops.h2
-rw-r--r--arch/ia64/include/asm/pgtable.h1
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c4
-rw-r--r--arch/ia64/kernel/iosapic.c4
-rw-r--r--arch/ia64/kernel/pci-dma.c5
-rw-r--r--arch/ia64/kernel/topology.c6
-rw-r--r--arch/ia64/kvm/mmio.c6
-rw-r--r--arch/ia64/kvm/vcpu.c6
-rw-r--r--arch/ia64/kvm/vcpu.h13
-rw-r--r--arch/mn10300/include/asm/pci.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h2
-rw-r--r--arch/powerpc/kernel/dma.c6
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/sh/boards/board-ap325rxa.c2
-rw-r--r--arch/sh/boards/mach-migor/setup.c2
-rw-r--r--arch/sh/kernel/cpu/sh2/setup-sh7619.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-mxg.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7201.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7203.c2
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7206.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7705.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh770x.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7710.c2
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7720.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh4-202.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c2
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7760.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7343.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7366.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7722.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7723.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7724.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7763.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7770.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7780.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7785.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7786.c2
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-shx3.c2
-rw-r--r--arch/sh/kernel/cpu/sh5/setup-sh5.c2
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c8
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c8
-rw-r--r--arch/x86/kernel/cpu/amd.c7
-rw-r--r--arch/x86/kernel/cpu/common.c48
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c18
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c40
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/reboot.c16
-rw-r--r--arch/x86/kernel/tsc.c29
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kvm/i8254.c3
-rw-r--r--arch/x86/kvm/mmu.c48
-rw-r--r--arch/x86/kvm/svm.c6
-rw-r--r--arch/x86/kvm/vmx.c6
-rw-r--r--arch/x86/kvm/x86.c44
-rw-r--r--drivers/ata/ahci.c79
-rw-r--r--drivers/ata/libata-core.c3
-rw-r--r--drivers/ata/pata_at91.c17
-rw-r--r--drivers/ata/pata_atiixp.c19
-rw-r--r--drivers/ata/sata_nv.c8
-rw-r--r--drivers/base/platform.c3
-rw-r--r--drivers/char/pty.c2
-rw-r--r--drivers/gpu/drm/drm_irq.c2
-rw-r--r--drivers/gpu/drm/drm_modes.c2
-rw-r--r--drivers/gpu/drm/i915/i915_irq.c4
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid5.c34
-rw-r--r--drivers/mtd/maps/sbc8240.c0
-rw-r--r--drivers/mtd/ubi/eba.c1
-rw-r--r--drivers/mtd/ubi/scan.c13
-rw-r--r--drivers/pci/hotplug/sgi_hotplug.c7
-rw-r--r--fs/nfs/direct.c20
-rw-r--r--fs/nfs/read.c6
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/ocfs2/alloc.c47
-rw-r--r--fs/ocfs2/aops.c69
-rw-r--r--fs/ocfs2/dcache.c35
-rw-r--r--fs/ocfs2/dcache.h3
-rw-r--r--fs/ocfs2/dlm/dlmast.c1
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c2
-rw-r--r--fs/ocfs2/file.c5
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/journal.h19
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/quota.h1
-rw-r--r--fs/ocfs2/quota_global.c134
-rw-r--r--fs/ocfs2/quota_local.c110
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/super.c30
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/proc/base.c27
-rw-r--r--fs/proc/task_mmu.c1
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_attr.c8
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_btree.c4
-rw-r--r--fs/xfs/xfs_da_btree.c6
-rw-r--r--fs/xfs/xfs_dir2.c2
-rw-r--r--fs/xfs/xfs_fsops.c20
-rw-r--r--fs/xfs/xfs_inode.c10
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c4
-rw-r--r--include/linux/kvm_host.h1
-rw-r--r--include/linux/nfs_fs.h5
-rw-r--r--include/linux/perf_counter.h49
-rw-r--r--include/linux/wait.h9
-rw-r--r--include/trace/ftrace.h15
-rw-r--r--kernel/futex.c28
-rw-r--r--kernel/futex_compat.c6
-rw-r--r--kernel/irq/manage.c17
-rw-r--r--kernel/irq/numa_migrate.c4
-rw-r--r--kernel/lockdep_proc.c3
-rw-r--r--kernel/perf_counter.c366
-rw-r--r--kernel/posix-cpu-timers.c7
-rw-r--r--kernel/rtmutex.c4
-rw-r--r--kernel/trace/blktrace.c12
-rw-r--r--kernel/wait.c5
-rw-r--r--mm/mempool.c4
-rw-r--r--net/socket.c2
-rw-r--r--security/selinux/hooks.c3
-rw-r--r--sound/pci/hda/patch_realtek.c20
-rw-r--r--sound/soc/fsl/efika-audio-fabric.c2
-rw-r--r--sound/soc/fsl/pcm030-audio-fabric.c2
-rw-r--r--tools/perf/Makefile29
-rw-r--r--tools/perf/builtin-list.c3
-rw-r--r--tools/perf/builtin-record.c95
-rw-r--r--tools/perf/builtin-report.c12
-rw-r--r--tools/perf/util/callchain.c5
-rw-r--r--tools/perf/util/parse-events.c10
-rw-r--r--tools/perf/util/symbol.c17
-rw-r--r--tools/perf/util/symbol.h24
-rw-r--r--virt/kvm/ioapic.c10
-rw-r--r--virt/kvm/irq_comm.c4
140 files changed, 1398 insertions, 639 deletions
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7bb0d934b6d8..dbea4f95fc85 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,6 +139,7 @@ Code Seq# Include File Comments
139'm' all linux/synclink.h conflict! 139'm' all linux/synclink.h conflict!
140'm' 00-1F net/irda/irmod.h conflict! 140'm' 00-1F net/irda/irmod.h conflict!
141'n' 00-7F linux/ncp_fs.h 141'n' 00-7F linux/ncp_fs.h
142'n' 80-8F linux/nilfs2_fs.h NILFS2
142'n' E0-FF video/matrox.h matroxfb 143'n' E0-FF video/matrox.h matroxfb
143'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2 144'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
144'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps) 145'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dd1a6d4bb747..7936b801fe6a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1115,6 +1115,10 @@ and is between 256 and 4096 characters. It is defined in the file
1115 libata.dma=4 Compact Flash DMA only 1115 libata.dma=4 Compact Flash DMA only
1116 Combinations also work, so libata.dma=3 enables DMA 1116 Combinations also work, so libata.dma=3 enables DMA
1117 for disks and CDROMs, but not CFs. 1117 for disks and CDROMs, but not CFs.
1118
1119 libata.ignore_hpa= [LIBATA] Ignore HPA limit
1120 libata.ignore_hpa=0 keep BIOS limits (default)
1121 libata.ignore_hpa=1 ignore limits, using full disk
1118 1122
1119 libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume 1123 libata.noacpi [LIBATA] Disables use of ACPI in libata suspend/resume
1120 when set. 1124 when set.
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
index e20d913d5914..abf768c681e2 100644
--- a/Documentation/lockdep-design.txt
+++ b/Documentation/lockdep-design.txt
@@ -30,9 +30,9 @@ State
30The validator tracks lock-class usage history into 4n + 1 separate state bits: 30The validator tracks lock-class usage history into 4n + 1 separate state bits:
31 31
32- 'ever held in STATE context' 32- 'ever held in STATE context'
33- 'ever head as readlock in STATE context' 33- 'ever held as readlock in STATE context'
34- 'ever head with STATE enabled' 34- 'ever held with STATE enabled'
35- 'ever head as readlock with STATE enabled' 35- 'ever held as readlock with STATE enabled'
36 36
37Where STATE can be either one of (kernel/lockdep_states.h) 37Where STATE can be either one of (kernel/lockdep_states.h)
38 - hardirq 38 - hardirq
diff --git a/Makefile b/Makefile
index 0d46615bffe5..abcfa85f8f82 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 2 1VERSION = 2
2PATCHLEVEL = 6 2PATCHLEVEL = 6
3SUBLEVEL = 31 3SUBLEVEL = 31
4EXTRAVERSION = -rc5 4EXTRAVERSION = -rc6
5NAME = Man-Eating Seals of Antiquity 5NAME = Man-Eating Seals of Antiquity
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 58a7e46affda..e7cbaa02cd0b 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -41,11 +41,6 @@ $(error Sorry, you need a newer version of the assember, one that is built from
41 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) 41 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
42endif 42endif
43 43
44ifeq ($(call cc-version),0304)
45 cflags-$(CONFIG_ITANIUM) += -mtune=merced
46 cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
47endif
48
49KBUILD_CFLAGS += $(cflags-y) 44KBUILD_CFLAGS += $(cflags-y)
50head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o 45head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
51 46
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index e2ca80037335..57a2787bc9fb 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -286,7 +286,7 @@ __test_and_clear_bit(int nr, volatile void * addr)
286{ 286{
287 __u32 *p = (__u32 *) addr + (nr >> 5); 287 __u32 *p = (__u32 *) addr + (nr >> 5);
288 __u32 m = 1 << (nr & 31); 288 __u32 m = 1 << (nr & 31);
289 int oldbitset = *p & m; 289 int oldbitset = (*p & m) != 0;
290 290
291 *p &= ~m; 291 *p &= ~m;
292 return oldbitset; 292 return oldbitset;
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 0a9cc73d35c7..8840a690d1e7 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -155,7 +155,6 @@
155#include <linux/bitops.h> 155#include <linux/bitops.h>
156#include <asm/cacheflush.h> 156#include <asm/cacheflush.h>
157#include <asm/mmu_context.h> 157#include <asm/mmu_context.h>
158#include <asm/processor.h>
159 158
160/* 159/*
161 * Next come the mappings that determine how mmap() protection bits 160 * Next come the mappings that determine how mmap() protection bits
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
index 2d311864e359..8ebccb589e1c 100644
--- a/arch/ia64/kernel/ia64_ksyms.c
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -21,6 +21,7 @@ EXPORT_SYMBOL(csum_ipv6_magic);
21 21
22#include <asm/page.h> 22#include <asm/page.h>
23EXPORT_SYMBOL(clear_page); 23EXPORT_SYMBOL(clear_page);
24EXPORT_SYMBOL(copy_page);
24 25
25#ifdef CONFIG_VIRTUAL_MEM_MAP 26#ifdef CONFIG_VIRTUAL_MEM_MAP
26#include <linux/bootmem.h> 27#include <linux/bootmem.h>
@@ -60,9 +61,6 @@ EXPORT_SYMBOL(__udivdi3);
60EXPORT_SYMBOL(__moddi3); 61EXPORT_SYMBOL(__moddi3);
61EXPORT_SYMBOL(__umoddi3); 62EXPORT_SYMBOL(__umoddi3);
62 63
63#include <asm/page.h>
64EXPORT_SYMBOL(copy_page);
65
66#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE) 64#if defined(CONFIG_MD_RAID456) || defined(CONFIG_MD_RAID456_MODULE)
67extern void xor_ia64_2(void); 65extern void xor_ia64_2(void);
68extern void xor_ia64_3(void); 66extern void xor_ia64_3(void);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index c48b03f2b61d..dab4d393908c 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -1072,6 +1072,10 @@ iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
1072 } 1072 }
1073 1073
1074 addr = ioremap(phys_addr, 0); 1074 addr = ioremap(phys_addr, 0);
1075 if (addr == NULL) {
1076 spin_unlock_irqrestore(&iosapic_lock, flags);
1077 return -ENOMEM;
1078 }
1075 ver = iosapic_version(addr); 1079 ver = iosapic_version(addr);
1076 if ((err = iosapic_check_gsi_range(gsi_base, ver))) { 1080 if ((err = iosapic_check_gsi_range(gsi_base, ver))) {
1077 iounmap(addr); 1081 iounmap(addr);
diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c
index 05695962fe44..f6b1ff0aea76 100644
--- a/arch/ia64/kernel/pci-dma.c
+++ b/arch/ia64/kernel/pci-dma.c
@@ -69,11 +69,6 @@ iommu_dma_init(void)
69 69
70int iommu_dma_supported(struct device *dev, u64 mask) 70int iommu_dma_supported(struct device *dev, u64 mask)
71{ 71{
72 struct dma_map_ops *ops = platform_dma_get_ops(dev);
73
74 if (ops->dma_supported)
75 return ops->dma_supported(dev, mask);
76
77 /* Copied from i386. Doesn't make much sense, because it will 72 /* Copied from i386. Doesn't make much sense, because it will
78 only work for pci_alloc_coherent. 73 only work for pci_alloc_coherent.
79 The caller just has to use GFP_DMA in this case. */ 74 The caller just has to use GFP_DMA in this case. */
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index bc80dff1df7a..8f060352e129 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -372,6 +372,10 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
372 retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, 372 retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj,
373 &cache_ktype_percpu_entry, &sys_dev->kobj, 373 &cache_ktype_percpu_entry, &sys_dev->kobj,
374 "%s", "cache"); 374 "%s", "cache");
375 if (unlikely(retval < 0)) {
376 cpu_cache_sysfs_exit(cpu);
377 return retval;
378 }
375 379
376 for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { 380 for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) {
377 this_object = LEAF_KOBJECT_PTR(cpu,i); 381 this_object = LEAF_KOBJECT_PTR(cpu,i);
@@ -385,7 +389,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
385 } 389 }
386 kobject_put(&all_cpu_cache_info[cpu].kobj); 390 kobject_put(&all_cpu_cache_info[cpu].kobj);
387 cpu_cache_sysfs_exit(cpu); 391 cpu_cache_sysfs_exit(cpu);
388 break; 392 return retval;
389 } 393 }
390 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 394 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
391 } 395 }
diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
index 21f63fffc379..9bf55afd08d0 100644
--- a/arch/ia64/kvm/mmio.c
+++ b/arch/ia64/kvm/mmio.c
@@ -247,7 +247,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
247 vcpu_get_fpreg(vcpu, inst.M9.f2, &v); 247 vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
248 /* Write high word. FIXME: this is a kludge! */ 248 /* Write high word. FIXME: this is a kludge! */
249 v.u.bits[1] &= 0x3ffff; 249 v.u.bits[1] &= 0x3ffff;
250 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); 250 mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
251 ma, IOREQ_WRITE);
251 data = v.u.bits[0]; 252 data = v.u.bits[0];
252 size = 3; 253 size = 3;
253 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { 254 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
@@ -265,7 +266,8 @@ void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
265 266
266 /* Write high word.FIXME: this is a kludge! */ 267 /* Write high word.FIXME: this is a kludge! */
267 v.u.bits[1] &= 0x3ffff; 268 v.u.bits[1] &= 0x3ffff;
268 mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); 269 mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
270 8, ma, IOREQ_WRITE);
269 data = v.u.bits[0]; 271 data = v.u.bits[0];
270 size = 3; 272 size = 3;
271 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { 273 } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
index 46b02cbcc874..cc406d064a09 100644
--- a/arch/ia64/kvm/vcpu.c
+++ b/arch/ia64/kvm/vcpu.c
@@ -461,7 +461,7 @@ void setreg(unsigned long regnum, unsigned long val,
461u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) 461u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
462{ 462{
463 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 463 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
464 u64 val; 464 unsigned long val;
465 465
466 if (!reg) 466 if (!reg)
467 return 0; 467 return 0;
@@ -469,7 +469,7 @@ u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
469 return val; 469 return val;
470} 470}
471 471
472void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) 472void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
473{ 473{
474 struct kvm_pt_regs *regs = vcpu_regs(vcpu); 474 struct kvm_pt_regs *regs = vcpu_regs(vcpu);
475 long sof = (regs->cr_ifs) & 0x7f; 475 long sof = (regs->cr_ifs) & 0x7f;
@@ -1072,7 +1072,7 @@ void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
1072 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); 1072 vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
1073} 1073}
1074 1074
1075int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) 1075int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
1076{ 1076{
1077 struct thash_data *data; 1077 struct thash_data *data;
1078 union ia64_isr visr, pt_isr; 1078 union ia64_isr visr, pt_isr;
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index 042af92ced83..360724d3ae69 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -686,14 +686,15 @@ static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
686 return highest_bits((int *)&(VMX(vcpu, insvc[0]))); 686 return highest_bits((int *)&(VMX(vcpu, insvc[0])));
687} 687}
688 688
689extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, u64 reg, 689extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
690 struct ia64_fpreg *val); 690 struct ia64_fpreg *val);
691extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, u64 reg, 691extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
692 struct ia64_fpreg *val); 692 struct ia64_fpreg *val);
693extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, u64 reg); 693extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
694extern void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 val, int nat); 694extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
695extern u64 vcpu_get_psr(struct kvm_vcpu *vcpu); 695 u64 val, int nat);
696extern void vcpu_set_psr(struct kvm_vcpu *vcpu, u64 val); 696extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
697extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
697extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr); 698extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
698extern void vcpu_bsw0(struct kvm_vcpu *vcpu); 699extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
699extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, 700extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
diff --git a/arch/mn10300/include/asm/pci.h b/arch/mn10300/include/asm/pci.h
index 35d2ed6396f6..19aecc90f7a4 100644
--- a/arch/mn10300/include/asm/pci.h
+++ b/arch/mn10300/include/asm/pci.h
@@ -59,7 +59,6 @@ void pcibios_penalize_isa_irq(int irq);
59#include <linux/slab.h> 59#include <linux/slab.h>
60#include <asm/scatterlist.h> 60#include <asm/scatterlist.h>
61#include <linux/string.h> 61#include <linux/string.h>
62#include <linux/mm.h>
63#include <asm/io.h> 62#include <asm/io.h>
64 63
65struct pci_dev; 64struct pci_dev;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index dfdf13c9fefd..fddc3ed715fa 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,7 +34,7 @@
34#define KVM_COALESCED_MMIO_PAGE_OFFSET 1 34#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
35 35
36/* We don't currently support large pages. */ 36/* We don't currently support large pages. */
37#define KVM_PAGES_PER_HPAGE (1<<31) 37#define KVM_PAGES_PER_HPAGE (1UL << 31)
38 38
39struct kvm; 39struct kvm;
40struct kvm_run; 40struct kvm_run;
diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c
index 20a60d661ba8..ccf129d47d84 100644
--- a/arch/powerpc/kernel/dma.c
+++ b/arch/powerpc/kernel/dma.c
@@ -7,6 +7,7 @@
7 7
8#include <linux/device.h> 8#include <linux/device.h>
9#include <linux/dma-mapping.h> 9#include <linux/dma-mapping.h>
10#include <linux/lmb.h>
10#include <asm/bug.h> 11#include <asm/bug.h>
11#include <asm/abs_addr.h> 12#include <asm/abs_addr.h>
12 13
@@ -90,11 +91,10 @@ static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
90static int dma_direct_dma_supported(struct device *dev, u64 mask) 91static int dma_direct_dma_supported(struct device *dev, u64 mask)
91{ 92{
92#ifdef CONFIG_PPC64 93#ifdef CONFIG_PPC64
93 /* Could be improved to check for memory though it better be 94 /* Could be improved so platforms can set the limit in case
94 * done via some global so platforms can set the limit in case
95 * they have limited DMA windows 95 * they have limited DMA windows
96 */ 96 */
97 return mask >= DMA_BIT_MASK(32); 97 return mask >= (lmb_end_of_DRAM() - 1);
98#else 98#else
99 return 1; 99 return 1;
100#endif 100#endif
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index f04f5301b1b4..4d613415c435 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -386,7 +386,7 @@ no_timer:
386 } 386 }
387 __unset_cpu_idle(vcpu); 387 __unset_cpu_idle(vcpu);
388 __set_current_state(TASK_RUNNING); 388 __set_current_state(TASK_RUNNING);
389 remove_wait_queue(&vcpu->wq, &wait); 389 remove_wait_queue(&vcpu->arch.local_int.wq, &wait);
390 spin_unlock_bh(&vcpu->arch.local_int.lock); 390 spin_unlock_bh(&vcpu->arch.local_int.lock);
391 spin_unlock(&vcpu->arch.local_int.float_int->lock); 391 spin_unlock(&vcpu->arch.local_int.float_int->lock);
392 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer); 392 hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
diff --git a/arch/sh/boards/board-ap325rxa.c b/arch/sh/boards/board-ap325rxa.c
index 7ffd1b4315bd..b9c88cc519e2 100644
--- a/arch/sh/boards/board-ap325rxa.c
+++ b/arch/sh/boards/board-ap325rxa.c
@@ -547,7 +547,7 @@ static int __init ap325rxa_devices_setup(void)
547 return platform_add_devices(ap325rxa_devices, 547 return platform_add_devices(ap325rxa_devices,
548 ARRAY_SIZE(ap325rxa_devices)); 548 ARRAY_SIZE(ap325rxa_devices));
549} 549}
550device_initcall(ap325rxa_devices_setup); 550arch_initcall(ap325rxa_devices_setup);
551 551
552/* Return the board specific boot mode pin configuration */ 552/* Return the board specific boot mode pin configuration */
553static int ap325rxa_mode_pins(void) 553static int ap325rxa_mode_pins(void)
diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c
index f70f4644deb4..f9b2e4df35b9 100644
--- a/arch/sh/boards/mach-migor/setup.c
+++ b/arch/sh/boards/mach-migor/setup.c
@@ -608,7 +608,7 @@ static int __init migor_devices_setup(void)
608 608
609 return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices)); 609 return platform_add_devices(migor_devices, ARRAY_SIZE(migor_devices));
610} 610}
611__initcall(migor_devices_setup); 611arch_initcall(migor_devices_setup);
612 612
613/* Return the board specific boot mode pin configuration */ 613/* Return the board specific boot mode pin configuration */
614static int migor_mode_pins(void) 614static int migor_mode_pins(void)
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index 13798733f2db..8555c05e8667 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -187,7 +187,7 @@ static int __init sh7619_devices_setup(void)
187 return platform_add_devices(sh7619_devices, 187 return platform_add_devices(sh7619_devices,
188 ARRAY_SIZE(sh7619_devices)); 188 ARRAY_SIZE(sh7619_devices));
189} 189}
190__initcall(sh7619_devices_setup); 190arch_initcall(sh7619_devices_setup);
191 191
192void __init plat_irq_setup(void) 192void __init plat_irq_setup(void)
193{ 193{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 869c2da4820b..b67376445315 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -238,7 +238,7 @@ static int __init mxg_devices_setup(void)
238 return platform_add_devices(mxg_devices, 238 return platform_add_devices(mxg_devices,
239 ARRAY_SIZE(mxg_devices)); 239 ARRAY_SIZE(mxg_devices));
240} 240}
241__initcall(mxg_devices_setup); 241arch_initcall(mxg_devices_setup);
242 242
243void __init plat_irq_setup(void) 243void __init plat_irq_setup(void)
244{ 244{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index d8febe128066..fbde5b75deb9 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -357,7 +357,7 @@ static int __init sh7201_devices_setup(void)
357 return platform_add_devices(sh7201_devices, 357 return platform_add_devices(sh7201_devices,
358 ARRAY_SIZE(sh7201_devices)); 358 ARRAY_SIZE(sh7201_devices));
359} 359}
360__initcall(sh7201_devices_setup); 360arch_initcall(sh7201_devices_setup);
361 361
362void __init plat_irq_setup(void) 362void __init plat_irq_setup(void)
363{ 363{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 62e3039d2398..d3fd536c9a84 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -367,7 +367,7 @@ static int __init sh7203_devices_setup(void)
367 return platform_add_devices(sh7203_devices, 367 return platform_add_devices(sh7203_devices,
368 ARRAY_SIZE(sh7203_devices)); 368 ARRAY_SIZE(sh7203_devices));
369} 369}
370__initcall(sh7203_devices_setup); 370arch_initcall(sh7203_devices_setup);
371 371
372void __init plat_irq_setup(void) 372void __init plat_irq_setup(void)
373{ 373{
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index 3e6f3d7a58be..a9ccc5e8d9e9 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -338,7 +338,7 @@ static int __init sh7206_devices_setup(void)
338 return platform_add_devices(sh7206_devices, 338 return platform_add_devices(sh7206_devices,
339 ARRAY_SIZE(sh7206_devices)); 339 ARRAY_SIZE(sh7206_devices));
340} 340}
341__initcall(sh7206_devices_setup); 341arch_initcall(sh7206_devices_setup);
342 342
343void __init plat_irq_setup(void) 343void __init plat_irq_setup(void)
344{ 344{
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index 88f742fed9ed..c23105983878 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -222,7 +222,7 @@ static int __init sh7705_devices_setup(void)
222 return platform_add_devices(sh7705_devices, 222 return platform_add_devices(sh7705_devices,
223 ARRAY_SIZE(sh7705_devices)); 223 ARRAY_SIZE(sh7705_devices));
224} 224}
225__initcall(sh7705_devices_setup); 225arch_initcall(sh7705_devices_setup);
226 226
227static struct platform_device *sh7705_early_devices[] __initdata = { 227static struct platform_device *sh7705_early_devices[] __initdata = {
228 &tmu0_device, 228 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index c56306798584..347ab35d0697 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -250,7 +250,7 @@ static int __init sh770x_devices_setup(void)
250 return platform_add_devices(sh770x_devices, 250 return platform_add_devices(sh770x_devices,
251 ARRAY_SIZE(sh770x_devices)); 251 ARRAY_SIZE(sh770x_devices));
252} 252}
253__initcall(sh770x_devices_setup); 253arch_initcall(sh770x_devices_setup);
254 254
255static struct platform_device *sh770x_early_devices[] __initdata = { 255static struct platform_device *sh770x_early_devices[] __initdata = {
256 &tmu0_device, 256 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index efa76c8148f4..717e90ae1097 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -226,7 +226,7 @@ static int __init sh7710_devices_setup(void)
226 return platform_add_devices(sh7710_devices, 226 return platform_add_devices(sh7710_devices,
227 ARRAY_SIZE(sh7710_devices)); 227 ARRAY_SIZE(sh7710_devices));
228} 228}
229__initcall(sh7710_devices_setup); 229arch_initcall(sh7710_devices_setup);
230 230
231static struct platform_device *sh7710_early_devices[] __initdata = { 231static struct platform_device *sh7710_early_devices[] __initdata = {
232 &tmu0_device, 232 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 5b2107798edb..74d8baaf8e96 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -388,7 +388,7 @@ static int __init sh7720_devices_setup(void)
388 return platform_add_devices(sh7720_devices, 388 return platform_add_devices(sh7720_devices,
389 ARRAY_SIZE(sh7720_devices)); 389 ARRAY_SIZE(sh7720_devices));
390} 390}
391__initcall(sh7720_devices_setup); 391arch_initcall(sh7720_devices_setup);
392 392
393static struct platform_device *sh7720_early_devices[] __initdata = { 393static struct platform_device *sh7720_early_devices[] __initdata = {
394 &cmt0_device, 394 &cmt0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index 6d088d123591..de4827df19aa 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -138,7 +138,7 @@ static int __init sh4202_devices_setup(void)
138 return platform_add_devices(sh4202_devices, 138 return platform_add_devices(sh4202_devices,
139 ARRAY_SIZE(sh4202_devices)); 139 ARRAY_SIZE(sh4202_devices));
140} 140}
141__initcall(sh4202_devices_setup); 141arch_initcall(sh4202_devices_setup);
142 142
143static struct platform_device *sh4202_early_devices[] __initdata = { 143static struct platform_device *sh4202_early_devices[] __initdata = {
144 &tmu0_device, 144 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index 851672d15cf4..1b8b122e8f3d 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -239,7 +239,7 @@ static int __init sh7750_devices_setup(void)
239 return platform_add_devices(sh7750_devices, 239 return platform_add_devices(sh7750_devices,
240 ARRAY_SIZE(sh7750_devices)); 240 ARRAY_SIZE(sh7750_devices));
241} 241}
242__initcall(sh7750_devices_setup); 242arch_initcall(sh7750_devices_setup);
243 243
244static struct platform_device *sh7750_early_devices[] __initdata = { 244static struct platform_device *sh7750_early_devices[] __initdata = {
245 &tmu0_device, 245 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 5b822519bd90..7fbb7be9284c 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -265,7 +265,7 @@ static int __init sh7760_devices_setup(void)
265 return platform_add_devices(sh7760_devices, 265 return platform_add_devices(sh7760_devices,
266 ARRAY_SIZE(sh7760_devices)); 266 ARRAY_SIZE(sh7760_devices));
267} 267}
268__initcall(sh7760_devices_setup); 268arch_initcall(sh7760_devices_setup);
269 269
270static struct platform_device *sh7760_early_devices[] __initdata = { 270static struct platform_device *sh7760_early_devices[] __initdata = {
271 &tmu0_device, 271 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index 6307e087c864..ac4d5672ec1a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -325,7 +325,7 @@ static int __init sh7343_devices_setup(void)
325 return platform_add_devices(sh7343_devices, 325 return platform_add_devices(sh7343_devices,
326 ARRAY_SIZE(sh7343_devices)); 326 ARRAY_SIZE(sh7343_devices));
327} 327}
328__initcall(sh7343_devices_setup); 328arch_initcall(sh7343_devices_setup);
329 329
330static struct platform_device *sh7343_early_devices[] __initdata = { 330static struct platform_device *sh7343_early_devices[] __initdata = {
331 &cmt_device, 331 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index c18f7d09281b..1a956b1beccc 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -318,7 +318,7 @@ static int __init sh7366_devices_setup(void)
318 return platform_add_devices(sh7366_devices, 318 return platform_add_devices(sh7366_devices,
319 ARRAY_SIZE(sh7366_devices)); 319 ARRAY_SIZE(sh7366_devices));
320} 320}
321__initcall(sh7366_devices_setup); 321arch_initcall(sh7366_devices_setup);
322 322
323static struct platform_device *sh7366_early_devices[] __initdata = { 323static struct platform_device *sh7366_early_devices[] __initdata = {
324 &cmt_device, 324 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index ea524a2da3e4..cda76ebf87c3 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -359,7 +359,7 @@ static int __init sh7722_devices_setup(void)
359 return platform_add_devices(sh7722_devices, 359 return platform_add_devices(sh7722_devices,
360 ARRAY_SIZE(sh7722_devices)); 360 ARRAY_SIZE(sh7722_devices));
361} 361}
362__initcall(sh7722_devices_setup); 362arch_initcall(sh7722_devices_setup);
363 363
364static struct platform_device *sh7722_early_devices[] __initdata = { 364static struct platform_device *sh7722_early_devices[] __initdata = {
365 &cmt_device, 365 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index e1bb80b2a27b..b45dace9539f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -473,7 +473,7 @@ static int __init sh7723_devices_setup(void)
473 return platform_add_devices(sh7723_devices, 473 return platform_add_devices(sh7723_devices,
474 ARRAY_SIZE(sh7723_devices)); 474 ARRAY_SIZE(sh7723_devices));
475} 475}
476__initcall(sh7723_devices_setup); 476arch_initcall(sh7723_devices_setup);
477 477
478static struct platform_device *sh7723_early_devices[] __initdata = { 478static struct platform_device *sh7723_early_devices[] __initdata = {
479 &cmt_device, 479 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index e5ac9eb11c63..a04edaab9a29 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -508,7 +508,7 @@ static int __init sh7724_devices_setup(void)
508 return platform_add_devices(sh7724_devices, 508 return platform_add_devices(sh7724_devices,
509 ARRAY_SIZE(sh7724_devices)); 509 ARRAY_SIZE(sh7724_devices));
510} 510}
511device_initcall(sh7724_devices_setup); 511arch_initcall(sh7724_devices_setup);
512 512
513static struct platform_device *sh7724_early_devices[] __initdata = { 513static struct platform_device *sh7724_early_devices[] __initdata = {
514 &cmt_device, 514 &cmt_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index f1e0c0d36da7..4659fff6b842 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -314,7 +314,7 @@ static int __init sh7763_devices_setup(void)
314 return platform_add_devices(sh7763_devices, 314 return platform_add_devices(sh7763_devices,
315 ARRAY_SIZE(sh7763_devices)); 315 ARRAY_SIZE(sh7763_devices));
316} 316}
317__initcall(sh7763_devices_setup); 317arch_initcall(sh7763_devices_setup);
318 318
319static struct platform_device *sh7763_early_devices[] __initdata = { 319static struct platform_device *sh7763_early_devices[] __initdata = {
320 &tmu0_device, 320 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index 1e86209db284..eead08d89d32 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -368,7 +368,7 @@ static int __init sh7770_devices_setup(void)
368 return platform_add_devices(sh7770_devices, 368 return platform_add_devices(sh7770_devices,
369 ARRAY_SIZE(sh7770_devices)); 369 ARRAY_SIZE(sh7770_devices));
370} 370}
371__initcall(sh7770_devices_setup); 371arch_initcall(sh7770_devices_setup);
372 372
373static struct platform_device *sh7770_early_devices[] __initdata = { 373static struct platform_device *sh7770_early_devices[] __initdata = {
374 &tmu0_device, 374 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 715e05b431e5..2c901f446959 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -256,7 +256,7 @@ static int __init sh7780_devices_setup(void)
256 return platform_add_devices(sh7780_devices, 256 return platform_add_devices(sh7780_devices,
257 ARRAY_SIZE(sh7780_devices)); 257 ARRAY_SIZE(sh7780_devices));
258} 258}
259__initcall(sh7780_devices_setup); 259arch_initcall(sh7780_devices_setup);
260 260
261static struct platform_device *sh7780_early_devices[] __initdata = { 261static struct platform_device *sh7780_early_devices[] __initdata = {
262 &tmu0_device, 262 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index af561402570b..7f6c718b6c36 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -263,7 +263,7 @@ static int __init sh7785_devices_setup(void)
263 return platform_add_devices(sh7785_devices, 263 return platform_add_devices(sh7785_devices,
264 ARRAY_SIZE(sh7785_devices)); 264 ARRAY_SIZE(sh7785_devices));
265} 265}
266__initcall(sh7785_devices_setup); 266arch_initcall(sh7785_devices_setup);
267 267
268static struct platform_device *sh7785_early_devices[] __initdata = { 268static struct platform_device *sh7785_early_devices[] __initdata = {
269 &tmu0_device, 269 &tmu0_device,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index b70049470a0b..0104a8ec5369 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -547,7 +547,7 @@ static int __init sh7786_devices_setup(void)
547 return platform_add_devices(sh7786_devices, 547 return platform_add_devices(sh7786_devices,
548 ARRAY_SIZE(sh7786_devices)); 548 ARRAY_SIZE(sh7786_devices));
549} 549}
550device_initcall(sh7786_devices_setup); 550arch_initcall(sh7786_devices_setup);
551 551
552void __init plat_early_device_setup(void) 552void __init plat_early_device_setup(void)
553{ 553{
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 53c65fd9ccef..07f078961c71 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -256,7 +256,7 @@ static int __init shx3_devices_setup(void)
256 return platform_add_devices(shx3_devices, 256 return platform_add_devices(shx3_devices,
257 ARRAY_SIZE(shx3_devices)); 257 ARRAY_SIZE(shx3_devices));
258} 258}
259__initcall(shx3_devices_setup); 259arch_initcall(shx3_devices_setup);
260 260
261void __init plat_early_device_setup(void) 261void __init plat_early_device_setup(void)
262{ 262{
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index f5ff1ac57fc2..6a0f82f70032 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -186,7 +186,7 @@ static int __init sh5_devices_setup(void)
186 return platform_add_devices(sh5_devices, 186 return platform_add_devices(sh5_devices,
187 ARRAY_SIZE(sh5_devices)); 187 ARRAY_SIZE(sh5_devices));
188} 188}
189__initcall(sh5_devices_setup); 189arch_initcall(sh5_devices_setup);
190 190
191void __init plat_early_device_setup(void) 191void __init plat_early_device_setup(void)
192{ 192{
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 738bdc6b0f8b..13ffa5df37d7 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,6 +24,7 @@ config X86
24 select HAVE_UNSTABLE_SCHED_CLOCK 24 select HAVE_UNSTABLE_SCHED_CLOCK
25 select HAVE_IDE 25 select HAVE_IDE
26 select HAVE_OPROFILE 26 select HAVE_OPROFILE
27 select HAVE_PERF_COUNTERS if (!M386 && !M486)
27 select HAVE_IOREMAP_PROT 28 select HAVE_IOREMAP_PROT
28 select HAVE_KPROBES 29 select HAVE_KPROBES
29 select ARCH_WANT_OPTIONAL_GPIOLIB 30 select ARCH_WANT_OPTIONAL_GPIOLIB
@@ -742,7 +743,6 @@ config X86_UP_IOAPIC
742config X86_LOCAL_APIC 743config X86_LOCAL_APIC
743 def_bool y 744 def_bool y
744 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC 745 depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
745 select HAVE_PERF_COUNTERS if (!M386 && !M486)
746 746
747config X86_IO_APIC 747config X86_IO_APIC
748 def_bool y 748 def_bool y
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 2ed4e2bb3b32..a5371ec36776 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -17,11 +17,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
17 return x2apic_enabled(); 17 return x2apic_enabled();
18} 18}
19 19
20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/*
21 21 * need to use more than cpu 0, because we need more vectors when
22 * MSI-X are used.
23 */
22static const struct cpumask *x2apic_target_cpus(void) 24static const struct cpumask *x2apic_target_cpus(void)
23{ 25{
24 return cpumask_of(0); 26 return cpu_online_mask;
25} 27}
26 28
27/* 29/*
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 0b631c6a2e00..a8989aadc99a 100644
--- a/arch/x86/kernel/apic/x2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -27,11 +27,13 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/*
31 31 * need to use more than cpu 0, because we need more vectors when
32 * MSI-X are used.
33 */
32static const struct cpumask *x2apic_target_cpus(void) 34static const struct cpumask *x2apic_target_cpus(void)
33{ 35{
34 return cpumask_of(0); 36 return cpu_online_mask;
35} 37}
36 38
37static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) 39static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index e2485b03f1cf..63fddcd082cd 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -400,6 +400,13 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
400 level = cpuid_eax(1); 400 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 402 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403
404 /*
405 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it.
407 */
408 if (c->x86_model < 0x14)
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
403 } 410 }
404 if (c->x86 == 0x10 || c->x86 == 0x11) 411 if (c->x86 == 0x10 || c->x86 == 0x11)
405 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 412 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1961c07af9a..5ce60a88027b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -59,7 +59,30 @@ void __init setup_cpu_local_masks(void)
59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask); 59 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
60} 60}
61 61
62static const struct cpu_dev *this_cpu __cpuinitdata; 62static void __cpuinit default_init(struct cpuinfo_x86 *c)
63{
64#ifdef CONFIG_X86_64
65 display_cacheinfo(c);
66#else
67 /* Not much we can do here... */
68 /* Check if at least it has cpuid */
69 if (c->cpuid_level == -1) {
70 /* No cpuid. It must be an ancient CPU */
71 if (c->x86 == 4)
72 strcpy(c->x86_model_id, "486");
73 else if (c->x86 == 3)
74 strcpy(c->x86_model_id, "386");
75 }
76#endif
77}
78
79static const struct cpu_dev __cpuinitconst default_cpu = {
80 .c_init = default_init,
81 .c_vendor = "Unknown",
82 .c_x86_vendor = X86_VENDOR_UNKNOWN,
83};
84
85static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
63 86
64DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { 87DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 88#ifdef CONFIG_X86_64
@@ -332,29 +355,6 @@ void switch_to_new_gdt(int cpu)
332 355
333static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {}; 356static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
334 357
335static void __cpuinit default_init(struct cpuinfo_x86 *c)
336{
337#ifdef CONFIG_X86_64
338 display_cacheinfo(c);
339#else
340 /* Not much we can do here... */
341 /* Check if at least it has cpuid */
342 if (c->cpuid_level == -1) {
343 /* No cpuid. It must be an ancient CPU */
344 if (c->x86 == 4)
345 strcpy(c->x86_model_id, "486");
346 else if (c->x86 == 3)
347 strcpy(c->x86_model_id, "386");
348 }
349#endif
350}
351
352static const struct cpu_dev __cpuinitconst default_cpu = {
353 .c_init = default_init,
354 .c_vendor = "Unknown",
355 .c_x86_vendor = X86_VENDOR_UNKNOWN,
356};
357
358static void __cpuinit get_model_name(struct cpuinfo_x86 *c) 358static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
359{ 359{
360 unsigned int *v; 360 unsigned int *v;
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index bff8dd191dd5..8bc64cfbe936 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -36,6 +36,7 @@
36 36
37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; 37static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); 38static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
39static DEFINE_PER_CPU(bool, thermal_throttle_active);
39 40
40static atomic_t therm_throt_en = ATOMIC_INIT(0); 41static atomic_t therm_throt_en = ATOMIC_INIT(0);
41 42
@@ -96,24 +97,27 @@ static int therm_throt_process(int curr)
96{ 97{
97 unsigned int cpu = smp_processor_id(); 98 unsigned int cpu = smp_processor_id();
98 __u64 tmp_jiffs = get_jiffies_64(); 99 __u64 tmp_jiffs = get_jiffies_64();
100 bool was_throttled = __get_cpu_var(thermal_throttle_active);
101 bool is_throttled = __get_cpu_var(thermal_throttle_active) = curr;
99 102
100 if (curr) 103 if (is_throttled)
101 __get_cpu_var(thermal_throttle_count)++; 104 __get_cpu_var(thermal_throttle_count)++;
102 105
103 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 106 if (!(was_throttled ^ is_throttled) &&
107 time_before64(tmp_jiffs, __get_cpu_var(next_check)))
104 return 0; 108 return 0;
105 109
106 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; 110 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
107 111
108 /* if we just entered the thermal event */ 112 /* if we just entered the thermal event */
109 if (curr) { 113 if (is_throttled) {
110 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 114 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
111 "cpu clock throttled (total events = %lu)\n", cpu, 115 "cpu clock throttled (total events = %lu)\n",
112 __get_cpu_var(thermal_throttle_count)); 116 cpu, __get_cpu_var(thermal_throttle_count));
113 117
114 add_taint(TAINT_MACHINE_CHECK); 118 add_taint(TAINT_MACHINE_CHECK);
115 } else { 119 } else if (was_throttled) {
116 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 120 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
117 } 121 }
118 122
119 return 1; 123 return 1;
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index b237c181aa41..396e35db7058 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -97,6 +97,7 @@ struct x86_pmu {
97 int num_counters_fixed; 97 int num_counters_fixed;
98 int counter_bits; 98 int counter_bits;
99 u64 counter_mask; 99 u64 counter_mask;
100 int apic;
100 u64 max_period; 101 u64 max_period;
101 u64 intel_ctrl; 102 u64 intel_ctrl;
102 void (*enable_bts)(u64 config); 103 void (*enable_bts)(u64 config);
@@ -116,8 +117,8 @@ static const u64 p6_perfmon_event_map[] =
116{ 117{
117 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, 118 [PERF_COUNT_HW_CPU_CYCLES] = 0x0079,
118 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, 119 [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0,
119 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0000, 120 [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e,
120 [PERF_COUNT_HW_CACHE_MISSES] = 0x0000, 121 [PERF_COUNT_HW_CACHE_MISSES] = 0x012e,
121 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, 122 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4,
122 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, 123 [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5,
123 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, 124 [PERF_COUNT_HW_BUS_CYCLES] = 0x0062,
@@ -660,6 +661,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
660 661
661static bool reserve_pmc_hardware(void) 662static bool reserve_pmc_hardware(void)
662{ 663{
664#ifdef CONFIG_X86_LOCAL_APIC
663 int i; 665 int i;
664 666
665 if (nmi_watchdog == NMI_LOCAL_APIC) 667 if (nmi_watchdog == NMI_LOCAL_APIC)
@@ -674,9 +676,11 @@ static bool reserve_pmc_hardware(void)
674 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) 676 if (!reserve_evntsel_nmi(x86_pmu.eventsel + i))
675 goto eventsel_fail; 677 goto eventsel_fail;
676 } 678 }
679#endif
677 680
678 return true; 681 return true;
679 682
683#ifdef CONFIG_X86_LOCAL_APIC
680eventsel_fail: 684eventsel_fail:
681 for (i--; i >= 0; i--) 685 for (i--; i >= 0; i--)
682 release_evntsel_nmi(x86_pmu.eventsel + i); 686 release_evntsel_nmi(x86_pmu.eventsel + i);
@@ -691,10 +695,12 @@ perfctr_fail:
691 enable_lapic_nmi_watchdog(); 695 enable_lapic_nmi_watchdog();
692 696
693 return false; 697 return false;
698#endif
694} 699}
695 700
696static void release_pmc_hardware(void) 701static void release_pmc_hardware(void)
697{ 702{
703#ifdef CONFIG_X86_LOCAL_APIC
698 int i; 704 int i;
699 705
700 for (i = 0; i < x86_pmu.num_counters; i++) { 706 for (i = 0; i < x86_pmu.num_counters; i++) {
@@ -704,6 +710,7 @@ static void release_pmc_hardware(void)
704 710
705 if (nmi_watchdog == NMI_LOCAL_APIC) 711 if (nmi_watchdog == NMI_LOCAL_APIC)
706 enable_lapic_nmi_watchdog(); 712 enable_lapic_nmi_watchdog();
713#endif
707} 714}
708 715
709static inline bool bts_available(void) 716static inline bool bts_available(void)
@@ -934,6 +941,15 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
934 hwc->sample_period = x86_pmu.max_period; 941 hwc->sample_period = x86_pmu.max_period;
935 hwc->last_period = hwc->sample_period; 942 hwc->last_period = hwc->sample_period;
936 atomic64_set(&hwc->period_left, hwc->sample_period); 943 atomic64_set(&hwc->period_left, hwc->sample_period);
944 } else {
945 /*
946 * If we have a PMU initialized but no APIC
947 * interrupts, we cannot sample hardware
948 * counters (user-space has to fall back and
949 * sample via a hrtimer based software counter):
950 */
951 if (!x86_pmu.apic)
952 return -EOPNOTSUPP;
937 } 953 }
938 954
939 counter->destroy = hw_perf_counter_destroy; 955 counter->destroy = hw_perf_counter_destroy;
@@ -1755,18 +1771,22 @@ void smp_perf_pending_interrupt(struct pt_regs *regs)
1755 1771
1756void set_perf_counter_pending(void) 1772void set_perf_counter_pending(void)
1757{ 1773{
1774#ifdef CONFIG_X86_LOCAL_APIC
1758 apic->send_IPI_self(LOCAL_PENDING_VECTOR); 1775 apic->send_IPI_self(LOCAL_PENDING_VECTOR);
1776#endif
1759} 1777}
1760 1778
1761void perf_counters_lapic_init(void) 1779void perf_counters_lapic_init(void)
1762{ 1780{
1763 if (!x86_pmu_initialized()) 1781#ifdef CONFIG_X86_LOCAL_APIC
1782 if (!x86_pmu.apic || !x86_pmu_initialized())
1764 return; 1783 return;
1765 1784
1766 /* 1785 /*
1767 * Always use NMI for PMU 1786 * Always use NMI for PMU
1768 */ 1787 */
1769 apic_write(APIC_LVTPC, APIC_DM_NMI); 1788 apic_write(APIC_LVTPC, APIC_DM_NMI);
1789#endif
1770} 1790}
1771 1791
1772static int __kprobes 1792static int __kprobes
@@ -1790,7 +1810,9 @@ perf_counter_nmi_handler(struct notifier_block *self,
1790 1810
1791 regs = args->regs; 1811 regs = args->regs;
1792 1812
1813#ifdef CONFIG_X86_LOCAL_APIC
1793 apic_write(APIC_LVTPC, APIC_DM_NMI); 1814 apic_write(APIC_LVTPC, APIC_DM_NMI);
1815#endif
1794 /* 1816 /*
1795 * Can't rely on the handled return value to say it was our NMI, two 1817 * Can't rely on the handled return value to say it was our NMI, two
1796 * counters could trigger 'simultaneously' raising two back-to-back NMIs. 1818 * counters could trigger 'simultaneously' raising two back-to-back NMIs.
@@ -1821,6 +1843,7 @@ static struct x86_pmu p6_pmu = {
1821 .event_map = p6_pmu_event_map, 1843 .event_map = p6_pmu_event_map,
1822 .raw_event = p6_pmu_raw_event, 1844 .raw_event = p6_pmu_raw_event,
1823 .max_events = ARRAY_SIZE(p6_perfmon_event_map), 1845 .max_events = ARRAY_SIZE(p6_perfmon_event_map),
1846 .apic = 1,
1824 .max_period = (1ULL << 31) - 1, 1847 .max_period = (1ULL << 31) - 1,
1825 .version = 0, 1848 .version = 0,
1826 .num_counters = 2, 1849 .num_counters = 2,
@@ -1847,6 +1870,7 @@ static struct x86_pmu intel_pmu = {
1847 .event_map = intel_pmu_event_map, 1870 .event_map = intel_pmu_event_map,
1848 .raw_event = intel_pmu_raw_event, 1871 .raw_event = intel_pmu_raw_event,
1849 .max_events = ARRAY_SIZE(intel_perfmon_event_map), 1872 .max_events = ARRAY_SIZE(intel_perfmon_event_map),
1873 .apic = 1,
1850 /* 1874 /*
1851 * Intel PMCs cannot be accessed sanely above 32 bit width, 1875 * Intel PMCs cannot be accessed sanely above 32 bit width,
1852 * so we install an artificial 1<<31 period regardless of 1876 * so we install an artificial 1<<31 period regardless of
@@ -1872,6 +1896,7 @@ static struct x86_pmu amd_pmu = {
1872 .num_counters = 4, 1896 .num_counters = 4,
1873 .counter_bits = 48, 1897 .counter_bits = 48,
1874 .counter_mask = (1ULL << 48) - 1, 1898 .counter_mask = (1ULL << 48) - 1,
1899 .apic = 1,
1875 /* use highest bit to detect overflow */ 1900 /* use highest bit to detect overflow */
1876 .max_period = (1ULL << 47) - 1, 1901 .max_period = (1ULL << 47) - 1,
1877}; 1902};
@@ -1897,13 +1922,14 @@ static int p6_pmu_init(void)
1897 return -ENODEV; 1922 return -ENODEV;
1898 } 1923 }
1899 1924
1925 x86_pmu = p6_pmu;
1926
1900 if (!cpu_has_apic) { 1927 if (!cpu_has_apic) {
1901 pr_info("no Local APIC, try rebooting with lapic"); 1928 pr_info("no APIC, boot with the \"lapic\" boot parameter to force-enable it.\n");
1902 return -ENODEV; 1929 pr_info("no hardware sampling interrupt available.\n");
1930 x86_pmu.apic = 0;
1903 } 1931 }
1904 1932
1905 x86_pmu = p6_pmu;
1906
1907 return 0; 1933 return 0;
1908} 1934}
1909 1935
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 19ccf6d0dccf..fe26ba3e3451 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -354,7 +354,7 @@ void __init efi_init(void)
354 */ 354 */
355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2); 355 c16 = tmp = early_ioremap(efi.systab->fw_vendor, 2);
356 if (c16) { 356 if (c16) {
357 for (i = 0; i < sizeof(vendor) && *c16; ++i) 357 for (i = 0; i < sizeof(vendor) - 1 && *c16; ++i)
358 vendor[i] = *c16++; 358 vendor[i] = *c16++;
359 vendor[i] = '\0'; 359 vendor[i] = '\0';
360 } else 360 } else
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 834c9da8bf9d..a06e8d101844 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -405,7 +405,7 @@ EXPORT_SYMBOL(machine_real_restart);
405#endif /* CONFIG_X86_32 */ 405#endif /* CONFIG_X86_32 */
406 406
407/* 407/*
408 * Apple MacBook5,2 (2009 MacBook) needs reboot=p 408 * Some Apple MacBook and MacBookPro's needs reboot=p to be able to reboot
409 */ 409 */
410static int __init set_pci_reboot(const struct dmi_system_id *d) 410static int __init set_pci_reboot(const struct dmi_system_id *d)
411{ 411{
@@ -418,12 +418,20 @@ static int __init set_pci_reboot(const struct dmi_system_id *d)
418} 418}
419 419
420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { 420static struct dmi_system_id __initdata pci_reboot_dmi_table[] = {
421 { /* Handle problems with rebooting on Apple MacBook5,2 */ 421 { /* Handle problems with rebooting on Apple MacBook5 */
422 .callback = set_pci_reboot, 422 .callback = set_pci_reboot,
423 .ident = "Apple MacBook", 423 .ident = "Apple MacBook5",
424 .matches = { 424 .matches = {
425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), 425 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5,2"), 426 DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
427 },
428 },
429 { /* Handle problems with rebooting on Apple MacBookPro5 */
430 .callback = set_pci_reboot,
431 .ident = "Apple MacBookPro5",
432 .matches = {
433 DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
434 DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro5"),
427 }, 435 },
428 }, 436 },
429 { } 437 { }
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e1a368d21d4..71f4368b357e 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -275,15 +275,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
275 * use the TSC value at the transitions to calculate a pretty 275 * use the TSC value at the transitions to calculate a pretty
276 * good value for the TSC frequencty. 276 * good value for the TSC frequencty.
277 */ 277 */
278static inline int pit_verify_msb(unsigned char val)
279{
280 /* Ignore LSB */
281 inb(0x42);
282 return inb(0x42) == val;
283}
284
278static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap) 285static inline int pit_expect_msb(unsigned char val, u64 *tscp, unsigned long *deltap)
279{ 286{
280 int count; 287 int count;
281 u64 tsc = 0; 288 u64 tsc = 0;
282 289
283 for (count = 0; count < 50000; count++) { 290 for (count = 0; count < 50000; count++) {
284 /* Ignore LSB */ 291 if (!pit_verify_msb(val))
285 inb(0x42);
286 if (inb(0x42) != val)
287 break; 292 break;
288 tsc = get_cycles(); 293 tsc = get_cycles();
289 } 294 }
@@ -336,8 +341,7 @@ static unsigned long quick_pit_calibrate(void)
336 * to do that is to just read back the 16-bit counter 341 * to do that is to just read back the 16-bit counter
337 * once from the PIT. 342 * once from the PIT.
338 */ 343 */
339 inb(0x42); 344 pit_verify_msb(0);
340 inb(0x42);
341 345
342 if (pit_expect_msb(0xff, &tsc, &d1)) { 346 if (pit_expect_msb(0xff, &tsc, &d1)) {
343 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) { 347 for (i = 1; i <= MAX_QUICK_PIT_ITERATIONS; i++) {
@@ -348,8 +352,19 @@ static unsigned long quick_pit_calibrate(void)
348 * Iterate until the error is less than 500 ppm 352 * Iterate until the error is less than 500 ppm
349 */ 353 */
350 delta -= tsc; 354 delta -= tsc;
351 if (d1+d2 < delta >> 11) 355 if (d1+d2 >= delta >> 11)
352 goto success; 356 continue;
357
358 /*
359 * Check the PIT one more time to verify that
360 * all TSC reads were stable wrt the PIT.
361 *
362 * This also guarantees serialization of the
363 * last cycle read ('d2') in pit_expect_msb.
364 */
365 if (!pit_verify_msb(0xfe - i))
366 break;
367 goto success;
353 } 368 }
354 } 369 }
355 printk("Fast TSC calibration failed\n"); 370 printk("Fast TSC calibration failed\n");
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index b263423fbe2a..95a7289e4b0c 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -441,7 +441,7 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip,
441 ap.ds = __USER_DS; 441 ap.ds = __USER_DS;
442 ap.es = __USER_DS; 442 ap.es = __USER_DS;
443 ap.fs = __KERNEL_PERCPU; 443 ap.fs = __KERNEL_PERCPU;
444 ap.gs = 0; 444 ap.gs = __KERNEL_STACK_CANARY;
445 445
446 ap.eflags = 0; 446 ap.eflags = 0;
447 447
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4d6f0d293ee2..21f68e00524f 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -104,6 +104,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
104 ktime_t remaining; 104 ktime_t remaining;
105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; 105 struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
106 106
107 if (!ps->pit_timer.period)
108 return 0;
109
107 /* 110 /*
108 * The Counter does not stop when it reaches zero. In 111 * The Counter does not stop when it reaches zero. In
109 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to 112 * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7030b5f911bf..0ef5bb2b4043 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -489,16 +489,20 @@ static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, int lpage)
489 * 489 *
490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc 490 * If rmapp bit zero is one, (then rmap & ~1) points to a struct kvm_rmap_desc
491 * containing more mappings. 491 * containing more mappings.
492 *
493 * Returns the number of rmap entries before the spte was added or zero if
494 * the spte was not added.
495 *
492 */ 496 */
493static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage) 497static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
494{ 498{
495 struct kvm_mmu_page *sp; 499 struct kvm_mmu_page *sp;
496 struct kvm_rmap_desc *desc; 500 struct kvm_rmap_desc *desc;
497 unsigned long *rmapp; 501 unsigned long *rmapp;
498 int i; 502 int i, count = 0;
499 503
500 if (!is_rmap_pte(*spte)) 504 if (!is_rmap_pte(*spte))
501 return; 505 return count;
502 gfn = unalias_gfn(vcpu->kvm, gfn); 506 gfn = unalias_gfn(vcpu->kvm, gfn);
503 sp = page_header(__pa(spte)); 507 sp = page_header(__pa(spte));
504 sp->gfns[spte - sp->spt] = gfn; 508 sp->gfns[spte - sp->spt] = gfn;
@@ -515,8 +519,10 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
515 } else { 519 } else {
516 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte); 520 rmap_printk("rmap_add: %p %llx many->many\n", spte, *spte);
517 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul); 521 desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
518 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) 522 while (desc->shadow_ptes[RMAP_EXT-1] && desc->more) {
519 desc = desc->more; 523 desc = desc->more;
524 count += RMAP_EXT;
525 }
520 if (desc->shadow_ptes[RMAP_EXT-1]) { 526 if (desc->shadow_ptes[RMAP_EXT-1]) {
521 desc->more = mmu_alloc_rmap_desc(vcpu); 527 desc->more = mmu_alloc_rmap_desc(vcpu);
522 desc = desc->more; 528 desc = desc->more;
@@ -525,6 +531,7 @@ static void rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn, int lpage)
525 ; 531 ;
526 desc->shadow_ptes[i] = spte; 532 desc->shadow_ptes[i] = spte;
527 } 533 }
534 return count;
528} 535}
529 536
530static void rmap_desc_remove_entry(unsigned long *rmapp, 537static void rmap_desc_remove_entry(unsigned long *rmapp,
@@ -754,6 +761,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp)
754 return young; 761 return young;
755} 762}
756 763
764#define RMAP_RECYCLE_THRESHOLD 1000
765
766static void rmap_recycle(struct kvm_vcpu *vcpu, gfn_t gfn, int lpage)
767{
768 unsigned long *rmapp;
769
770 gfn = unalias_gfn(vcpu->kvm, gfn);
771 rmapp = gfn_to_rmap(vcpu->kvm, gfn, lpage);
772
773 kvm_unmap_rmapp(vcpu->kvm, rmapp);
774 kvm_flush_remote_tlbs(vcpu->kvm);
775}
776
757int kvm_age_hva(struct kvm *kvm, unsigned long hva) 777int kvm_age_hva(struct kvm *kvm, unsigned long hva)
758{ 778{
759 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 779 return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
@@ -1407,24 +1427,25 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp)
1407 */ 1427 */
1408void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages) 1428void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
1409{ 1429{
1430 int used_pages;
1431
1432 used_pages = kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages;
1433 used_pages = max(0, used_pages);
1434
1410 /* 1435 /*
1411 * If we set the number of mmu pages to be smaller be than the 1436 * If we set the number of mmu pages to be smaller be than the
1412 * number of actived pages , we must to free some mmu pages before we 1437 * number of actived pages , we must to free some mmu pages before we
1413 * change the value 1438 * change the value
1414 */ 1439 */
1415 1440
1416 if ((kvm->arch.n_alloc_mmu_pages - kvm->arch.n_free_mmu_pages) > 1441 if (used_pages > kvm_nr_mmu_pages) {
1417 kvm_nr_mmu_pages) { 1442 while (used_pages > kvm_nr_mmu_pages) {
1418 int n_used_mmu_pages = kvm->arch.n_alloc_mmu_pages
1419 - kvm->arch.n_free_mmu_pages;
1420
1421 while (n_used_mmu_pages > kvm_nr_mmu_pages) {
1422 struct kvm_mmu_page *page; 1443 struct kvm_mmu_page *page;
1423 1444
1424 page = container_of(kvm->arch.active_mmu_pages.prev, 1445 page = container_of(kvm->arch.active_mmu_pages.prev,
1425 struct kvm_mmu_page, link); 1446 struct kvm_mmu_page, link);
1426 kvm_mmu_zap_page(kvm, page); 1447 kvm_mmu_zap_page(kvm, page);
1427 n_used_mmu_pages--; 1448 used_pages--;
1428 } 1449 }
1429 kvm->arch.n_free_mmu_pages = 0; 1450 kvm->arch.n_free_mmu_pages = 0;
1430 } 1451 }
@@ -1740,6 +1761,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1740{ 1761{
1741 int was_rmapped = 0; 1762 int was_rmapped = 0;
1742 int was_writeble = is_writeble_pte(*shadow_pte); 1763 int was_writeble = is_writeble_pte(*shadow_pte);
1764 int rmap_count;
1743 1765
1744 pgprintk("%s: spte %llx access %x write_fault %d" 1766 pgprintk("%s: spte %llx access %x write_fault %d"
1745 " user_fault %d gfn %lx\n", 1767 " user_fault %d gfn %lx\n",
@@ -1781,9 +1803,11 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
1781 1803
1782 page_header_update_slot(vcpu->kvm, shadow_pte, gfn); 1804 page_header_update_slot(vcpu->kvm, shadow_pte, gfn);
1783 if (!was_rmapped) { 1805 if (!was_rmapped) {
1784 rmap_add(vcpu, shadow_pte, gfn, largepage); 1806 rmap_count = rmap_add(vcpu, shadow_pte, gfn, largepage);
1785 if (!is_rmap_pte(*shadow_pte)) 1807 if (!is_rmap_pte(*shadow_pte))
1786 kvm_release_pfn_clean(pfn); 1808 kvm_release_pfn_clean(pfn);
1809 if (rmap_count > RMAP_RECYCLE_THRESHOLD)
1810 rmap_recycle(vcpu, gfn, largepage);
1787 } else { 1811 } else {
1788 if (was_writeble) 1812 if (was_writeble)
1789 kvm_release_pfn_dirty(pfn); 1813 kvm_release_pfn_dirty(pfn);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 71510e07e69e..b1f658ad2f06 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -711,6 +711,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
711 svm->vmcb->control.tsc_offset += delta; 711 svm->vmcb->control.tsc_offset += delta;
712 vcpu->cpu = cpu; 712 vcpu->cpu = cpu;
713 kvm_migrate_timers(vcpu); 713 kvm_migrate_timers(vcpu);
714 svm->asid_generation = 0;
714 } 715 }
715 716
716 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++) 717 for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
@@ -1031,7 +1032,6 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data)
1031 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID; 1032 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1032 } 1033 }
1033 1034
1034 svm->vcpu.cpu = svm_data->cpu;
1035 svm->asid_generation = svm_data->asid_generation; 1035 svm->asid_generation = svm_data->asid_generation;
1036 svm->vmcb->control.asid = svm_data->next_asid++; 1036 svm->vmcb->control.asid = svm_data->next_asid++;
1037} 1037}
@@ -2300,8 +2300,8 @@ static void pre_svm_run(struct vcpu_svm *svm)
2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu); 2300 struct svm_cpu_data *svm_data = per_cpu(svm_data, cpu);
2301 2301
2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 2302 svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
2303 if (svm->vcpu.cpu != cpu || 2303 /* FIXME: handle wraparound of asid_generation */
2304 svm->asid_generation != svm_data->asid_generation) 2304 if (svm->asid_generation != svm_data->asid_generation)
2305 new_asid(svm, svm_data); 2305 new_asid(svm, svm_data);
2306} 2306}
2307 2307
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 356a0ce85c68..29f912927a58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3157,8 +3157,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3157 struct vcpu_vmx *vmx = to_vmx(vcpu); 3157 struct vcpu_vmx *vmx = to_vmx(vcpu);
3158 enum emulation_result err = EMULATE_DONE; 3158 enum emulation_result err = EMULATE_DONE;
3159 3159
3160 preempt_enable();
3161 local_irq_enable(); 3160 local_irq_enable();
3161 preempt_enable();
3162 3162
3163 while (!guest_state_valid(vcpu)) { 3163 while (!guest_state_valid(vcpu)) {
3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); 3164 err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
@@ -3168,7 +3168,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3168 3168
3169 if (err != EMULATE_DONE) { 3169 if (err != EMULATE_DONE) {
3170 kvm_report_emulation_failure(vcpu, "emulation failure"); 3170 kvm_report_emulation_failure(vcpu, "emulation failure");
3171 return; 3171 break;
3172 } 3172 }
3173 3173
3174 if (signal_pending(current)) 3174 if (signal_pending(current))
@@ -3177,8 +3177,8 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
3177 schedule(); 3177 schedule();
3178 } 3178 }
3179 3179
3180 local_irq_disable();
3181 preempt_disable(); 3180 preempt_disable();
3181 local_irq_disable();
3182 3182
3183 vmx->invalid_state_emulation_result = err; 3183 vmx->invalid_state_emulation_result = err;
3184} 3184}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fe5474aec41a..3d4529011828 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -704,11 +704,48 @@ static bool msr_mtrr_valid(unsigned msr)
704 return false; 704 return false;
705} 705}
706 706
707static bool valid_pat_type(unsigned t)
708{
709 return t < 8 && (1 << t) & 0xf3; /* 0, 1, 4, 5, 6, 7 */
710}
711
712static bool valid_mtrr_type(unsigned t)
713{
714 return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
715}
716
717static bool mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
718{
719 int i;
720
721 if (!msr_mtrr_valid(msr))
722 return false;
723
724 if (msr == MSR_IA32_CR_PAT) {
725 for (i = 0; i < 8; i++)
726 if (!valid_pat_type((data >> (i * 8)) & 0xff))
727 return false;
728 return true;
729 } else if (msr == MSR_MTRRdefType) {
730 if (data & ~0xcff)
731 return false;
732 return valid_mtrr_type(data & 0xff);
733 } else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
734 for (i = 0; i < 8 ; i++)
735 if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
736 return false;
737 return true;
738 }
739
740 /* variable MTRRs */
741 return valid_mtrr_type(data & 0xff);
742}
743
707static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) 744static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
708{ 745{
709 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; 746 u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges;
710 747
711 if (!msr_mtrr_valid(msr)) 748 if (!mtrr_valid(vcpu, msr, data))
712 return 1; 749 return 1;
713 750
714 if (msr == MSR_MTRRdefType) { 751 if (msr == MSR_MTRRdefType) {
@@ -1079,14 +1116,13 @@ long kvm_arch_dev_ioctl(struct file *filp,
1079 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list)) 1116 if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
1080 goto out; 1117 goto out;
1081 r = -E2BIG; 1118 r = -E2BIG;
1082 if (n < num_msrs_to_save) 1119 if (n < msr_list.nmsrs)
1083 goto out; 1120 goto out;
1084 r = -EFAULT; 1121 r = -EFAULT;
1085 if (copy_to_user(user_msr_list->indices, &msrs_to_save, 1122 if (copy_to_user(user_msr_list->indices, &msrs_to_save,
1086 num_msrs_to_save * sizeof(u32))) 1123 num_msrs_to_save * sizeof(u32)))
1087 goto out; 1124 goto out;
1088 if (copy_to_user(user_msr_list->indices 1125 if (copy_to_user(user_msr_list->indices + num_msrs_to_save,
1089 + num_msrs_to_save * sizeof(u32),
1090 &emulated_msrs, 1126 &emulated_msrs,
1091 ARRAY_SIZE(emulated_msrs) * sizeof(u32))) 1127 ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
1092 goto out; 1128 goto out;
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 958c1fa41900..fe3eba5d6b3e 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -219,6 +219,8 @@ enum {
219 AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */ 219 AHCI_HFLAG_SECT255 = (1 << 8), /* max 255 sectors */
220 AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */ 220 AHCI_HFLAG_YES_NCQ = (1 << 9), /* force NCQ cap on */
221 AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */ 221 AHCI_HFLAG_NO_SUSPEND = (1 << 10), /* don't suspend */
222 AHCI_HFLAG_SRST_TOUT_IS_OFFLINE = (1 << 11), /* treat SRST timeout as
223 link offline */
222 224
223 /* ap->flags bits */ 225 /* ap->flags bits */
224 226
@@ -1663,6 +1665,7 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1663 int (*check_ready)(struct ata_link *link)) 1665 int (*check_ready)(struct ata_link *link))
1664{ 1666{
1665 struct ata_port *ap = link->ap; 1667 struct ata_port *ap = link->ap;
1668 struct ahci_host_priv *hpriv = ap->host->private_data;
1666 const char *reason = NULL; 1669 const char *reason = NULL;
1667 unsigned long now, msecs; 1670 unsigned long now, msecs;
1668 struct ata_taskfile tf; 1671 struct ata_taskfile tf;
@@ -1701,12 +1704,21 @@ static int ahci_do_softreset(struct ata_link *link, unsigned int *class,
1701 1704
1702 /* wait for link to become ready */ 1705 /* wait for link to become ready */
1703 rc = ata_wait_after_reset(link, deadline, check_ready); 1706 rc = ata_wait_after_reset(link, deadline, check_ready);
1704 /* link occupied, -ENODEV too is an error */ 1707 if (rc == -EBUSY && hpriv->flags & AHCI_HFLAG_SRST_TOUT_IS_OFFLINE) {
1705 if (rc) { 1708 /*
1709 * Workaround for cases where link online status can't
1710 * be trusted. Treat device readiness timeout as link
1711 * offline.
1712 */
1713 ata_link_printk(link, KERN_INFO,
1714 "device not ready, treating as offline\n");
1715 *class = ATA_DEV_NONE;
1716 } else if (rc) {
1717 /* link occupied, -ENODEV too is an error */
1706 reason = "device not ready"; 1718 reason = "device not ready";
1707 goto fail; 1719 goto fail;
1708 } 1720 } else
1709 *class = ahci_dev_classify(ap); 1721 *class = ahci_dev_classify(ap);
1710 1722
1711 DPRINTK("EXIT, class=%u\n", *class); 1723 DPRINTK("EXIT, class=%u\n", *class);
1712 return 0; 1724 return 0;
@@ -1773,7 +1785,8 @@ static int ahci_sb600_softreset(struct ata_link *link, unsigned int *class,
1773 irq_sts = readl(port_mmio + PORT_IRQ_STAT); 1785 irq_sts = readl(port_mmio + PORT_IRQ_STAT);
1774 if (irq_sts & PORT_IRQ_BAD_PMP) { 1786 if (irq_sts & PORT_IRQ_BAD_PMP) {
1775 ata_link_printk(link, KERN_WARNING, 1787 ata_link_printk(link, KERN_WARNING,
1776 "failed due to HW bug, retry pmp=0\n"); 1788 "applying SB600 PMP SRST workaround "
1789 "and retrying\n");
1777 rc = ahci_do_softreset(link, class, 0, deadline, 1790 rc = ahci_do_softreset(link, class, 0, deadline,
1778 ahci_check_ready); 1791 ahci_check_ready);
1779 } 1792 }
@@ -2726,6 +2739,56 @@ static bool ahci_broken_suspend(struct pci_dev *pdev)
2726 return !ver || strcmp(ver, dmi->driver_data) < 0; 2739 return !ver || strcmp(ver, dmi->driver_data) < 0;
2727} 2740}
2728 2741
2742static bool ahci_broken_online(struct pci_dev *pdev)
2743{
2744#define ENCODE_BUSDEVFN(bus, slot, func) \
2745 (void *)(unsigned long)(((bus) << 8) | PCI_DEVFN((slot), (func)))
2746 static const struct dmi_system_id sysids[] = {
2747 /*
2748 * There are several gigabyte boards which use
2749 * SIMG5723s configured as hardware RAID. Certain
2750 * 5723 firmware revisions shipped there keep the link
2751 * online but fail to answer properly to SRST or
2752 * IDENTIFY when no device is attached downstream
2753 * causing libata to retry quite a few times leading
2754 * to excessive detection delay.
2755 *
2756 * As these firmwares respond to the second reset try
2757 * with invalid device signature, considering unknown
2758 * sig as offline works around the problem acceptably.
2759 */
2760 {
2761 .ident = "EP45-DQ6",
2762 .matches = {
2763 DMI_MATCH(DMI_BOARD_VENDOR,
2764 "Gigabyte Technology Co., Ltd."),
2765 DMI_MATCH(DMI_BOARD_NAME, "EP45-DQ6"),
2766 },
2767 .driver_data = ENCODE_BUSDEVFN(0x0a, 0x00, 0),
2768 },
2769 {
2770 .ident = "EP45-DS5",
2771 .matches = {
2772 DMI_MATCH(DMI_BOARD_VENDOR,
2773 "Gigabyte Technology Co., Ltd."),
2774 DMI_MATCH(DMI_BOARD_NAME, "EP45-DS5"),
2775 },
2776 .driver_data = ENCODE_BUSDEVFN(0x03, 0x00, 0),
2777 },
2778 { } /* terminate list */
2779 };
2780#undef ENCODE_BUSDEVFN
2781 const struct dmi_system_id *dmi = dmi_first_match(sysids);
2782 unsigned int val;
2783
2784 if (!dmi)
2785 return false;
2786
2787 val = (unsigned long)dmi->driver_data;
2788
2789 return pdev->bus->number == (val >> 8) && pdev->devfn == (val & 0xff);
2790}
2791
2729static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) 2792static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2730{ 2793{
2731 static int printed_version; 2794 static int printed_version;
@@ -2841,6 +2904,12 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2841 "BIOS update required for suspend/resume\n"); 2904 "BIOS update required for suspend/resume\n");
2842 } 2905 }
2843 2906
2907 if (ahci_broken_online(pdev)) {
2908 hpriv->flags |= AHCI_HFLAG_SRST_TOUT_IS_OFFLINE;
2909 dev_info(&pdev->dev,
2910 "online status unreliable, applying workaround\n");
2911 }
2912
2844 /* CAP.NP sometimes indicate the index of the last enabled 2913 /* CAP.NP sometimes indicate the index of the last enabled
2845 * port, at other times, that of the last possible port, so 2914 * port, at other times, that of the last possible port, so
2846 * determining the maximum port number requires looking at 2915 * determining the maximum port number requires looking at
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 8ac98ff16d7d..072ba5ea138f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4302,6 +4302,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
4302 { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA }, 4302 { "WDC WD2500JD-00HBB0", "WD-WMAL71490727", ATA_HORKAGE_BROKEN_HPA },
4303 { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA }, 4303 { "MAXTOR 6L080L4", "A93.0500", ATA_HORKAGE_BROKEN_HPA },
4304 4304
4305 /* this one allows HPA unlocking but fails IOs on the area */
4306 { "OCZ-VERTEX", "1.30", ATA_HORKAGE_BROKEN_HPA },
4307
4305 /* Devices which report 1 sector over size HPA */ 4308 /* Devices which report 1 sector over size HPA */
4306 { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, }, 4309 { "ST340823A", NULL, ATA_HORKAGE_HPA_SIZE, },
4307 { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, }, 4310 { "ST320413A", NULL, ATA_HORKAGE_HPA_SIZE, },
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
index 5702affcb325..41c94b1ae493 100644
--- a/drivers/ata/pata_at91.c
+++ b/drivers/ata/pata_at91.c
@@ -250,7 +250,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
250 ata_port_desc(ap, "no IRQ, using PIO polling"); 250 ata_port_desc(ap, "no IRQ, using PIO polling");
251 } 251 }
252 252
253 info = kzalloc(sizeof(*info), GFP_KERNEL); 253 info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
254 254
255 if (!info) { 255 if (!info) {
256 dev_err(dev, "failed to allocate memory for private data\n"); 256 dev_err(dev, "failed to allocate memory for private data\n");
@@ -275,7 +275,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
275 if (!info->ide_addr) { 275 if (!info->ide_addr) {
276 dev_err(dev, "failed to map IO base\n"); 276 dev_err(dev, "failed to map IO base\n");
277 ret = -ENOMEM; 277 ret = -ENOMEM;
278 goto err_ide_ioremap; 278 goto err_put;
279 } 279 }
280 280
281 info->alt_addr = devm_ioremap(dev, 281 info->alt_addr = devm_ioremap(dev,
@@ -284,7 +284,7 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
284 if (!info->alt_addr) { 284 if (!info->alt_addr) {
285 dev_err(dev, "failed to map CTL base\n"); 285 dev_err(dev, "failed to map CTL base\n");
286 ret = -ENOMEM; 286 ret = -ENOMEM;
287 goto err_alt_ioremap; 287 goto err_put;
288 } 288 }
289 289
290 ap->ioaddr.cmd_addr = info->ide_addr; 290 ap->ioaddr.cmd_addr = info->ide_addr;
@@ -303,13 +303,8 @@ static int __devinit pata_at91_probe(struct platform_device *pdev)
303 irq ? ata_sff_interrupt : NULL, 303 irq ? ata_sff_interrupt : NULL,
304 irq_flags, &pata_at91_sht); 304 irq_flags, &pata_at91_sht);
305 305
306err_alt_ioremap: 306err_put:
307 devm_iounmap(dev, info->ide_addr);
308
309err_ide_ioremap:
310 clk_put(info->mck); 307 clk_put(info->mck);
311 kfree(info);
312
313 return ret; 308 return ret;
314} 309}
315 310
@@ -317,7 +312,6 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
317{ 312{
318 struct ata_host *host = dev_get_drvdata(&pdev->dev); 313 struct ata_host *host = dev_get_drvdata(&pdev->dev);
319 struct at91_ide_info *info; 314 struct at91_ide_info *info;
320 struct device *dev = &pdev->dev;
321 315
322 if (!host) 316 if (!host)
323 return 0; 317 return 0;
@@ -328,11 +322,8 @@ static int __devexit pata_at91_remove(struct platform_device *pdev)
328 if (!info) 322 if (!info)
329 return 0; 323 return 0;
330 324
331 devm_iounmap(dev, info->ide_addr);
332 devm_iounmap(dev, info->alt_addr);
333 clk_put(info->mck); 325 clk_put(info->mck);
334 326
335 kfree(info);
336 return 0; 327 return 0;
337} 328}
338 329
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index bec0b8ade66d..45915566e4e9 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -1,6 +1,7 @@
1/* 1/*
2 * pata_atiixp.c - ATI PATA for new ATA layer 2 * pata_atiixp.c - ATI PATA for new ATA layer
3 * (C) 2005 Red Hat Inc 3 * (C) 2005 Red Hat Inc
4 * (C) 2009 Bartlomiej Zolnierkiewicz
4 * 5 *
5 * Based on 6 * Based on
6 * 7 *
@@ -61,20 +62,19 @@ static void atiixp_set_pio_timing(struct ata_port *ap, struct ata_device *adev,
61 62
62 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 63 struct pci_dev *pdev = to_pci_dev(ap->host->dev);
63 int dn = 2 * ap->port_no + adev->devno; 64 int dn = 2 * ap->port_no + adev->devno;
64
65 /* Check this is correct - the order is odd in both drivers */
66 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); 65 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
67 u16 pio_mode_data, pio_timing_data; 66 u32 pio_timing_data;
67 u16 pio_mode_data;
68 68
69 pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data); 69 pci_read_config_word(pdev, ATIIXP_IDE_PIO_MODE, &pio_mode_data);
70 pio_mode_data &= ~(0x7 << (4 * dn)); 70 pio_mode_data &= ~(0x7 << (4 * dn));
71 pio_mode_data |= pio << (4 * dn); 71 pio_mode_data |= pio << (4 * dn);
72 pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data); 72 pci_write_config_word(pdev, ATIIXP_IDE_PIO_MODE, pio_mode_data);
73 73
74 pci_read_config_word(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data); 74 pci_read_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, &pio_timing_data);
75 pio_timing_data &= ~(0xFF << timing_shift); 75 pio_timing_data &= ~(0xFF << timing_shift);
76 pio_timing_data |= (pio_timings[pio] << timing_shift); 76 pio_timing_data |= (pio_timings[pio] << timing_shift);
77 pci_write_config_word(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data); 77 pci_write_config_dword(pdev, ATIIXP_IDE_PIO_TIMING, pio_timing_data);
78} 78}
79 79
80/** 80/**
@@ -119,16 +119,17 @@ static void atiixp_set_dmamode(struct ata_port *ap, struct ata_device *adev)
119 udma_mode_data |= dma << (4 * dn); 119 udma_mode_data |= dma << (4 * dn);
120 pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data); 120 pci_write_config_word(pdev, ATIIXP_IDE_UDMA_MODE, udma_mode_data);
121 } else { 121 } else {
122 u16 mwdma_timing_data;
123 /* Check this is correct - the order is odd in both drivers */
124 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1); 122 int timing_shift = (16 * ap->port_no) + 8 * (adev->devno ^ 1);
123 u32 mwdma_timing_data;
125 124
126 dma -= XFER_MW_DMA_0; 125 dma -= XFER_MW_DMA_0;
127 126
128 pci_read_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, &mwdma_timing_data); 127 pci_read_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
128 &mwdma_timing_data);
129 mwdma_timing_data &= ~(0xFF << timing_shift); 129 mwdma_timing_data &= ~(0xFF << timing_shift);
130 mwdma_timing_data |= (mwdma_timings[dma] << timing_shift); 130 mwdma_timing_data |= (mwdma_timings[dma] << timing_shift);
131 pci_write_config_word(pdev, ATIIXP_IDE_MWDMA_TIMING, mwdma_timing_data); 131 pci_write_config_dword(pdev, ATIIXP_IDE_MWDMA_TIMING,
132 mwdma_timing_data);
132 } 133 }
133 /* 134 /*
134 * We must now look at the PIO mode situation. We may need to 135 * We must now look at the PIO mode situation. We may need to
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index b2d11f300c39..86a40582999c 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -602,6 +602,7 @@ MODULE_VERSION(DRV_VERSION);
602 602
603static int adma_enabled; 603static int adma_enabled;
604static int swncq_enabled = 1; 604static int swncq_enabled = 1;
605static int msi_enabled;
605 606
606static void nv_adma_register_mode(struct ata_port *ap) 607static void nv_adma_register_mode(struct ata_port *ap)
607{ 608{
@@ -2459,6 +2460,11 @@ static int nv_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
2459 } else if (type == SWNCQ) 2460 } else if (type == SWNCQ)
2460 nv_swncq_host_init(host); 2461 nv_swncq_host_init(host);
2461 2462
2463 if (msi_enabled) {
2464 dev_printk(KERN_NOTICE, &pdev->dev, "Using MSI\n");
2465 pci_enable_msi(pdev);
2466 }
2467
2462 pci_set_master(pdev); 2468 pci_set_master(pdev);
2463 return ata_host_activate(host, pdev->irq, ipriv->irq_handler, 2469 return ata_host_activate(host, pdev->irq, ipriv->irq_handler,
2464 IRQF_SHARED, ipriv->sht); 2470 IRQF_SHARED, ipriv->sht);
@@ -2558,4 +2564,6 @@ module_param_named(adma, adma_enabled, bool, 0444);
2558MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)"); 2564MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: false)");
2559module_param_named(swncq, swncq_enabled, bool, 0444); 2565module_param_named(swncq, swncq_enabled, bool, 0444);
2560MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)"); 2566MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: true)");
2567module_param_named(msi, msi_enabled, bool, 0444);
2568MODULE_PARM_DESC(msi, "Enable use of MSI (Default: false)");
2561 2569
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 81cb01bfc356..456594bd97bc 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -483,9 +483,6 @@ int platform_driver_register(struct platform_driver *drv)
483 drv->driver.remove = platform_drv_remove; 483 drv->driver.remove = platform_drv_remove;
484 if (drv->shutdown) 484 if (drv->shutdown)
485 drv->driver.shutdown = platform_drv_shutdown; 485 drv->driver.shutdown = platform_drv_shutdown;
486 if (drv->suspend || drv->resume)
487 pr_warning("Platform driver '%s' needs updating - please use "
488 "dev_pm_ops\n", drv->driver.name);
489 486
490 return driver_register(&drv->driver); 487 return driver_register(&drv->driver);
491} 488}
diff --git a/drivers/char/pty.c b/drivers/char/pty.c
index 6e6942c45f5b..d083c73d784a 100644
--- a/drivers/char/pty.c
+++ b/drivers/char/pty.c
@@ -144,6 +144,8 @@ static int pty_write(struct tty_struct *tty, const unsigned char *buf,
144 144
145static int pty_write_room(struct tty_struct *tty) 145static int pty_write_room(struct tty_struct *tty)
146{ 146{
147 if (tty->stopped)
148 return 0;
147 return pty_space(tty->link); 149 return pty_space(tty->link);
148} 150}
149 151
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index b4a3dbcebe9b..f85aaf21e783 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -566,7 +566,7 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
566 566
567 ret = drm_vblank_get(dev, crtc); 567 ret = drm_vblank_get(dev, crtc);
568 if (ret) { 568 if (ret) {
569 DRM_ERROR("failed to acquire vblank counter, %d\n", ret); 569 DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
570 return ret; 570 return ret;
571 } 571 }
572 seq = drm_vblank_count(dev, crtc); 572 seq = drm_vblank_count(dev, crtc);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 54f492a488a9..7914097b09c6 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -566,6 +566,8 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
566 found_it = 1; 566 found_it = 1;
567 /* if equal delete the probed mode */ 567 /* if equal delete the probed mode */
568 mode->status = pmode->status; 568 mode->status = pmode->status;
569 /* Merge type bits together */
570 mode->type |= pmode->type;
569 list_del(&pmode->head); 571 list_del(&pmode->head);
570 drm_mode_destroy(connector->dev, pmode); 572 drm_mode_destroy(connector->dev, pmode);
571 break; 573 break;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 83aee80e77a6..7ebc84c2881e 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -190,7 +190,7 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int pipe)
190 low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL; 190 low_frame = pipe ? PIPEBFRAMEPIXEL : PIPEAFRAMEPIXEL;
191 191
192 if (!i915_pipe_enabled(dev, pipe)) { 192 if (!i915_pipe_enabled(dev, pipe)) {
193 DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); 193 DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe);
194 return 0; 194 return 0;
195 } 195 }
196 196
@@ -219,7 +219,7 @@ u32 gm45_get_vblank_counter(struct drm_device *dev, int pipe)
219 int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45; 219 int reg = pipe ? PIPEB_FRMCOUNT_GM45 : PIPEA_FRMCOUNT_GM45;
220 220
221 if (!i915_pipe_enabled(dev, pipe)) { 221 if (!i915_pipe_enabled(dev, pipe)) {
222 DRM_ERROR("trying to get vblank count for disabled pipe %d\n", pipe); 222 DRM_DEBUG("trying to get vblank count for disabled pipe %d\n", pipe);
223 return 0; 223 return 0;
224 } 224 }
225 225
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9b..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
1974 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1975 mddev->events ++; 1976 mddev->events ++;
1976 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1977 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1978 if ((mddev->events&1)==0) { 1979 * to spares. */
1979 mddev->events++; 1980 if ((mddev->events&1)==0)
1980 nospares = 0; 1981 nospares = 0;
1981 }
1982 } else { 1982 } else {
1983 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1984 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1985 mddev->events++;
1986 nospares = 0; 1985 nospares = 0;
1987 }
1988 } 1986 }
1989 } 1987 }
1990 1988
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3601 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3602 return -EINVAL; 3600 return -EINVAL;
3603 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3604 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3605 return -EBUSY; 3604 return -EBUSY;
3606 3605
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4304 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4305 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4306 4305
4306 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4309 return -EBUSY; 4309 err = -EBUSY;
4310 } 4310 } else if (mddev->pers) {
4311
4312 if (mddev->pers) {
4313 4311
4314 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4367 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4369 } 4367 }
4370 4368out:
4369 mutex_unlock(&mddev->open_mutex);
4370 if (err)
4371 return err;
4371 /* 4372 /*
4372 * Free resources if final stop 4373 * Free resources if final stop
4373 */ 4374 */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4433 blk_integrity_unregister(disk); 4434 blk_integrity_unregister(disk);
4434 md_new_event(mddev); 4435 md_new_event(mddev);
4435 sysfs_notify_dirent(mddev->sysfs_state); 4436 sysfs_notify_dirent(mddev->sysfs_state);
4436out:
4437 return err; 4437 return err;
4438} 4438}
4439 4439
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5518 } 5518 }
5519 BUG_ON(mddev != bdev->bd_disk->private_data); 5519 BUG_ON(mddev != bdev->bd_disk->private_data);
5520 5520
5521 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5521 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5522 goto out; 5522 goto out;
5523 5523
5524 err = 0; 5524 err = 0;
5525 atomic_inc(&mddev->openers); 5525 atomic_inc(&mddev->openers);
5526 mddev_unlock(mddev); 5526 mutex_unlock(&mddev->open_mutex);
5527 5527
5528 check_disk_change(bdev); 5528 check_disk_change(bdev);
5529 out: 5529 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf9..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2b521ee67dfa..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3786 sector_nr = raid5_size(mddev, 0, 0) 3786 sector_nr = raid5_size(mddev, 0, 0)
3787 - conf->reshape_progress; 3787 - conf->reshape_progress;
3788 } else if (mddev->delta_disks > 0 && 3788 } else if (mddev->delta_disks >= 0 &&
3789 conf->reshape_progress > 0) 3789 conf->reshape_progress > 0)
3790 sector_nr = conf->reshape_progress; 3790 sector_nr = conf->reshape_progress;
3791 sector_div(sector_nr, new_data_disks); 3791 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
4509 (old_disks-max_degraded)); 4509 (old_disks-max_degraded));
4510 /* here_old is the first stripe that we might need to read 4510 /* here_old is the first stripe that we might need to read
4511 * from */ 4511 * from */
4512 if (here_new >= here_old) { 4512 if (mddev->delta_disks == 0) {
4513 /* We cannot be sure it is safe to start an in-place
4514 * reshape. It is only safe if user-space if monitoring
4515 * and taking constant backups.
4516 * mdadm always starts a situation like this in
4517 * readonly mode so it can take control before
4518 * allowing any writes. So just check for that.
4519 */
4520 if ((here_new * mddev->new_chunk_sectors !=
4521 here_old * mddev->chunk_sectors) ||
4522 mddev->ro == 0) {
4523 printk(KERN_ERR "raid5: in-place reshape must be started"
4524 " in read-only mode - aborting\n");
4525 return -EINVAL;
4526 }
4527 } else if (mddev->delta_disks < 0
4528 ? (here_new * mddev->new_chunk_sectors <=
4529 here_old * mddev->chunk_sectors)
4530 : (here_new * mddev->new_chunk_sectors >=
4531 here_old * mddev->chunk_sectors)) {
4513 /* Reading from the same stripe as writing to - bad */ 4532 /* Reading from the same stripe as writing to - bad */
4514 printk(KERN_ERR "raid5: reshape_position too early for " 4533 printk(KERN_ERR "raid5: reshape_position too early for "
4515 "auto-recovery - aborting.\n"); 4534 "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5078 mddev->degraded--; 5097 mddev->degraded--;
5079 for (d = conf->raid_disks ; 5098 for (d = conf->raid_disks ;
5080 d < conf->raid_disks - mddev->delta_disks; 5099 d < conf->raid_disks - mddev->delta_disks;
5081 d++) 5100 d++) {
5082 raid5_remove_disk(mddev, d); 5101 mdk_rdev_t *rdev = conf->disks[d].rdev;
5102 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5103 char nm[20];
5104 sprintf(nm, "rd%d", rdev->raid_disk);
5105 sysfs_remove_link(&mddev->kobj, nm);
5106 rdev->raid_disk = -1;
5107 }
5108 }
5083 } 5109 }
5084 mddev->layout = conf->algorithm; 5110 mddev->layout = conf->algorithm;
5085 mddev->chunk_sectors = conf->chunk_sectors; 5111 mddev->chunk_sectors = conf->chunk_sectors;
diff --git a/drivers/mtd/maps/sbc8240.c b/drivers/mtd/maps/sbc8240.c
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/drivers/mtd/maps/sbc8240.c
+++ /dev/null
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 0f2034c3ed2f..e4d9ef0c965a 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -1254,6 +1254,7 @@ out_free:
1254 if (!ubi->volumes[i]) 1254 if (!ubi->volumes[i])
1255 continue; 1255 continue;
1256 kfree(ubi->volumes[i]->eba_tbl); 1256 kfree(ubi->volumes[i]->eba_tbl);
1257 ubi->volumes[i]->eba_tbl = NULL;
1257 } 1258 }
1258 return err; 1259 return err;
1259} 1260}
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index a423131b6171..b847745394b4 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -781,11 +781,22 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
781 return -EINVAL; 781 return -EINVAL;
782 } 782 }
783 783
784 /*
785 * Make sure that all PEBs have the same image sequence number.
786 * This allows us to detect situations when users flash UBI
787 * images incorrectly, so that the flash has the new UBI image
788 * and leftovers from the old one. This feature was added
789 * relatively recently, and the sequence number was always
790 * zero, because old UBI implementations always set it to zero.
791 * For this reasons, we do not panic if some PEBs have zero
792 * sequence number, while other PEBs have non-zero sequence
793 * number.
794 */
784 image_seq = be32_to_cpu(ech->image_seq); 795 image_seq = be32_to_cpu(ech->image_seq);
785 if (!si->image_seq_set) { 796 if (!si->image_seq_set) {
786 ubi->image_seq = image_seq; 797 ubi->image_seq = image_seq;
787 si->image_seq_set = 1; 798 si->image_seq_set = 1;
788 } else if (ubi->image_seq != image_seq) { 799 } else if (ubi->image_seq && ubi->image_seq != image_seq) {
789 ubi_err("bad image sequence number %d in PEB %d, " 800 ubi_err("bad image sequence number %d in PEB %d, "
790 "expected %d", image_seq, pnum, ubi->image_seq); 801 "expected %d", image_seq, pnum, ubi->image_seq);
791 ubi_dbg_dump_ec_hdr(ech); 802 ubi_dbg_dump_ec_hdr(ech);
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index a4494d78e7c2..8aebe1e9d3d6 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -90,11 +90,10 @@ static struct hotplug_slot_ops sn_hotplug_slot_ops = {
90 90
91static DEFINE_MUTEX(sn_hotplug_mutex); 91static DEFINE_MUTEX(sn_hotplug_mutex);
92 92
93static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot, 93static ssize_t path_show(struct pci_slot *pci_slot, char *buf)
94 char *buf)
95{ 94{
96 int retval = -ENOENT; 95 int retval = -ENOENT;
97 struct slot *slot = bss_hotplug_slot->private; 96 struct slot *slot = pci_slot->hotplug->private;
98 97
99 if (!slot) 98 if (!slot)
100 return retval; 99 return retval;
@@ -103,7 +102,7 @@ static ssize_t path_show (struct hotplug_slot *bss_hotplug_slot,
103 return retval; 102 return retval;
104} 103}
105 104
106static struct hotplug_slot_attribute sn_slot_path_attr = __ATTR_RO(path); 105static struct pci_slot_attribute sn_slot_path_attr = __ATTR_RO(path);
107 106
108static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device) 107static int sn_pci_slot_valid(struct pci_bus *pci_bus, int device)
109{ 108{
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 489fc01a3204..e4e089a8f294 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -255,7 +255,7 @@ static void nfs_direct_read_release(void *calldata)
255 255
256 if (put_dreq(dreq)) 256 if (put_dreq(dreq))
257 nfs_direct_complete(dreq); 257 nfs_direct_complete(dreq);
258 nfs_readdata_release(calldata); 258 nfs_readdata_free(data);
259} 259}
260 260
261static const struct rpc_call_ops nfs_read_direct_ops = { 261static const struct rpc_call_ops nfs_read_direct_ops = {
@@ -314,14 +314,14 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
314 data->npages, 1, 0, data->pagevec, NULL); 314 data->npages, 1, 0, data->pagevec, NULL);
315 up_read(&current->mm->mmap_sem); 315 up_read(&current->mm->mmap_sem);
316 if (result < 0) { 316 if (result < 0) {
317 nfs_readdata_release(data); 317 nfs_readdata_free(data);
318 break; 318 break;
319 } 319 }
320 if ((unsigned)result < data->npages) { 320 if ((unsigned)result < data->npages) {
321 bytes = result * PAGE_SIZE; 321 bytes = result * PAGE_SIZE;
322 if (bytes <= pgbase) { 322 if (bytes <= pgbase) {
323 nfs_direct_release_pages(data->pagevec, result); 323 nfs_direct_release_pages(data->pagevec, result);
324 nfs_readdata_release(data); 324 nfs_readdata_free(data);
325 break; 325 break;
326 } 326 }
327 bytes -= pgbase; 327 bytes -= pgbase;
@@ -334,7 +334,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
334 data->inode = inode; 334 data->inode = inode;
335 data->cred = msg.rpc_cred; 335 data->cred = msg.rpc_cred;
336 data->args.fh = NFS_FH(inode); 336 data->args.fh = NFS_FH(inode);
337 data->args.context = get_nfs_open_context(ctx); 337 data->args.context = ctx;
338 data->args.offset = pos; 338 data->args.offset = pos;
339 data->args.pgbase = pgbase; 339 data->args.pgbase = pgbase;
340 data->args.pages = data->pagevec; 340 data->args.pages = data->pagevec;
@@ -441,7 +441,7 @@ static void nfs_direct_free_writedata(struct nfs_direct_req *dreq)
441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages); 441 struct nfs_write_data *data = list_entry(dreq->rewrite_list.next, struct nfs_write_data, pages);
442 list_del(&data->pages); 442 list_del(&data->pages);
443 nfs_direct_release_pages(data->pagevec, data->npages); 443 nfs_direct_release_pages(data->pagevec, data->npages);
444 nfs_writedata_release(data); 444 nfs_writedata_free(data);
445 } 445 }
446} 446}
447 447
@@ -534,7 +534,7 @@ static void nfs_direct_commit_release(void *calldata)
534 534
535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status); 535 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
536 nfs_direct_write_complete(dreq, data->inode); 536 nfs_direct_write_complete(dreq, data->inode);
537 nfs_commitdata_release(calldata); 537 nfs_commit_free(data);
538} 538}
539 539
540static const struct rpc_call_ops nfs_commit_direct_ops = { 540static const struct rpc_call_ops nfs_commit_direct_ops = {
@@ -570,7 +570,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
570 data->args.fh = NFS_FH(data->inode); 570 data->args.fh = NFS_FH(data->inode);
571 data->args.offset = 0; 571 data->args.offset = 0;
572 data->args.count = 0; 572 data->args.count = 0;
573 data->args.context = get_nfs_open_context(dreq->ctx); 573 data->args.context = dreq->ctx;
574 data->res.count = 0; 574 data->res.count = 0;
575 data->res.fattr = &data->fattr; 575 data->res.fattr = &data->fattr;
576 data->res.verf = &data->verf; 576 data->res.verf = &data->verf;
@@ -734,14 +734,14 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
734 data->npages, 0, 0, data->pagevec, NULL); 734 data->npages, 0, 0, data->pagevec, NULL);
735 up_read(&current->mm->mmap_sem); 735 up_read(&current->mm->mmap_sem);
736 if (result < 0) { 736 if (result < 0) {
737 nfs_writedata_release(data); 737 nfs_writedata_free(data);
738 break; 738 break;
739 } 739 }
740 if ((unsigned)result < data->npages) { 740 if ((unsigned)result < data->npages) {
741 bytes = result * PAGE_SIZE; 741 bytes = result * PAGE_SIZE;
742 if (bytes <= pgbase) { 742 if (bytes <= pgbase) {
743 nfs_direct_release_pages(data->pagevec, result); 743 nfs_direct_release_pages(data->pagevec, result);
744 nfs_writedata_release(data); 744 nfs_writedata_free(data);
745 break; 745 break;
746 } 746 }
747 bytes -= pgbase; 747 bytes -= pgbase;
@@ -756,7 +756,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
756 data->inode = inode; 756 data->inode = inode;
757 data->cred = msg.rpc_cred; 757 data->cred = msg.rpc_cred;
758 data->args.fh = NFS_FH(inode); 758 data->args.fh = NFS_FH(inode);
759 data->args.context = get_nfs_open_context(ctx); 759 data->args.context = ctx;
760 data->args.offset = pos; 760 data->args.offset = pos;
761 data->args.pgbase = pgbase; 761 data->args.pgbase = pgbase;
762 data->args.pages = data->pagevec; 762 data->args.pages = data->pagevec;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 73ea5e8d66ce..12c9e66d3f1d 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -60,17 +60,15 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
60 return p; 60 return p;
61} 61}
62 62
63static void nfs_readdata_free(struct nfs_read_data *p) 63void nfs_readdata_free(struct nfs_read_data *p)
64{ 64{
65 if (p && (p->pagevec != &p->page_array[0])) 65 if (p && (p->pagevec != &p->page_array[0]))
66 kfree(p->pagevec); 66 kfree(p->pagevec);
67 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
68} 68}
69 69
70void nfs_readdata_release(void *data) 70static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 71{
72 struct nfs_read_data *rdata = data;
73
74 put_nfs_open_context(rdata->args.context); 72 put_nfs_open_context(rdata->args.context);
75 nfs_readdata_free(rdata); 73 nfs_readdata_free(rdata);
76} 74}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0a0a2ff767c3..a34fae21fe10 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -87,17 +87,15 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
87 return p; 87 return p;
88} 88}
89 89
90static void nfs_writedata_free(struct nfs_write_data *p) 90void nfs_writedata_free(struct nfs_write_data *p)
91{ 91{
92 if (p && (p->pagevec != &p->page_array[0])) 92 if (p && (p->pagevec != &p->page_array[0]))
93 kfree(p->pagevec); 93 kfree(p->pagevec);
94 mempool_free(p, nfs_wdata_mempool); 94 mempool_free(p, nfs_wdata_mempool);
95} 95}
96 96
97void nfs_writedata_release(void *data) 97static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 98{
99 struct nfs_write_data *wdata = data;
100
101 put_nfs_open_context(wdata->args.context); 99 put_nfs_open_context(wdata->args.context);
102 nfs_writedata_free(wdata); 100 nfs_writedata_free(wdata);
103} 101}
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9edcde4974aa..f9a3e8942669 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1914,7 +1914,8 @@ static void ocfs2_adjust_adjacent_records(struct ocfs2_extent_rec *left_rec,
1914 * immediately to their right. 1914 * immediately to their right.
1915 */ 1915 */
1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos); 1916 left_clusters = le32_to_cpu(right_child_el->l_recs[0].e_cpos);
1917 if (ocfs2_is_empty_extent(&right_child_el->l_recs[0])) { 1917 if (!ocfs2_rec_clusters(right_child_el, &right_child_el->l_recs[0])) {
1918 BUG_ON(right_child_el->l_tree_depth);
1918 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1); 1919 BUG_ON(le16_to_cpu(right_child_el->l_next_free_rec) <= 1);
1919 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos); 1920 left_clusters = le32_to_cpu(right_child_el->l_recs[1].e_cpos);
1920 } 1921 }
@@ -2476,15 +2477,37 @@ out_ret_path:
2476 return ret; 2477 return ret;
2477} 2478}
2478 2479
2479static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2480 struct ocfs2_path *path) 2481 int subtree_index, struct ocfs2_path *path)
2481{ 2482{
2482 int i, idx; 2483 int i, idx, ret;
2483 struct ocfs2_extent_rec *rec; 2484 struct ocfs2_extent_rec *rec;
2484 struct ocfs2_extent_list *el; 2485 struct ocfs2_extent_list *el;
2485 struct ocfs2_extent_block *eb; 2486 struct ocfs2_extent_block *eb;
2486 u32 range; 2487 u32 range;
2487 2488
2489 /*
2490 * In normal tree rotation process, we will never touch the
2491 * tree branch above subtree_index and ocfs2_extend_rotate_transaction
2492 * doesn't reserve the credits for them either.
2493 *
2494 * But we do have a special case here which will update the rightmost
2495 * records for all the bh in the path.
2496 * So we have to allocate extra credits and access them.
2497 */
2498 ret = ocfs2_extend_trans(handle,
2499 handle->h_buffer_credits + subtree_index);
2500 if (ret) {
2501 mlog_errno(ret);
2502 goto out;
2503 }
2504
2505 ret = ocfs2_journal_access_path(inode, handle, path);
2506 if (ret) {
2507 mlog_errno(ret);
2508 goto out;
2509 }
2510
2488 /* Path should always be rightmost. */ 2511 /* Path should always be rightmost. */
2489 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 2512 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
2490 BUG_ON(eb->h_next_leaf_blk != 0ULL); 2513 BUG_ON(eb->h_next_leaf_blk != 0ULL);
@@ -2505,6 +2528,8 @@ static void ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2505 2528
2506 ocfs2_journal_dirty(handle, path->p_node[i].bh); 2529 ocfs2_journal_dirty(handle, path->p_node[i].bh);
2507 } 2530 }
2531out:
2532 return ret;
2508} 2533}
2509 2534
2510static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
@@ -2717,7 +2742,12 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2717 if (del_right_subtree) { 2742 if (del_right_subtree) {
2718 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path,
2719 subtree_index, dealloc); 2744 subtree_index, dealloc);
2720 ocfs2_update_edge_lengths(inode, handle, left_path); 2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
2746 left_path);
2747 if (ret) {
2748 mlog_errno(ret);
2749 goto out;
2750 }
2721 2751
2722 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2752 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2723 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 2753 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
@@ -3034,7 +3064,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3034 3064
3035 ocfs2_unlink_subtree(inode, handle, left_path, path, 3065 ocfs2_unlink_subtree(inode, handle, left_path, path,
3036 subtree_index, dealloc); 3066 subtree_index, dealloc);
3037 ocfs2_update_edge_lengths(inode, handle, left_path); 3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index,
3068 left_path);
3069 if (ret) {
3070 mlog_errno(ret);
3071 goto out;
3072 }
3038 3073
3039 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 3074 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
3040 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 3075 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index b2c52b3a1484..b401654011a2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -193,6 +193,7 @@ static int ocfs2_get_block(struct inode *inode, sector_t iblock,
193 (unsigned long long)OCFS2_I(inode)->ip_blkno); 193 (unsigned long long)OCFS2_I(inode)->ip_blkno);
194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters); 194 mlog(ML_ERROR, "Size %llu, clusters %u\n", (unsigned long long)i_size_read(inode), OCFS2_I(inode)->ip_clusters);
195 dump_stack(); 195 dump_stack();
196 goto bail;
196 } 197 }
197 198
198 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); 199 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
@@ -894,18 +895,17 @@ struct ocfs2_write_cluster_desc {
894 */ 895 */
895 unsigned c_new; 896 unsigned c_new;
896 unsigned c_unwritten; 897 unsigned c_unwritten;
898 unsigned c_needs_zero;
897}; 899};
898 900
899static inline int ocfs2_should_zero_cluster(struct ocfs2_write_cluster_desc *d)
900{
901 return d->c_new || d->c_unwritten;
902}
903
904struct ocfs2_write_ctxt { 901struct ocfs2_write_ctxt {
905 /* Logical cluster position / len of write */ 902 /* Logical cluster position / len of write */
906 u32 w_cpos; 903 u32 w_cpos;
907 u32 w_clen; 904 u32 w_clen;
908 905
906 /* First cluster allocated in a nonsparse extend */
907 u32 w_first_new_cpos;
908
909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE]; 909 struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
910 910
911 /* 911 /*
@@ -983,6 +983,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
983 return -ENOMEM; 983 return -ENOMEM;
984 984
985 wc->w_cpos = pos >> osb->s_clustersize_bits; 985 wc->w_cpos = pos >> osb->s_clustersize_bits;
986 wc->w_first_new_cpos = UINT_MAX;
986 cend = (pos + len - 1) >> osb->s_clustersize_bits; 987 cend = (pos + len - 1) >> osb->s_clustersize_bits;
987 wc->w_clen = cend - wc->w_cpos + 1; 988 wc->w_clen = cend - wc->w_cpos + 1;
988 get_bh(di_bh); 989 get_bh(di_bh);
@@ -1217,20 +1218,18 @@ out:
1217 */ 1218 */
1218static int ocfs2_write_cluster(struct address_space *mapping, 1219static int ocfs2_write_cluster(struct address_space *mapping,
1219 u32 phys, unsigned int unwritten, 1220 u32 phys, unsigned int unwritten,
1221 unsigned int should_zero,
1220 struct ocfs2_alloc_context *data_ac, 1222 struct ocfs2_alloc_context *data_ac,
1221 struct ocfs2_alloc_context *meta_ac, 1223 struct ocfs2_alloc_context *meta_ac,
1222 struct ocfs2_write_ctxt *wc, u32 cpos, 1224 struct ocfs2_write_ctxt *wc, u32 cpos,
1223 loff_t user_pos, unsigned user_len) 1225 loff_t user_pos, unsigned user_len)
1224{ 1226{
1225 int ret, i, new, should_zero = 0; 1227 int ret, i, new;
1226 u64 v_blkno, p_blkno; 1228 u64 v_blkno, p_blkno;
1227 struct inode *inode = mapping->host; 1229 struct inode *inode = mapping->host;
1228 struct ocfs2_extent_tree et; 1230 struct ocfs2_extent_tree et;
1229 1231
1230 new = phys == 0 ? 1 : 0; 1232 new = phys == 0 ? 1 : 0;
1231 if (new || unwritten)
1232 should_zero = 1;
1233
1234 if (new) { 1233 if (new) {
1235 u32 tmp_pos; 1234 u32 tmp_pos;
1236 1235
@@ -1301,7 +1300,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1301 if (tmpret) { 1300 if (tmpret) {
1302 mlog_errno(tmpret); 1301 mlog_errno(tmpret);
1303 if (ret == 0) 1302 if (ret == 0)
1304 tmpret = ret; 1303 ret = tmpret;
1305 } 1304 }
1306 } 1305 }
1307 1306
@@ -1341,7 +1340,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1341 local_len = osb->s_clustersize - cluster_off; 1340 local_len = osb->s_clustersize - cluster_off;
1342 1341
1343 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1342 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1344 desc->c_unwritten, data_ac, meta_ac, 1343 desc->c_unwritten,
1344 desc->c_needs_zero,
1345 data_ac, meta_ac,
1345 wc, desc->c_cpos, pos, local_len); 1346 wc, desc->c_cpos, pos, local_len);
1346 if (ret) { 1347 if (ret) {
1347 mlog_errno(ret); 1348 mlog_errno(ret);
@@ -1391,14 +1392,14 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
1391 * newly allocated cluster. 1392 * newly allocated cluster.
1392 */ 1393 */
1393 desc = &wc->w_desc[0]; 1394 desc = &wc->w_desc[0];
1394 if (ocfs2_should_zero_cluster(desc)) 1395 if (desc->c_needs_zero)
1395 ocfs2_figure_cluster_boundaries(osb, 1396 ocfs2_figure_cluster_boundaries(osb,
1396 desc->c_cpos, 1397 desc->c_cpos,
1397 &wc->w_target_from, 1398 &wc->w_target_from,
1398 NULL); 1399 NULL);
1399 1400
1400 desc = &wc->w_desc[wc->w_clen - 1]; 1401 desc = &wc->w_desc[wc->w_clen - 1];
1401 if (ocfs2_should_zero_cluster(desc)) 1402 if (desc->c_needs_zero)
1402 ocfs2_figure_cluster_boundaries(osb, 1403 ocfs2_figure_cluster_boundaries(osb,
1403 desc->c_cpos, 1404 desc->c_cpos,
1404 NULL, 1405 NULL,
@@ -1466,13 +1467,28 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1466 phys++; 1467 phys++;
1467 } 1468 }
1468 1469
1470 /*
1471 * If w_first_new_cpos is < UINT_MAX, we have a non-sparse
1472 * file that got extended. w_first_new_cpos tells us
1473 * where the newly allocated clusters are so we can
1474 * zero them.
1475 */
1476 if (desc->c_cpos >= wc->w_first_new_cpos) {
1477 BUG_ON(phys == 0);
1478 desc->c_needs_zero = 1;
1479 }
1480
1469 desc->c_phys = phys; 1481 desc->c_phys = phys;
1470 if (phys == 0) { 1482 if (phys == 0) {
1471 desc->c_new = 1; 1483 desc->c_new = 1;
1484 desc->c_needs_zero = 1;
1472 *clusters_to_alloc = *clusters_to_alloc + 1; 1485 *clusters_to_alloc = *clusters_to_alloc + 1;
1473 } 1486 }
1474 if (ext_flags & OCFS2_EXT_UNWRITTEN) 1487
1488 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1475 desc->c_unwritten = 1; 1489 desc->c_unwritten = 1;
1490 desc->c_needs_zero = 1;
1491 }
1476 1492
1477 num_clusters--; 1493 num_clusters--;
1478 } 1494 }
@@ -1632,10 +1648,13 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1632 if (newsize <= i_size_read(inode)) 1648 if (newsize <= i_size_read(inode))
1633 return 0; 1649 return 0;
1634 1650
1635 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len); 1651 ret = ocfs2_extend_no_holes(inode, newsize, pos);
1636 if (ret) 1652 if (ret)
1637 mlog_errno(ret); 1653 mlog_errno(ret);
1638 1654
1655 wc->w_first_new_cpos =
1656 ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
1657
1639 return ret; 1658 return ret;
1640} 1659}
1641 1660
@@ -1644,7 +1663,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1644 struct page **pagep, void **fsdata, 1663 struct page **pagep, void **fsdata,
1645 struct buffer_head *di_bh, struct page *mmap_page) 1664 struct buffer_head *di_bh, struct page *mmap_page)
1646{ 1665{
1647 int ret, credits = OCFS2_INODE_UPDATE_CREDITS; 1666 int ret, cluster_of_pages, credits = OCFS2_INODE_UPDATE_CREDITS;
1648 unsigned int clusters_to_alloc, extents_to_split; 1667 unsigned int clusters_to_alloc, extents_to_split;
1649 struct ocfs2_write_ctxt *wc; 1668 struct ocfs2_write_ctxt *wc;
1650 struct inode *inode = mapping->host; 1669 struct inode *inode = mapping->host;
@@ -1722,8 +1741,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1722 1741
1723 } 1742 }
1724 1743
1725 ocfs2_set_target_boundaries(osb, wc, pos, len, 1744 /*
1726 clusters_to_alloc + extents_to_split); 1745 * We have to zero sparse allocated clusters, unwritten extent clusters,
1746 * and non-sparse clusters we just extended. For non-sparse writes,
1747 * we know zeros will only be needed in the first and/or last cluster.
1748 */
1749 if (clusters_to_alloc || extents_to_split ||
1750 wc->w_desc[0].c_needs_zero ||
1751 wc->w_desc[wc->w_clen - 1].c_needs_zero)
1752 cluster_of_pages = 1;
1753 else
1754 cluster_of_pages = 0;
1755
1756 ocfs2_set_target_boundaries(osb, wc, pos, len, cluster_of_pages);
1727 1757
1728 handle = ocfs2_start_trans(osb, credits); 1758 handle = ocfs2_start_trans(osb, credits);
1729 if (IS_ERR(handle)) { 1759 if (IS_ERR(handle)) {
@@ -1756,8 +1786,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1756 * extent. 1786 * extent.
1757 */ 1787 */
1758 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, 1788 ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos,
1759 clusters_to_alloc + extents_to_split, 1789 cluster_of_pages, mmap_page);
1760 mmap_page);
1761 if (ret) { 1790 if (ret) {
1762 mlog_errno(ret); 1791 mlog_errno(ret);
1763 goto out_quota; 1792 goto out_quota;
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index b574431a031d..2f28b7de2c8d 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -310,22 +310,19 @@ out_attach:
310 return ret; 310 return ret;
311} 311}
312 312
313static DEFINE_SPINLOCK(dentry_list_lock); 313DEFINE_SPINLOCK(dentry_list_lock);
314 314
315/* We limit the number of dentry locks to drop in one go. We have 315/* We limit the number of dentry locks to drop in one go. We have
316 * this limit so that we don't starve other users of ocfs2_wq. */ 316 * this limit so that we don't starve other users of ocfs2_wq. */
317#define DL_INODE_DROP_COUNT 64 317#define DL_INODE_DROP_COUNT 64
318 318
319/* Drop inode references from dentry locks */ 319/* Drop inode references from dentry locks */
320void ocfs2_drop_dl_inodes(struct work_struct *work) 320static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count)
321{ 321{
322 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
323 dentry_lock_work);
324 struct ocfs2_dentry_lock *dl; 322 struct ocfs2_dentry_lock *dl;
325 int drop_count = DL_INODE_DROP_COUNT;
326 323
327 spin_lock(&dentry_list_lock); 324 spin_lock(&dentry_list_lock);
328 while (osb->dentry_lock_list && drop_count--) { 325 while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) {
329 dl = osb->dentry_lock_list; 326 dl = osb->dentry_lock_list;
330 osb->dentry_lock_list = dl->dl_next; 327 osb->dentry_lock_list = dl->dl_next;
331 spin_unlock(&dentry_list_lock); 328 spin_unlock(&dentry_list_lock);
@@ -333,11 +330,32 @@ void ocfs2_drop_dl_inodes(struct work_struct *work)
333 kfree(dl); 330 kfree(dl);
334 spin_lock(&dentry_list_lock); 331 spin_lock(&dentry_list_lock);
335 } 332 }
336 if (osb->dentry_lock_list) 333 spin_unlock(&dentry_list_lock);
334}
335
336void ocfs2_drop_dl_inodes(struct work_struct *work)
337{
338 struct ocfs2_super *osb = container_of(work, struct ocfs2_super,
339 dentry_lock_work);
340
341 __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT);
342 /*
343 * Don't queue dropping if umount is in progress. We flush the
344 * list in ocfs2_dismount_volume
345 */
346 spin_lock(&dentry_list_lock);
347 if (osb->dentry_lock_list &&
348 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
337 queue_work(ocfs2_wq, &osb->dentry_lock_work); 349 queue_work(ocfs2_wq, &osb->dentry_lock_work);
338 spin_unlock(&dentry_list_lock); 350 spin_unlock(&dentry_list_lock);
339} 351}
340 352
353/* Flush the whole work queue */
354void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb)
355{
356 __ocfs2_drop_dl_inodes(osb, -1);
357}
358
341/* 359/*
342 * ocfs2_dentry_iput() and friends. 360 * ocfs2_dentry_iput() and friends.
343 * 361 *
@@ -368,7 +386,8 @@ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb,
368 /* We leave dropping of inode reference to ocfs2_wq as that can 386 /* We leave dropping of inode reference to ocfs2_wq as that can
369 * possibly lead to inode deletion which gets tricky */ 387 * possibly lead to inode deletion which gets tricky */
370 spin_lock(&dentry_list_lock); 388 spin_lock(&dentry_list_lock);
371 if (!osb->dentry_lock_list) 389 if (!osb->dentry_lock_list &&
390 !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED))
372 queue_work(ocfs2_wq, &osb->dentry_lock_work); 391 queue_work(ocfs2_wq, &osb->dentry_lock_work);
373 dl->dl_next = osb->dentry_lock_list; 392 dl->dl_next = osb->dentry_lock_list;
374 osb->dentry_lock_list = dl; 393 osb->dentry_lock_list = dl;
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h
index faa12e75f98d..f5dd1789acf1 100644
--- a/fs/ocfs2/dcache.h
+++ b/fs/ocfs2/dcache.h
@@ -49,10 +49,13 @@ struct ocfs2_dentry_lock {
49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, 49int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode,
50 u64 parent_blkno); 50 u64 parent_blkno);
51 51
52extern spinlock_t dentry_list_lock;
53
52void ocfs2_dentry_lock_put(struct ocfs2_super *osb, 54void ocfs2_dentry_lock_put(struct ocfs2_super *osb,
53 struct ocfs2_dentry_lock *dl); 55 struct ocfs2_dentry_lock *dl);
54 56
55void ocfs2_drop_dl_inodes(struct work_struct *work); 57void ocfs2_drop_dl_inodes(struct work_struct *work);
58void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb);
56 59
57struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, 60struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno,
58 int skip_unhashed); 61 int skip_unhashed);
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index d07ddbe4b283..81eff8e58322 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -103,7 +103,6 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
103 lock->ast_pending, lock->ml.type); 103 lock->ast_pending, lock->ml.type);
104 BUG(); 104 BUG();
105 } 105 }
106 BUG_ON(!list_empty(&lock->ast_list));
107 if (lock->ast_pending) 106 if (lock->ast_pending)
108 mlog(0, "lock has an ast getting flushed right now\n"); 107 mlog(0, "lock has an ast getting flushed right now\n");
109 108
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index bcb9260c3735..43e6e3280569 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1118,7 +1118,7 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1118 1118
1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n", 1119 mlog(0, "%s:%.*s: sending mig lockres (%s) to %u\n",
1120 dlm->name, res->lockname.len, res->lockname.name, 1120 dlm->name, res->lockname.len, res->lockname.name,
1121 orig_flags & DLM_MRES_MIGRATION ? "migrate" : "recovery", 1121 orig_flags & DLM_MRES_MIGRATION ? "migration" : "recovery",
1122 send_to); 1122 send_to);
1123 1123
1124 /* send it */ 1124 /* send it */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 62442e413a00..aa501d3f93f1 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1851,6 +1851,7 @@ relock:
1851 if (ret) 1851 if (ret)
1852 goto out_dio; 1852 goto out_dio;
1853 1853
1854 count = ocount;
1854 ret = generic_write_checks(file, ppos, &count, 1855 ret = generic_write_checks(file, ppos, &count,
1855 S_ISBLK(inode->i_mode)); 1856 S_ISBLK(inode->i_mode));
1856 if (ret) 1857 if (ret)
@@ -1918,8 +1919,10 @@ out_sems:
1918 1919
1919 mutex_unlock(&inode->i_mutex); 1920 mutex_unlock(&inode->i_mutex);
1920 1921
1922 if (written)
1923 ret = written;
1921 mlog_exit(ret); 1924 mlog_exit(ret);
1922 return written ? written : ret; 1925 return ret;
1923} 1926}
1924 1927
1925static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, 1928static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index f033760ecbea..c48b93ac6b65 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1954,10 +1954,16 @@ void ocfs2_orphan_scan_init(struct ocfs2_super *osb)
1954 os->os_osb = osb; 1954 os->os_osb = osb;
1955 os->os_count = 0; 1955 os->os_count = 0;
1956 os->os_seqno = 0; 1956 os->os_seqno = 0;
1957 os->os_scantime = CURRENT_TIME;
1958 mutex_init(&os->os_lock); 1957 mutex_init(&os->os_lock);
1959 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work); 1958 INIT_DELAYED_WORK(&os->os_orphan_scan_work, ocfs2_orphan_scan_work);
1959}
1960 1960
1961void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
1962{
1963 struct ocfs2_orphan_scan *os;
1964
1965 os = &osb->osb_orphan_scan;
1966 os->os_scantime = CURRENT_TIME;
1961 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb)) 1967 if (ocfs2_is_hard_readonly(osb) || ocfs2_mount_local(osb))
1962 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); 1968 atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
1963 else { 1969 else {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 5432c7f79cc6..2c3222aec622 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -145,6 +145,7 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
145 145
146/* Exported only for the journal struct init code in super.c. Do not call. */ 146/* Exported only for the journal struct init code in super.c. Do not call. */
147void ocfs2_orphan_scan_init(struct ocfs2_super *osb); 147void ocfs2_orphan_scan_init(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_start(struct ocfs2_super *osb);
148void ocfs2_orphan_scan_stop(struct ocfs2_super *osb); 149void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
149void ocfs2_orphan_scan_exit(struct ocfs2_super *osb); 150void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
150 151
@@ -329,20 +330,27 @@ int ocfs2_journal_dirty(handle_t *handle,
329/* extended attribute block update */ 330/* extended attribute block update */
330#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 331#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
331 332
333/* Update of a single quota block */
334#define OCFS2_QUOTA_BLOCK_UPDATE_CREDITS 1
335
332/* global quotafile inode update, data block */ 336/* global quotafile inode update, data block */
333#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 337#define OCFS2_QINFO_WRITE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + \
338 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
334 339
340#define OCFS2_LOCAL_QINFO_WRITE_CREDITS OCFS2_QUOTA_BLOCK_UPDATE_CREDITS
335/* 341/*
336 * The two writes below can accidentally see global info dirty due 342 * The two writes below can accidentally see global info dirty due
337 * to set_info() quotactl so make them prepared for the writes. 343 * to set_info() quotactl so make them prepared for the writes.
338 */ 344 */
339/* quota data block, global info */ 345/* quota data block, global info */
340/* Write to local quota file */ 346/* Write to local quota file */
341#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + 1) 347#define OCFS2_QWRITE_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
348 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
342 349
343/* global quota data block, local quota data block, global quota inode, 350/* global quota data block, local quota data block, global quota inode,
344 * global quota info */ 351 * global quota info */
345#define OCFS2_QSYNC_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 3) 352#define OCFS2_QSYNC_CREDITS (OCFS2_QINFO_WRITE_CREDITS + \
353 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS)
346 354
347static inline int ocfs2_quota_trans_credits(struct super_block *sb) 355static inline int ocfs2_quota_trans_credits(struct super_block *sb)
348{ 356{
@@ -355,11 +363,6 @@ static inline int ocfs2_quota_trans_credits(struct super_block *sb)
355 return credits; 363 return credits;
356} 364}
357 365
358/* Number of credits needed for removing quota structure from file */
359int ocfs2_calc_qdel_credits(struct super_block *sb, int type);
360/* Number of credits needed for initialization of new quota structure */
361int ocfs2_calc_qinit_credits(struct super_block *sb, int type);
362
363/* group extend. inode update and last group update. */ 366/* group extend. inode update and last group update. */
364#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 367#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
365 368
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index c9345ebb8493..39e1d5a39505 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -224,10 +224,12 @@ enum ocfs2_mount_options
224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */ 224 OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
225}; 225};
226 226
227#define OCFS2_OSB_SOFT_RO 0x0001 227#define OCFS2_OSB_SOFT_RO 0x0001
228#define OCFS2_OSB_HARD_RO 0x0002 228#define OCFS2_OSB_HARD_RO 0x0002
229#define OCFS2_OSB_ERROR_FS 0x0004 229#define OCFS2_OSB_ERROR_FS 0x0004
230#define OCFS2_DEFAULT_ATIME_QUANTUM 60 230#define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008
231
232#define OCFS2_DEFAULT_ATIME_QUANTUM 60
231 233
232struct ocfs2_journal; 234struct ocfs2_journal;
233struct ocfs2_slot_info; 235struct ocfs2_slot_info;
@@ -490,6 +492,18 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb,
490 spin_unlock(&osb->osb_lock); 492 spin_unlock(&osb->osb_lock);
491} 493}
492 494
495
496static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb,
497 unsigned long flag)
498{
499 unsigned long ret;
500
501 spin_lock(&osb->osb_lock);
502 ret = osb->osb_flags & flag;
503 spin_unlock(&osb->osb_lock);
504 return ret;
505}
506
493static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, 507static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb,
494 int hard) 508 int hard)
495{ 509{
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h
index 7365e2e08706..3fb96fcd4c81 100644
--- a/fs/ocfs2/quota.h
+++ b/fs/ocfs2/quota.h
@@ -50,7 +50,6 @@ struct ocfs2_mem_dqinfo {
50 unsigned int dqi_chunks; /* Number of chunks in local quota file */ 50 unsigned int dqi_chunks; /* Number of chunks in local quota file */
51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */ 51 unsigned int dqi_blocks; /* Number of blocks allocated for local quota file */
52 unsigned int dqi_syncms; /* How often should we sync with other nodes */ 52 unsigned int dqi_syncms; /* How often should we sync with other nodes */
53 unsigned int dqi_syncjiff; /* Precomputed dqi_syncms in jiffies */
54 struct list_head dqi_chunk; /* List of chunks */ 53 struct list_head dqi_chunk; /* List of chunks */
55 struct inode *dqi_gqinode; /* Global quota file inode */ 54 struct inode *dqi_gqinode; /* Global quota file inode */
56 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */ 55 struct ocfs2_lock_res dqi_gqlock; /* Lock protecting quota information structure */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index edfa60cd155c..bf7742d0ee3b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -69,6 +69,7 @@ static void ocfs2_global_mem2diskdqb(void *dp, struct dquot *dquot)
69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace); 69 d->dqb_curspace = cpu_to_le64(m->dqb_curspace);
70 d->dqb_btime = cpu_to_le64(m->dqb_btime); 70 d->dqb_btime = cpu_to_le64(m->dqb_btime);
71 d->dqb_itime = cpu_to_le64(m->dqb_itime); 71 d->dqb_itime = cpu_to_le64(m->dqb_itime);
72 d->dqb_pad1 = d->dqb_pad2 = 0;
72} 73}
73 74
74static int ocfs2_global_is_id(void *dp, struct dquot *dquot) 75static int ocfs2_global_is_id(void *dp, struct dquot *dquot)
@@ -211,14 +212,13 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
211 212
212 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA); 213 mutex_lock_nested(&gqinode->i_mutex, I_MUTEX_QUOTA);
213 if (gqinode->i_size < off + len) { 214 if (gqinode->i_size < off + len) {
214 down_write(&OCFS2_I(gqinode)->ip_alloc_sem); 215 loff_t rounded_end =
215 err = ocfs2_extend_no_holes(gqinode, off + len, off); 216 ocfs2_align_bytes_to_blocks(sb, off + len);
216 up_write(&OCFS2_I(gqinode)->ip_alloc_sem); 217
217 if (err < 0) 218 /* Space is already allocated in ocfs2_global_read_dquot() */
218 goto out;
219 err = ocfs2_simple_size_update(gqinode, 219 err = ocfs2_simple_size_update(gqinode,
220 oinfo->dqi_gqi_bh, 220 oinfo->dqi_gqi_bh,
221 off + len); 221 rounded_end);
222 if (err < 0) 222 if (err < 0)
223 goto out; 223 goto out;
224 new = 1; 224 new = 1;
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
234 } 234 }
235 if (err) { 235 if (err) {
236 mlog_errno(err); 236 mlog_errno(err);
237 return err; 237 goto out;
238 } 238 }
239 lock_buffer(bh); 239 lock_buffer(bh);
240 if (new) 240 if (new)
@@ -342,7 +342,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace); 342 info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace); 343 info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms); 344 oinfo->dqi_syncms = le32_to_cpu(dinfo.dqi_syncms);
345 oinfo->dqi_syncjiff = msecs_to_jiffies(oinfo->dqi_syncms);
346 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks); 345 oinfo->dqi_gi.dqi_blocks = le32_to_cpu(dinfo.dqi_blocks);
347 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk); 346 oinfo->dqi_gi.dqi_free_blk = le32_to_cpu(dinfo.dqi_free_blk);
348 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry); 347 oinfo->dqi_gi.dqi_free_entry = le32_to_cpu(dinfo.dqi_free_entry);
@@ -352,7 +351,7 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
352 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi); 351 oinfo->dqi_gi.dqi_qtree_depth = qtree_depth(&oinfo->dqi_gi);
353 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn); 352 INIT_DELAYED_WORK(&oinfo->dqi_sync_work, qsync_work_fn);
354 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 353 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
355 oinfo->dqi_syncjiff); 354 msecs_to_jiffies(oinfo->dqi_syncms));
356 355
357out_err: 356out_err:
358 mlog_exit(status); 357 mlog_exit(status);
@@ -402,13 +401,36 @@ int ocfs2_global_write_info(struct super_block *sb, int type)
402 return err; 401 return err;
403} 402}
404 403
404static int ocfs2_global_qinit_alloc(struct super_block *sb, int type)
405{
406 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
407
408 /*
409 * We may need to allocate tree blocks and a leaf block but not the
410 * root block
411 */
412 return oinfo->dqi_gi.dqi_qtree_depth;
413}
414
415static int ocfs2_calc_global_qinit_credits(struct super_block *sb, int type)
416{
417 /* We modify all the allocated blocks, tree root, and info block */
418 return (ocfs2_global_qinit_alloc(sb, type) + 2) *
419 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS;
420}
421
405/* Read in information from global quota file and acquire a reference to it. 422/* Read in information from global quota file and acquire a reference to it.
406 * dquot_acquire() has already started the transaction and locked quota file */ 423 * dquot_acquire() has already started the transaction and locked quota file */
407int ocfs2_global_read_dquot(struct dquot *dquot) 424int ocfs2_global_read_dquot(struct dquot *dquot)
408{ 425{
409 int err, err2, ex = 0; 426 int err, err2, ex = 0;
410 struct ocfs2_mem_dqinfo *info = 427 struct super_block *sb = dquot->dq_sb;
411 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 428 int type = dquot->dq_type;
429 struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
430 struct ocfs2_super *osb = OCFS2_SB(sb);
431 struct inode *gqinode = info->dqi_gqinode;
432 int need_alloc = ocfs2_global_qinit_alloc(sb, type);
433 handle_t *handle = NULL;
412 434
413 err = ocfs2_qinfo_lock(info, 0); 435 err = ocfs2_qinfo_lock(info, 0);
414 if (err < 0) 436 if (err < 0)
@@ -419,14 +441,33 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
419 OCFS2_DQUOT(dquot)->dq_use_count++; 441 OCFS2_DQUOT(dquot)->dq_use_count++;
420 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; 442 OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
421 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; 443 OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
444 ocfs2_qinfo_unlock(info, 0);
445
422 if (!dquot->dq_off) { /* No real quota entry? */ 446 if (!dquot->dq_off) { /* No real quota entry? */
423 /* Upgrade to exclusive lock for allocation */
424 ocfs2_qinfo_unlock(info, 0);
425 err = ocfs2_qinfo_lock(info, 1);
426 if (err < 0)
427 goto out_qlock;
428 ex = 1; 447 ex = 1;
448 /*
449 * Add blocks to quota file before we start a transaction since
450 * locking allocators ranks above a transaction start
451 */
452 WARN_ON(journal_current_handle());
453 down_write(&OCFS2_I(gqinode)->ip_alloc_sem);
454 err = ocfs2_extend_no_holes(gqinode,
455 gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
456 gqinode->i_size);
457 up_write(&OCFS2_I(gqinode)->ip_alloc_sem);
458 if (err < 0)
459 goto out;
429 } 460 }
461
462 handle = ocfs2_start_trans(osb,
463 ocfs2_calc_global_qinit_credits(sb, type));
464 if (IS_ERR(handle)) {
465 err = PTR_ERR(handle);
466 goto out;
467 }
468 err = ocfs2_qinfo_lock(info, ex);
469 if (err < 0)
470 goto out_trans;
430 err = qtree_write_dquot(&info->dqi_gi, dquot); 471 err = qtree_write_dquot(&info->dqi_gi, dquot);
431 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) { 472 if (ex && info_dirty(sb_dqinfo(dquot->dq_sb, dquot->dq_type))) {
432 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type); 473 err2 = __ocfs2_global_write_info(dquot->dq_sb, dquot->dq_type);
@@ -438,6 +479,9 @@ out_qlock:
438 ocfs2_qinfo_unlock(info, 1); 479 ocfs2_qinfo_unlock(info, 1);
439 else 480 else
440 ocfs2_qinfo_unlock(info, 0); 481 ocfs2_qinfo_unlock(info, 0);
482out_trans:
483 if (handle)
484 ocfs2_commit_trans(osb, handle);
441out: 485out:
442 if (err < 0) 486 if (err < 0)
443 mlog_errno(err); 487 mlog_errno(err);
@@ -607,7 +651,7 @@ static void qsync_work_fn(struct work_struct *work)
607 651
608 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type); 652 dquot_scan_active(sb, ocfs2_sync_dquot_helper, oinfo->dqi_type);
609 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work, 653 queue_delayed_work(ocfs2_quota_wq, &oinfo->dqi_sync_work,
610 oinfo->dqi_syncjiff); 654 msecs_to_jiffies(oinfo->dqi_syncms));
611} 655}
612 656
613/* 657/*
@@ -635,20 +679,18 @@ out:
635 return status; 679 return status;
636} 680}
637 681
638int ocfs2_calc_qdel_credits(struct super_block *sb, int type) 682static int ocfs2_calc_qdel_credits(struct super_block *sb, int type)
639{ 683{
640 struct ocfs2_mem_dqinfo *oinfo; 684 struct ocfs2_mem_dqinfo *oinfo = sb_dqinfo(sb, type)->dqi_priv;
641 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA, 685 /*
642 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA }; 686 * We modify tree, leaf block, global info, local chunk header,
643 687 * global and local inode; OCFS2_QINFO_WRITE_CREDITS already
644 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type])) 688 * accounts for inode update
645 return 0; 689 */
646 690 return (oinfo->dqi_gi.dqi_qtree_depth + 2) *
647 oinfo = sb_dqinfo(sb, type)->dqi_priv; 691 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS +
648 /* We modify tree, leaf block, global info, local chunk header, 692 OCFS2_QINFO_WRITE_CREDITS +
649 * global and local inode */ 693 OCFS2_INODE_UPDATE_CREDITS;
650 return oinfo->dqi_gi.dqi_qtree_depth + 2 + 1 +
651 2 * OCFS2_INODE_UPDATE_CREDITS;
652} 694}
653 695
654static int ocfs2_release_dquot(struct dquot *dquot) 696static int ocfs2_release_dquot(struct dquot *dquot)
@@ -680,33 +722,10 @@ out:
680 return status; 722 return status;
681} 723}
682 724
683int ocfs2_calc_qinit_credits(struct super_block *sb, int type)
684{
685 struct ocfs2_mem_dqinfo *oinfo;
686 int features[MAXQUOTAS] = { OCFS2_FEATURE_RO_COMPAT_USRQUOTA,
687 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA };
688 struct ocfs2_dinode *lfe, *gfe;
689
690 if (!OCFS2_HAS_RO_COMPAT_FEATURE(sb, features[type]))
691 return 0;
692
693 oinfo = sb_dqinfo(sb, type)->dqi_priv;
694 gfe = (struct ocfs2_dinode *)oinfo->dqi_gqi_bh->b_data;
695 lfe = (struct ocfs2_dinode *)oinfo->dqi_lqi_bh->b_data;
696 /* We can extend local file + global file. In local file we
697 * can modify info, chunk header block and dquot block. In
698 * global file we can modify info, tree and leaf block */
699 return ocfs2_calc_extend_credits(sb, &lfe->id2.i_list, 0) +
700 ocfs2_calc_extend_credits(sb, &gfe->id2.i_list, 0) +
701 3 + oinfo->dqi_gi.dqi_qtree_depth + 2;
702}
703
704static int ocfs2_acquire_dquot(struct dquot *dquot) 725static int ocfs2_acquire_dquot(struct dquot *dquot)
705{ 726{
706 handle_t *handle;
707 struct ocfs2_mem_dqinfo *oinfo = 727 struct ocfs2_mem_dqinfo *oinfo =
708 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv; 728 sb_dqinfo(dquot->dq_sb, dquot->dq_type)->dqi_priv;
709 struct ocfs2_super *osb = OCFS2_SB(dquot->dq_sb);
710 int status = 0; 729 int status = 0;
711 730
712 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type); 731 mlog_entry("id=%u, type=%d", dquot->dq_id, dquot->dq_type);
@@ -715,16 +734,7 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
715 status = ocfs2_lock_global_qf(oinfo, 1); 734 status = ocfs2_lock_global_qf(oinfo, 1);
716 if (status < 0) 735 if (status < 0)
717 goto out; 736 goto out;
718 handle = ocfs2_start_trans(osb,
719 ocfs2_calc_qinit_credits(dquot->dq_sb, dquot->dq_type));
720 if (IS_ERR(handle)) {
721 status = PTR_ERR(handle);
722 mlog_errno(status);
723 goto out_ilock;
724 }
725 status = dquot_acquire(dquot); 737 status = dquot_acquire(dquot);
726 ocfs2_commit_trans(osb, handle);
727out_ilock:
728 ocfs2_unlock_global_qf(oinfo, 1); 738 ocfs2_unlock_global_qf(oinfo, 1);
729out: 739out:
730 mlog_exit(status); 740 mlog_exit(status);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 5a460fa82553..bdb09cb6e1fe 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -20,6 +20,7 @@
20#include "sysfile.h" 20#include "sysfile.h"
21#include "dlmglue.h" 21#include "dlmglue.h"
22#include "quota.h" 22#include "quota.h"
23#include "uptodate.h"
23 24
24/* Number of local quota structures per block */ 25/* Number of local quota structures per block */
25static inline unsigned int ol_quota_entries_per_block(struct super_block *sb) 26static inline unsigned int ol_quota_entries_per_block(struct super_block *sb)
@@ -100,7 +101,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
100 handle_t *handle; 101 handle_t *handle;
101 int status; 102 int status;
102 103
103 handle = ocfs2_start_trans(OCFS2_SB(sb), 1); 104 handle = ocfs2_start_trans(OCFS2_SB(sb),
105 OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
104 if (IS_ERR(handle)) { 106 if (IS_ERR(handle)) {
105 status = PTR_ERR(handle); 107 status = PTR_ERR(handle);
106 mlog_errno(status); 108 mlog_errno(status);
@@ -610,7 +612,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
610 goto out_bh; 612 goto out_bh;
611 /* Mark quota file as clean if we are recovering quota file of 613 /* Mark quota file as clean if we are recovering quota file of
612 * some other node. */ 614 * some other node. */
613 handle = ocfs2_start_trans(osb, 1); 615 handle = ocfs2_start_trans(osb,
616 OCFS2_LOCAL_QINFO_WRITE_CREDITS);
614 if (IS_ERR(handle)) { 617 if (IS_ERR(handle)) {
615 status = PTR_ERR(handle); 618 status = PTR_ERR(handle);
616 mlog_errno(status); 619 mlog_errno(status);
@@ -940,7 +943,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
940 struct ocfs2_local_disk_chunk *dchunk; 943 struct ocfs2_local_disk_chunk *dchunk;
941 int status; 944 int status;
942 handle_t *handle; 945 handle_t *handle;
943 struct buffer_head *bh = NULL; 946 struct buffer_head *bh = NULL, *dbh = NULL;
944 u64 p_blkno; 947 u64 p_blkno;
945 948
946 /* We are protected by dqio_sem so no locking needed */ 949 /* We are protected by dqio_sem so no locking needed */
@@ -964,32 +967,35 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
964 mlog_errno(status); 967 mlog_errno(status);
965 goto out; 968 goto out;
966 } 969 }
970 /* Local quota info and two new blocks we initialize */
971 handle = ocfs2_start_trans(OCFS2_SB(sb),
972 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
973 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
974 if (IS_ERR(handle)) {
975 status = PTR_ERR(handle);
976 mlog_errno(status);
977 goto out;
978 }
967 979
980 /* Initialize chunk header */
968 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 981 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
969 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks, 982 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
970 &p_blkno, NULL, NULL); 983 &p_blkno, NULL, NULL);
971 up_read(&OCFS2_I(lqinode)->ip_alloc_sem); 984 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
972 if (status < 0) { 985 if (status < 0) {
973 mlog_errno(status); 986 mlog_errno(status);
974 goto out; 987 goto out_trans;
975 } 988 }
976 bh = sb_getblk(sb, p_blkno); 989 bh = sb_getblk(sb, p_blkno);
977 if (!bh) { 990 if (!bh) {
978 status = -ENOMEM; 991 status = -ENOMEM;
979 mlog_errno(status); 992 mlog_errno(status);
980 goto out; 993 goto out_trans;
981 } 994 }
982 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
983 996 ocfs2_set_new_buffer_uptodate(lqinode, bh);
984 handle = ocfs2_start_trans(OCFS2_SB(sb), 2);
985 if (IS_ERR(handle)) {
986 status = PTR_ERR(handle);
987 mlog_errno(status);
988 goto out;
989 }
990
991 status = ocfs2_journal_access_dq(handle, lqinode, bh, 997 status = ocfs2_journal_access_dq(handle, lqinode, bh,
992 OCFS2_JOURNAL_ACCESS_WRITE); 998 OCFS2_JOURNAL_ACCESS_CREATE);
993 if (status < 0) { 999 if (status < 0) {
994 mlog_errno(status); 1000 mlog_errno(status);
995 goto out_trans; 1001 goto out_trans;
@@ -999,7 +1005,6 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
999 memset(dchunk->dqc_bitmap, 0, 1005 memset(dchunk->dqc_bitmap, 0,
1000 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1006 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1001 OCFS2_QBLK_RESERVED_SPACE); 1007 OCFS2_QBLK_RESERVED_SPACE);
1002 set_buffer_uptodate(bh);
1003 unlock_buffer(bh); 1008 unlock_buffer(bh);
1004 status = ocfs2_journal_dirty(handle, bh); 1009 status = ocfs2_journal_dirty(handle, bh);
1005 if (status < 0) { 1010 if (status < 0) {
@@ -1007,6 +1012,38 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1007 goto out_trans; 1012 goto out_trans;
1008 } 1013 }
1009 1014
1015 /* Initialize new block with structures */
1016 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1017 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks + 1,
1018 &p_blkno, NULL, NULL);
1019 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1020 if (status < 0) {
1021 mlog_errno(status);
1022 goto out_trans;
1023 }
1024 dbh = sb_getblk(sb, p_blkno);
1025 if (!dbh) {
1026 status = -ENOMEM;
1027 mlog_errno(status);
1028 goto out_trans;
1029 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) {
1034 mlog_errno(status);
1035 goto out_trans;
1036 }
1037 lock_buffer(dbh);
1038 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1039 unlock_buffer(dbh);
1040 status = ocfs2_journal_dirty(handle, dbh);
1041 if (status < 0) {
1042 mlog_errno(status);
1043 goto out_trans;
1044 }
1045
1046 /* Update local quotafile info */
1010 oinfo->dqi_blocks += 2; 1047 oinfo->dqi_blocks += 2;
1011 oinfo->dqi_chunks++; 1048 oinfo->dqi_chunks++;
1012 status = ocfs2_local_write_info(sb, type); 1049 status = ocfs2_local_write_info(sb, type);
@@ -1031,6 +1068,7 @@ out_trans:
1031 ocfs2_commit_trans(OCFS2_SB(sb), handle); 1068 ocfs2_commit_trans(OCFS2_SB(sb), handle);
1032out: 1069out:
1033 brelse(bh); 1070 brelse(bh);
1071 brelse(dbh);
1034 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk); 1072 kmem_cache_free(ocfs2_qf_chunk_cachep, chunk);
1035 return ERR_PTR(status); 1073 return ERR_PTR(status);
1036} 1074}
@@ -1048,6 +1086,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1048 struct ocfs2_local_disk_chunk *dchunk; 1086 struct ocfs2_local_disk_chunk *dchunk;
1049 int epb = ol_quota_entries_per_block(sb); 1087 int epb = ol_quota_entries_per_block(sb);
1050 unsigned int chunk_blocks; 1088 unsigned int chunk_blocks;
1089 struct buffer_head *bh;
1090 u64 p_blkno;
1051 int status; 1091 int status;
1052 handle_t *handle; 1092 handle_t *handle;
1053 1093
@@ -1075,12 +1115,49 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1075 mlog_errno(status); 1115 mlog_errno(status);
1076 goto out; 1116 goto out;
1077 } 1117 }
1078 handle = ocfs2_start_trans(OCFS2_SB(sb), 2); 1118
1119 /* Get buffer from the just added block */
1120 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1121 status = ocfs2_extent_map_get_blocks(lqinode, oinfo->dqi_blocks,
1122 &p_blkno, NULL, NULL);
1123 up_read(&OCFS2_I(lqinode)->ip_alloc_sem);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto out;
1127 }
1128 bh = sb_getblk(sb, p_blkno);
1129 if (!bh) {
1130 status = -ENOMEM;
1131 mlog_errno(status);
1132 goto out;
1133 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh);
1135
1136 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb),
1138 OCFS2_LOCAL_QINFO_WRITE_CREDITS +
1139 2 * OCFS2_QUOTA_BLOCK_UPDATE_CREDITS);
1079 if (IS_ERR(handle)) { 1140 if (IS_ERR(handle)) {
1080 status = PTR_ERR(handle); 1141 status = PTR_ERR(handle);
1081 mlog_errno(status); 1142 mlog_errno(status);
1082 goto out; 1143 goto out;
1083 } 1144 }
1145 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) {
1149 mlog_errno(status);
1150 goto out_trans;
1151 }
1152 lock_buffer(bh);
1153 memset(bh->b_data, 0, sb->s_blocksize);
1154 unlock_buffer(bh);
1155 status = ocfs2_journal_dirty(handle, bh);
1156 if (status < 0) {
1157 mlog_errno(status);
1158 goto out_trans;
1159 }
1160 /* Update chunk header */
1084 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh,
1085 OCFS2_JOURNAL_ACCESS_WRITE); 1162 OCFS2_JOURNAL_ACCESS_WRITE);
1086 if (status < 0) { 1163 if (status < 0) {
@@ -1097,6 +1174,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1097 mlog_errno(status); 1174 mlog_errno(status);
1098 goto out_trans; 1175 goto out_trans;
1099 } 1176 }
1177 /* Update file header */
1100 oinfo->dqi_blocks++; 1178 oinfo->dqi_blocks++;
1101 status = ocfs2_local_write_info(sb, type); 1179 status = ocfs2_local_write_info(sb, type);
1102 if (status < 0) { 1180 if (status < 0) {
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index 3f661376a2de..e49c41050264 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -17,6 +17,7 @@
17 * General Public License for more details. 17 * General Public License for more details.
18 */ 18 */
19 19
20#include <linux/kernel.h>
20#include <linux/crc32.h> 21#include <linux/crc32.h>
21#include <linux/module.h> 22#include <linux/module.h>
22 23
@@ -153,7 +154,7 @@ static int status_map[] = {
153 154
154static int dlm_status_to_errno(enum dlm_status status) 155static int dlm_status_to_errno(enum dlm_status status)
155{ 156{
156 BUG_ON(status > (sizeof(status_map) / sizeof(status_map[0]))); 157 BUG_ON(status < 0 || status >= ARRAY_SIZE(status_map));
157 158
158 return status_map[status]; 159 return status_map[status];
159} 160}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 7efb349fb9bd..b0ee0fdf799a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -777,6 +777,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
777 } 777 }
778 di = (struct ocfs2_dinode *) (*bh)->b_data; 778 di = (struct ocfs2_dinode *) (*bh)->b_data;
779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats)); 779 memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
780 spin_lock_init(&stats->b_lock);
780 status = ocfs2_verify_volume(di, *bh, blksize, stats); 781 status = ocfs2_verify_volume(di, *bh, blksize, stats);
781 if (status >= 0) 782 if (status >= 0)
782 goto bail; 783 goto bail;
@@ -1182,7 +1183,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1182 wake_up(&osb->osb_mount_event); 1183 wake_up(&osb->osb_mount_event);
1183 1184
1184 /* Start this when the mount is almost sure of being successful */ 1185 /* Start this when the mount is almost sure of being successful */
1185 ocfs2_orphan_scan_init(osb); 1186 ocfs2_orphan_scan_start(osb);
1186 1187
1187 mlog_exit(status); 1188 mlog_exit(status);
1188 return status; 1189 return status;
@@ -1213,14 +1214,27 @@ static int ocfs2_get_sb(struct file_system_type *fs_type,
1213 mnt); 1214 mnt);
1214} 1215}
1215 1216
1217static void ocfs2_kill_sb(struct super_block *sb)
1218{
1219 struct ocfs2_super *osb = OCFS2_SB(sb);
1220
1221 /* Prevent further queueing of inode drop events */
1222 spin_lock(&dentry_list_lock);
1223 ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED);
1224 spin_unlock(&dentry_list_lock);
1225 /* Wait for work to finish and/or remove it */
1226 cancel_work_sync(&osb->dentry_lock_work);
1227
1228 kill_block_super(sb);
1229}
1230
1216static struct file_system_type ocfs2_fs_type = { 1231static struct file_system_type ocfs2_fs_type = {
1217 .owner = THIS_MODULE, 1232 .owner = THIS_MODULE,
1218 .name = "ocfs2", 1233 .name = "ocfs2",
1219 .get_sb = ocfs2_get_sb, /* is this called when we mount 1234 .get_sb = ocfs2_get_sb, /* is this called when we mount
1220 * the fs? */ 1235 * the fs? */
1221 .kill_sb = kill_block_super, /* set to the generic one 1236 .kill_sb = ocfs2_kill_sb,
1222 * right now, but do we 1237
1223 * need to change that? */
1224 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, 1238 .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
1225 .next = NULL 1239 .next = NULL
1226}; 1240};
@@ -1819,6 +1833,12 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1819 1833
1820 debugfs_remove(osb->osb_ctxt); 1834 debugfs_remove(osb->osb_ctxt);
1821 1835
1836 /*
1837 * Flush inode dropping work queue so that deletes are
1838 * performed while the filesystem is still working
1839 */
1840 ocfs2_drop_all_dl_inodes(osb);
1841
1822 /* Orphan scan should be stopped as early as possible */ 1842 /* Orphan scan should be stopped as early as possible */
1823 ocfs2_orphan_scan_stop(osb); 1843 ocfs2_orphan_scan_stop(osb);
1824 1844
@@ -1981,6 +2001,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
1981 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", 2001 snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
1982 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); 2002 MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev));
1983 2003
2004 ocfs2_orphan_scan_init(osb);
2005
1984 status = ocfs2_recovery_init(osb); 2006 status = ocfs2_recovery_init(osb);
1985 if (status) { 2007 if (status) {
1986 mlog(ML_ERROR, "Unable to initialize recovery state\n"); 2008 mlog(ML_ERROR, "Unable to initialize recovery state\n");
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index ba320e250747..d1a27cda984f 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -1052,7 +1052,8 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1052 struct ocfs2_xattr_block *xb; 1052 struct ocfs2_xattr_block *xb;
1053 struct ocfs2_xattr_value_root *xv; 1053 struct ocfs2_xattr_value_root *xv;
1054 size_t size; 1054 size_t size;
1055 int ret = -ENODATA, name_offset, name_len, block_off, i; 1055 int ret = -ENODATA, name_offset, name_len, i;
1056 int uninitialized_var(block_off);
1056 1057
1057 xs->bucket = ocfs2_xattr_bucket_new(inode); 1058 xs->bucket = ocfs2_xattr_bucket_new(inode);
1058 if (!xs->bucket) { 1059 if (!xs->bucket) {
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 3ce5ae9e3d2d..175db258942f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -234,23 +234,20 @@ static int check_mem_permission(struct task_struct *task)
234 234
235struct mm_struct *mm_for_maps(struct task_struct *task) 235struct mm_struct *mm_for_maps(struct task_struct *task)
236{ 236{
237 struct mm_struct *mm = get_task_mm(task); 237 struct mm_struct *mm;
238 if (!mm) 238
239 if (mutex_lock_killable(&task->cred_guard_mutex))
239 return NULL; 240 return NULL;
240 down_read(&mm->mmap_sem); 241
241 task_lock(task); 242 mm = get_task_mm(task);
242 if (task->mm != mm) 243 if (mm && mm != current->mm &&
243 goto out; 244 !ptrace_may_access(task, PTRACE_MODE_READ)) {
244 if (task->mm != current->mm && 245 mmput(mm);
245 __ptrace_may_access(task, PTRACE_MODE_READ) < 0) 246 mm = NULL;
246 goto out; 247 }
247 task_unlock(task); 248 mutex_unlock(&task->cred_guard_mutex);
249
248 return mm; 250 return mm;
249out:
250 task_unlock(task);
251 up_read(&mm->mmap_sem);
252 mmput(mm);
253 return NULL;
254} 251}
255 252
256static int proc_pid_cmdline(struct task_struct *task, char * buffer) 253static int proc_pid_cmdline(struct task_struct *task, char * buffer)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6f61b7cc32e0..9bd8be1d235c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -119,6 +119,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
119 mm = mm_for_maps(priv->task); 119 mm = mm_for_maps(priv->task);
120 if (!mm) 120 if (!mm)
121 return NULL; 121 return NULL;
122 down_read(&mm->mmap_sem);
122 123
123 tail_vma = get_gate_vma(priv->task); 124 tail_vma = get_gate_vma(priv->task);
124 priv->tail_vma = tail_vma; 125 priv->tail_vma = tail_vma;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 64a72e2e7650..8f5c05d3dbd3 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -189,6 +189,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
189 priv->task = NULL; 189 priv->task = NULL;
190 return NULL; 190 return NULL;
191 } 191 }
192 down_read(&mm->mmap_sem);
192 193
193 /* start from the Nth VMA */ 194 /* start from the Nth VMA */
194 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p)) 195 for (p = rb_first(&mm->mm_rb); p; p = rb_next(p))
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 0c93c7ef3d18..965df1227d64 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -770,7 +770,7 @@ xfs_buf_associate_memory(
770 bp->b_pages = NULL; 770 bp->b_pages = NULL;
771 bp->b_addr = mem; 771 bp->b_addr = mem;
772 772
773 rval = _xfs_buf_get_pages(bp, page_count, 0); 773 rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
774 if (rval) 774 if (rval)
775 return rval; 775 return rval;
776 776
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index db15feb906ff..4ece1906bd41 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -2010,7 +2010,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); 2010 dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); 2011 blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno, 2012 error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2013 blkcnt, XFS_BUF_LOCK, &bp); 2013 blkcnt,
2014 XFS_BUF_LOCK | XBF_DONT_BLOCK,
2015 &bp);
2014 if (error) 2016 if (error)
2015 return(error); 2017 return(error);
2016 2018
@@ -2141,8 +2143,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
2141 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 2143 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2142 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 2144 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2143 2145
2144 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, 2146 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno, blkcnt,
2145 blkcnt, XFS_BUF_LOCK); 2147 XFS_BUF_LOCK | XBF_DONT_BLOCK);
2146 ASSERT(bp); 2148 ASSERT(bp);
2147 ASSERT(!XFS_BUF_GETERROR(bp)); 2149 ASSERT(!XFS_BUF_GETERROR(bp));
2148 2150
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 7928b9983c1d..8ee5b5a76a2a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -6009,7 +6009,7 @@ xfs_getbmap(
6009 */ 6009 */
6010 error = ENOMEM; 6010 error = ENOMEM;
6011 subnex = 16; 6011 subnex = 16;
6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL); 6012 map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
6013 if (!map) 6013 if (!map)
6014 goto out_unlock_ilock; 6014 goto out_unlock_ilock;
6015 6015
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index e9df99574829..26717388acf5 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -120,8 +120,8 @@ xfs_btree_check_sblock(
120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) { 120 XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
121 if (bp) 121 if (bp)
122 xfs_buftrace("SBTREE ERROR", bp); 122 xfs_buftrace("SBTREE ERROR", bp);
123 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW, 123 XFS_CORRUPTION_ERROR("xfs_btree_check_sblock",
124 cur->bc_mp); 124 XFS_ERRLEVEL_LOW, cur->bc_mp, block);
125 return XFS_ERROR(EFSCORRUPTED); 125 return XFS_ERROR(EFSCORRUPTED);
126 } 126 }
127 return 0; 127 return 0;
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9ff6e57a5075..2847bbc1c534 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2201,7 +2201,7 @@ kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
2201xfs_da_state_t * 2201xfs_da_state_t *
2202xfs_da_state_alloc(void) 2202xfs_da_state_alloc(void)
2203{ 2203{
2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP); 2204 return kmem_zone_zalloc(xfs_da_state_zone, KM_NOFS);
2205} 2205}
2206 2206
2207/* 2207/*
@@ -2261,9 +2261,9 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2261 int off; 2261 int off;
2262 2262
2263 if (nbuf == 1) 2263 if (nbuf == 1)
2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP); 2264 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
2265 else 2265 else
2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP); 2266 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2267 dabuf->dirty = 0; 2267 dabuf->dirty = 0;
2268#ifdef XFS_DABUF_DEBUG 2268#ifdef XFS_DABUF_DEBUG
2269 dabuf->ra = ra; 2269 dabuf->ra = ra;
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index c657bec6d951..bb1d58eb3982 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -256,7 +256,7 @@ xfs_dir_cilookup_result(
256 !(args->op_flags & XFS_DA_OP_CILOOKUP)) 256 !(args->op_flags & XFS_DA_OP_CILOOKUP))
257 return EEXIST; 257 return EEXIST;
258 258
259 args->value = kmem_alloc(len, KM_MAYFAIL); 259 args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
260 if (!args->value) 260 if (!args->value)
261 return ENOMEM; 261 return ENOMEM;
262 262
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index cbd451bb4848..2d0b3e1da9e6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -167,17 +167,25 @@ xfs_growfs_data_private(
167 new = nb - mp->m_sb.sb_dblocks; 167 new = nb - mp->m_sb.sb_dblocks;
168 oagcount = mp->m_sb.sb_agcount; 168 oagcount = mp->m_sb.sb_agcount;
169 if (nagcount > oagcount) { 169 if (nagcount > oagcount) {
170 void *new_perag, *old_perag;
171
170 xfs_filestream_flush(mp); 172 xfs_filestream_flush(mp);
173
174 new_perag = kmem_zalloc(sizeof(xfs_perag_t) * nagcount,
175 KM_MAYFAIL);
176 if (!new_perag)
177 return XFS_ERROR(ENOMEM);
178
171 down_write(&mp->m_peraglock); 179 down_write(&mp->m_peraglock);
172 mp->m_perag = kmem_realloc(mp->m_perag, 180 memcpy(new_perag, mp->m_perag, sizeof(xfs_perag_t) * oagcount);
173 sizeof(xfs_perag_t) * nagcount, 181 old_perag = mp->m_perag;
174 sizeof(xfs_perag_t) * oagcount, 182 mp->m_perag = new_perag;
175 KM_SLEEP); 183
176 memset(&mp->m_perag[oagcount], 0,
177 (nagcount - oagcount) * sizeof(xfs_perag_t));
178 mp->m_flags |= XFS_MOUNT_32BITINODES; 184 mp->m_flags |= XFS_MOUNT_32BITINODES;
179 nagimax = xfs_initialize_perag(mp, nagcount); 185 nagimax = xfs_initialize_perag(mp, nagcount);
180 up_write(&mp->m_peraglock); 186 up_write(&mp->m_peraglock);
187
188 kmem_free(old_perag);
181 } 189 }
182 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS); 190 tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFS);
183 tp->t_flags |= XFS_TRANS_RESERVE; 191 tp->t_flags |= XFS_TRANS_RESERVE;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1f22d65fed0a..da428b3fe0f5 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -343,6 +343,16 @@ xfs_iformat(
343 return XFS_ERROR(EFSCORRUPTED); 343 return XFS_ERROR(EFSCORRUPTED);
344 } 344 }
345 345
346 if (unlikely((ip->i_d.di_flags & XFS_DIFLAG_REALTIME) &&
347 !ip->i_mount->m_rtdev_targp)) {
348 xfs_fs_repair_cmn_err(CE_WARN, ip->i_mount,
349 "corrupt dinode %Lu, has realtime flag set.",
350 ip->i_ino);
351 XFS_CORRUPTION_ERROR("xfs_iformat(realtime)",
352 XFS_ERRLEVEL_LOW, ip->i_mount, dip);
353 return XFS_ERROR(EFSCORRUPTED);
354 }
355
346 switch (ip->i_d.di_mode & S_IFMT) { 356 switch (ip->i_d.di_mode & S_IFMT) {
347 case S_IFIFO: 357 case S_IFIFO:
348 case S_IFCHR: 358 case S_IFCHR:
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 3750f04ede0b..9dbdff3ea484 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3180,7 +3180,7 @@ try_again:
3180STATIC void 3180STATIC void
3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) 3181xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
3182{ 3182{
3183 ASSERT(spin_is_locked(&log->l_icloglock)); 3183 assert_spin_locked(&log->l_icloglock);
3184 3184
3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) { 3185 if (iclog->ic_state == XLOG_STATE_ACTIVE) {
3186 xlog_state_switch_iclogs(log, iclog, 0); 3186 xlog_state_switch_iclogs(log, iclog, 0);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index c4eca5ed5dab..492d75bae2bf 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -538,7 +538,9 @@ xfs_readlink_bmap(
538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock); 538 d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount); 539 byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
540 540
541 bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0); 541 bp = xfs_buf_read_flags(mp->m_ddev_targp, d, BTOBB(byte_cnt),
542 XBF_LOCK | XBF_MAPPED |
543 XBF_DONT_BLOCK);
542 error = XFS_BUF_GETERROR(bp); 544 error = XFS_BUF_GETERROR(bp);
543 if (error) { 545 if (error) {
544 xfs_ioerror_alert("xfs_readlink", 546 xfs_ioerror_alert("xfs_readlink",
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 16713dc672e4..3060bdc35ffe 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -110,6 +110,7 @@ struct kvm_memory_slot {
110 110
111struct kvm_kernel_irq_routing_entry { 111struct kvm_kernel_irq_routing_entry {
112 u32 gsi; 112 u32 gsi;
113 u32 type;
113 int (*set)(struct kvm_kernel_irq_routing_entry *e, 114 int (*set)(struct kvm_kernel_irq_routing_entry *e,
114 struct kvm *kvm, int level); 115 struct kvm *kvm, int level);
115 union { 116 union {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fdffb413b192..f6b90240dd41 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -473,7 +473,6 @@ extern int nfs_writepages(struct address_space *, struct writeback_control *);
473extern int nfs_flush_incompatible(struct file *file, struct page *page); 473extern int nfs_flush_incompatible(struct file *file, struct page *page);
474extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); 474extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
475extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); 475extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
476extern void nfs_writedata_release(void *);
477 476
478/* 477/*
479 * Try to write back everything synchronously (but check the 478 * Try to write back everything synchronously (but check the
@@ -488,7 +487,6 @@ extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
488extern int nfs_commit_inode(struct inode *, int); 487extern int nfs_commit_inode(struct inode *, int);
489extern struct nfs_write_data *nfs_commitdata_alloc(void); 488extern struct nfs_write_data *nfs_commitdata_alloc(void);
490extern void nfs_commit_free(struct nfs_write_data *wdata); 489extern void nfs_commit_free(struct nfs_write_data *wdata);
491extern void nfs_commitdata_release(void *wdata);
492#else 490#else
493static inline int 491static inline int
494nfs_commit_inode(struct inode *inode, int how) 492nfs_commit_inode(struct inode *inode, int how)
@@ -507,6 +505,7 @@ nfs_have_writebacks(struct inode *inode)
507 * Allocate nfs_write_data structures 505 * Allocate nfs_write_data structures
508 */ 506 */
509extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages); 507extern struct nfs_write_data *nfs_writedata_alloc(unsigned int npages);
508extern void nfs_writedata_free(struct nfs_write_data *);
510 509
511/* 510/*
512 * linux/fs/nfs/read.c 511 * linux/fs/nfs/read.c
@@ -515,7 +514,6 @@ extern int nfs_readpage(struct file *, struct page *);
515extern int nfs_readpages(struct file *, struct address_space *, 514extern int nfs_readpages(struct file *, struct address_space *,
516 struct list_head *, unsigned); 515 struct list_head *, unsigned);
517extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *); 516extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
518extern void nfs_readdata_release(void *data);
519extern int nfs_readpage_async(struct nfs_open_context *, struct inode *, 517extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
520 struct page *); 518 struct page *);
521 519
@@ -523,6 +521,7 @@ extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
523 * Allocate nfs_read_data structures 521 * Allocate nfs_read_data structures
524 */ 522 */
525extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages); 523extern struct nfs_read_data *nfs_readdata_alloc(unsigned int npages);
524extern void nfs_readdata_free(struct nfs_read_data *);
526 525
527/* 526/*
528 * linux/fs/nfs3proc.c 527 * linux/fs/nfs3proc.c
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 0a6f3209c9de..b53f7006cc4e 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -115,7 +115,7 @@ enum perf_counter_sample_format {
115 PERF_SAMPLE_TID = 1U << 1, 115 PERF_SAMPLE_TID = 1U << 1,
116 PERF_SAMPLE_TIME = 1U << 2, 116 PERF_SAMPLE_TIME = 1U << 2,
117 PERF_SAMPLE_ADDR = 1U << 3, 117 PERF_SAMPLE_ADDR = 1U << 3,
118 PERF_SAMPLE_GROUP = 1U << 4, 118 PERF_SAMPLE_READ = 1U << 4,
119 PERF_SAMPLE_CALLCHAIN = 1U << 5, 119 PERF_SAMPLE_CALLCHAIN = 1U << 5,
120 PERF_SAMPLE_ID = 1U << 6, 120 PERF_SAMPLE_ID = 1U << 6,
121 PERF_SAMPLE_CPU = 1U << 7, 121 PERF_SAMPLE_CPU = 1U << 7,
@@ -127,16 +127,32 @@ enum perf_counter_sample_format {
127}; 127};
128 128
129/* 129/*
130 * Bits that can be set in attr.read_format to request that 130 * The format of the data returned by read() on a perf counter fd,
131 * reads on the counter should return the indicated quantities, 131 * as specified by attr.read_format:
132 * in increasing order of bit value, after the counter value. 132 *
133 * struct read_format {
134 * { u64 value;
135 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
136 * { u64 time_running; } && PERF_FORMAT_RUNNING
137 * { u64 id; } && PERF_FORMAT_ID
138 * } && !PERF_FORMAT_GROUP
139 *
140 * { u64 nr;
141 * { u64 time_enabled; } && PERF_FORMAT_ENABLED
142 * { u64 time_running; } && PERF_FORMAT_RUNNING
143 * { u64 value;
144 * { u64 id; } && PERF_FORMAT_ID
145 * } cntr[nr];
146 * } && PERF_FORMAT_GROUP
147 * };
133 */ 148 */
134enum perf_counter_read_format { 149enum perf_counter_read_format {
135 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, 150 PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
136 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, 151 PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
137 PERF_FORMAT_ID = 1U << 2, 152 PERF_FORMAT_ID = 1U << 2,
153 PERF_FORMAT_GROUP = 1U << 3,
138 154
139 PERF_FORMAT_MAX = 1U << 3, /* non-ABI */ 155 PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
140}; 156};
141 157
142#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ 158#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -343,10 +359,8 @@ enum perf_event_type {
343 * struct { 359 * struct {
344 * struct perf_event_header header; 360 * struct perf_event_header header;
345 * u32 pid, tid; 361 * u32 pid, tid;
346 * u64 value; 362 *
347 * { u64 time_enabled; } && PERF_FORMAT_ENABLED 363 * struct read_format values;
348 * { u64 time_running; } && PERF_FORMAT_RUNNING
349 * { u64 parent_id; } && PERF_FORMAT_ID
350 * }; 364 * };
351 */ 365 */
352 PERF_EVENT_READ = 8, 366 PERF_EVENT_READ = 8,
@@ -364,11 +378,24 @@ enum perf_event_type {
364 * { u32 cpu, res; } && PERF_SAMPLE_CPU 378 * { u32 cpu, res; } && PERF_SAMPLE_CPU
365 * { u64 period; } && PERF_SAMPLE_PERIOD 379 * { u64 period; } && PERF_SAMPLE_PERIOD
366 * 380 *
367 * { u64 nr; 381 * { struct read_format values; } && PERF_SAMPLE_READ
368 * { u64 id, val; } cnt[nr]; } && PERF_SAMPLE_GROUP
369 * 382 *
370 * { u64 nr, 383 * { u64 nr,
371 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN 384 * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
385 *
386 * #
387 * # The RAW record below is opaque data wrt the ABI
388 * #
389 * # That is, the ABI doesn't make any promises wrt to
390 * # the stability of its content, it may vary depending
391 * # on event, hardware, kernel version and phase of
392 * # the moon.
393 * #
394 * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
395 * #
396 *
397 * { u32 size;
398 * char data[size];}&& PERF_SAMPLE_RAW
372 * }; 399 * };
373 */ 400 */
374 PERF_EVENT_SAMPLE = 9, 401 PERF_EVENT_SAMPLE = 9,
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6788e1a4d4ca..cf3c2f5dba51 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -77,7 +77,14 @@ struct task_struct;
77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ 77#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \
78 { .flags = word, .bit_nr = bit, } 78 { .flags = word, .bit_nr = bit, }
79 79
80extern void init_waitqueue_head(wait_queue_head_t *q); 80extern void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *);
81
82#define init_waitqueue_head(q) \
83 do { \
84 static struct lock_class_key __key; \
85 \
86 __init_waitqueue_head((q), &__key); \
87 } while (0)
81 88
82#ifdef CONFIG_LOCKDEP 89#ifdef CONFIG_LOCKDEP
83# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ 90# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 7fb16d90e7b1..f64fbaae781a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -637,12 +637,20 @@ __attribute__((section("_ftrace_events"))) event_##call = { \
637 * pc = preempt_count(); 637 * pc = preempt_count();
638 * 638 *
639 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args); 639 * __data_size = ftrace_get_offsets_<call>(&__data_offsets, args);
640 * __entry_size = __data_size + sizeof(*entry); 640 *
641 * // Below we want to get the aligned size by taking into account
642 * // the u32 field that will later store the buffer size
643 * __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),
644 * sizeof(u64));
645 * __entry_size -= sizeof(u32);
641 * 646 *
642 * do { 647 * do {
643 * char raw_data[__entry_size]; <- allocate our sample in the stack 648 * char raw_data[__entry_size]; <- allocate our sample in the stack
644 * struct trace_entry *ent; 649 * struct trace_entry *ent;
645 * 650 *
651 * zero dead bytes from alignment to avoid stack leak to userspace:
652 *
653 * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL;
646 * entry = (struct ftrace_raw_<call> *)raw_data; 654 * entry = (struct ftrace_raw_<call> *)raw_data;
647 * ent = &entry->ent; 655 * ent = &entry->ent;
648 * tracing_generic_entry_update(ent, irq_flags, pc); 656 * tracing_generic_entry_update(ent, irq_flags, pc);
@@ -685,12 +693,15 @@ static void ftrace_profile_##call(proto) \
685 pc = preempt_count(); \ 693 pc = preempt_count(); \
686 \ 694 \
687 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ 695 __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
688 __entry_size = ALIGN(__data_size + sizeof(*entry), sizeof(u64));\ 696 __entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
697 sizeof(u64)); \
698 __entry_size -= sizeof(u32); \
689 \ 699 \
690 do { \ 700 do { \
691 char raw_data[__entry_size]; \ 701 char raw_data[__entry_size]; \
692 struct trace_entry *ent; \ 702 struct trace_entry *ent; \
693 \ 703 \
704 *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \
694 entry = (struct ftrace_raw_##call *)raw_data; \ 705 entry = (struct ftrace_raw_##call *)raw_data; \
695 ent = &entry->ent; \ 706 ent = &entry->ent; \
696 tracing_generic_entry_update(ent, irq_flags, pc); \ 707 tracing_generic_entry_update(ent, irq_flags, pc); \
diff --git a/kernel/futex.c b/kernel/futex.c
index 0672ff88f159..e18cfbdc7190 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -1010,15 +1010,19 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue 1010 * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
1011 * q: the futex_q 1011 * q: the futex_q
1012 * key: the key of the requeue target futex 1012 * key: the key of the requeue target futex
1013 * hb: the hash_bucket of the requeue target futex
1013 * 1014 *
1014 * During futex_requeue, with requeue_pi=1, it is possible to acquire the 1015 * During futex_requeue, with requeue_pi=1, it is possible to acquire the
1015 * target futex if it is uncontended or via a lock steal. Set the futex_q key 1016 * target futex if it is uncontended or via a lock steal. Set the futex_q key
1016 * to the requeue target futex so the waiter can detect the wakeup on the right 1017 * to the requeue target futex so the waiter can detect the wakeup on the right
1017 * futex, but remove it from the hb and NULL the rt_waiter so it can detect 1018 * futex, but remove it from the hb and NULL the rt_waiter so it can detect
1018 * atomic lock acquisition. Must be called with the q->lock_ptr held. 1019 * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
1020 * to protect access to the pi_state to fixup the owner later. Must be called
1021 * with both q->lock_ptr and hb->lock held.
1019 */ 1022 */
1020static inline 1023static inline
1021void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key) 1024void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
1025 struct futex_hash_bucket *hb)
1022{ 1026{
1023 drop_futex_key_refs(&q->key); 1027 drop_futex_key_refs(&q->key);
1024 get_futex_key_refs(key); 1028 get_futex_key_refs(key);
@@ -1030,6 +1034,11 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
1030 WARN_ON(!q->rt_waiter); 1034 WARN_ON(!q->rt_waiter);
1031 q->rt_waiter = NULL; 1035 q->rt_waiter = NULL;
1032 1036
1037 q->lock_ptr = &hb->lock;
1038#ifdef CONFIG_DEBUG_PI_LIST
1039 q->list.plist.lock = &hb->lock;
1040#endif
1041
1033 wake_up_state(q->task, TASK_NORMAL); 1042 wake_up_state(q->task, TASK_NORMAL);
1034} 1043}
1035 1044
@@ -1088,7 +1097,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
1088 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, 1097 ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
1089 set_waiters); 1098 set_waiters);
1090 if (ret == 1) 1099 if (ret == 1)
1091 requeue_pi_wake_futex(top_waiter, key2); 1100 requeue_pi_wake_futex(top_waiter, key2, hb2);
1092 1101
1093 return ret; 1102 return ret;
1094} 1103}
@@ -1247,8 +1256,15 @@ retry_private:
1247 if (!match_futex(&this->key, &key1)) 1256 if (!match_futex(&this->key, &key1))
1248 continue; 1257 continue;
1249 1258
1250 WARN_ON(!requeue_pi && this->rt_waiter); 1259 /*
1251 WARN_ON(requeue_pi && !this->rt_waiter); 1260 * FUTEX_WAIT_REQEUE_PI and FUTEX_CMP_REQUEUE_PI should always
1261 * be paired with each other and no other futex ops.
1262 */
1263 if ((requeue_pi && !this->rt_waiter) ||
1264 (!requeue_pi && this->rt_waiter)) {
1265 ret = -EINVAL;
1266 break;
1267 }
1252 1268
1253 /* 1269 /*
1254 * Wake nr_wake waiters. For requeue_pi, if we acquired the 1270 * Wake nr_wake waiters. For requeue_pi, if we acquired the
@@ -1273,7 +1289,7 @@ retry_private:
1273 this->task, 1); 1289 this->task, 1);
1274 if (ret == 1) { 1290 if (ret == 1) {
1275 /* We got the lock. */ 1291 /* We got the lock. */
1276 requeue_pi_wake_futex(this, &key2); 1292 requeue_pi_wake_futex(this, &key2, hb2);
1277 continue; 1293 continue;
1278 } else if (ret) { 1294 } else if (ret) {
1279 /* -EDEADLK */ 1295 /* -EDEADLK */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d607a5b9ee29..235716556bf1 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -180,7 +180,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
180 int cmd = op & FUTEX_CMD_MASK; 180 int cmd = op & FUTEX_CMD_MASK;
181 181
182 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || 182 if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
183 cmd == FUTEX_WAIT_BITSET)) { 183 cmd == FUTEX_WAIT_BITSET ||
184 cmd == FUTEX_WAIT_REQUEUE_PI)) {
184 if (get_compat_timespec(&ts, utime)) 185 if (get_compat_timespec(&ts, utime))
185 return -EFAULT; 186 return -EFAULT;
186 if (!timespec_valid(&ts)) 187 if (!timespec_valid(&ts))
@@ -191,7 +192,8 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
191 t = ktime_add_safe(ktime_get(), t); 192 t = ktime_add_safe(ktime_get(), t);
192 tp = &t; 193 tp = &t;
193 } 194 }
194 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE) 195 if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
196 cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
195 val2 = (int) (unsigned long) utime; 197 val2 = (int) (unsigned long) utime;
196 198
197 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3); 199 return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 61c679db4687..d222515a5a06 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -761,7 +761,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
761{ 761{
762 struct irq_desc *desc = irq_to_desc(irq); 762 struct irq_desc *desc = irq_to_desc(irq);
763 struct irqaction *action, **action_ptr; 763 struct irqaction *action, **action_ptr;
764 struct task_struct *irqthread;
765 unsigned long flags; 764 unsigned long flags;
766 765
767 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq); 766 WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
@@ -809,9 +808,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
809 desc->chip->disable(irq); 808 desc->chip->disable(irq);
810 } 809 }
811 810
812 irqthread = action->thread;
813 action->thread = NULL;
814
815 spin_unlock_irqrestore(&desc->lock, flags); 811 spin_unlock_irqrestore(&desc->lock, flags);
816 812
817 unregister_handler_proc(irq, action); 813 unregister_handler_proc(irq, action);
@@ -819,12 +815,6 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
819 /* Make sure it's not being used on another CPU: */ 815 /* Make sure it's not being used on another CPU: */
820 synchronize_irq(irq); 816 synchronize_irq(irq);
821 817
822 if (irqthread) {
823 if (!test_bit(IRQTF_DIED, &action->thread_flags))
824 kthread_stop(irqthread);
825 put_task_struct(irqthread);
826 }
827
828#ifdef CONFIG_DEBUG_SHIRQ 818#ifdef CONFIG_DEBUG_SHIRQ
829 /* 819 /*
830 * It's a shared IRQ -- the driver ought to be prepared for an IRQ 820 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
@@ -840,6 +830,13 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
840 local_irq_restore(flags); 830 local_irq_restore(flags);
841 } 831 }
842#endif 832#endif
833
834 if (action->thread) {
835 if (!test_bit(IRQTF_DIED, &action->thread_flags))
836 kthread_stop(action->thread);
837 put_task_struct(action->thread);
838 }
839
843 return action; 840 return action;
844} 841}
845 842
diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c
index 2f69bee57bf2..3fd30197da2e 100644
--- a/kernel/irq/numa_migrate.c
+++ b/kernel/irq/numa_migrate.c
@@ -107,8 +107,8 @@ out_unlock:
107 107
108struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) 108struct irq_desc *move_irq_desc(struct irq_desc *desc, int node)
109{ 109{
110 /* those all static, do move them */ 110 /* those static or target node is -1, do not move them */
111 if (desc->irq < NR_IRQS_LEGACY) 111 if (desc->irq < NR_IRQS_LEGACY || node == -1)
112 return desc; 112 return desc;
113 113
114 if (desc->node != node) 114 if (desc->node != node)
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d7135aa2d2c4..e94caa666dba 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -758,7 +758,8 @@ static int __init lockdep_proc_init(void)
758 &proc_lockdep_stats_operations); 758 &proc_lockdep_stats_operations);
759 759
760#ifdef CONFIG_LOCK_STAT 760#ifdef CONFIG_LOCK_STAT
761 proc_create("lock_stat", S_IRUSR, NULL, &proc_lock_stat_operations); 761 proc_create("lock_stat", S_IRUSR | S_IWUSR, NULL,
762 &proc_lock_stat_operations);
762#endif 763#endif
763 764
764 return 0; 765 return 0;
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index bf8110b35c51..534e20d14d63 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -307,6 +307,10 @@ counter_sched_out(struct perf_counter *counter,
307 return; 307 return;
308 308
309 counter->state = PERF_COUNTER_STATE_INACTIVE; 309 counter->state = PERF_COUNTER_STATE_INACTIVE;
310 if (counter->pending_disable) {
311 counter->pending_disable = 0;
312 counter->state = PERF_COUNTER_STATE_OFF;
313 }
310 counter->tstamp_stopped = ctx->time; 314 counter->tstamp_stopped = ctx->time;
311 counter->pmu->disable(counter); 315 counter->pmu->disable(counter);
312 counter->oncpu = -1; 316 counter->oncpu = -1;
@@ -1692,7 +1696,32 @@ static int perf_release(struct inode *inode, struct file *file)
1692 return 0; 1696 return 0;
1693} 1697}
1694 1698
1695static u64 perf_counter_read_tree(struct perf_counter *counter) 1699static int perf_counter_read_size(struct perf_counter *counter)
1700{
1701 int entry = sizeof(u64); /* value */
1702 int size = 0;
1703 int nr = 1;
1704
1705 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
1706 size += sizeof(u64);
1707
1708 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1709 size += sizeof(u64);
1710
1711 if (counter->attr.read_format & PERF_FORMAT_ID)
1712 entry += sizeof(u64);
1713
1714 if (counter->attr.read_format & PERF_FORMAT_GROUP) {
1715 nr += counter->group_leader->nr_siblings;
1716 size += sizeof(u64);
1717 }
1718
1719 size += entry * nr;
1720
1721 return size;
1722}
1723
1724static u64 perf_counter_read_value(struct perf_counter *counter)
1696{ 1725{
1697 struct perf_counter *child; 1726 struct perf_counter *child;
1698 u64 total = 0; 1727 u64 total = 0;
@@ -1704,14 +1733,96 @@ static u64 perf_counter_read_tree(struct perf_counter *counter)
1704 return total; 1733 return total;
1705} 1734}
1706 1735
1736static int perf_counter_read_entry(struct perf_counter *counter,
1737 u64 read_format, char __user *buf)
1738{
1739 int n = 0, count = 0;
1740 u64 values[2];
1741
1742 values[n++] = perf_counter_read_value(counter);
1743 if (read_format & PERF_FORMAT_ID)
1744 values[n++] = primary_counter_id(counter);
1745
1746 count = n * sizeof(u64);
1747
1748 if (copy_to_user(buf, values, count))
1749 return -EFAULT;
1750
1751 return count;
1752}
1753
1754static int perf_counter_read_group(struct perf_counter *counter,
1755 u64 read_format, char __user *buf)
1756{
1757 struct perf_counter *leader = counter->group_leader, *sub;
1758 int n = 0, size = 0, err = -EFAULT;
1759 u64 values[3];
1760
1761 values[n++] = 1 + leader->nr_siblings;
1762 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1763 values[n++] = leader->total_time_enabled +
1764 atomic64_read(&leader->child_total_time_enabled);
1765 }
1766 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1767 values[n++] = leader->total_time_running +
1768 atomic64_read(&leader->child_total_time_running);
1769 }
1770
1771 size = n * sizeof(u64);
1772
1773 if (copy_to_user(buf, values, size))
1774 return -EFAULT;
1775
1776 err = perf_counter_read_entry(leader, read_format, buf + size);
1777 if (err < 0)
1778 return err;
1779
1780 size += err;
1781
1782 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
1783 err = perf_counter_read_entry(counter, read_format,
1784 buf + size);
1785 if (err < 0)
1786 return err;
1787
1788 size += err;
1789 }
1790
1791 return size;
1792}
1793
1794static int perf_counter_read_one(struct perf_counter *counter,
1795 u64 read_format, char __user *buf)
1796{
1797 u64 values[4];
1798 int n = 0;
1799
1800 values[n++] = perf_counter_read_value(counter);
1801 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
1802 values[n++] = counter->total_time_enabled +
1803 atomic64_read(&counter->child_total_time_enabled);
1804 }
1805 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
1806 values[n++] = counter->total_time_running +
1807 atomic64_read(&counter->child_total_time_running);
1808 }
1809 if (read_format & PERF_FORMAT_ID)
1810 values[n++] = primary_counter_id(counter);
1811
1812 if (copy_to_user(buf, values, n * sizeof(u64)))
1813 return -EFAULT;
1814
1815 return n * sizeof(u64);
1816}
1817
1707/* 1818/*
1708 * Read the performance counter - simple non blocking version for now 1819 * Read the performance counter - simple non blocking version for now
1709 */ 1820 */
1710static ssize_t 1821static ssize_t
1711perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) 1822perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1712{ 1823{
1713 u64 values[4]; 1824 u64 read_format = counter->attr.read_format;
1714 int n; 1825 int ret;
1715 1826
1716 /* 1827 /*
1717 * Return end-of-file for a read on a counter that is in 1828 * Return end-of-file for a read on a counter that is in
@@ -1721,28 +1832,18 @@ perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
1721 if (counter->state == PERF_COUNTER_STATE_ERROR) 1832 if (counter->state == PERF_COUNTER_STATE_ERROR)
1722 return 0; 1833 return 0;
1723 1834
1835 if (count < perf_counter_read_size(counter))
1836 return -ENOSPC;
1837
1724 WARN_ON_ONCE(counter->ctx->parent_ctx); 1838 WARN_ON_ONCE(counter->ctx->parent_ctx);
1725 mutex_lock(&counter->child_mutex); 1839 mutex_lock(&counter->child_mutex);
1726 values[0] = perf_counter_read_tree(counter); 1840 if (read_format & PERF_FORMAT_GROUP)
1727 n = 1; 1841 ret = perf_counter_read_group(counter, read_format, buf);
1728 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) 1842 else
1729 values[n++] = counter->total_time_enabled + 1843 ret = perf_counter_read_one(counter, read_format, buf);
1730 atomic64_read(&counter->child_total_time_enabled);
1731 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
1732 values[n++] = counter->total_time_running +
1733 atomic64_read(&counter->child_total_time_running);
1734 if (counter->attr.read_format & PERF_FORMAT_ID)
1735 values[n++] = primary_counter_id(counter);
1736 mutex_unlock(&counter->child_mutex); 1844 mutex_unlock(&counter->child_mutex);
1737 1845
1738 if (count < n * sizeof(u64)) 1846 return ret;
1739 return -EINVAL;
1740 count = n * sizeof(u64);
1741
1742 if (copy_to_user(buf, values, count))
1743 return -EFAULT;
1744
1745 return count;
1746} 1847}
1747 1848
1748static ssize_t 1849static ssize_t
@@ -2246,7 +2347,7 @@ static void perf_pending_counter(struct perf_pending_entry *entry)
2246 2347
2247 if (counter->pending_disable) { 2348 if (counter->pending_disable) {
2248 counter->pending_disable = 0; 2349 counter->pending_disable = 0;
2249 perf_counter_disable(counter); 2350 __perf_counter_disable(counter);
2250 } 2351 }
2251 2352
2252 if (counter->pending_wakeup) { 2353 if (counter->pending_wakeup) {
@@ -2631,6 +2732,79 @@ static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p)
2631 return task_pid_nr_ns(p, counter->ns); 2732 return task_pid_nr_ns(p, counter->ns);
2632} 2733}
2633 2734
2735static void perf_output_read_one(struct perf_output_handle *handle,
2736 struct perf_counter *counter)
2737{
2738 u64 read_format = counter->attr.read_format;
2739 u64 values[4];
2740 int n = 0;
2741
2742 values[n++] = atomic64_read(&counter->count);
2743 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2744 values[n++] = counter->total_time_enabled +
2745 atomic64_read(&counter->child_total_time_enabled);
2746 }
2747 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2748 values[n++] = counter->total_time_running +
2749 atomic64_read(&counter->child_total_time_running);
2750 }
2751 if (read_format & PERF_FORMAT_ID)
2752 values[n++] = primary_counter_id(counter);
2753
2754 perf_output_copy(handle, values, n * sizeof(u64));
2755}
2756
2757/*
2758 * XXX PERF_FORMAT_GROUP vs inherited counters seems difficult.
2759 */
2760static void perf_output_read_group(struct perf_output_handle *handle,
2761 struct perf_counter *counter)
2762{
2763 struct perf_counter *leader = counter->group_leader, *sub;
2764 u64 read_format = counter->attr.read_format;
2765 u64 values[5];
2766 int n = 0;
2767
2768 values[n++] = 1 + leader->nr_siblings;
2769
2770 if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
2771 values[n++] = leader->total_time_enabled;
2772
2773 if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
2774 values[n++] = leader->total_time_running;
2775
2776 if (leader != counter)
2777 leader->pmu->read(leader);
2778
2779 values[n++] = atomic64_read(&leader->count);
2780 if (read_format & PERF_FORMAT_ID)
2781 values[n++] = primary_counter_id(leader);
2782
2783 perf_output_copy(handle, values, n * sizeof(u64));
2784
2785 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2786 n = 0;
2787
2788 if (sub != counter)
2789 sub->pmu->read(sub);
2790
2791 values[n++] = atomic64_read(&sub->count);
2792 if (read_format & PERF_FORMAT_ID)
2793 values[n++] = primary_counter_id(sub);
2794
2795 perf_output_copy(handle, values, n * sizeof(u64));
2796 }
2797}
2798
2799static void perf_output_read(struct perf_output_handle *handle,
2800 struct perf_counter *counter)
2801{
2802 if (counter->attr.read_format & PERF_FORMAT_GROUP)
2803 perf_output_read_group(handle, counter);
2804 else
2805 perf_output_read_one(handle, counter);
2806}
2807
2634void perf_counter_output(struct perf_counter *counter, int nmi, 2808void perf_counter_output(struct perf_counter *counter, int nmi,
2635 struct perf_sample_data *data) 2809 struct perf_sample_data *data)
2636{ 2810{
@@ -2642,12 +2816,7 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
2642 struct { 2816 struct {
2643 u32 pid, tid; 2817 u32 pid, tid;
2644 } tid_entry; 2818 } tid_entry;
2645 struct {
2646 u64 id;
2647 u64 counter;
2648 } group_entry;
2649 struct perf_callchain_entry *callchain = NULL; 2819 struct perf_callchain_entry *callchain = NULL;
2650 struct perf_raw_record *raw = NULL;
2651 int callchain_size = 0; 2820 int callchain_size = 0;
2652 u64 time; 2821 u64 time;
2653 struct { 2822 struct {
@@ -2701,10 +2870,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
2701 if (sample_type & PERF_SAMPLE_PERIOD) 2870 if (sample_type & PERF_SAMPLE_PERIOD)
2702 header.size += sizeof(u64); 2871 header.size += sizeof(u64);
2703 2872
2704 if (sample_type & PERF_SAMPLE_GROUP) { 2873 if (sample_type & PERF_SAMPLE_READ)
2705 header.size += sizeof(u64) + 2874 header.size += perf_counter_read_size(counter);
2706 counter->nr_siblings * sizeof(group_entry);
2707 }
2708 2875
2709 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2876 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2710 callchain = perf_callchain(data->regs); 2877 callchain = perf_callchain(data->regs);
@@ -2717,9 +2884,15 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
2717 } 2884 }
2718 2885
2719 if (sample_type & PERF_SAMPLE_RAW) { 2886 if (sample_type & PERF_SAMPLE_RAW) {
2720 raw = data->raw; 2887 int size = sizeof(u32);
2721 if (raw) 2888
2722 header.size += raw->size; 2889 if (data->raw)
2890 size += data->raw->size;
2891 else
2892 size += sizeof(u32);
2893
2894 WARN_ON_ONCE(size & (sizeof(u64)-1));
2895 header.size += size;
2723 } 2896 }
2724 2897
2725 ret = perf_output_begin(&handle, counter, header.size, nmi, 1); 2898 ret = perf_output_begin(&handle, counter, header.size, nmi, 1);
@@ -2755,26 +2928,8 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
2755 if (sample_type & PERF_SAMPLE_PERIOD) 2928 if (sample_type & PERF_SAMPLE_PERIOD)
2756 perf_output_put(&handle, data->period); 2929 perf_output_put(&handle, data->period);
2757 2930
2758 /* 2931 if (sample_type & PERF_SAMPLE_READ)
2759 * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. 2932 perf_output_read(&handle, counter);
2760 */
2761 if (sample_type & PERF_SAMPLE_GROUP) {
2762 struct perf_counter *leader, *sub;
2763 u64 nr = counter->nr_siblings;
2764
2765 perf_output_put(&handle, nr);
2766
2767 leader = counter->group_leader;
2768 list_for_each_entry(sub, &leader->sibling_list, list_entry) {
2769 if (sub != counter)
2770 sub->pmu->read(sub);
2771
2772 group_entry.id = primary_counter_id(sub);
2773 group_entry.counter = atomic64_read(&sub->count);
2774
2775 perf_output_put(&handle, group_entry);
2776 }
2777 }
2778 2933
2779 if (sample_type & PERF_SAMPLE_CALLCHAIN) { 2934 if (sample_type & PERF_SAMPLE_CALLCHAIN) {
2780 if (callchain) 2935 if (callchain)
@@ -2785,8 +2940,21 @@ void perf_counter_output(struct perf_counter *counter, int nmi,
2785 } 2940 }
2786 } 2941 }
2787 2942
2788 if ((sample_type & PERF_SAMPLE_RAW) && raw) 2943 if (sample_type & PERF_SAMPLE_RAW) {
2789 perf_output_copy(&handle, raw->data, raw->size); 2944 if (data->raw) {
2945 perf_output_put(&handle, data->raw->size);
2946 perf_output_copy(&handle, data->raw->data, data->raw->size);
2947 } else {
2948 struct {
2949 u32 size;
2950 u32 data;
2951 } raw = {
2952 .size = sizeof(u32),
2953 .data = 0,
2954 };
2955 perf_output_put(&handle, raw);
2956 }
2957 }
2790 2958
2791 perf_output_end(&handle); 2959 perf_output_end(&handle);
2792} 2960}
@@ -2800,8 +2968,6 @@ struct perf_read_event {
2800 2968
2801 u32 pid; 2969 u32 pid;
2802 u32 tid; 2970 u32 tid;
2803 u64 value;
2804 u64 format[3];
2805}; 2971};
2806 2972
2807static void 2973static void
@@ -2813,34 +2979,20 @@ perf_counter_read_event(struct perf_counter *counter,
2813 .header = { 2979 .header = {
2814 .type = PERF_EVENT_READ, 2980 .type = PERF_EVENT_READ,
2815 .misc = 0, 2981 .misc = 0,
2816 .size = sizeof(event) - sizeof(event.format), 2982 .size = sizeof(event) + perf_counter_read_size(counter),
2817 }, 2983 },
2818 .pid = perf_counter_pid(counter, task), 2984 .pid = perf_counter_pid(counter, task),
2819 .tid = perf_counter_tid(counter, task), 2985 .tid = perf_counter_tid(counter, task),
2820 .value = atomic64_read(&counter->count),
2821 }; 2986 };
2822 int ret, i = 0; 2987 int ret;
2823
2824 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
2825 event.header.size += sizeof(u64);
2826 event.format[i++] = counter->total_time_enabled;
2827 }
2828
2829 if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
2830 event.header.size += sizeof(u64);
2831 event.format[i++] = counter->total_time_running;
2832 }
2833
2834 if (counter->attr.read_format & PERF_FORMAT_ID) {
2835 event.header.size += sizeof(u64);
2836 event.format[i++] = primary_counter_id(counter);
2837 }
2838 2988
2839 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0); 2989 ret = perf_output_begin(&handle, counter, event.header.size, 0, 0);
2840 if (ret) 2990 if (ret)
2841 return; 2991 return;
2842 2992
2843 perf_output_copy(&handle, &event, event.header.size); 2993 perf_output_put(&handle, event);
2994 perf_output_read(&handle, counter);
2995
2844 perf_output_end(&handle); 2996 perf_output_end(&handle);
2845} 2997}
2846 2998
@@ -2876,10 +3028,10 @@ static void perf_counter_task_output(struct perf_counter *counter,
2876 return; 3028 return;
2877 3029
2878 task_event->event.pid = perf_counter_pid(counter, task); 3030 task_event->event.pid = perf_counter_pid(counter, task);
2879 task_event->event.ppid = perf_counter_pid(counter, task->real_parent); 3031 task_event->event.ppid = perf_counter_pid(counter, current);
2880 3032
2881 task_event->event.tid = perf_counter_tid(counter, task); 3033 task_event->event.tid = perf_counter_tid(counter, task);
2882 task_event->event.ptid = perf_counter_tid(counter, task->real_parent); 3034 task_event->event.ptid = perf_counter_tid(counter, current);
2883 3035
2884 perf_output_put(&handle, task_event->event); 3036 perf_output_put(&handle, task_event->event);
2885 perf_output_end(&handle); 3037 perf_output_end(&handle);
@@ -3426,40 +3578,32 @@ static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
3426 3578
3427static int perf_swcounter_is_counting(struct perf_counter *counter) 3579static int perf_swcounter_is_counting(struct perf_counter *counter)
3428{ 3580{
3429 struct perf_counter_context *ctx; 3581 /*
3430 unsigned long flags; 3582 * The counter is active, we're good!
3431 int count; 3583 */
3432
3433 if (counter->state == PERF_COUNTER_STATE_ACTIVE) 3584 if (counter->state == PERF_COUNTER_STATE_ACTIVE)
3434 return 1; 3585 return 1;
3435 3586
3587 /*
3588 * The counter is off/error, not counting.
3589 */
3436 if (counter->state != PERF_COUNTER_STATE_INACTIVE) 3590 if (counter->state != PERF_COUNTER_STATE_INACTIVE)
3437 return 0; 3591 return 0;
3438 3592
3439 /* 3593 /*
3440 * If the counter is inactive, it could be just because 3594 * The counter is inactive, if the context is active
3441 * its task is scheduled out, or because it's in a group 3595 * we're part of a group that didn't make it on the 'pmu',
3442 * which could not go on the PMU. We want to count in 3596 * not counting.
3443 * the first case but not the second. If the context is
3444 * currently active then an inactive software counter must
3445 * be the second case. If it's not currently active then
3446 * we need to know whether the counter was active when the
3447 * context was last active, which we can determine by
3448 * comparing counter->tstamp_stopped with ctx->time.
3449 *
3450 * We are within an RCU read-side critical section,
3451 * which protects the existence of *ctx.
3452 */ 3597 */
3453 ctx = counter->ctx; 3598 if (counter->ctx->is_active)
3454 spin_lock_irqsave(&ctx->lock, flags); 3599 return 0;
3455 count = 1; 3600
3456 /* Re-check state now we have the lock */ 3601 /*
3457 if (counter->state < PERF_COUNTER_STATE_INACTIVE || 3602 * We're inactive and the context is too, this means the
3458 counter->ctx->is_active || 3603 * task is scheduled out, we're counting events that happen
3459 counter->tstamp_stopped < ctx->time) 3604 * to us, like migration events.
3460 count = 0; 3605 */
3461 spin_unlock_irqrestore(&ctx->lock, flags); 3606 return 1;
3462 return count;
3463} 3607}
3464 3608
3465static int perf_swcounter_match(struct perf_counter *counter, 3609static int perf_swcounter_match(struct perf_counter *counter,
@@ -3770,6 +3914,14 @@ static void tp_perf_counter_destroy(struct perf_counter *counter)
3770 3914
3771static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) 3915static const struct pmu *tp_perf_counter_init(struct perf_counter *counter)
3772{ 3916{
3917 /*
3918 * Raw tracepoint data is a severe data leak, only allow root to
3919 * have these.
3920 */
3921 if ((counter->attr.sample_type & PERF_SAMPLE_RAW) &&
3922 !capable(CAP_SYS_ADMIN))
3923 return ERR_PTR(-EPERM);
3924
3773 if (ftrace_profile_enable(counter->attr.config)) 3925 if (ftrace_profile_enable(counter->attr.config))
3774 return NULL; 3926 return NULL;
3775 3927
@@ -3903,9 +4055,9 @@ perf_counter_alloc(struct perf_counter_attr *attr,
3903 atomic64_set(&hwc->period_left, hwc->sample_period); 4055 atomic64_set(&hwc->period_left, hwc->sample_period);
3904 4056
3905 /* 4057 /*
3906 * we currently do not support PERF_SAMPLE_GROUP on inherited counters 4058 * we currently do not support PERF_FORMAT_GROUP on inherited counters
3907 */ 4059 */
3908 if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) 4060 if (attr->inherit && (attr->read_format & PERF_FORMAT_GROUP))
3909 goto done; 4061 goto done;
3910 4062
3911 switch (attr->type) { 4063 switch (attr->type) {
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index bece7c0b67b2..e33a21cb9407 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -521,11 +521,12 @@ void posix_cpu_timers_exit(struct task_struct *tsk)
521} 521}
522void posix_cpu_timers_exit_group(struct task_struct *tsk) 522void posix_cpu_timers_exit_group(struct task_struct *tsk)
523{ 523{
524 struct task_cputime cputime; 524 struct signal_struct *const sig = tsk->signal;
525 525
526 thread_group_cputimer(tsk, &cputime);
527 cleanup_timers(tsk->signal->cpu_timers, 526 cleanup_timers(tsk->signal->cpu_timers,
528 cputime.utime, cputime.stime, cputime.sum_exec_runtime); 527 cputime_add(tsk->utime, sig->utime),
528 cputime_add(tsk->stime, sig->stime),
529 tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
529} 530}
530 531
531static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now) 532static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index fcd107a78c5a..29bd4baf9e75 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -1039,16 +1039,14 @@ int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) { 1039 if (!rt_mutex_owner(lock) || try_to_steal_lock(lock, task)) {
1040 /* We got the lock for task. */ 1040 /* We got the lock for task. */
1041 debug_rt_mutex_lock(lock); 1041 debug_rt_mutex_lock(lock);
1042
1043 rt_mutex_set_owner(lock, task, 0); 1042 rt_mutex_set_owner(lock, task, 0);
1044 1043 spin_unlock(&lock->wait_lock);
1045 rt_mutex_deadlock_account_lock(lock, task); 1044 rt_mutex_deadlock_account_lock(lock, task);
1046 return 1; 1045 return 1;
1047 } 1046 }
1048 1047
1049 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock); 1048 ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock);
1050 1049
1051
1052 if (ret && !waiter->task) { 1050 if (ret && !waiter->task) {
1053 /* 1051 /*
1054 * Reset the return value. We might have 1052 * Reset the return value. We might have
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 1090b0aed9ba..7a34cb563fec 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -267,8 +267,8 @@ static void blk_trace_free(struct blk_trace *bt)
267{ 267{
268 debugfs_remove(bt->msg_file); 268 debugfs_remove(bt->msg_file);
269 debugfs_remove(bt->dropped_file); 269 debugfs_remove(bt->dropped_file);
270 debugfs_remove(bt->dir);
271 relay_close(bt->rchan); 270 relay_close(bt->rchan);
271 debugfs_remove(bt->dir);
272 free_percpu(bt->sequence); 272 free_percpu(bt->sequence);
273 free_percpu(bt->msg_data); 273 free_percpu(bt->msg_data);
274 kfree(bt); 274 kfree(bt);
@@ -378,18 +378,8 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
378 378
379static int blk_remove_buf_file_callback(struct dentry *dentry) 379static int blk_remove_buf_file_callback(struct dentry *dentry)
380{ 380{
381 struct dentry *parent = dentry->d_parent;
382 debugfs_remove(dentry); 381 debugfs_remove(dentry);
383 382
384 /*
385 * this will fail for all but the last file, but that is ok. what we
386 * care about is the top level buts->name directory going away, when
387 * the last trace file is gone. Then we don't have to rmdir() that
388 * manually on trace stop, so it nicely solves the issue with
389 * force killing of running traces.
390 */
391
392 debugfs_remove(parent);
393 return 0; 383 return 0;
394} 384}
395 385
diff --git a/kernel/wait.c b/kernel/wait.c
index ea7c3b4275cf..c4bd3d825f35 100644
--- a/kernel/wait.c
+++ b/kernel/wait.c
@@ -10,13 +10,14 @@
10#include <linux/wait.h> 10#include <linux/wait.h>
11#include <linux/hash.h> 11#include <linux/hash.h>
12 12
13void init_waitqueue_head(wait_queue_head_t *q) 13void __init_waitqueue_head(wait_queue_head_t *q, struct lock_class_key *key)
14{ 14{
15 spin_lock_init(&q->lock); 15 spin_lock_init(&q->lock);
16 lockdep_set_class(&q->lock, key);
16 INIT_LIST_HEAD(&q->task_list); 17 INIT_LIST_HEAD(&q->task_list);
17} 18}
18 19
19EXPORT_SYMBOL(init_waitqueue_head); 20EXPORT_SYMBOL(__init_waitqueue_head);
20 21
21void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) 22void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait)
22{ 23{
diff --git a/mm/mempool.c b/mm/mempool.c
index a46eb1b4bb66..32e75d400503 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -303,14 +303,14 @@ EXPORT_SYMBOL(mempool_free_slab);
303 */ 303 */
304void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data) 304void *mempool_kmalloc(gfp_t gfp_mask, void *pool_data)
305{ 305{
306 size_t size = (size_t)(long)pool_data; 306 size_t size = (size_t)pool_data;
307 return kmalloc(size, gfp_mask); 307 return kmalloc(size, gfp_mask);
308} 308}
309EXPORT_SYMBOL(mempool_kmalloc); 309EXPORT_SYMBOL(mempool_kmalloc);
310 310
311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data) 311void *mempool_kzalloc(gfp_t gfp_mask, void *pool_data)
312{ 312{
313 size_t size = (size_t) pool_data; 313 size_t size = (size_t)pool_data;
314 return kzalloc(size, gfp_mask); 314 return kzalloc(size, gfp_mask);
315} 315}
316EXPORT_SYMBOL(mempool_kzalloc); 316EXPORT_SYMBOL(mempool_kzalloc);
diff --git a/net/socket.c b/net/socket.c
index 791d71a36a93..6d4716559047 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -736,7 +736,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
736 if (more) 736 if (more)
737 flags |= MSG_MORE; 737 flags |= MSG_MORE;
738 738
739 return sock->ops->sendpage(sock, page, offset, size, flags); 739 return kernel_sendpage(sock, page, offset, size, flags);
740} 740}
741 741
742static ssize_t sock_splice_read(struct file *file, loff_t *ppos, 742static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 15c2a08a66f1..1e8cfc4c2ed6 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1285,6 +1285,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1285 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 1285 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
1286 context, len); 1286 context, len);
1287 if (rc == -ERANGE) { 1287 if (rc == -ERANGE) {
1288 kfree(context);
1289
1288 /* Need a larger buffer. Query for the right size. */ 1290 /* Need a larger buffer. Query for the right size. */
1289 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, 1291 rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX,
1290 NULL, 0); 1292 NULL, 0);
@@ -1292,7 +1294,6 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
1292 dput(dentry); 1294 dput(dentry);
1293 goto out_unlock; 1295 goto out_unlock;
1294 } 1296 }
1295 kfree(context);
1296 len = rc; 1297 len = rc;
1297 context = kmalloc(len+1, GFP_NOFS); 1298 context = kmalloc(len+1, GFP_NOFS);
1298 if (!context) { 1299 if (!context) {
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 51c44fdbc0f0..fea976793ae5 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -13563,6 +13563,8 @@ static int patch_alc269(struct hda_codec *codec)
13563 set_capture_mixer(spec); 13563 set_capture_mixer(spec);
13564 set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT); 13564 set_beep_amp(spec, 0x0b, 0x04, HDA_INPUT);
13565 13565
13566 spec->vmaster_nid = 0x02;
13567
13566 codec->patch_ops = alc_patch_ops; 13568 codec->patch_ops = alc_patch_ops;
13567 if (board_config == ALC269_AUTO) 13569 if (board_config == ALC269_AUTO)
13568 spec->init_hook = alc269_auto_init; 13570 spec->init_hook = alc269_auto_init;
@@ -15577,9 +15579,12 @@ static int patch_alc861vd(struct hda_codec *codec)
15577 spec->stream_digital_playback = &alc861vd_pcm_digital_playback; 15579 spec->stream_digital_playback = &alc861vd_pcm_digital_playback;
15578 spec->stream_digital_capture = &alc861vd_pcm_digital_capture; 15580 spec->stream_digital_capture = &alc861vd_pcm_digital_capture;
15579 15581
15580 spec->adc_nids = alc861vd_adc_nids; 15582 if (!spec->adc_nids) {
15581 spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids); 15583 spec->adc_nids = alc861vd_adc_nids;
15582 spec->capsrc_nids = alc861vd_capsrc_nids; 15584 spec->num_adc_nids = ARRAY_SIZE(alc861vd_adc_nids);
15585 }
15586 if (!spec->capsrc_nids)
15587 spec->capsrc_nids = alc861vd_capsrc_nids;
15583 15588
15584 set_capture_mixer(spec); 15589 set_capture_mixer(spec);
15585 set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT); 15590 set_beep_amp(spec, 0x0b, 0x05, HDA_INPUT);
@@ -17496,9 +17501,12 @@ static int patch_alc662(struct hda_codec *codec)
17496 spec->stream_digital_playback = &alc662_pcm_digital_playback; 17501 spec->stream_digital_playback = &alc662_pcm_digital_playback;
17497 spec->stream_digital_capture = &alc662_pcm_digital_capture; 17502 spec->stream_digital_capture = &alc662_pcm_digital_capture;
17498 17503
17499 spec->adc_nids = alc662_adc_nids; 17504 if (!spec->adc_nids) {
17500 spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids); 17505 spec->adc_nids = alc662_adc_nids;
17501 spec->capsrc_nids = alc662_capsrc_nids; 17506 spec->num_adc_nids = ARRAY_SIZE(alc662_adc_nids);
17507 }
17508 if (!spec->capsrc_nids)
17509 spec->capsrc_nids = alc662_capsrc_nids;
17502 17510
17503 if (!spec->cap_mixer) 17511 if (!spec->cap_mixer)
17504 set_capture_mixer(spec); 17512 set_capture_mixer(spec);
diff --git a/sound/soc/fsl/efika-audio-fabric.c b/sound/soc/fsl/efika-audio-fabric.c
index 85b0e7569504..3326e2a1e863 100644
--- a/sound/soc/fsl/efika-audio-fabric.c
+++ b/sound/soc/fsl/efika-audio-fabric.c
@@ -30,6 +30,8 @@
30#include "mpc5200_psc_ac97.h" 30#include "mpc5200_psc_ac97.h"
31#include "../codecs/stac9766.h" 31#include "../codecs/stac9766.h"
32 32
33#define DRV_NAME "efika-audio-fabric"
34
33static struct snd_soc_device device; 35static struct snd_soc_device device;
34static struct snd_soc_card card; 36static struct snd_soc_card card;
35 37
diff --git a/sound/soc/fsl/pcm030-audio-fabric.c b/sound/soc/fsl/pcm030-audio-fabric.c
index 8766f7a3893d..b928ef7d28eb 100644
--- a/sound/soc/fsl/pcm030-audio-fabric.c
+++ b/sound/soc/fsl/pcm030-audio-fabric.c
@@ -30,6 +30,8 @@
30#include "mpc5200_psc_ac97.h" 30#include "mpc5200_psc_ac97.h"
31#include "../codecs/wm9712.h" 31#include "../codecs/wm9712.h"
32 32
33#define DRV_NAME "pcm030-audio-fabric"
34
33static struct snd_soc_device device; 35static struct snd_soc_device device;
34static struct snd_soc_card card; 36static struct snd_soc_card card;
35 37
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index cb9033d3f72f..68218cfd38b3 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -386,22 +386,29 @@ endif
386ifdef NO_DEMANGLE 386ifdef NO_DEMANGLE
387 BASIC_CFLAGS += -DNO_DEMANGLE 387 BASIC_CFLAGS += -DNO_DEMANGLE
388else 388else
389
390 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y") 389 has_bfd := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd > /dev/null 2>&1 && echo y")
391 390
392 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
393
394 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
395
396 ifeq ($(has_bfd),y) 391 ifeq ($(has_bfd),y)
397 EXTLIBS += -lbfd 392 EXTLIBS += -lbfd
398 else ifeq ($(has_bfd_iberty),y)
399 EXTLIBS += -lbfd -liberty
400 else ifeq ($(has_bfd_iberty_z),y)
401 EXTLIBS += -lbfd -liberty -lz
402 else 393 else
403 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) 394 has_bfd_iberty := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty > /dev/null 2>&1 && echo y")
404 BASIC_CFLAGS += -DNO_DEMANGLE 395 ifeq ($(has_bfd_iberty),y)
396 EXTLIBS += -lbfd -liberty
397 else
398 has_bfd_iberty_z := $(shell sh -c "(echo '\#include <bfd.h>'; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y")
399 ifeq ($(has_bfd_iberty_z),y)
400 EXTLIBS += -lbfd -liberty -lz
401 else
402 has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) -liberty > /dev/null 2>&1 && echo y")
403 ifeq ($(has_cplus_demangle),y)
404 EXTLIBS += -liberty
405 BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
406 else
407 msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling)
408 BASIC_CFLAGS += -DNO_DEMANGLE
409 endif
410 endif
411 endif
405 endif 412 endif
406endif 413endif
407 414
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f990fa8a35c9..d88c6961274c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -10,11 +10,12 @@
10 10
11#include "perf.h" 11#include "perf.h"
12 12
13#include "util/parse-options.h"
14#include "util/parse-events.h" 13#include "util/parse-events.h"
14#include "util/cache.h"
15 15
16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used) 16int cmd_list(int argc __used, const char **argv __used, const char *prefix __used)
17{ 17{
18 setup_pager();
18 print_events(); 19 print_events();
19 return 0; 20 return 0;
20} 21}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 106c6abd1c38..65b4115e417d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -35,7 +35,9 @@ static int output;
35static const char *output_name = "perf.data"; 35static const char *output_name = "perf.data";
36static int group = 0; 36static int group = 0;
37static unsigned int realtime_prio = 0; 37static unsigned int realtime_prio = 0;
38static int raw_samples = 0;
38static int system_wide = 0; 39static int system_wide = 0;
40static int profile_cpu = -1;
39static pid_t target_pid = -1; 41static pid_t target_pid = -1;
40static int inherit = 1; 42static int inherit = 1;
41static int force = 0; 43static int force = 0;
@@ -185,46 +187,48 @@ static void sig_atexit(void)
185 kill(getpid(), signr); 187 kill(getpid(), signr);
186} 188}
187 189
188static void pid_synthesize_comm_event(pid_t pid, int full) 190static pid_t pid_synthesize_comm_event(pid_t pid, int full)
189{ 191{
190 struct comm_event comm_ev; 192 struct comm_event comm_ev;
191 char filename[PATH_MAX]; 193 char filename[PATH_MAX];
192 char bf[BUFSIZ]; 194 char bf[BUFSIZ];
193 int fd; 195 FILE *fp;
194 size_t size; 196 size_t size = 0;
195 char *field, *sep;
196 DIR *tasks; 197 DIR *tasks;
197 struct dirent dirent, *next; 198 struct dirent dirent, *next;
199 pid_t tgid = 0;
198 200
199 snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); 201 snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
200 202
201 fd = open(filename, O_RDONLY); 203 fp = fopen(filename, "r");
202 if (fd < 0) { 204 if (fp == NULL) {
203 /* 205 /*
204 * We raced with a task exiting - just return: 206 * We raced with a task exiting - just return:
205 */ 207 */
206 if (verbose) 208 if (verbose)
207 fprintf(stderr, "couldn't open %s\n", filename); 209 fprintf(stderr, "couldn't open %s\n", filename);
208 return; 210 return 0;
209 } 211 }
210 if (read(fd, bf, sizeof(bf)) < 0) {
211 fprintf(stderr, "couldn't read %s\n", filename);
212 exit(EXIT_FAILURE);
213 }
214 close(fd);
215 212
216 /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */
217 memset(&comm_ev, 0, sizeof(comm_ev)); 213 memset(&comm_ev, 0, sizeof(comm_ev));
218 field = strchr(bf, '('); 214 while (!comm_ev.comm[0] || !comm_ev.pid) {
219 if (field == NULL) 215 if (fgets(bf, sizeof(bf), fp) == NULL)
220 goto out_failure; 216 goto out_failure;
221 sep = strchr(++field, ')'); 217
222 if (sep == NULL) 218 if (memcmp(bf, "Name:", 5) == 0) {
223 goto out_failure; 219 char *name = bf + 5;
224 size = sep - field; 220 while (*name && isspace(*name))
225 memcpy(comm_ev.comm, field, size++); 221 ++name;
226 222 size = strlen(name) - 1;
227 comm_ev.pid = pid; 223 memcpy(comm_ev.comm, name, size++);
224 } else if (memcmp(bf, "Tgid:", 5) == 0) {
225 char *tgids = bf + 5;
226 while (*tgids && isspace(*tgids))
227 ++tgids;
228 tgid = comm_ev.pid = atoi(tgids);
229 }
230 }
231
228 comm_ev.header.type = PERF_EVENT_COMM; 232 comm_ev.header.type = PERF_EVENT_COMM;
229 size = ALIGN(size, sizeof(u64)); 233 size = ALIGN(size, sizeof(u64));
230 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); 234 comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size);
@@ -233,7 +237,7 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
233 comm_ev.tid = pid; 237 comm_ev.tid = pid;
234 238
235 write_output(&comm_ev, comm_ev.header.size); 239 write_output(&comm_ev, comm_ev.header.size);
236 return; 240 goto out_fclose;
237 } 241 }
238 242
239 snprintf(filename, sizeof(filename), "/proc/%d/task", pid); 243 snprintf(filename, sizeof(filename), "/proc/%d/task", pid);
@@ -250,7 +254,10 @@ static void pid_synthesize_comm_event(pid_t pid, int full)
250 write_output(&comm_ev, comm_ev.header.size); 254 write_output(&comm_ev, comm_ev.header.size);
251 } 255 }
252 closedir(tasks); 256 closedir(tasks);
253 return; 257
258out_fclose:
259 fclose(fp);
260 return tgid;
254 261
255out_failure: 262out_failure:
256 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", 263 fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n",
@@ -258,7 +265,7 @@ out_failure:
258 exit(EXIT_FAILURE); 265 exit(EXIT_FAILURE);
259} 266}
260 267
261static void pid_synthesize_mmap_samples(pid_t pid) 268static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid)
262{ 269{
263 char filename[PATH_MAX]; 270 char filename[PATH_MAX];
264 FILE *fp; 271 FILE *fp;
@@ -310,7 +317,7 @@ static void pid_synthesize_mmap_samples(pid_t pid)
310 mmap_ev.len -= mmap_ev.start; 317 mmap_ev.len -= mmap_ev.start;
311 mmap_ev.header.size = (sizeof(mmap_ev) - 318 mmap_ev.header.size = (sizeof(mmap_ev) -
312 (sizeof(mmap_ev.filename) - size)); 319 (sizeof(mmap_ev.filename) - size));
313 mmap_ev.pid = pid; 320 mmap_ev.pid = tgid;
314 mmap_ev.tid = pid; 321 mmap_ev.tid = pid;
315 322
316 write_output(&mmap_ev, mmap_ev.header.size); 323 write_output(&mmap_ev, mmap_ev.header.size);
@@ -329,14 +336,14 @@ static void synthesize_all(void)
329 336
330 while (!readdir_r(proc, &dirent, &next) && next) { 337 while (!readdir_r(proc, &dirent, &next) && next) {
331 char *end; 338 char *end;
332 pid_t pid; 339 pid_t pid, tgid;
333 340
334 pid = strtol(dirent.d_name, &end, 10); 341 pid = strtol(dirent.d_name, &end, 10);
335 if (*end) /* only interested in proper numerical dirents */ 342 if (*end) /* only interested in proper numerical dirents */
336 continue; 343 continue;
337 344
338 pid_synthesize_comm_event(pid, 1); 345 tgid = pid_synthesize_comm_event(pid, 1);
339 pid_synthesize_mmap_samples(pid); 346 pid_synthesize_mmap_samples(pid, tgid);
340 } 347 }
341 348
342 closedir(proc); 349 closedir(proc);
@@ -374,7 +381,7 @@ static void create_counter(int counter, int cpu, pid_t pid)
374 PERF_FORMAT_TOTAL_TIME_RUNNING | 381 PERF_FORMAT_TOTAL_TIME_RUNNING |
375 PERF_FORMAT_ID; 382 PERF_FORMAT_ID;
376 383
377 attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; 384 attr->sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID;
378 385
379 if (freq) { 386 if (freq) {
380 attr->sample_type |= PERF_SAMPLE_PERIOD; 387 attr->sample_type |= PERF_SAMPLE_PERIOD;
@@ -394,6 +401,8 @@ static void create_counter(int counter, int cpu, pid_t pid)
394 if (call_graph) 401 if (call_graph)
395 attr->sample_type |= PERF_SAMPLE_CALLCHAIN; 402 attr->sample_type |= PERF_SAMPLE_CALLCHAIN;
396 403
404 if (raw_samples)
405 attr->sample_type |= PERF_SAMPLE_RAW;
397 406
398 attr->mmap = track; 407 attr->mmap = track;
399 attr->comm = track; 408 attr->comm = track;
@@ -408,6 +417,8 @@ try_again:
408 417
409 if (err == EPERM) 418 if (err == EPERM)
410 die("Permission error - are you root?\n"); 419 die("Permission error - are you root?\n");
420 else if (err == ENODEV && profile_cpu != -1)
421 die("No such device - did you specify an out-of-range profile CPU?\n");
411 422
412 /* 423 /*
413 * If it's cycles then fall back to hrtimer 424 * If it's cycles then fall back to hrtimer
@@ -541,16 +552,22 @@ static int __cmd_record(int argc, const char **argv)
541 if (pid == -1) 552 if (pid == -1)
542 pid = getpid(); 553 pid = getpid();
543 554
544 open_counters(-1, pid); 555 open_counters(profile_cpu, pid);
545 } else for (i = 0; i < nr_cpus; i++) 556 } else {
546 open_counters(i, target_pid); 557 if (profile_cpu != -1) {
558 open_counters(profile_cpu, target_pid);
559 } else {
560 for (i = 0; i < nr_cpus; i++)
561 open_counters(i, target_pid);
562 }
563 }
547 564
548 if (file_new) 565 if (file_new)
549 perf_header__write(header, output); 566 perf_header__write(header, output);
550 567
551 if (!system_wide) { 568 if (!system_wide) {
552 pid_synthesize_comm_event(pid, 0); 569 pid_t tgid = pid_synthesize_comm_event(pid, 0);
553 pid_synthesize_mmap_samples(pid); 570 pid_synthesize_mmap_samples(pid, tgid);
554 } else 571 } else
555 synthesize_all(); 572 synthesize_all();
556 573
@@ -618,10 +635,14 @@ static const struct option options[] = {
618 "record events on existing pid"), 635 "record events on existing pid"),
619 OPT_INTEGER('r', "realtime", &realtime_prio, 636 OPT_INTEGER('r', "realtime", &realtime_prio,
620 "collect data with this RT SCHED_FIFO priority"), 637 "collect data with this RT SCHED_FIFO priority"),
638 OPT_BOOLEAN('R', "raw-samples", &raw_samples,
639 "collect raw sample records from all opened counters"),
621 OPT_BOOLEAN('a', "all-cpus", &system_wide, 640 OPT_BOOLEAN('a', "all-cpus", &system_wide,
622 "system-wide collection from all CPUs"), 641 "system-wide collection from all CPUs"),
623 OPT_BOOLEAN('A', "append", &append_file, 642 OPT_BOOLEAN('A', "append", &append_file,
624 "append to the output file to do incremental profiling"), 643 "append to the output file to do incremental profiling"),
644 OPT_INTEGER('C', "profile_cpu", &profile_cpu,
645 "CPU to profile on"),
625 OPT_BOOLEAN('f', "force", &force, 646 OPT_BOOLEAN('f', "force", &force,
626 "overwrite existing data file"), 647 "overwrite existing data file"),
627 OPT_LONG('c', "count", &default_interval, 648 OPT_LONG('c', "count", &default_interval,
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 93945ecdac86..6321951fe1bf 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1276,11 +1276,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1276 more_data += sizeof(u64); 1276 more_data += sizeof(u64);
1277 } 1277 }
1278 1278
1279 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d: %p period: %Ld\n", 1279 dprintf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n",
1280 (void *)(offset + head), 1280 (void *)(offset + head),
1281 (void *)(long)(event->header.size), 1281 (void *)(long)(event->header.size),
1282 event->header.misc, 1282 event->header.misc,
1283 event->ip.pid, 1283 event->ip.pid, event->ip.tid,
1284 (void *)(long)ip, 1284 (void *)(long)ip,
1285 (long long)period); 1285 (long long)period);
1286 1286
@@ -1340,10 +1340,11 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
1340 if (show & show_mask) { 1340 if (show & show_mask) {
1341 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); 1341 struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip);
1342 1342
1343 if (dso_list && dso && dso->name && !strlist__has_entry(dso_list, dso->name)) 1343 if (dso_list && (!dso || !dso->name ||
1344 !strlist__has_entry(dso_list, dso->name)))
1344 return 0; 1345 return 0;
1345 1346
1346 if (sym_list && sym && !strlist__has_entry(sym_list, sym->name)) 1347 if (sym_list && (!sym || !strlist__has_entry(sym_list, sym->name)))
1347 return 0; 1348 return 0;
1348 1349
1349 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { 1350 if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) {
@@ -1362,10 +1363,11 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
1362 struct thread *thread = threads__findnew(event->mmap.pid); 1363 struct thread *thread = threads__findnew(event->mmap.pid);
1363 struct map *map = map__new(&event->mmap, cwd, cwdlen); 1364 struct map *map = map__new(&event->mmap, cwd, cwdlen);
1364 1365
1365 dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", 1366 dprintf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n",
1366 (void *)(offset + head), 1367 (void *)(offset + head),
1367 (void *)(long)(event->header.size), 1368 (void *)(long)(event->header.size),
1368 event->mmap.pid, 1369 event->mmap.pid,
1370 event->mmap.tid,
1369 (void *)(long)event->mmap.start, 1371 (void *)(long)event->mmap.start,
1370 (void *)(long)event->mmap.len, 1372 (void *)(long)event->mmap.len,
1371 (void *)(long)event->mmap.pgoff, 1373 (void *)(long)event->mmap.pgoff,
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index a8e67aa9ef49..011473411642 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -13,6 +13,7 @@
13#include <stdio.h> 13#include <stdio.h>
14#include <stdbool.h> 14#include <stdbool.h>
15#include <errno.h> 15#include <errno.h>
16#include <math.h>
16 17
17#include "callchain.h" 18#include "callchain.h"
18 19
@@ -112,7 +113,7 @@ static void __sort_chain_graph_rel(struct callchain_node *node,
112 u64 min_hit; 113 u64 min_hit;
113 114
114 node->rb_root = RB_ROOT; 115 node->rb_root = RB_ROOT;
115 min_hit = node->children_hit * min_percent / 100.0; 116 min_hit = ceil(node->children_hit * min_percent);
116 117
117 chain_for_each_child(child, node) { 118 chain_for_each_child(child, node) {
118 __sort_chain_graph_rel(child, min_percent); 119 __sort_chain_graph_rel(child, min_percent);
@@ -126,7 +127,7 @@ static void
126sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root, 127sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_node *chain_root,
127 u64 min_hit __used, struct callchain_param *param) 128 u64 min_hit __used, struct callchain_param *param)
128{ 129{
129 __sort_chain_graph_rel(chain_root, param->min_percent); 130 __sort_chain_graph_rel(chain_root, param->min_percent / 100.0);
130 rb_root->rb_node = chain_root->rb_root.rb_node; 131 rb_root->rb_node = chain_root->rb_root.rb_node;
131} 132}
132 133
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4858d83b3b67..044178408783 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -379,6 +379,7 @@ static int parse_tracepoint_event(const char **strp,
379 struct perf_counter_attr *attr) 379 struct perf_counter_attr *attr)
380{ 380{
381 const char *evt_name; 381 const char *evt_name;
382 char *flags;
382 char sys_name[MAX_EVENT_LENGTH]; 383 char sys_name[MAX_EVENT_LENGTH];
383 char id_buf[4]; 384 char id_buf[4];
384 int fd; 385 int fd;
@@ -400,6 +401,15 @@ static int parse_tracepoint_event(const char **strp,
400 strncpy(sys_name, *strp, sys_length); 401 strncpy(sys_name, *strp, sys_length);
401 sys_name[sys_length] = '\0'; 402 sys_name[sys_length] = '\0';
402 evt_name = evt_name + 1; 403 evt_name = evt_name + 1;
404
405 flags = strchr(evt_name, ':');
406 if (flags) {
407 *flags = '\0';
408 flags++;
409 if (!strncmp(flags, "record", strlen(flags)))
410 attr->sample_type |= PERF_SAMPLE_RAW;
411 }
412
403 evt_length = strlen(evt_name); 413 evt_length = strlen(evt_name);
404 if (evt_length >= MAX_EVENT_LENGTH) 414 if (evt_length >= MAX_EVENT_LENGTH)
405 return 0; 415 return 0;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index e9b13b414955..0b9862351260 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -7,23 +7,8 @@
7#include <gelf.h> 7#include <gelf.h>
8#include <elf.h> 8#include <elf.h>
9 9
10#ifndef NO_DEMANGLE
11#include <bfd.h>
12#else
13static inline
14char *bfd_demangle(void __used *v, const char __used *c, int __used i)
15{
16 return NULL;
17}
18#endif
19
20const char *sym_hist_filter; 10const char *sym_hist_filter;
21 11
22#ifndef DMGL_PARAMS
23#define DMGL_PARAMS (1 << 0) /* Include function args */
24#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
25#endif
26
27enum dso_origin { 12enum dso_origin {
28 DSO__ORIG_KERNEL = 0, 13 DSO__ORIG_KERNEL = 0,
29 DSO__ORIG_JAVA_JIT, 14 DSO__ORIG_JAVA_JIT,
@@ -816,6 +801,8 @@ more:
816 } 801 }
817out: 802out:
818 free(name); 803 free(name);
804 if (ret < 0 && strstr(self->name, " (deleted)") != NULL)
805 return 0;
819 return ret; 806 return ret;
820} 807}
821 808
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 50f723571241..48b8e5759af9 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -8,6 +8,30 @@
8#include "module.h" 8#include "module.h"
9#include "event.h" 9#include "event.h"
10 10
11#ifdef HAVE_CPLUS_DEMANGLE
12extern char *cplus_demangle(const char *, int);
13
14static inline char *bfd_demangle(void __used *v, const char *c, int i)
15{
16 return cplus_demangle(c, i);
17}
18#else
19#ifdef NO_DEMANGLE
20static inline char *bfd_demangle(void __used *v, const char __used *c,
21 int __used i)
22{
23 return NULL;
24}
25#else
26#include <bfd.h>
27#endif
28#endif
29
30#ifndef DMGL_PARAMS
31#define DMGL_PARAMS (1 << 0) /* Include function args */
32#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
33#endif
34
11struct symbol { 35struct symbol {
12 struct rb_node rb_node; 36 struct rb_node rb_node;
13 u64 start; 37 u64 start;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 1eddae94bab3..1150c6d5c7b8 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -95,8 +95,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx)
95 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG) 95 if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
96 pent->fields.remote_irr = 1; 96 pent->fields.remote_irr = 1;
97 } 97 }
98 if (!pent->fields.trig_mode)
99 ioapic->irr &= ~(1 << idx);
100 98
101 return injected; 99 return injected;
102} 100}
@@ -136,7 +134,8 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
136 mask_after = ioapic->redirtbl[index].fields.mask; 134 mask_after = ioapic->redirtbl[index].fields.mask;
137 if (mask_before != mask_after) 135 if (mask_before != mask_after)
138 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after); 136 kvm_fire_mask_notifiers(ioapic->kvm, index, mask_after);
139 if (ioapic->irr & (1 << index)) 137 if (ioapic->redirtbl[index].fields.trig_mode == IOAPIC_LEVEL_TRIG
138 && ioapic->irr & (1 << index))
140 ioapic_service(ioapic, index); 139 ioapic_service(ioapic, index);
141 break; 140 break;
142 } 141 }
@@ -184,9 +183,10 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
184 if (!level) 183 if (!level)
185 ioapic->irr &= ~mask; 184 ioapic->irr &= ~mask;
186 else { 185 else {
186 int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
187 ioapic->irr |= mask; 187 ioapic->irr |= mask;
188 if ((!entry.fields.trig_mode && old_irr != ioapic->irr) 188 if ((edge && old_irr != ioapic->irr) ||
189 || !entry.fields.remote_irr) 189 (!edge && !entry.fields.remote_irr))
190 ret = ioapic_service(ioapic, irq); 190 ret = ioapic_service(ioapic, irq);
191 } 191 }
192 } 192 }
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a8bd466d00cc..ddc17f0e2f35 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -160,7 +160,8 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
160 unsigned gsi = pin; 160 unsigned gsi = pin;
161 161
162 list_for_each_entry(e, &kvm->irq_routing, link) 162 list_for_each_entry(e, &kvm->irq_routing, link)
163 if (e->irqchip.irqchip == irqchip && 163 if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
164 e->irqchip.irqchip == irqchip &&
164 e->irqchip.pin == pin) { 165 e->irqchip.pin == pin) {
165 gsi = e->gsi; 166 gsi = e->gsi;
166 break; 167 break;
@@ -259,6 +260,7 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
259 int delta; 260 int delta;
260 261
261 e->gsi = ue->gsi; 262 e->gsi = ue->gsi;
263 e->type = ue->type;
262 switch (ue->type) { 264 switch (ue->type) {
263 case KVM_IRQ_ROUTING_IRQCHIP: 265 case KVM_IRQ_ROUTING_IRQCHIP:
264 delta = 0; 266 delta = 0;