aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/interrupts.txt12
-rw-r--r--Documentation/devicetree/bindings/pci/designware-pcie.txt4
-rw-r--r--Documentation/devicetree/bindings/pci/ti-pci.txt59
-rw-r--r--Documentation/filesystems/Locking6
-rw-r--r--MAINTAINERS8
-rw-r--r--Makefile4
-rw-r--r--arch/frv/include/asm/processor.h3
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c6
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h3
-rw-r--r--arch/x86/kvm/emulate.c11
-rw-r--r--drivers/char/hw_random/virtio-rng.c1
-rw-r--r--drivers/idle/intel_idle.c77
-rw-r--r--drivers/md/raid10.c7
-rw-r--r--drivers/md/raid5.c4
-rw-r--r--drivers/of/fdt.c2
-rw-r--r--drivers/of/irq.c17
-rw-r--r--drivers/of/selftest.c40
-rw-r--r--drivers/pci/host/Kconfig11
-rw-r--r--drivers/pci/host/Makefile1
-rw-r--r--drivers/pci/host/pci-dra7xx.c458
-rw-r--r--drivers/pci/host/pci-tegra.c118
-rw-r--r--drivers/pci/host/pcie-designware.c134
-rw-r--r--drivers/pci/host/pcie-designware.h11
-rw-r--r--drivers/scsi/scsi.c12
-rw-r--r--drivers/scsi/scsi_lib.c2
-rw-r--r--fs/aio.c86
-rw-r--r--fs/btrfs/backref.c14
-rw-r--r--fs/btrfs/btrfs_inode.h6
-rw-r--r--fs/btrfs/ctree.c20
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c285
-rw-r--r--fs/btrfs/file-item.c2
-rw-r--r--fs/btrfs/file.c26
-rw-r--r--fs/btrfs/inode.c59
-rw-r--r--fs/btrfs/ordered-data.c123
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/qgroup.c169
-rw-r--r--fs/btrfs/qgroup.h1
-rw-r--r--fs/btrfs/super.c51
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h1
-rw-r--r--fs/btrfs/ulist.h15
-rw-r--r--fs/locks.c86
-rw-r--r--kernel/module.c5
-rw-r--r--tools/power/x86/turbostat/turbostat.c80
-rw-r--r--virt/kvm/assigned-dev.c4
-rw-r--r--virt/kvm/iommu.c19
49 files changed, 1622 insertions, 517 deletions
diff --git a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
index 1486497a24c1..ce6a1a072028 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
@@ -4,11 +4,13 @@ Specifying interrupt information for devices
41) Interrupt client nodes 41) Interrupt client nodes
5------------------------- 5-------------------------
6 6
7Nodes that describe devices which generate interrupts must contain an either an 7Nodes that describe devices which generate interrupts must contain an
8"interrupts" property or an "interrupts-extended" property. These properties 8"interrupts" property, an "interrupts-extended" property, or both. If both are
9contain a list of interrupt specifiers, one per output interrupt. The format of 9present, the latter should take precedence; the former may be provided simply
10the interrupt specifier is determined by the interrupt controller to which the 10for compatibility with software that does not recognize the latter. These
11interrupts are routed; see section 2 below for details. 11properties contain a list of interrupt specifiers, one per output interrupt. The
12format of the interrupt specifier is determined by the interrupt controller to
13which the interrupts are routed; see section 2 below for details.
12 14
13 Example: 15 Example:
14 interrupt-parent = <&intc1>; 16 interrupt-parent = <&intc1>;
diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt
index d0d15ee42834..ed0d9b9fff2b 100644
--- a/Documentation/devicetree/bindings/pci/designware-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt
@@ -2,6 +2,10 @@
2 2
3Required properties: 3Required properties:
4- compatible: should contain "snps,dw-pcie" to identify the core. 4- compatible: should contain "snps,dw-pcie" to identify the core.
5- reg: Should contain the configuration address space.
6- reg-names: Must be "config" for the PCIe configuration space.
7 (The old way of getting the configuration address space from "ranges"
8 is deprecated and should be avoided.)
5- #address-cells: set to <3> 9- #address-cells: set to <3>
6- #size-cells: set to <2> 10- #size-cells: set to <2>
7- device_type: set to "pci" 11- device_type: set to "pci"
diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt b/Documentation/devicetree/bindings/pci/ti-pci.txt
new file mode 100644
index 000000000000..3d217911b313
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/ti-pci.txt
@@ -0,0 +1,59 @@
1TI PCI Controllers
2
3PCIe Designware Controller
4 - compatible: Should be "ti,dra7-pcie""
5 - reg : Two register ranges as listed in the reg-names property
6 - reg-names : The first entry must be "ti-conf" for the TI specific registers
7 The second entry must be "rc-dbics" for the designware pcie
8 registers
9 The third entry must be "config" for the PCIe configuration space
10 - phys : list of PHY specifiers (used by generic PHY framework)
11 - phy-names : must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the
12 number of PHYs as specified in *phys* property.
13 - ti,hwmods : Name of the hwmod associated to the pcie, "pcie<X>",
14 where <X> is the instance number of the pcie from the HW spec.
15 - interrupts : Two interrupt entries must be specified. The first one is for
16 main interrupt line and the second for MSI interrupt line.
17 - #address-cells,
18 #size-cells,
19 #interrupt-cells,
20 device_type,
21 ranges,
22 num-lanes,
23 interrupt-map-mask,
24 interrupt-map : as specified in ../designware-pcie.txt
25
26Example:
27axi {
28 compatible = "simple-bus";
29 #size-cells = <1>;
30 #address-cells = <1>;
31 ranges = <0x51000000 0x51000000 0x3000
32 0x0 0x20000000 0x10000000>;
33 pcie@51000000 {
34 compatible = "ti,dra7-pcie";
35 reg = <0x51000000 0x2000>, <0x51002000 0x14c>, <0x1000 0x2000>;
36 reg-names = "rc_dbics", "ti_conf", "config";
37 interrupts = <0 232 0x4>, <0 233 0x4>;
38 #address-cells = <3>;
39 #size-cells = <2>;
40 device_type = "pci";
41 ranges = <0x81000000 0 0 0x03000 0 0x00010000
42 0x82000000 0 0x20013000 0x13000 0 0xffed000>;
43 #interrupt-cells = <1>;
44 num-lanes = <1>;
45 ti,hwmods = "pcie1";
46 phys = <&pcie1_phy>;
47 phy-names = "pcie-phy0";
48 interrupt-map-mask = <0 0 0 7>;
49 interrupt-map = <0 0 0 1 &pcie_intc 1>,
50 <0 0 0 2 &pcie_intc 2>,
51 <0 0 0 3 &pcie_intc 3>,
52 <0 0 0 4 &pcie_intc 4>;
53 pcie_intc: interrupt-controller {
54 interrupt-controller;
55 #address-cells = <0>;
56 #interrupt-cells = <1>;
57 };
58 };
59};
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index b18dd1779029..f1997e9da61f 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -349,7 +349,11 @@ prototypes:
349locking rules: 349locking rules:
350 inode->i_lock may block 350 inode->i_lock may block
351fl_copy_lock: yes no 351fl_copy_lock: yes no
352fl_release_private: maybe no 352fl_release_private: maybe maybe[1]
353
354[1]: ->fl_release_private for flock or POSIX locks is currently allowed
355to block. Leases however can still be freed while the i_lock is held and
356so fl_release_private called on a lease should not block.
353 357
354----------------------- lock_manager_operations --------------------------- 358----------------------- lock_manager_operations ---------------------------
355prototypes: 359prototypes:
diff --git a/MAINTAINERS b/MAINTAINERS
index aefa94841ff3..05dc39d66920 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6858,6 +6858,14 @@ S: Supported
6858F: Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt 6858F: Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
6859F: drivers/pci/host/pci-tegra.c 6859F: drivers/pci/host/pci-tegra.c
6860 6860
6861PCI DRIVER FOR TI DRA7XX
6862M: Kishon Vijay Abraham I <kishon@ti.com>
6863L: linux-omap@vger.kernel.org
6864L: linux-pci@vger.kernel.org
6865S: Supported
6866F: Documentation/devicetree/bindings/pci/ti-pci.txt
6867F: drivers/pci/host/pci-dra7xx.c
6868
6861PCI DRIVER FOR RENESAS R-CAR 6869PCI DRIVER FOR RENESAS R-CAR
6862M: Simon Horman <horms@verge.net.au> 6870M: Simon Horman <horms@verge.net.au>
6863L: linux-pci@vger.kernel.org 6871L: linux-pci@vger.kernel.org
diff --git a/Makefile b/Makefile
index 6aace6750567..e43244263306 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
1VERSION = 3 1VERSION = 3
2PATCHLEVEL = 16 2PATCHLEVEL = 17
3SUBLEVEL = 0 3SUBLEVEL = 0
4EXTRAVERSION = 4EXTRAVERSION = -rc1
5NAME = Shuffling Zombie Juror 5NAME = Shuffling Zombie Juror
6 6
7# *DOCUMENTATION* 7# *DOCUMENTATION*
diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index a34f309e5801..6554e78893f2 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h
@@ -129,7 +129,8 @@ unsigned long get_wchan(struct task_struct *p);
129#define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc) 129#define KSTK_EIP(tsk) ((tsk)->thread.frame0->pc)
130#define KSTK_ESP(tsk) ((tsk)->thread.frame0->sp) 130#define KSTK_ESP(tsk) ((tsk)->thread.frame0->sp)
131 131
132#define cpu_relax() barrier() 132#define cpu_relax() barrier()
133#define cpu_relax_lowlatency() cpu_relax()
133 134
134/* data cache prefetch */ 135/* data cache prefetch */
135#define ARCH_HAS_PREFETCH 136#define ARCH_HAS_PREFETCH
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 329d7fdd0a6a..b9615ba5b083 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -101,7 +101,7 @@ struct kvm_rma_info *kvm_alloc_rma()
101 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL); 101 ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
102 if (!ri) 102 if (!ri)
103 return NULL; 103 return NULL;
104 page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages)); 104 page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
105 if (!page) 105 if (!page)
106 goto err_out; 106 goto err_out;
107 atomic_set(&ri->use_count, 1); 107 atomic_set(&ri->use_count, 1);
@@ -135,12 +135,12 @@ struct page *kvm_alloc_hpt(unsigned long nr_pages)
135{ 135{
136 unsigned long align_pages = HPT_ALIGN_PAGES; 136 unsigned long align_pages = HPT_ALIGN_PAGES;
137 137
138 VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT); 138 VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
139 139
140 /* Old CPUs require HPT aligned on a multiple of its size */ 140 /* Old CPUs require HPT aligned on a multiple of its size */
141 if (!cpu_has_feature(CPU_FTR_ARCH_206)) 141 if (!cpu_has_feature(CPU_FTR_ARCH_206))
142 align_pages = nr_pages; 142 align_pages = nr_pages;
143 return cma_alloc(kvm_cma, nr_pages, get_order(align_pages)); 143 return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
144} 144}
145EXPORT_SYMBOL_GPL(kvm_alloc_hpt); 145EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
146 146
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 572460175ba5..7c492ed9087b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -95,7 +95,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
95#define KVM_REFILL_PAGES 25 95#define KVM_REFILL_PAGES 25
96#define KVM_MAX_CPUID_ENTRIES 80 96#define KVM_MAX_CPUID_ENTRIES 80
97#define KVM_NR_FIXED_MTRR_REGION 88 97#define KVM_NR_FIXED_MTRR_REGION 88
98#define KVM_NR_VAR_MTRR 10 98#define KVM_NR_VAR_MTRR 8
99 99
100#define ASYNC_PF_PER_VCPU 64 100#define ASYNC_PF_PER_VCPU 64
101 101
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index eac9e92fe181..e21331ce368f 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -149,6 +149,9 @@
149 149
150#define MSR_CORE_C1_RES 0x00000660 150#define MSR_CORE_C1_RES 0x00000660
151 151
152#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
153#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
154
152#define MSR_AMD64_MC0_MASK 0xc0010044 155#define MSR_AMD64_MC0_MASK 0xc0010044
153 156
154#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x)) 157#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56657b0bb3bb..03954f7900f5 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1491,9 +1491,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
1491 goto exception; 1491 goto exception;
1492 break; 1492 break;
1493 case VCPU_SREG_CS: 1493 case VCPU_SREG_CS:
1494 if (in_task_switch && rpl != dpl)
1495 goto exception;
1496
1497 if (!(seg_desc.type & 8)) 1494 if (!(seg_desc.type & 8))
1498 goto exception; 1495 goto exception;
1499 1496
@@ -4394,8 +4391,11 @@ done_prefixes:
4394 4391
4395 ctxt->execute = opcode.u.execute; 4392 ctxt->execute = opcode.u.execute;
4396 4393
4394 if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
4395 return EMULATION_FAILED;
4396
4397 if (unlikely(ctxt->d & 4397 if (unlikely(ctxt->d &
4398 (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) { 4398 (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
4399 /* 4399 /*
4400 * These are copied unconditionally here, and checked unconditionally 4400 * These are copied unconditionally here, and checked unconditionally
4401 * in x86_emulate_insn. 4401 * in x86_emulate_insn.
@@ -4406,9 +4406,6 @@ done_prefixes:
4406 if (ctxt->d & NotImpl) 4406 if (ctxt->d & NotImpl)
4407 return EMULATION_FAILED; 4407 return EMULATION_FAILED;
4408 4408
4409 if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
4410 return EMULATION_FAILED;
4411
4412 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack)) 4409 if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
4413 ctxt->op_bytes = 8; 4410 ctxt->op_bytes = 8;
4414 4411
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 0027137daa56..2e3139eda93b 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -116,6 +116,7 @@ static int probe_common(struct virtio_device *vdev)
116 .cleanup = virtio_cleanup, 116 .cleanup = virtio_cleanup,
117 .priv = (unsigned long)vi, 117 .priv = (unsigned long)vi,
118 .name = vi->name, 118 .name = vi->name,
119 .quality = 1000,
119 }; 120 };
120 vdev->priv = vi; 121 vdev->priv = vi;
121 122
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 4d140bbbe100..9b7ee7e427df 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -89,6 +89,7 @@ struct idle_cpu {
89 * Indicate which enable bits to clear here. 89 * Indicate which enable bits to clear here.
90 */ 90 */
91 unsigned long auto_demotion_disable_flags; 91 unsigned long auto_demotion_disable_flags;
92 bool byt_auto_demotion_disable_flag;
92 bool disable_promotion_to_c1e; 93 bool disable_promotion_to_c1e;
93}; 94};
94 95
@@ -442,6 +443,66 @@ static struct cpuidle_state hsw_cstates[] = {
442 { 443 {
443 .enter = NULL } 444 .enter = NULL }
444}; 445};
446static struct cpuidle_state bdw_cstates[] = {
447 {
448 .name = "C1-BDW",
449 .desc = "MWAIT 0x00",
450 .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
451 .exit_latency = 2,
452 .target_residency = 2,
453 .enter = &intel_idle },
454 {
455 .name = "C1E-BDW",
456 .desc = "MWAIT 0x01",
457 .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
458 .exit_latency = 10,
459 .target_residency = 20,
460 .enter = &intel_idle },
461 {
462 .name = "C3-BDW",
463 .desc = "MWAIT 0x10",
464 .flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
465 .exit_latency = 40,
466 .target_residency = 100,
467 .enter = &intel_idle },
468 {
469 .name = "C6-BDW",
470 .desc = "MWAIT 0x20",
471 .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
472 .exit_latency = 133,
473 .target_residency = 400,
474 .enter = &intel_idle },
475 {
476 .name = "C7s-BDW",
477 .desc = "MWAIT 0x32",
478 .flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
479 .exit_latency = 166,
480 .target_residency = 500,
481 .enter = &intel_idle },
482 {
483 .name = "C8-BDW",
484 .desc = "MWAIT 0x40",
485 .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
486 .exit_latency = 300,
487 .target_residency = 900,
488 .enter = &intel_idle },
489 {
490 .name = "C9-BDW",
491 .desc = "MWAIT 0x50",
492 .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
493 .exit_latency = 600,
494 .target_residency = 1800,
495 .enter = &intel_idle },
496 {
497 .name = "C10-BDW",
498 .desc = "MWAIT 0x60",
499 .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
500 .exit_latency = 2600,
501 .target_residency = 7700,
502 .enter = &intel_idle },
503 {
504 .enter = NULL }
505};
445 506
446static struct cpuidle_state atom_cstates[] = { 507static struct cpuidle_state atom_cstates[] = {
447 { 508 {
@@ -613,6 +674,7 @@ static const struct idle_cpu idle_cpu_snb = {
613static const struct idle_cpu idle_cpu_byt = { 674static const struct idle_cpu idle_cpu_byt = {
614 .state_table = byt_cstates, 675 .state_table = byt_cstates,
615 .disable_promotion_to_c1e = true, 676 .disable_promotion_to_c1e = true,
677 .byt_auto_demotion_disable_flag = true,
616}; 678};
617 679
618static const struct idle_cpu idle_cpu_ivb = { 680static const struct idle_cpu idle_cpu_ivb = {
@@ -630,6 +692,11 @@ static const struct idle_cpu idle_cpu_hsw = {
630 .disable_promotion_to_c1e = true, 692 .disable_promotion_to_c1e = true,
631}; 693};
632 694
695static const struct idle_cpu idle_cpu_bdw = {
696 .state_table = bdw_cstates,
697 .disable_promotion_to_c1e = true,
698};
699
633static const struct idle_cpu idle_cpu_avn = { 700static const struct idle_cpu idle_cpu_avn = {
634 .state_table = avn_cstates, 701 .state_table = avn_cstates,
635 .disable_promotion_to_c1e = true, 702 .disable_promotion_to_c1e = true,
@@ -658,7 +725,10 @@ static const struct x86_cpu_id intel_idle_ids[] = {
658 ICPU(0x3f, idle_cpu_hsw), 725 ICPU(0x3f, idle_cpu_hsw),
659 ICPU(0x45, idle_cpu_hsw), 726 ICPU(0x45, idle_cpu_hsw),
660 ICPU(0x46, idle_cpu_hsw), 727 ICPU(0x46, idle_cpu_hsw),
661 ICPU(0x4D, idle_cpu_avn), 728 ICPU(0x4d, idle_cpu_avn),
729 ICPU(0x3d, idle_cpu_bdw),
730 ICPU(0x4f, idle_cpu_bdw),
731 ICPU(0x56, idle_cpu_bdw),
662 {} 732 {}
663}; 733};
664MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids); 734MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -814,6 +884,11 @@ static int __init intel_idle_cpuidle_driver_init(void)
814 if (icpu->auto_demotion_disable_flags) 884 if (icpu->auto_demotion_disable_flags)
815 on_each_cpu(auto_demotion_disable, NULL, 1); 885 on_each_cpu(auto_demotion_disable, NULL, 1);
816 886
887 if (icpu->byt_auto_demotion_disable_flag) {
888 wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
889 wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
890 }
891
817 if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */ 892 if (icpu->disable_promotion_to_c1e) /* each-cpu is redundant */
818 on_each_cpu(c1e_promotion_disable, NULL, 1); 893 on_each_cpu(c1e_promotion_disable, NULL, 1);
819 894
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b08c18871323..6703751d87d7 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2953,6 +2953,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
2953 */ 2953 */
2954 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) { 2954 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
2955 end_reshape(conf); 2955 end_reshape(conf);
2956 close_sync(conf);
2956 return 0; 2957 return 0;
2957 } 2958 }
2958 2959
@@ -3081,6 +3082,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3081 } 3082 }
3082 3083
3083 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 3084 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
3085 r10_bio->state = 0;
3084 raise_barrier(conf, rb2 != NULL); 3086 raise_barrier(conf, rb2 != NULL);
3085 atomic_set(&r10_bio->remaining, 0); 3087 atomic_set(&r10_bio->remaining, 0);
3086 3088
@@ -3269,6 +3271,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
3269 if (sync_blocks < max_sync) 3271 if (sync_blocks < max_sync)
3270 max_sync = sync_blocks; 3272 max_sync = sync_blocks;
3271 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 3273 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
3274 r10_bio->state = 0;
3272 3275
3273 r10_bio->mddev = mddev; 3276 r10_bio->mddev = mddev;
3274 atomic_set(&r10_bio->remaining, 0); 3277 atomic_set(&r10_bio->remaining, 0);
@@ -4384,6 +4387,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
4384read_more: 4387read_more:
4385 /* Now schedule reads for blocks from sector_nr to last */ 4388 /* Now schedule reads for blocks from sector_nr to last */
4386 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO); 4389 r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
4390 r10_bio->state = 0;
4387 raise_barrier(conf, sectors_done != 0); 4391 raise_barrier(conf, sectors_done != 0);
4388 atomic_set(&r10_bio->remaining, 0); 4392 atomic_set(&r10_bio->remaining, 0);
4389 r10_bio->mddev = mddev; 4393 r10_bio->mddev = mddev;
@@ -4398,6 +4402,7 @@ read_more:
4398 * on all the target devices. 4402 * on all the target devices.
4399 */ 4403 */
4400 // FIXME 4404 // FIXME
4405 mempool_free(r10_bio, conf->r10buf_pool);
4401 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4406 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4402 return sectors_done; 4407 return sectors_done;
4403 } 4408 }
@@ -4410,7 +4415,7 @@ read_more:
4410 read_bio->bi_private = r10_bio; 4415 read_bio->bi_private = r10_bio;
4411 read_bio->bi_end_io = end_sync_read; 4416 read_bio->bi_end_io = end_sync_read;
4412 read_bio->bi_rw = READ; 4417 read_bio->bi_rw = READ;
4413 read_bio->bi_flags &= ~(BIO_POOL_MASK - 1); 4418 read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
4414 read_bio->bi_flags |= 1 << BIO_UPTODATE; 4419 read_bio->bi_flags |= 1 << BIO_UPTODATE;
4415 read_bio->bi_vcnt = 0; 4420 read_bio->bi_vcnt = 0;
4416 read_bio->bi_iter.bi_size = 0; 4421 read_bio->bi_iter.bi_size = 0;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6234b2e84587..183588b11fc1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2922,7 +2922,7 @@ static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s,
2922 (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) && 2922 (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
2923 !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || 2923 !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
2924 (sh->raid_conf->level == 6 && s->failed && s->to_write && 2924 (sh->raid_conf->level == 6 && s->failed && s->to_write &&
2925 s->to_write < sh->raid_conf->raid_disks - 2 && 2925 s->to_write - s->non_overwrite < sh->raid_conf->raid_disks - 2 &&
2926 (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) { 2926 (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
2927 /* we would like to get this block, possibly by computing it, 2927 /* we would like to get this block, possibly by computing it,
2928 * otherwise read it if the backing disk is insync 2928 * otherwise read it if the backing disk is insync
@@ -3817,6 +3817,8 @@ static void handle_stripe(struct stripe_head *sh)
3817 set_bit(R5_Wantwrite, &dev->flags); 3817 set_bit(R5_Wantwrite, &dev->flags);
3818 if (prexor) 3818 if (prexor)
3819 continue; 3819 continue;
3820 if (s.failed > 1)
3821 continue;
3820 if (!test_bit(R5_Insync, &dev->flags) || 3822 if (!test_bit(R5_Insync, &dev->flags) ||
3821 ((i == sh->pd_idx || i == sh->qd_idx) && 3823 ((i == sh->pd_idx || i == sh->qd_idx) &&
3822 s.failed == 0)) 3824 s.failed == 0))
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index f46a24ffa3fe..79cb8313c7d8 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -453,7 +453,7 @@ static int __init __reserved_mem_reserve_reg(unsigned long node,
453 base = dt_mem_next_cell(dt_root_addr_cells, &prop); 453 base = dt_mem_next_cell(dt_root_addr_cells, &prop);
454 size = dt_mem_next_cell(dt_root_size_cells, &prop); 454 size = dt_mem_next_cell(dt_root_size_cells, &prop);
455 455
456 if (base && size && 456 if (size &&
457 early_init_dt_reserve_memory_arch(base, size, nomap) == 0) 457 early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
458 pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n", 458 pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n",
459 uname, &base, (unsigned long)size / SZ_1M); 459 uname, &base, (unsigned long)size / SZ_1M);
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 3e06a699352d..1471e0a223a5 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -301,16 +301,17 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar
301 /* Get the reg property (if any) */ 301 /* Get the reg property (if any) */
302 addr = of_get_property(device, "reg", NULL); 302 addr = of_get_property(device, "reg", NULL);
303 303
304 /* Try the new-style interrupts-extended first */
305 res = of_parse_phandle_with_args(device, "interrupts-extended",
306 "#interrupt-cells", index, out_irq);
307 if (!res)
308 return of_irq_parse_raw(addr, out_irq);
309
304 /* Get the interrupts property */ 310 /* Get the interrupts property */
305 intspec = of_get_property(device, "interrupts", &intlen); 311 intspec = of_get_property(device, "interrupts", &intlen);
306 if (intspec == NULL) { 312 if (intspec == NULL)
307 /* Try the new-style interrupts-extended */ 313 return -EINVAL;
308 res = of_parse_phandle_with_args(device, "interrupts-extended", 314
309 "#interrupt-cells", index, out_irq);
310 if (res)
311 return -EINVAL;
312 return of_irq_parse_raw(addr, out_irq);
313 }
314 intlen /= sizeof(*intspec); 315 intlen /= sizeof(*intspec);
315 316
316 pr_debug(" intspec=%d intlen=%d\n", be32_to_cpup(intspec), intlen); 317 pr_debug(" intspec=%d intlen=%d\n", be32_to_cpup(intspec), intlen);
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index d41002667833..a737cb5974de 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -27,6 +27,7 @@ static struct selftest_results {
27#define NO_OF_NODES 2 27#define NO_OF_NODES 2
28static struct device_node *nodes[NO_OF_NODES]; 28static struct device_node *nodes[NO_OF_NODES];
29static int last_node_index; 29static int last_node_index;
30static bool selftest_live_tree;
30 31
31#define selftest(result, fmt, ...) { \ 32#define selftest(result, fmt, ...) { \
32 if (!(result)) { \ 33 if (!(result)) { \
@@ -630,13 +631,6 @@ static int attach_node_and_children(struct device_node *np)
630{ 631{
631 struct device_node *next, *root = np, *dup; 632 struct device_node *next, *root = np, *dup;
632 633
633 if (!np) {
634 pr_warn("%s: No tree to attach; not running tests\n",
635 __func__);
636 return -ENODATA;
637 }
638
639
640 /* skip root node */ 634 /* skip root node */
641 np = np->child; 635 np = np->child;
642 /* storing a copy in temporary node */ 636 /* storing a copy in temporary node */
@@ -672,12 +666,12 @@ static int attach_node_and_children(struct device_node *np)
672static int __init selftest_data_add(void) 666static int __init selftest_data_add(void)
673{ 667{
674 void *selftest_data; 668 void *selftest_data;
675 struct device_node *selftest_data_node; 669 struct device_node *selftest_data_node, *np;
676 extern uint8_t __dtb_testcases_begin[]; 670 extern uint8_t __dtb_testcases_begin[];
677 extern uint8_t __dtb_testcases_end[]; 671 extern uint8_t __dtb_testcases_end[];
678 const int size = __dtb_testcases_end - __dtb_testcases_begin; 672 const int size = __dtb_testcases_end - __dtb_testcases_begin;
679 673
680 if (!size || !of_allnodes) { 674 if (!size) {
681 pr_warn("%s: No testcase data to attach; not running tests\n", 675 pr_warn("%s: No testcase data to attach; not running tests\n",
682 __func__); 676 __func__);
683 return -ENODATA; 677 return -ENODATA;
@@ -692,6 +686,22 @@ static int __init selftest_data_add(void)
692 return -ENOMEM; 686 return -ENOMEM;
693 } 687 }
694 of_fdt_unflatten_tree(selftest_data, &selftest_data_node); 688 of_fdt_unflatten_tree(selftest_data, &selftest_data_node);
689 if (!selftest_data_node) {
690 pr_warn("%s: No tree to attach; not running tests\n", __func__);
691 return -ENODATA;
692 }
693
694 if (!of_allnodes) {
695 /* enabling flag for removing nodes */
696 selftest_live_tree = true;
697 of_allnodes = selftest_data_node;
698
699 for_each_of_allnodes(np)
700 __of_attach_node_sysfs(np);
701 of_aliases = of_find_node_by_path("/aliases");
702 of_chosen = of_find_node_by_path("/chosen");
703 return 0;
704 }
695 705
696 /* attach the sub-tree to live tree */ 706 /* attach the sub-tree to live tree */
697 return attach_node_and_children(selftest_data_node); 707 return attach_node_and_children(selftest_data_node);
@@ -723,6 +733,18 @@ static void selftest_data_remove(void)
723 struct device_node *np; 733 struct device_node *np;
724 struct property *prop; 734 struct property *prop;
725 735
736 if (selftest_live_tree) {
737 of_node_put(of_aliases);
738 of_node_put(of_chosen);
739 of_aliases = NULL;
740 of_chosen = NULL;
741 for_each_child_of_node(of_allnodes, np)
742 detach_node_and_children(np);
743 __of_detach_node_sysfs(of_allnodes);
744 of_allnodes = NULL;
745 return;
746 }
747
726 while (last_node_index >= 0) { 748 while (last_node_index >= 0) {
727 if (nodes[last_node_index]) { 749 if (nodes[last_node_index]) {
728 np = of_find_node_by_path(nodes[last_node_index]->full_name); 750 np = of_find_node_by_path(nodes[last_node_index]->full_name);
diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 2d8a4d05d78f..8922c376456a 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig
@@ -1,9 +1,18 @@
1menu "PCI host controller drivers" 1menu "PCI host controller drivers"
2 depends on PCI 2 depends on PCI
3 3
4config PCI_DRA7XX
5 bool "TI DRA7xx PCIe controller"
6 select PCIE_DW
7 depends on OF && HAS_IOMEM && TI_PIPE3
8 help
9 Enables support for the PCIe controller in the DRA7xx SoC. There
10 are two instances of PCIe controller in DRA7xx. This controller can
11 act both as EP and RC. This reuses the Designware core.
12
4config PCI_MVEBU 13config PCI_MVEBU
5 bool "Marvell EBU PCIe controller" 14 bool "Marvell EBU PCIe controller"
6 depends on ARCH_MVEBU || ARCH_DOVE || ARCH_KIRKWOOD 15 depends on ARCH_MVEBU || ARCH_DOVE
7 depends on OF 16 depends on OF
8 17
9config PCIE_DW 18config PCIE_DW
diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 0daec7941aba..d0e88f114ff9 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_PCIE_DW) += pcie-designware.o 1obj-$(CONFIG_PCIE_DW) += pcie-designware.o
2obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
2obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o 3obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
3obj-$(CONFIG_PCI_IMX6) += pci-imx6.o 4obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
4obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o 5obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o
diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c
new file mode 100644
index 000000000000..52b34fee07fd
--- /dev/null
+++ b/drivers/pci/host/pci-dra7xx.c
@@ -0,0 +1,458 @@
1/*
2 * pcie-dra7xx - PCIe controller driver for TI DRA7xx SoCs
3 *
4 * Copyright (C) 2013-2014 Texas Instruments Incorporated - http://www.ti.com
5 *
6 * Authors: Kishon Vijay Abraham I <kishon@ti.com>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/delay.h>
14#include <linux/err.h>
15#include <linux/interrupt.h>
16#include <linux/irq.h>
17#include <linux/irqdomain.h>
18#include <linux/kernel.h>
19#include <linux/module.h>
20#include <linux/pci.h>
21#include <linux/phy/phy.h>
22#include <linux/platform_device.h>
23#include <linux/pm_runtime.h>
24#include <linux/resource.h>
25#include <linux/types.h>
26
27#include "pcie-designware.h"
28
29/* PCIe controller wrapper DRA7XX configuration registers */
30
31#define PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN 0x0024
32#define PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MAIN 0x0028
33#define ERR_SYS BIT(0)
34#define ERR_FATAL BIT(1)
35#define ERR_NONFATAL BIT(2)
36#define ERR_COR BIT(3)
37#define ERR_AXI BIT(4)
38#define ERR_ECRC BIT(5)
39#define PME_TURN_OFF BIT(8)
40#define PME_TO_ACK BIT(9)
41#define PM_PME BIT(10)
42#define LINK_REQ_RST BIT(11)
43#define LINK_UP_EVT BIT(12)
44#define CFG_BME_EVT BIT(13)
45#define CFG_MSE_EVT BIT(14)
46#define INTERRUPTS (ERR_SYS | ERR_FATAL | ERR_NONFATAL | ERR_COR | ERR_AXI | \
47 ERR_ECRC | PME_TURN_OFF | PME_TO_ACK | PM_PME | \
48 LINK_REQ_RST | LINK_UP_EVT | CFG_BME_EVT | CFG_MSE_EVT)
49
50#define PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI 0x0034
51#define PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI 0x0038
52#define INTA BIT(0)
53#define INTB BIT(1)
54#define INTC BIT(2)
55#define INTD BIT(3)
56#define MSI BIT(4)
57#define LEG_EP_INTERRUPTS (INTA | INTB | INTC | INTD)
58
59#define PCIECTRL_DRA7XX_CONF_DEVICE_CMD 0x0104
60#define LTSSM_EN 0x1
61
62#define PCIECTRL_DRA7XX_CONF_PHY_CS 0x010C
63#define LINK_UP BIT(16)
64
65struct dra7xx_pcie {
66 void __iomem *base;
67 struct phy **phy;
68 int phy_count;
69 struct device *dev;
70 struct pcie_port pp;
71};
72
73#define to_dra7xx_pcie(x) container_of((x), struct dra7xx_pcie, pp)
74
75static inline u32 dra7xx_pcie_readl(struct dra7xx_pcie *pcie, u32 offset)
76{
77 return readl(pcie->base + offset);
78}
79
80static inline void dra7xx_pcie_writel(struct dra7xx_pcie *pcie, u32 offset,
81 u32 value)
82{
83 writel(value, pcie->base + offset);
84}
85
86static int dra7xx_pcie_link_up(struct pcie_port *pp)
87{
88 struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
89 u32 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_PHY_CS);
90
91 return !!(reg & LINK_UP);
92}
93
94static int dra7xx_pcie_establish_link(struct pcie_port *pp)
95{
96 u32 reg;
97 unsigned int retries = 1000;
98 struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
99
100 if (dw_pcie_link_up(pp)) {
101 dev_err(pp->dev, "link is already up\n");
102 return 0;
103 }
104
105 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
106 reg |= LTSSM_EN;
107 dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
108
109 while (retries--) {
110 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_PHY_CS);
111 if (reg & LINK_UP)
112 break;
113 usleep_range(10, 20);
114 }
115
116 if (retries == 0) {
117 dev_err(pp->dev, "link is not up\n");
118 return -ETIMEDOUT;
119 }
120
121 return 0;
122}
123
124static void dra7xx_pcie_enable_interrupts(struct pcie_port *pp)
125{
126 struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
127
128 dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN,
129 ~INTERRUPTS);
130 dra7xx_pcie_writel(dra7xx,
131 PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MAIN, INTERRUPTS);
132 dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI,
133 ~LEG_EP_INTERRUPTS & ~MSI);
134
135 if (IS_ENABLED(CONFIG_PCI_MSI))
136 dra7xx_pcie_writel(dra7xx,
137 PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI, MSI);
138 else
139 dra7xx_pcie_writel(dra7xx,
140 PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI,
141 LEG_EP_INTERRUPTS);
142}
143
144static void dra7xx_pcie_host_init(struct pcie_port *pp)
145{
146 dw_pcie_setup_rc(pp);
147 dra7xx_pcie_establish_link(pp);
148 if (IS_ENABLED(CONFIG_PCI_MSI))
149 dw_pcie_msi_init(pp);
150 dra7xx_pcie_enable_interrupts(pp);
151}
152
153static struct pcie_host_ops dra7xx_pcie_host_ops = {
154 .link_up = dra7xx_pcie_link_up,
155 .host_init = dra7xx_pcie_host_init,
156};
157
158static int dra7xx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
159 irq_hw_number_t hwirq)
160{
161 irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
162 irq_set_chip_data(irq, domain->host_data);
163 set_irq_flags(irq, IRQF_VALID);
164
165 return 0;
166}
167
168static const struct irq_domain_ops intx_domain_ops = {
169 .map = dra7xx_pcie_intx_map,
170};
171
172static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
173{
174 struct device *dev = pp->dev;
175 struct device_node *node = dev->of_node;
176 struct device_node *pcie_intc_node = of_get_next_child(node, NULL);
177
178 if (!pcie_intc_node) {
179 dev_err(dev, "No PCIe Intc node found\n");
180 return PTR_ERR(pcie_intc_node);
181 }
182
183 pp->irq_domain = irq_domain_add_linear(pcie_intc_node, 4,
184 &intx_domain_ops, pp);
185 if (!pp->irq_domain) {
186 dev_err(dev, "Failed to get a INTx IRQ domain\n");
187 return PTR_ERR(pp->irq_domain);
188 }
189
190 return 0;
191}
192
193static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
194{
195 struct pcie_port *pp = arg;
196 struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
197 u32 reg;
198
199 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI);
200
201 switch (reg) {
202 case MSI:
203 dw_handle_msi_irq(pp);
204 break;
205 case INTA:
206 case INTB:
207 case INTC:
208 case INTD:
209 generic_handle_irq(irq_find_mapping(pp->irq_domain, ffs(reg)));
210 break;
211 }
212
213 dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
214
215 return IRQ_HANDLED;
216}
217
218
219static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
220{
221 struct dra7xx_pcie *dra7xx = arg;
222 u32 reg;
223
224 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN);
225
226 if (reg & ERR_SYS)
227 dev_dbg(dra7xx->dev, "System Error\n");
228
229 if (reg & ERR_FATAL)
230 dev_dbg(dra7xx->dev, "Fatal Error\n");
231
232 if (reg & ERR_NONFATAL)
233 dev_dbg(dra7xx->dev, "Non Fatal Error\n");
234
235 if (reg & ERR_COR)
236 dev_dbg(dra7xx->dev, "Correctable Error\n");
237
238 if (reg & ERR_AXI)
239 dev_dbg(dra7xx->dev, "AXI tag lookup fatal Error\n");
240
241 if (reg & ERR_ECRC)
242 dev_dbg(dra7xx->dev, "ECRC Error\n");
243
244 if (reg & PME_TURN_OFF)
245 dev_dbg(dra7xx->dev,
246 "Power Management Event Turn-Off message received\n");
247
248 if (reg & PME_TO_ACK)
249 dev_dbg(dra7xx->dev,
250 "Power Management Turn-Off Ack message received\n");
251
252 if (reg & PM_PME)
253 dev_dbg(dra7xx->dev,
254 "PM Power Management Event message received\n");
255
256 if (reg & LINK_REQ_RST)
257 dev_dbg(dra7xx->dev, "Link Request Reset\n");
258
259 if (reg & LINK_UP_EVT)
260 dev_dbg(dra7xx->dev, "Link-up state change\n");
261
262 if (reg & CFG_BME_EVT)
263 dev_dbg(dra7xx->dev, "CFG 'Bus Master Enable' change\n");
264
265 if (reg & CFG_MSE_EVT)
266 dev_dbg(dra7xx->dev, "CFG 'Memory Space Enable' change\n");
267
268 dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN, reg);
269
270 return IRQ_HANDLED;
271}
272
273static int add_pcie_port(struct dra7xx_pcie *dra7xx,
274 struct platform_device *pdev)
275{
276 int ret;
277 struct pcie_port *pp;
278 struct resource *res;
279 struct device *dev = &pdev->dev;
280
281 pp = &dra7xx->pp;
282 pp->dev = dev;
283 pp->ops = &dra7xx_pcie_host_ops;
284
285 pp->irq = platform_get_irq(pdev, 1);
286 if (pp->irq < 0) {
287 dev_err(dev, "missing IRQ resource\n");
288 return -EINVAL;
289 }
290
291 ret = devm_request_irq(&pdev->dev, pp->irq,
292 dra7xx_pcie_msi_irq_handler, IRQF_SHARED,
293 "dra7-pcie-msi", pp);
294 if (ret) {
295 dev_err(&pdev->dev, "failed to request irq\n");
296 return ret;
297 }
298
299 if (!IS_ENABLED(CONFIG_PCI_MSI)) {
300 ret = dra7xx_pcie_init_irq_domain(pp);
301 if (ret < 0)
302 return ret;
303 }
304
305 res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rc_dbics");
306 pp->dbi_base = devm_ioremap(dev, res->start, resource_size(res));
307 if (!pp->dbi_base)
308 return -ENOMEM;
309
310 ret = dw_pcie_host_init(pp);
311 if (ret) {
312 dev_err(dra7xx->dev, "failed to initialize host\n");
313 return ret;
314 }
315
316 return 0;
317}
318
319static int __init dra7xx_pcie_probe(struct platform_device *pdev)
320{
321 u32 reg;
322 int ret;
323 int irq;
324 int i;
325 int phy_count;
326 struct phy **phy;
327 void __iomem *base;
328 struct resource *res;
329 struct dra7xx_pcie *dra7xx;
330 struct device *dev = &pdev->dev;
331 struct device_node *np = dev->of_node;
332 char name[10];
333
334 dra7xx = devm_kzalloc(dev, sizeof(*dra7xx), GFP_KERNEL);
335 if (!dra7xx)
336 return -ENOMEM;
337
338 irq = platform_get_irq(pdev, 0);
339 if (irq < 0) {
340 dev_err(dev, "missing IRQ resource\n");
341 return -EINVAL;
342 }
343
344 ret = devm_request_irq(dev, irq, dra7xx_pcie_irq_handler,
345 IRQF_SHARED, "dra7xx-pcie-main", dra7xx);
346 if (ret) {
347 dev_err(dev, "failed to request irq\n");
348 return ret;
349 }
350
351 res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ti_conf");
352 base = devm_ioremap_nocache(dev, res->start, resource_size(res));
353 if (!base)
354 return -ENOMEM;
355
356 phy_count = of_property_count_strings(np, "phy-names");
357 if (phy_count < 0) {
358 dev_err(dev, "unable to find the strings\n");
359 return phy_count;
360 }
361
362 phy = devm_kzalloc(dev, sizeof(*phy) * phy_count, GFP_KERNEL);
363 if (!phy)
364 return -ENOMEM;
365
366 for (i = 0; i < phy_count; i++) {
367 snprintf(name, sizeof(name), "pcie-phy%d", i);
368 phy[i] = devm_phy_get(dev, name);
369 if (IS_ERR(phy[i]))
370 return PTR_ERR(phy[i]);
371
372 ret = phy_init(phy[i]);
373 if (ret < 0)
374 goto err_phy;
375
376 ret = phy_power_on(phy[i]);
377 if (ret < 0) {
378 phy_exit(phy[i]);
379 goto err_phy;
380 }
381 }
382
383 dra7xx->base = base;
384 dra7xx->phy = phy;
385 dra7xx->dev = dev;
386 dra7xx->phy_count = phy_count;
387
388 pm_runtime_enable(dev);
389 ret = pm_runtime_get_sync(dev);
390 if (IS_ERR_VALUE(ret)) {
391 dev_err(dev, "pm_runtime_get_sync failed\n");
392 goto err_phy;
393 }
394
395 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
396 reg &= ~LTSSM_EN;
397 dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
398
399 platform_set_drvdata(pdev, dra7xx);
400
401 ret = add_pcie_port(dra7xx, pdev);
402 if (ret < 0)
403 goto err_add_port;
404
405 return 0;
406
407err_add_port:
408 pm_runtime_put(dev);
409 pm_runtime_disable(dev);
410
411err_phy:
412 while (--i >= 0) {
413 phy_power_off(phy[i]);
414 phy_exit(phy[i]);
415 }
416
417 return ret;
418}
419
420static int __exit dra7xx_pcie_remove(struct platform_device *pdev)
421{
422 struct dra7xx_pcie *dra7xx = platform_get_drvdata(pdev);
423 struct pcie_port *pp = &dra7xx->pp;
424 struct device *dev = &pdev->dev;
425 int count = dra7xx->phy_count;
426
427 if (pp->irq_domain)
428 irq_domain_remove(pp->irq_domain);
429 pm_runtime_put(dev);
430 pm_runtime_disable(dev);
431 while (count--) {
432 phy_power_off(dra7xx->phy[count]);
433 phy_exit(dra7xx->phy[count]);
434 }
435
436 return 0;
437}
438
439static const struct of_device_id of_dra7xx_pcie_match[] = {
440 { .compatible = "ti,dra7-pcie", },
441 {},
442};
443MODULE_DEVICE_TABLE(of, of_dra7xx_pcie_match);
444
445static struct platform_driver dra7xx_pcie_driver = {
446 .remove = __exit_p(dra7xx_pcie_remove),
447 .driver = {
448 .name = "dra7-pcie",
449 .owner = THIS_MODULE,
450 .of_match_table = of_dra7xx_pcie_match,
451 },
452};
453
454module_platform_driver_probe(dra7xx_pcie_driver, dra7xx_pcie_probe);
455
456MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
457MODULE_DESCRIPTION("TI PCIe controller driver");
458MODULE_LICENSE("GPL v2");
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index abd65784618d..0fb0fdb223d5 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -25,6 +25,7 @@
25 */ 25 */
26 26
27#include <linux/clk.h> 27#include <linux/clk.h>
28#include <linux/debugfs.h>
28#include <linux/delay.h> 29#include <linux/delay.h>
29#include <linux/export.h> 30#include <linux/export.h>
30#include <linux/interrupt.h> 31#include <linux/interrupt.h>
@@ -276,6 +277,7 @@ struct tegra_pcie {
276 unsigned int num_supplies; 277 unsigned int num_supplies;
277 278
278 const struct tegra_pcie_soc_data *soc_data; 279 const struct tegra_pcie_soc_data *soc_data;
280 struct dentry *debugfs;
279}; 281};
280 282
281struct tegra_pcie_port { 283struct tegra_pcie_port {
@@ -1739,6 +1741,115 @@ static const struct of_device_id tegra_pcie_of_match[] = {
1739}; 1741};
1740MODULE_DEVICE_TABLE(of, tegra_pcie_of_match); 1742MODULE_DEVICE_TABLE(of, tegra_pcie_of_match);
1741 1743
1744static void *tegra_pcie_ports_seq_start(struct seq_file *s, loff_t *pos)
1745{
1746 struct tegra_pcie *pcie = s->private;
1747
1748 if (list_empty(&pcie->ports))
1749 return NULL;
1750
1751 seq_printf(s, "Index Status\n");
1752
1753 return seq_list_start(&pcie->ports, *pos);
1754}
1755
1756static void *tegra_pcie_ports_seq_next(struct seq_file *s, void *v, loff_t *pos)
1757{
1758 struct tegra_pcie *pcie = s->private;
1759
1760 return seq_list_next(v, &pcie->ports, pos);
1761}
1762
1763static void tegra_pcie_ports_seq_stop(struct seq_file *s, void *v)
1764{
1765}
1766
1767static int tegra_pcie_ports_seq_show(struct seq_file *s, void *v)
1768{
1769 bool up = false, active = false;
1770 struct tegra_pcie_port *port;
1771 unsigned int value;
1772
1773 port = list_entry(v, struct tegra_pcie_port, list);
1774
1775 value = readl(port->base + RP_VEND_XP);
1776
1777 if (value & RP_VEND_XP_DL_UP)
1778 up = true;
1779
1780 value = readl(port->base + RP_LINK_CONTROL_STATUS);
1781
1782 if (value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE)
1783 active = true;
1784
1785 seq_printf(s, "%2u ", port->index);
1786
1787 if (up)
1788 seq_printf(s, "up");
1789
1790 if (active) {
1791 if (up)
1792 seq_printf(s, ", ");
1793
1794 seq_printf(s, "active");
1795 }
1796
1797 seq_printf(s, "\n");
1798 return 0;
1799}
1800
1801static const struct seq_operations tegra_pcie_ports_seq_ops = {
1802 .start = tegra_pcie_ports_seq_start,
1803 .next = tegra_pcie_ports_seq_next,
1804 .stop = tegra_pcie_ports_seq_stop,
1805 .show = tegra_pcie_ports_seq_show,
1806};
1807
1808static int tegra_pcie_ports_open(struct inode *inode, struct file *file)
1809{
1810 struct tegra_pcie *pcie = inode->i_private;
1811 struct seq_file *s;
1812 int err;
1813
1814 err = seq_open(file, &tegra_pcie_ports_seq_ops);
1815 if (err)
1816 return err;
1817
1818 s = file->private_data;
1819 s->private = pcie;
1820
1821 return 0;
1822}
1823
1824static const struct file_operations tegra_pcie_ports_ops = {
1825 .owner = THIS_MODULE,
1826 .open = tegra_pcie_ports_open,
1827 .read = seq_read,
1828 .llseek = seq_lseek,
1829 .release = seq_release,
1830};
1831
1832static int tegra_pcie_debugfs_init(struct tegra_pcie *pcie)
1833{
1834 struct dentry *file;
1835
1836 pcie->debugfs = debugfs_create_dir("pcie", NULL);
1837 if (!pcie->debugfs)
1838 return -ENOMEM;
1839
1840 file = debugfs_create_file("ports", S_IFREG | S_IRUGO, pcie->debugfs,
1841 pcie, &tegra_pcie_ports_ops);
1842 if (!file)
1843 goto remove;
1844
1845 return 0;
1846
1847remove:
1848 debugfs_remove_recursive(pcie->debugfs);
1849 pcie->debugfs = NULL;
1850 return -ENOMEM;
1851}
1852
1742static int tegra_pcie_probe(struct platform_device *pdev) 1853static int tegra_pcie_probe(struct platform_device *pdev)
1743{ 1854{
1744 const struct of_device_id *match; 1855 const struct of_device_id *match;
@@ -1793,6 +1904,13 @@ static int tegra_pcie_probe(struct platform_device *pdev)
1793 goto disable_msi; 1904 goto disable_msi;
1794 } 1905 }
1795 1906
1907 if (IS_ENABLED(CONFIG_DEBUG_FS)) {
1908 err = tegra_pcie_debugfs_init(pcie);
1909 if (err < 0)
1910 dev_err(&pdev->dev, "failed to setup debugfs: %d\n",
1911 err);
1912 }
1913
1796 platform_set_drvdata(pdev, pcie); 1914 platform_set_drvdata(pdev, pcie);
1797 return 0; 1915 return 0;
1798 1916
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 1eaf4df3618a..52bd3a143563 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -20,6 +20,7 @@
20#include <linux/of_pci.h> 20#include <linux/of_pci.h>
21#include <linux/pci.h> 21#include <linux/pci.h>
22#include <linux/pci_regs.h> 22#include <linux/pci_regs.h>
23#include <linux/platform_device.h>
23#include <linux/types.h> 24#include <linux/types.h>
24 25
25#include "pcie-designware.h" 26#include "pcie-designware.h"
@@ -217,27 +218,47 @@ static int find_valid_pos0(struct pcie_port *pp, int msgvec, int pos, int *pos0)
217 return 0; 218 return 0;
218} 219}
219 220
221static void dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq)
222{
223 unsigned int res, bit, val;
224
225 res = (irq / 32) * 12;
226 bit = irq % 32;
227 dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
228 val &= ~(1 << bit);
229 dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
230}
231
220static void clear_irq_range(struct pcie_port *pp, unsigned int irq_base, 232static void clear_irq_range(struct pcie_port *pp, unsigned int irq_base,
221 unsigned int nvec, unsigned int pos) 233 unsigned int nvec, unsigned int pos)
222{ 234{
223 unsigned int i, res, bit, val; 235 unsigned int i;
224 236
225 for (i = 0; i < nvec; i++) { 237 for (i = 0; i < nvec; i++) {
226 irq_set_msi_desc_off(irq_base, i, NULL); 238 irq_set_msi_desc_off(irq_base, i, NULL);
227 clear_bit(pos + i, pp->msi_irq_in_use); 239 clear_bit(pos + i, pp->msi_irq_in_use);
228 /* Disable corresponding interrupt on MSI controller */ 240 /* Disable corresponding interrupt on MSI controller */
229 res = ((pos + i) / 32) * 12; 241 if (pp->ops->msi_clear_irq)
230 bit = (pos + i) % 32; 242 pp->ops->msi_clear_irq(pp, pos + i);
231 dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val); 243 else
232 val &= ~(1 << bit); 244 dw_pcie_msi_clear_irq(pp, pos + i);
233 dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
234 } 245 }
235} 246}
236 247
248static void dw_pcie_msi_set_irq(struct pcie_port *pp, int irq)
249{
250 unsigned int res, bit, val;
251
252 res = (irq / 32) * 12;
253 bit = irq % 32;
254 dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
255 val |= 1 << bit;
256 dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
257}
258
237static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos) 259static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
238{ 260{
239 int res, bit, irq, pos0, pos1, i; 261 int irq, pos0, pos1, i;
240 u32 val;
241 struct pcie_port *pp = sys_to_pcie(desc->dev->bus->sysdata); 262 struct pcie_port *pp = sys_to_pcie(desc->dev->bus->sysdata);
242 263
243 if (!pp) { 264 if (!pp) {
@@ -281,11 +302,10 @@ static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
281 } 302 }
282 set_bit(pos0 + i, pp->msi_irq_in_use); 303 set_bit(pos0 + i, pp->msi_irq_in_use);
283 /*Enable corresponding interrupt in MSI interrupt controller */ 304 /*Enable corresponding interrupt in MSI interrupt controller */
284 res = ((pos0 + i) / 32) * 12; 305 if (pp->ops->msi_set_irq)
285 bit = (pos0 + i) % 32; 306 pp->ops->msi_set_irq(pp, pos0 + i);
286 dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val); 307 else
287 val |= 1 << bit; 308 dw_pcie_msi_set_irq(pp, pos0 + i);
288 dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
289 } 309 }
290 310
291 *pos = pos0; 311 *pos = pos0;
@@ -353,7 +373,10 @@ static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
353 */ 373 */
354 desc->msi_attrib.multiple = msgvec; 374 desc->msi_attrib.multiple = msgvec;
355 375
356 msg.address_lo = virt_to_phys((void *)pp->msi_data); 376 if (pp->ops->get_msi_data)
377 msg.address_lo = pp->ops->get_msi_data(pp);
378 else
379 msg.address_lo = virt_to_phys((void *)pp->msi_data);
357 msg.address_hi = 0x0; 380 msg.address_hi = 0x0;
358 msg.data = pos; 381 msg.data = pos;
359 write_msi_msg(irq, &msg); 382 write_msi_msg(irq, &msg);
@@ -396,10 +419,35 @@ static const struct irq_domain_ops msi_domain_ops = {
396int __init dw_pcie_host_init(struct pcie_port *pp) 419int __init dw_pcie_host_init(struct pcie_port *pp)
397{ 420{
398 struct device_node *np = pp->dev->of_node; 421 struct device_node *np = pp->dev->of_node;
422 struct platform_device *pdev = to_platform_device(pp->dev);
399 struct of_pci_range range; 423 struct of_pci_range range;
400 struct of_pci_range_parser parser; 424 struct of_pci_range_parser parser;
401 u32 val; 425 struct resource *cfg_res;
402 int i; 426 u32 val, na, ns;
427 const __be32 *addrp;
428 int i, index;
429
430 /* Find the address cell size and the number of cells in order to get
431 * the untranslated address.
432 */
433 of_property_read_u32(np, "#address-cells", &na);
434 ns = of_n_size_cells(np);
435
436 cfg_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
437 if (cfg_res) {
438 pp->config.cfg0_size = resource_size(cfg_res)/2;
439 pp->config.cfg1_size = resource_size(cfg_res)/2;
440 pp->cfg0_base = cfg_res->start;
441 pp->cfg1_base = cfg_res->start + pp->config.cfg0_size;
442
443 /* Find the untranslated configuration space address */
444 index = of_property_match_string(np, "reg-names", "config");
445 addrp = of_get_address(np, index, false, false);
446 pp->cfg0_mod_base = of_read_number(addrp, ns);
447 pp->cfg1_mod_base = pp->cfg0_mod_base + pp->config.cfg0_size;
448 } else {
449 dev_err(pp->dev, "missing *config* reg space\n");
450 }
403 451
404 if (of_pci_range_parser_init(&parser, np)) { 452 if (of_pci_range_parser_init(&parser, np)) {
405 dev_err(pp->dev, "missing ranges property\n"); 453 dev_err(pp->dev, "missing ranges property\n");
@@ -422,17 +470,33 @@ int __init dw_pcie_host_init(struct pcie_port *pp)
422 pp->config.io_size = resource_size(&pp->io); 470 pp->config.io_size = resource_size(&pp->io);
423 pp->config.io_bus_addr = range.pci_addr; 471 pp->config.io_bus_addr = range.pci_addr;
424 pp->io_base = range.cpu_addr; 472 pp->io_base = range.cpu_addr;
473
474 /* Find the untranslated IO space address */
475 pp->io_mod_base = of_read_number(parser.range -
476 parser.np + na, ns);
425 } 477 }
426 if (restype == IORESOURCE_MEM) { 478 if (restype == IORESOURCE_MEM) {
427 of_pci_range_to_resource(&range, np, &pp->mem); 479 of_pci_range_to_resource(&range, np, &pp->mem);
428 pp->mem.name = "MEM"; 480 pp->mem.name = "MEM";
429 pp->config.mem_size = resource_size(&pp->mem); 481 pp->config.mem_size = resource_size(&pp->mem);
430 pp->config.mem_bus_addr = range.pci_addr; 482 pp->config.mem_bus_addr = range.pci_addr;
483
484 /* Find the untranslated MEM space address */
485 pp->mem_mod_base = of_read_number(parser.range -
486 parser.np + na, ns);
431 } 487 }
432 if (restype == 0) { 488 if (restype == 0) {
433 of_pci_range_to_resource(&range, np, &pp->cfg); 489 of_pci_range_to_resource(&range, np, &pp->cfg);
434 pp->config.cfg0_size = resource_size(&pp->cfg)/2; 490 pp->config.cfg0_size = resource_size(&pp->cfg)/2;
435 pp->config.cfg1_size = resource_size(&pp->cfg)/2; 491 pp->config.cfg1_size = resource_size(&pp->cfg)/2;
492 pp->cfg0_base = pp->cfg.start;
493 pp->cfg1_base = pp->cfg.start + pp->config.cfg0_size;
494
495 /* Find the untranslated configuration space address */
496 pp->cfg0_mod_base = of_read_number(parser.range -
497 parser.np + na, ns);
498 pp->cfg1_mod_base = pp->cfg0_mod_base +
499 pp->config.cfg0_size;
436 } 500 }
437 } 501 }
438 502
@@ -445,8 +509,6 @@ int __init dw_pcie_host_init(struct pcie_port *pp)
445 } 509 }
446 } 510 }
447 511
448 pp->cfg0_base = pp->cfg.start;
449 pp->cfg1_base = pp->cfg.start + pp->config.cfg0_size;
450 pp->mem_base = pp->mem.start; 512 pp->mem_base = pp->mem.start;
451 513
452 pp->va_cfg0_base = devm_ioremap(pp->dev, pp->cfg0_base, 514 pp->va_cfg0_base = devm_ioremap(pp->dev, pp->cfg0_base,
@@ -509,9 +571,9 @@ static void dw_pcie_prog_viewport_cfg0(struct pcie_port *pp, u32 busdev)
509 /* Program viewport 0 : OUTBOUND : CFG0 */ 571 /* Program viewport 0 : OUTBOUND : CFG0 */
510 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0, 572 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0,
511 PCIE_ATU_VIEWPORT); 573 PCIE_ATU_VIEWPORT);
512 dw_pcie_writel_rc(pp, pp->cfg0_base, PCIE_ATU_LOWER_BASE); 574 dw_pcie_writel_rc(pp, pp->cfg0_mod_base, PCIE_ATU_LOWER_BASE);
513 dw_pcie_writel_rc(pp, (pp->cfg0_base >> 32), PCIE_ATU_UPPER_BASE); 575 dw_pcie_writel_rc(pp, (pp->cfg0_mod_base >> 32), PCIE_ATU_UPPER_BASE);
514 dw_pcie_writel_rc(pp, pp->cfg0_base + pp->config.cfg0_size - 1, 576 dw_pcie_writel_rc(pp, pp->cfg0_mod_base + pp->config.cfg0_size - 1,
515 PCIE_ATU_LIMIT); 577 PCIE_ATU_LIMIT);
516 dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET); 578 dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET);
517 dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET); 579 dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET);
@@ -525,9 +587,9 @@ static void dw_pcie_prog_viewport_cfg1(struct pcie_port *pp, u32 busdev)
525 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1, 587 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,
526 PCIE_ATU_VIEWPORT); 588 PCIE_ATU_VIEWPORT);
527 dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_CFG1, PCIE_ATU_CR1); 589 dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_CFG1, PCIE_ATU_CR1);
528 dw_pcie_writel_rc(pp, pp->cfg1_base, PCIE_ATU_LOWER_BASE); 590 dw_pcie_writel_rc(pp, pp->cfg1_mod_base, PCIE_ATU_LOWER_BASE);
529 dw_pcie_writel_rc(pp, (pp->cfg1_base >> 32), PCIE_ATU_UPPER_BASE); 591 dw_pcie_writel_rc(pp, (pp->cfg1_mod_base >> 32), PCIE_ATU_UPPER_BASE);
530 dw_pcie_writel_rc(pp, pp->cfg1_base + pp->config.cfg1_size - 1, 592 dw_pcie_writel_rc(pp, pp->cfg1_mod_base + pp->config.cfg1_size - 1,
531 PCIE_ATU_LIMIT); 593 PCIE_ATU_LIMIT);
532 dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET); 594 dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET);
533 dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET); 595 dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET);
@@ -540,9 +602,9 @@ static void dw_pcie_prog_viewport_mem_outbound(struct pcie_port *pp)
540 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0, 602 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0,
541 PCIE_ATU_VIEWPORT); 603 PCIE_ATU_VIEWPORT);
542 dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_MEM, PCIE_ATU_CR1); 604 dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_MEM, PCIE_ATU_CR1);
543 dw_pcie_writel_rc(pp, pp->mem_base, PCIE_ATU_LOWER_BASE); 605 dw_pcie_writel_rc(pp, pp->mem_mod_base, PCIE_ATU_LOWER_BASE);
544 dw_pcie_writel_rc(pp, (pp->mem_base >> 32), PCIE_ATU_UPPER_BASE); 606 dw_pcie_writel_rc(pp, (pp->mem_mod_base >> 32), PCIE_ATU_UPPER_BASE);
545 dw_pcie_writel_rc(pp, pp->mem_base + pp->config.mem_size - 1, 607 dw_pcie_writel_rc(pp, pp->mem_mod_base + pp->config.mem_size - 1,
546 PCIE_ATU_LIMIT); 608 PCIE_ATU_LIMIT);
547 dw_pcie_writel_rc(pp, pp->config.mem_bus_addr, PCIE_ATU_LOWER_TARGET); 609 dw_pcie_writel_rc(pp, pp->config.mem_bus_addr, PCIE_ATU_LOWER_TARGET);
548 dw_pcie_writel_rc(pp, upper_32_bits(pp->config.mem_bus_addr), 610 dw_pcie_writel_rc(pp, upper_32_bits(pp->config.mem_bus_addr),
@@ -556,9 +618,9 @@ static void dw_pcie_prog_viewport_io_outbound(struct pcie_port *pp)
556 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1, 618 dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,
557 PCIE_ATU_VIEWPORT); 619 PCIE_ATU_VIEWPORT);
558 dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_IO, PCIE_ATU_CR1); 620 dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_IO, PCIE_ATU_CR1);
559 dw_pcie_writel_rc(pp, pp->io_base, PCIE_ATU_LOWER_BASE); 621 dw_pcie_writel_rc(pp, pp->io_mod_base, PCIE_ATU_LOWER_BASE);
560 dw_pcie_writel_rc(pp, (pp->io_base >> 32), PCIE_ATU_UPPER_BASE); 622 dw_pcie_writel_rc(pp, (pp->io_mod_base >> 32), PCIE_ATU_UPPER_BASE);
561 dw_pcie_writel_rc(pp, pp->io_base + pp->config.io_size - 1, 623 dw_pcie_writel_rc(pp, pp->io_mod_base + pp->config.io_size - 1,
562 PCIE_ATU_LIMIT); 624 PCIE_ATU_LIMIT);
563 dw_pcie_writel_rc(pp, pp->config.io_bus_addr, PCIE_ATU_LOWER_TARGET); 625 dw_pcie_writel_rc(pp, pp->config.io_bus_addr, PCIE_ATU_LOWER_TARGET);
564 dw_pcie_writel_rc(pp, upper_32_bits(pp->config.io_bus_addr), 626 dw_pcie_writel_rc(pp, upper_32_bits(pp->config.io_bus_addr),
@@ -656,7 +718,11 @@ static int dw_pcie_rd_conf(struct pci_bus *bus, u32 devfn, int where,
656 } 718 }
657 719
658 if (bus->number != pp->root_bus_nr) 720 if (bus->number != pp->root_bus_nr)
659 ret = dw_pcie_rd_other_conf(pp, bus, devfn, 721 if (pp->ops->rd_other_conf)
722 ret = pp->ops->rd_other_conf(pp, bus, devfn,
723 where, size, val);
724 else
725 ret = dw_pcie_rd_other_conf(pp, bus, devfn,
660 where, size, val); 726 where, size, val);
661 else 727 else
662 ret = dw_pcie_rd_own_conf(pp, where, size, val); 728 ret = dw_pcie_rd_own_conf(pp, where, size, val);
@@ -679,7 +745,11 @@ static int dw_pcie_wr_conf(struct pci_bus *bus, u32 devfn,
679 return PCIBIOS_DEVICE_NOT_FOUND; 745 return PCIBIOS_DEVICE_NOT_FOUND;
680 746
681 if (bus->number != pp->root_bus_nr) 747 if (bus->number != pp->root_bus_nr)
682 ret = dw_pcie_wr_other_conf(pp, bus, devfn, 748 if (pp->ops->wr_other_conf)
749 ret = pp->ops->wr_other_conf(pp, bus, devfn,
750 where, size, val);
751 else
752 ret = dw_pcie_wr_other_conf(pp, bus, devfn,
683 where, size, val); 753 where, size, val);
684 else 754 else
685 ret = dw_pcie_wr_own_conf(pp, where, size, val); 755 ret = dw_pcie_wr_own_conf(pp, where, size, val);
diff --git a/drivers/pci/host/pcie-designware.h b/drivers/pci/host/pcie-designware.h
index 77f592faa7bf..daf81f922cda 100644
--- a/drivers/pci/host/pcie-designware.h
+++ b/drivers/pci/host/pcie-designware.h
@@ -36,11 +36,15 @@ struct pcie_port {
36 u8 root_bus_nr; 36 u8 root_bus_nr;
37 void __iomem *dbi_base; 37 void __iomem *dbi_base;
38 u64 cfg0_base; 38 u64 cfg0_base;
39 u64 cfg0_mod_base;
39 void __iomem *va_cfg0_base; 40 void __iomem *va_cfg0_base;
40 u64 cfg1_base; 41 u64 cfg1_base;
42 u64 cfg1_mod_base;
41 void __iomem *va_cfg1_base; 43 void __iomem *va_cfg1_base;
42 u64 io_base; 44 u64 io_base;
45 u64 io_mod_base;
43 u64 mem_base; 46 u64 mem_base;
47 u64 mem_mod_base;
44 struct resource cfg; 48 struct resource cfg;
45 struct resource io; 49 struct resource io;
46 struct resource mem; 50 struct resource mem;
@@ -61,8 +65,15 @@ struct pcie_host_ops {
61 u32 val, void __iomem *dbi_base); 65 u32 val, void __iomem *dbi_base);
62 int (*rd_own_conf)(struct pcie_port *pp, int where, int size, u32 *val); 66 int (*rd_own_conf)(struct pcie_port *pp, int where, int size, u32 *val);
63 int (*wr_own_conf)(struct pcie_port *pp, int where, int size, u32 val); 67 int (*wr_own_conf)(struct pcie_port *pp, int where, int size, u32 val);
68 int (*rd_other_conf)(struct pcie_port *pp, struct pci_bus *bus,
69 unsigned int devfn, int where, int size, u32 *val);
70 int (*wr_other_conf)(struct pcie_port *pp, struct pci_bus *bus,
71 unsigned int devfn, int where, int size, u32 val);
64 int (*link_up)(struct pcie_port *pp); 72 int (*link_up)(struct pcie_port *pp);
65 void (*host_init)(struct pcie_port *pp); 73 void (*host_init)(struct pcie_port *pp);
74 void (*msi_set_irq)(struct pcie_port *pp, int irq);
75 void (*msi_clear_irq)(struct pcie_port *pp, int irq);
76 u32 (*get_msi_data)(struct pcie_port *pp);
66}; 77};
67 78
68int dw_pcie_cfg_read(void __iomem *addr, int where, int size, u32 *val); 79int dw_pcie_cfg_read(void __iomem *addr, int where, int size, u32 *val);
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index df3306019a7e..d81f3cc43ff1 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -377,6 +377,10 @@ scsi_alloc_host_cmd_pool(struct Scsi_Host *shost)
377 pool->slab_flags |= SLAB_CACHE_DMA; 377 pool->slab_flags |= SLAB_CACHE_DMA;
378 pool->gfp_mask = __GFP_DMA; 378 pool->gfp_mask = __GFP_DMA;
379 } 379 }
380
381 if (hostt->cmd_size)
382 hostt->cmd_pool = pool;
383
380 return pool; 384 return pool;
381} 385}
382 386
@@ -421,8 +425,10 @@ out:
421out_free_slab: 425out_free_slab:
422 kmem_cache_destroy(pool->cmd_slab); 426 kmem_cache_destroy(pool->cmd_slab);
423out_free_pool: 427out_free_pool:
424 if (hostt->cmd_size) 428 if (hostt->cmd_size) {
425 scsi_free_host_cmd_pool(pool); 429 scsi_free_host_cmd_pool(pool);
430 hostt->cmd_pool = NULL;
431 }
426 goto out; 432 goto out;
427} 433}
428 434
@@ -444,8 +450,10 @@ static void scsi_put_host_cmd_pool(struct Scsi_Host *shost)
444 if (!--pool->users) { 450 if (!--pool->users) {
445 kmem_cache_destroy(pool->cmd_slab); 451 kmem_cache_destroy(pool->cmd_slab);
446 kmem_cache_destroy(pool->sense_slab); 452 kmem_cache_destroy(pool->sense_slab);
447 if (hostt->cmd_size) 453 if (hostt->cmd_size) {
448 scsi_free_host_cmd_pool(pool); 454 scsi_free_host_cmd_pool(pool);
455 hostt->cmd_pool = NULL;
456 }
449 } 457 }
450 mutex_unlock(&host_cmd_pool_mutex); 458 mutex_unlock(&host_cmd_pool_mutex);
451} 459}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9c44392b748f..ce62e8798cc8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1774,7 +1774,7 @@ static void scsi_request_fn(struct request_queue *q)
1774 blk_requeue_request(q, req); 1774 blk_requeue_request(q, req);
1775 atomic_dec(&sdev->device_busy); 1775 atomic_dec(&sdev->device_busy);
1776out_delay: 1776out_delay:
1777 if (atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev)) 1777 if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
1778 blk_delay_queue(q, SCSI_QUEUE_DELAY); 1778 blk_delay_queue(q, SCSI_QUEUE_DELAY);
1779} 1779}
1780 1780
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc2674..ae635872affb 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -192,7 +192,6 @@ static struct file *aio_private_file(struct kioctx *ctx, loff_t nr_pages)
192 } 192 }
193 193
194 file->f_flags = O_RDWR; 194 file->f_flags = O_RDWR;
195 file->private_data = ctx;
196 return file; 195 return file;
197} 196}
198 197
@@ -202,7 +201,7 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
202 static const struct dentry_operations ops = { 201 static const struct dentry_operations ops = {
203 .d_dname = simple_dname, 202 .d_dname = simple_dname,
204 }; 203 };
205 return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1); 204 return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
206} 205}
207 206
208/* aio_setup 207/* aio_setup
@@ -556,8 +555,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
556 struct aio_ring *ring; 555 struct aio_ring *ring;
557 556
558 spin_lock(&mm->ioctx_lock); 557 spin_lock(&mm->ioctx_lock);
559 rcu_read_lock(); 558 table = rcu_dereference_raw(mm->ioctx_table);
560 table = rcu_dereference(mm->ioctx_table);
561 559
562 while (1) { 560 while (1) {
563 if (table) 561 if (table)
@@ -565,7 +563,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
565 if (!table->table[i]) { 563 if (!table->table[i]) {
566 ctx->id = i; 564 ctx->id = i;
567 table->table[i] = ctx; 565 table->table[i] = ctx;
568 rcu_read_unlock();
569 spin_unlock(&mm->ioctx_lock); 566 spin_unlock(&mm->ioctx_lock);
570 567
571 /* While kioctx setup is in progress, 568 /* While kioctx setup is in progress,
@@ -579,8 +576,6 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
579 } 576 }
580 577
581 new_nr = (table ? table->nr : 1) * 4; 578 new_nr = (table ? table->nr : 1) * 4;
582
583 rcu_read_unlock();
584 spin_unlock(&mm->ioctx_lock); 579 spin_unlock(&mm->ioctx_lock);
585 580
586 table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) * 581 table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
@@ -591,8 +586,7 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
591 table->nr = new_nr; 586 table->nr = new_nr;
592 587
593 spin_lock(&mm->ioctx_lock); 588 spin_lock(&mm->ioctx_lock);
594 rcu_read_lock(); 589 old = rcu_dereference_raw(mm->ioctx_table);
595 old = rcu_dereference(mm->ioctx_table);
596 590
597 if (!old) { 591 if (!old) {
598 rcu_assign_pointer(mm->ioctx_table, table); 592 rcu_assign_pointer(mm->ioctx_table, table);
@@ -739,12 +733,9 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
739 733
740 734
741 spin_lock(&mm->ioctx_lock); 735 spin_lock(&mm->ioctx_lock);
742 rcu_read_lock(); 736 table = rcu_dereference_raw(mm->ioctx_table);
743 table = rcu_dereference(mm->ioctx_table);
744
745 WARN_ON(ctx != table->table[ctx->id]); 737 WARN_ON(ctx != table->table[ctx->id]);
746 table->table[ctx->id] = NULL; 738 table->table[ctx->id] = NULL;
747 rcu_read_unlock();
748 spin_unlock(&mm->ioctx_lock); 739 spin_unlock(&mm->ioctx_lock);
749 740
750 /* percpu_ref_kill() will do the necessary call_rcu() */ 741 /* percpu_ref_kill() will do the necessary call_rcu() */
@@ -793,40 +784,30 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
793 */ 784 */
794void exit_aio(struct mm_struct *mm) 785void exit_aio(struct mm_struct *mm)
795{ 786{
796 struct kioctx_table *table; 787 struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
797 struct kioctx *ctx; 788 int i;
798 unsigned i = 0;
799
800 while (1) {
801 rcu_read_lock();
802 table = rcu_dereference(mm->ioctx_table);
803
804 do {
805 if (!table || i >= table->nr) {
806 rcu_read_unlock();
807 rcu_assign_pointer(mm->ioctx_table, NULL);
808 if (table)
809 kfree(table);
810 return;
811 }
812 789
813 ctx = table->table[i++]; 790 if (!table)
814 } while (!ctx); 791 return;
815 792
816 rcu_read_unlock(); 793 for (i = 0; i < table->nr; ++i) {
794 struct kioctx *ctx = table->table[i];
817 795
796 if (!ctx)
797 continue;
818 /* 798 /*
819 * We don't need to bother with munmap() here - 799 * We don't need to bother with munmap() here - exit_mmap(mm)
820 * exit_mmap(mm) is coming and it'll unmap everything. 800 * is coming and it'll unmap everything. And we simply can't,
821 * Since aio_free_ring() uses non-zero ->mmap_size 801 * this is not necessarily our ->mm.
822 * as indicator that it needs to unmap the area, 802 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
823 * just set it to 0; aio_free_ring() is the only 803 * that it needs to unmap the area, just set it to 0.
824 * place that uses ->mmap_size, so it's safe.
825 */ 804 */
826 ctx->mmap_size = 0; 805 ctx->mmap_size = 0;
827
828 kill_ioctx(mm, ctx, NULL); 806 kill_ioctx(mm, ctx, NULL);
829 } 807 }
808
809 RCU_INIT_POINTER(mm->ioctx_table, NULL);
810 kfree(table);
830} 811}
831 812
832static void put_reqs_available(struct kioctx *ctx, unsigned nr) 813static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -834,10 +815,8 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
834 struct kioctx_cpu *kcpu; 815 struct kioctx_cpu *kcpu;
835 unsigned long flags; 816 unsigned long flags;
836 817
837 preempt_disable();
838 kcpu = this_cpu_ptr(ctx->cpu);
839
840 local_irq_save(flags); 818 local_irq_save(flags);
819 kcpu = this_cpu_ptr(ctx->cpu);
841 kcpu->reqs_available += nr; 820 kcpu->reqs_available += nr;
842 821
843 while (kcpu->reqs_available >= ctx->req_batch * 2) { 822 while (kcpu->reqs_available >= ctx->req_batch * 2) {
@@ -846,7 +825,6 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr)
846 } 825 }
847 826
848 local_irq_restore(flags); 827 local_irq_restore(flags);
849 preempt_enable();
850} 828}
851 829
852static bool get_reqs_available(struct kioctx *ctx) 830static bool get_reqs_available(struct kioctx *ctx)
@@ -855,10 +833,8 @@ static bool get_reqs_available(struct kioctx *ctx)
855 bool ret = false; 833 bool ret = false;
856 unsigned long flags; 834 unsigned long flags;
857 835
858 preempt_disable();
859 kcpu = this_cpu_ptr(ctx->cpu);
860
861 local_irq_save(flags); 836 local_irq_save(flags);
837 kcpu = this_cpu_ptr(ctx->cpu);
862 if (!kcpu->reqs_available) { 838 if (!kcpu->reqs_available) {
863 int old, avail = atomic_read(&ctx->reqs_available); 839 int old, avail = atomic_read(&ctx->reqs_available);
864 840
@@ -878,7 +854,6 @@ static bool get_reqs_available(struct kioctx *ctx)
878 kcpu->reqs_available--; 854 kcpu->reqs_available--;
879out: 855out:
880 local_irq_restore(flags); 856 local_irq_restore(flags);
881 preempt_enable();
882 return ret; 857 return ret;
883} 858}
884 859
@@ -1047,7 +1022,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
1047} 1022}
1048EXPORT_SYMBOL(aio_complete); 1023EXPORT_SYMBOL(aio_complete);
1049 1024
1050/* aio_read_events 1025/* aio_read_events_ring
1051 * Pull an event off of the ioctx's event ring. Returns the number of 1026 * Pull an event off of the ioctx's event ring. Returns the number of
1052 * events fetched 1027 * events fetched
1053 */ 1028 */
@@ -1270,12 +1245,12 @@ static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
1270 if (compat) 1245 if (compat)
1271 ret = compat_rw_copy_check_uvector(rw, 1246 ret = compat_rw_copy_check_uvector(rw,
1272 (struct compat_iovec __user *)buf, 1247 (struct compat_iovec __user *)buf,
1273 *nr_segs, 1, *iovec, iovec); 1248 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1274 else 1249 else
1275#endif 1250#endif
1276 ret = rw_copy_check_uvector(rw, 1251 ret = rw_copy_check_uvector(rw,
1277 (struct iovec __user *)buf, 1252 (struct iovec __user *)buf,
1278 *nr_segs, 1, *iovec, iovec); 1253 *nr_segs, UIO_FASTIOV, *iovec, iovec);
1279 if (ret < 0) 1254 if (ret < 0)
1280 return ret; 1255 return ret;
1281 1256
@@ -1299,9 +1274,8 @@ static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
1299} 1274}
1300 1275
1301/* 1276/*
1302 * aio_setup_iocb: 1277 * aio_run_iocb:
1303 * Performs the initial checks and aio retry method 1278 * Performs the initial checks and io submission.
1304 * setup for the kiocb at the time of io submission.
1305 */ 1279 */
1306static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, 1280static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1307 char __user *buf, bool compat) 1281 char __user *buf, bool compat)
@@ -1313,7 +1287,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
1313 fmode_t mode; 1287 fmode_t mode;
1314 aio_rw_op *rw_op; 1288 aio_rw_op *rw_op;
1315 rw_iter_op *iter_op; 1289 rw_iter_op *iter_op;
1316 struct iovec inline_vec, *iovec = &inline_vec; 1290 struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
1317 struct iov_iter iter; 1291 struct iov_iter iter;
1318 1292
1319 switch (opcode) { 1293 switch (opcode) {
@@ -1348,7 +1322,7 @@ rw_common:
1348 if (!ret) 1322 if (!ret)
1349 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes); 1323 ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
1350 if (ret < 0) { 1324 if (ret < 0) {
1351 if (iovec != &inline_vec) 1325 if (iovec != inline_vecs)
1352 kfree(iovec); 1326 kfree(iovec);
1353 return ret; 1327 return ret;
1354 } 1328 }
@@ -1395,7 +1369,7 @@ rw_common:
1395 return -EINVAL; 1369 return -EINVAL;
1396 } 1370 }
1397 1371
1398 if (iovec != &inline_vec) 1372 if (iovec != inline_vecs)
1399 kfree(iovec); 1373 kfree(iovec);
1400 1374
1401 if (ret != -EIOCBQUEUED) { 1375 if (ret != -EIOCBQUEUED) {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e25564bfcb46..54a201dac7f9 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -276,9 +276,8 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
276 } 276 }
277 if (ret > 0) 277 if (ret > 0)
278 goto next; 278 goto next;
279 ret = ulist_add_merge(parents, eb->start, 279 ret = ulist_add_merge_ptr(parents, eb->start,
280 (uintptr_t)eie, 280 eie, (void **)&old, GFP_NOFS);
281 (u64 *)&old, GFP_NOFS);
282 if (ret < 0) 281 if (ret < 0)
283 break; 282 break;
284 if (!ret && extent_item_pos) { 283 if (!ret && extent_item_pos) {
@@ -1001,16 +1000,19 @@ again:
1001 ret = -EIO; 1000 ret = -EIO;
1002 goto out; 1001 goto out;
1003 } 1002 }
1003 btrfs_tree_read_lock(eb);
1004 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
1004 ret = find_extent_in_eb(eb, bytenr, 1005 ret = find_extent_in_eb(eb, bytenr,
1005 *extent_item_pos, &eie); 1006 *extent_item_pos, &eie);
1007 btrfs_tree_read_unlock_blocking(eb);
1006 free_extent_buffer(eb); 1008 free_extent_buffer(eb);
1007 if (ret < 0) 1009 if (ret < 0)
1008 goto out; 1010 goto out;
1009 ref->inode_list = eie; 1011 ref->inode_list = eie;
1010 } 1012 }
1011 ret = ulist_add_merge(refs, ref->parent, 1013 ret = ulist_add_merge_ptr(refs, ref->parent,
1012 (uintptr_t)ref->inode_list, 1014 ref->inode_list,
1013 (u64 *)&eie, GFP_NOFS); 1015 (void **)&eie, GFP_NOFS);
1014 if (ret < 0) 1016 if (ret < 0)
1015 goto out; 1017 goto out;
1016 if (!ret && extent_item_pos) { 1018 if (!ret && extent_item_pos) {
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923c410c..43527fd78825 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -84,12 +84,6 @@ struct btrfs_inode {
84 */ 84 */
85 struct list_head delalloc_inodes; 85 struct list_head delalloc_inodes;
86 86
87 /*
88 * list for tracking inodes that must be sent to disk before a
89 * rename or truncate commit
90 */
91 struct list_head ordered_operations;
92
93 /* node for the red-black tree that links inodes in subvolume root */ 87 /* node for the red-black tree that links inodes in subvolume root */
94 struct rb_node rb_node; 88 struct rb_node rb_node;
95 89
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453b8e24..44ee5d2e52a4 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -280,9 +280,9 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
280 280
281 WARN_ON(btrfs_header_generation(buf) > trans->transid); 281 WARN_ON(btrfs_header_generation(buf) > trans->transid);
282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) 282 if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
283 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 283 ret = btrfs_inc_ref(trans, root, cow, 1);
284 else 284 else
285 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 285 ret = btrfs_inc_ref(trans, root, cow, 0);
286 286
287 if (ret) 287 if (ret)
288 return ret; 288 return ret;
@@ -1035,14 +1035,14 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1035 if ((owner == root->root_key.objectid || 1035 if ((owner == root->root_key.objectid ||
1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 1036 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1037 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1038 ret = btrfs_inc_ref(trans, root, buf, 1, 1); 1038 ret = btrfs_inc_ref(trans, root, buf, 1);
1039 BUG_ON(ret); /* -ENOMEM */ 1039 BUG_ON(ret); /* -ENOMEM */
1040 1040
1041 if (root->root_key.objectid == 1041 if (root->root_key.objectid ==
1042 BTRFS_TREE_RELOC_OBJECTID) { 1042 BTRFS_TREE_RELOC_OBJECTID) {
1043 ret = btrfs_dec_ref(trans, root, buf, 0, 1); 1043 ret = btrfs_dec_ref(trans, root, buf, 0);
1044 BUG_ON(ret); /* -ENOMEM */ 1044 BUG_ON(ret); /* -ENOMEM */
1045 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1045 ret = btrfs_inc_ref(trans, root, cow, 1);
1046 BUG_ON(ret); /* -ENOMEM */ 1046 BUG_ON(ret); /* -ENOMEM */
1047 } 1047 }
1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 1048 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -1050,9 +1050,9 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1050 1050
1051 if (root->root_key.objectid == 1051 if (root->root_key.objectid ==
1052 BTRFS_TREE_RELOC_OBJECTID) 1052 BTRFS_TREE_RELOC_OBJECTID)
1053 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1053 ret = btrfs_inc_ref(trans, root, cow, 1);
1054 else 1054 else
1055 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1055 ret = btrfs_inc_ref(trans, root, cow, 0);
1056 BUG_ON(ret); /* -ENOMEM */ 1056 BUG_ON(ret); /* -ENOMEM */
1057 } 1057 }
1058 if (new_flags != 0) { 1058 if (new_flags != 0) {
@@ -1069,11 +1069,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 1069 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
1070 if (root->root_key.objectid == 1070 if (root->root_key.objectid ==
1071 BTRFS_TREE_RELOC_OBJECTID) 1071 BTRFS_TREE_RELOC_OBJECTID)
1072 ret = btrfs_inc_ref(trans, root, cow, 1, 1); 1072 ret = btrfs_inc_ref(trans, root, cow, 1);
1073 else 1073 else
1074 ret = btrfs_inc_ref(trans, root, cow, 0, 1); 1074 ret = btrfs_inc_ref(trans, root, cow, 0);
1075 BUG_ON(ret); /* -ENOMEM */ 1075 BUG_ON(ret); /* -ENOMEM */
1076 ret = btrfs_dec_ref(trans, root, buf, 1, 1); 1076 ret = btrfs_dec_ref(trans, root, buf, 1);
1077 BUG_ON(ret); /* -ENOMEM */ 1077 BUG_ON(ret); /* -ENOMEM */
1078 } 1078 }
1079 clean_tree_block(trans, root, buf); 1079 clean_tree_block(trans, root, buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397f4e92..8e29b614fe93 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3326,9 +3326,9 @@ int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte, 3326 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
3327 struct btrfs_key *ins, int is_data, int delalloc); 3327 struct btrfs_key *ins, int is_data, int delalloc);
3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3328int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3329 struct extent_buffer *buf, int full_backref, int no_quota); 3329 struct extent_buffer *buf, int full_backref);
3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3330int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3331 struct extent_buffer *buf, int full_backref, int no_quota); 3331 struct extent_buffer *buf, int full_backref);
3332int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 3332int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
3333 struct btrfs_root *root, 3333 struct btrfs_root *root,
3334 u64 bytenr, u64 num_bytes, u64 flags, 3334 u64 bytenr, u64 num_bytes, u64 flags,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..d0ed9e664f7d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -60,8 +60,6 @@ static void end_workqueue_fn(struct btrfs_work *work);
60static void free_fs_root(struct btrfs_root *root); 60static void free_fs_root(struct btrfs_root *root);
61static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 61static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
62 int read_only); 62 int read_only);
63static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
64 struct btrfs_root *root);
65static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 63static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
66static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 64static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
67 struct btrfs_root *root); 65 struct btrfs_root *root);
@@ -3829,34 +3827,6 @@ static void btrfs_error_commit_super(struct btrfs_root *root)
3829 btrfs_cleanup_transaction(root); 3827 btrfs_cleanup_transaction(root);
3830} 3828}
3831 3829
3832static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3833 struct btrfs_root *root)
3834{
3835 struct btrfs_inode *btrfs_inode;
3836 struct list_head splice;
3837
3838 INIT_LIST_HEAD(&splice);
3839
3840 mutex_lock(&root->fs_info->ordered_operations_mutex);
3841 spin_lock(&root->fs_info->ordered_root_lock);
3842
3843 list_splice_init(&t->ordered_operations, &splice);
3844 while (!list_empty(&splice)) {
3845 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3846 ordered_operations);
3847
3848 list_del_init(&btrfs_inode->ordered_operations);
3849 spin_unlock(&root->fs_info->ordered_root_lock);
3850
3851 btrfs_invalidate_inodes(btrfs_inode->root);
3852
3853 spin_lock(&root->fs_info->ordered_root_lock);
3854 }
3855
3856 spin_unlock(&root->fs_info->ordered_root_lock);
3857 mutex_unlock(&root->fs_info->ordered_operations_mutex);
3858}
3859
3860static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3830static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3861{ 3831{
3862 struct btrfs_ordered_extent *ordered; 3832 struct btrfs_ordered_extent *ordered;
@@ -4093,8 +4063,6 @@ again:
4093void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4063void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4094 struct btrfs_root *root) 4064 struct btrfs_root *root)
4095{ 4065{
4096 btrfs_destroy_ordered_operations(cur_trans, root);
4097
4098 btrfs_destroy_delayed_refs(cur_trans, root); 4066 btrfs_destroy_delayed_refs(cur_trans, root);
4099 4067
4100 cur_trans->state = TRANS_STATE_COMMIT_START; 4068 cur_trans->state = TRANS_STATE_COMMIT_START;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..102ed3143976 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3057,7 +3057,7 @@ out:
3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 3057static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3058 struct btrfs_root *root, 3058 struct btrfs_root *root,
3059 struct extent_buffer *buf, 3059 struct extent_buffer *buf,
3060 int full_backref, int inc, int no_quota) 3060 int full_backref, int inc)
3061{ 3061{
3062 u64 bytenr; 3062 u64 bytenr;
3063 u64 num_bytes; 3063 u64 num_bytes;
@@ -3111,7 +3111,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3111 key.offset -= btrfs_file_extent_offset(buf, fi); 3111 key.offset -= btrfs_file_extent_offset(buf, fi);
3112 ret = process_func(trans, root, bytenr, num_bytes, 3112 ret = process_func(trans, root, bytenr, num_bytes,
3113 parent, ref_root, key.objectid, 3113 parent, ref_root, key.objectid,
3114 key.offset, no_quota); 3114 key.offset, 1);
3115 if (ret) 3115 if (ret)
3116 goto fail; 3116 goto fail;
3117 } else { 3117 } else {
@@ -3119,7 +3119,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
3119 num_bytes = btrfs_level_size(root, level - 1); 3119 num_bytes = btrfs_level_size(root, level - 1);
3120 ret = process_func(trans, root, bytenr, num_bytes, 3120 ret = process_func(trans, root, bytenr, num_bytes,
3121 parent, ref_root, level - 1, 0, 3121 parent, ref_root, level - 1, 0,
3122 no_quota); 3122 1);
3123 if (ret) 3123 if (ret)
3124 goto fail; 3124 goto fail;
3125 } 3125 }
@@ -3130,15 +3130,15 @@ fail:
3130} 3130}
3131 3131
3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3132int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3133 struct extent_buffer *buf, int full_backref, int no_quota) 3133 struct extent_buffer *buf, int full_backref)
3134{ 3134{
3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota); 3135 return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
3136} 3136}
3137 3137
3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 3138int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3139 struct extent_buffer *buf, int full_backref, int no_quota) 3139 struct extent_buffer *buf, int full_backref)
3140{ 3140{
3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota); 3141 return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
3142} 3142}
3143 3143
3144static int write_one_cache_group(struct btrfs_trans_handle *trans, 3144static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -7478,6 +7478,220 @@ reada:
7478 wc->reada_slot = slot; 7478 wc->reada_slot = slot;
7479} 7479}
7480 7480
7481static int account_leaf_items(struct btrfs_trans_handle *trans,
7482 struct btrfs_root *root,
7483 struct extent_buffer *eb)
7484{
7485 int nr = btrfs_header_nritems(eb);
7486 int i, extent_type, ret;
7487 struct btrfs_key key;
7488 struct btrfs_file_extent_item *fi;
7489 u64 bytenr, num_bytes;
7490
7491 for (i = 0; i < nr; i++) {
7492 btrfs_item_key_to_cpu(eb, &key, i);
7493
7494 if (key.type != BTRFS_EXTENT_DATA_KEY)
7495 continue;
7496
7497 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
7498 /* filter out non qgroup-accountable extents */
7499 extent_type = btrfs_file_extent_type(eb, fi);
7500
7501 if (extent_type == BTRFS_FILE_EXTENT_INLINE)
7502 continue;
7503
7504 bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
7505 if (!bytenr)
7506 continue;
7507
7508 num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
7509
7510 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7511 root->objectid,
7512 bytenr, num_bytes,
7513 BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
7514 if (ret)
7515 return ret;
7516 }
7517 return 0;
7518}
7519
7520/*
7521 * Walk up the tree from the bottom, freeing leaves and any interior
7522 * nodes which have had all slots visited. If a node (leaf or
7523 * interior) is freed, the node above it will have it's slot
7524 * incremented. The root node will never be freed.
7525 *
7526 * At the end of this function, we should have a path which has all
7527 * slots incremented to the next position for a search. If we need to
7528 * read a new node it will be NULL and the node above it will have the
7529 * correct slot selected for a later read.
7530 *
7531 * If we increment the root nodes slot counter past the number of
7532 * elements, 1 is returned to signal completion of the search.
7533 */
7534static int adjust_slots_upwards(struct btrfs_root *root,
7535 struct btrfs_path *path, int root_level)
7536{
7537 int level = 0;
7538 int nr, slot;
7539 struct extent_buffer *eb;
7540
7541 if (root_level == 0)
7542 return 1;
7543
7544 while (level <= root_level) {
7545 eb = path->nodes[level];
7546 nr = btrfs_header_nritems(eb);
7547 path->slots[level]++;
7548 slot = path->slots[level];
7549 if (slot >= nr || level == 0) {
7550 /*
7551 * Don't free the root - we will detect this
7552 * condition after our loop and return a
7553 * positive value for caller to stop walking the tree.
7554 */
7555 if (level != root_level) {
7556 btrfs_tree_unlock_rw(eb, path->locks[level]);
7557 path->locks[level] = 0;
7558
7559 free_extent_buffer(eb);
7560 path->nodes[level] = NULL;
7561 path->slots[level] = 0;
7562 }
7563 } else {
7564 /*
7565 * We have a valid slot to walk back down
7566 * from. Stop here so caller can process these
7567 * new nodes.
7568 */
7569 break;
7570 }
7571
7572 level++;
7573 }
7574
7575 eb = path->nodes[root_level];
7576 if (path->slots[root_level] >= btrfs_header_nritems(eb))
7577 return 1;
7578
7579 return 0;
7580}
7581
7582/*
7583 * root_eb is the subtree root and is locked before this function is called.
7584 */
7585static int account_shared_subtree(struct btrfs_trans_handle *trans,
7586 struct btrfs_root *root,
7587 struct extent_buffer *root_eb,
7588 u64 root_gen,
7589 int root_level)
7590{
7591 int ret = 0;
7592 int level;
7593 struct extent_buffer *eb = root_eb;
7594 struct btrfs_path *path = NULL;
7595
7596 BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
7597 BUG_ON(root_eb == NULL);
7598
7599 if (!root->fs_info->quota_enabled)
7600 return 0;
7601
7602 if (!extent_buffer_uptodate(root_eb)) {
7603 ret = btrfs_read_buffer(root_eb, root_gen);
7604 if (ret)
7605 goto out;
7606 }
7607
7608 if (root_level == 0) {
7609 ret = account_leaf_items(trans, root, root_eb);
7610 goto out;
7611 }
7612
7613 path = btrfs_alloc_path();
7614 if (!path)
7615 return -ENOMEM;
7616
7617 /*
7618 * Walk down the tree. Missing extent blocks are filled in as
7619 * we go. Metadata is accounted every time we read a new
7620 * extent block.
7621 *
7622 * When we reach a leaf, we account for file extent items in it,
7623 * walk back up the tree (adjusting slot pointers as we go)
7624 * and restart the search process.
7625 */
7626 extent_buffer_get(root_eb); /* For path */
7627 path->nodes[root_level] = root_eb;
7628 path->slots[root_level] = 0;
7629 path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
7630walk_down:
7631 level = root_level;
7632 while (level >= 0) {
7633 if (path->nodes[level] == NULL) {
7634 int child_bsize = root->nodesize;
7635 int parent_slot;
7636 u64 child_gen;
7637 u64 child_bytenr;
7638
7639 /* We need to get child blockptr/gen from
7640 * parent before we can read it. */
7641 eb = path->nodes[level + 1];
7642 parent_slot = path->slots[level + 1];
7643 child_bytenr = btrfs_node_blockptr(eb, parent_slot);
7644 child_gen = btrfs_node_ptr_generation(eb, parent_slot);
7645
7646 eb = read_tree_block(root, child_bytenr, child_bsize,
7647 child_gen);
7648 if (!eb || !extent_buffer_uptodate(eb)) {
7649 ret = -EIO;
7650 goto out;
7651 }
7652
7653 path->nodes[level] = eb;
7654 path->slots[level] = 0;
7655
7656 btrfs_tree_read_lock(eb);
7657 btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
7658 path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
7659
7660 ret = btrfs_qgroup_record_ref(trans, root->fs_info,
7661 root->objectid,
7662 child_bytenr,
7663 child_bsize,
7664 BTRFS_QGROUP_OPER_SUB_SUBTREE,
7665 0);
7666 if (ret)
7667 goto out;
7668
7669 }
7670
7671 if (level == 0) {
7672 ret = account_leaf_items(trans, root, path->nodes[level]);
7673 if (ret)
7674 goto out;
7675
7676 /* Nonzero return here means we completed our search */
7677 ret = adjust_slots_upwards(root, path, root_level);
7678 if (ret)
7679 break;
7680
7681 /* Restart search with new slots */
7682 goto walk_down;
7683 }
7684
7685 level--;
7686 }
7687
7688 ret = 0;
7689out:
7690 btrfs_free_path(path);
7691
7692 return ret;
7693}
7694
7481/* 7695/*
7482 * helper to process tree block while walking down the tree. 7696 * helper to process tree block while walking down the tree.
7483 * 7697 *
@@ -7532,9 +7746,9 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
7532 /* wc->stage == UPDATE_BACKREF */ 7746 /* wc->stage == UPDATE_BACKREF */
7533 if (!(wc->flags[level] & flag)) { 7747 if (!(wc->flags[level] & flag)) {
7534 BUG_ON(!path->locks[level]); 7748 BUG_ON(!path->locks[level]);
7535 ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc); 7749 ret = btrfs_inc_ref(trans, root, eb, 1);
7536 BUG_ON(ret); /* -ENOMEM */ 7750 BUG_ON(ret); /* -ENOMEM */
7537 ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc); 7751 ret = btrfs_dec_ref(trans, root, eb, 0);
7538 BUG_ON(ret); /* -ENOMEM */ 7752 BUG_ON(ret); /* -ENOMEM */
7539 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 7753 ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
7540 eb->len, flag, 7754 eb->len, flag,
@@ -7581,6 +7795,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7581 int level = wc->level; 7795 int level = wc->level;
7582 int reada = 0; 7796 int reada = 0;
7583 int ret = 0; 7797 int ret = 0;
7798 bool need_account = false;
7584 7799
7585 generation = btrfs_node_ptr_generation(path->nodes[level], 7800 generation = btrfs_node_ptr_generation(path->nodes[level],
7586 path->slots[level]); 7801 path->slots[level]);
@@ -7626,6 +7841,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
7626 7841
7627 if (wc->stage == DROP_REFERENCE) { 7842 if (wc->stage == DROP_REFERENCE) {
7628 if (wc->refs[level - 1] > 1) { 7843 if (wc->refs[level - 1] > 1) {
7844 need_account = true;
7629 if (level == 1 && 7845 if (level == 1 &&
7630 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 7846 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
7631 goto skip; 7847 goto skip;
@@ -7689,6 +7905,16 @@ skip:
7689 parent = 0; 7905 parent = 0;
7690 } 7906 }
7691 7907
7908 if (need_account) {
7909 ret = account_shared_subtree(trans, root, next,
7910 generation, level - 1);
7911 if (ret) {
7912 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
7913 "%d accounting shared subtree. Quota "
7914 "is out of sync, rescan required.\n",
7915 root->fs_info->sb->s_id, ret);
7916 }
7917 }
7692 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 7918 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
7693 root->root_key.objectid, level - 1, 0, 0); 7919 root->root_key.objectid, level - 1, 0, 0);
7694 BUG_ON(ret); /* -ENOMEM */ 7920 BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7995,17 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
7769 if (wc->refs[level] == 1) { 7995 if (wc->refs[level] == 1) {
7770 if (level == 0) { 7996 if (level == 0) {
7771 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 7997 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
7772 ret = btrfs_dec_ref(trans, root, eb, 1, 7998 ret = btrfs_dec_ref(trans, root, eb, 1);
7773 wc->for_reloc);
7774 else 7999 else
7775 ret = btrfs_dec_ref(trans, root, eb, 0, 8000 ret = btrfs_dec_ref(trans, root, eb, 0);
7776 wc->for_reloc);
7777 BUG_ON(ret); /* -ENOMEM */ 8001 BUG_ON(ret); /* -ENOMEM */
8002 ret = account_leaf_items(trans, root, eb);
8003 if (ret) {
8004 printk_ratelimited(KERN_ERR "BTRFS: %s Error "
8005 "%d accounting leaf items. Quota "
8006 "is out of sync, rescan required.\n",
8007 root->fs_info->sb->s_id, ret);
8008 }
7778 } 8009 }
7779 /* make block locked assertion in clean_tree_block happy */ 8010 /* make block locked assertion in clean_tree_block happy */
7780 if (!path->locks[level] && 8011 if (!path->locks[level] &&
@@ -7900,6 +8131,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
7900 int level; 8131 int level;
7901 bool root_dropped = false; 8132 bool root_dropped = false;
7902 8133
8134 btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
8135
7903 path = btrfs_alloc_path(); 8136 path = btrfs_alloc_path();
7904 if (!path) { 8137 if (!path) {
7905 err = -ENOMEM; 8138 err = -ENOMEM;
@@ -8025,6 +8258,24 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8025 goto out_end_trans; 8258 goto out_end_trans;
8026 } 8259 }
8027 8260
8261 /*
8262 * Qgroup update accounting is run from
8263 * delayed ref handling. This usually works
8264 * out because delayed refs are normally the
8265 * only way qgroup updates are added. However,
8266 * we may have added updates during our tree
8267 * walk so run qgroups here to make sure we
8268 * don't lose any updates.
8269 */
8270 ret = btrfs_delayed_qgroup_accounting(trans,
8271 root->fs_info);
8272 if (ret)
8273 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8274 "running qgroup updates "
8275 "during snapshot delete. "
8276 "Quota is out of sync, "
8277 "rescan required.\n", ret);
8278
8028 btrfs_end_transaction_throttle(trans, tree_root); 8279 btrfs_end_transaction_throttle(trans, tree_root);
8029 if (!for_reloc && btrfs_need_cleaner_sleep(root)) { 8280 if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
8030 pr_debug("BTRFS: drop snapshot early exit\n"); 8281 pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8329,14 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
8078 } 8329 }
8079 root_dropped = true; 8330 root_dropped = true;
8080out_end_trans: 8331out_end_trans:
8332 ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
8333 if (ret)
8334 printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
8335 "running qgroup updates "
8336 "during snapshot delete. "
8337 "Quota is out of sync, "
8338 "rescan required.\n", ret);
8339
8081 btrfs_end_transaction_throttle(trans, tree_root); 8340 btrfs_end_transaction_throttle(trans, tree_root);
8082out_free: 8341out_free:
8083 kfree(wc); 8342 kfree(wc);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe45d686..54c84daec9b5 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -756,7 +756,7 @@ again:
756 found_next = 1; 756 found_next = 1;
757 if (ret != 0) 757 if (ret != 0)
758 goto insert; 758 goto insert;
759 slot = 0; 759 slot = path->slots[0];
760 } 760 }
761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 761 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID || 762 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1f2b99cb55ea..d3afac292d67 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1838,33 +1838,9 @@ out:
1838 1838
1839int btrfs_release_file(struct inode *inode, struct file *filp) 1839int btrfs_release_file(struct inode *inode, struct file *filp)
1840{ 1840{
1841 /*
1842 * ordered_data_close is set by settattr when we are about to truncate
1843 * a file from a non-zero size to a zero size. This tries to
1844 * flush down new bytes that may have been written if the
1845 * application were using truncate to replace a file in place.
1846 */
1847 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1848 &BTRFS_I(inode)->runtime_flags)) {
1849 struct btrfs_trans_handle *trans;
1850 struct btrfs_root *root = BTRFS_I(inode)->root;
1851
1852 /*
1853 * We need to block on a committing transaction to keep us from
1854 * throwing a ordered operation on to the list and causing
1855 * something like sync to deadlock trying to flush out this
1856 * inode.
1857 */
1858 trans = btrfs_start_transaction(root, 0);
1859 if (IS_ERR(trans))
1860 return PTR_ERR(trans);
1861 btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
1862 btrfs_end_transaction(trans, root);
1863 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1864 filemap_flush(inode->i_mapping);
1865 }
1866 if (filp->private_data) 1841 if (filp->private_data)
1867 btrfs_ioctl_trans_end(filp); 1842 btrfs_ioctl_trans_end(filp);
1843 filemap_flush(inode->i_mapping);
1868 return 0; 1844 return 0;
1869} 1845}
1870 1846
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3183742d6f0d..03708ef3deef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -709,6 +709,18 @@ retry:
709 unlock_extent(io_tree, async_extent->start, 709 unlock_extent(io_tree, async_extent->start,
710 async_extent->start + 710 async_extent->start +
711 async_extent->ram_size - 1); 711 async_extent->ram_size - 1);
712
713 /*
714 * we need to redirty the pages if we decide to
715 * fallback to uncompressed IO, otherwise we
716 * will not submit these pages down to lower
717 * layers.
718 */
719 extent_range_redirty_for_io(inode,
720 async_extent->start,
721 async_extent->start +
722 async_extent->ram_size - 1);
723
712 goto retry; 724 goto retry;
713 } 725 }
714 goto out_free; 726 goto out_free;
@@ -7939,27 +7951,6 @@ static int btrfs_truncate(struct inode *inode)
7939 BUG_ON(ret); 7951 BUG_ON(ret);
7940 7952
7941 /* 7953 /*
7942 * setattr is responsible for setting the ordered_data_close flag,
7943 * but that is only tested during the last file release. That
7944 * could happen well after the next commit, leaving a great big
7945 * window where new writes may get lost if someone chooses to write
7946 * to this file after truncating to zero
7947 *
7948 * The inode doesn't have any dirty data here, and so if we commit
7949 * this is a noop. If someone immediately starts writing to the inode
7950 * it is very likely we'll catch some of their writes in this
7951 * transaction, and the commit will find this file on the ordered
7952 * data list with good things to send down.
7953 *
7954 * This is a best effort solution, there is still a window where
7955 * using truncate to replace the contents of the file will
7956 * end up with a zero length file after a crash.
7957 */
7958 if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
7959 &BTRFS_I(inode)->runtime_flags))
7960 btrfs_add_ordered_operation(trans, root, inode);
7961
7962 /*
7963 * So if we truncate and then write and fsync we normally would just 7954 * So if we truncate and then write and fsync we normally would just
7964 * write the extents that changed, which is a problem if we need to 7955 * write the extents that changed, which is a problem if we need to
7965 * first truncate that entire inode. So set this flag so we write out 7956 * first truncate that entire inode. So set this flag so we write out
@@ -8106,7 +8097,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
8106 mutex_init(&ei->delalloc_mutex); 8097 mutex_init(&ei->delalloc_mutex);
8107 btrfs_ordered_inode_tree_init(&ei->ordered_tree); 8098 btrfs_ordered_inode_tree_init(&ei->ordered_tree);
8108 INIT_LIST_HEAD(&ei->delalloc_inodes); 8099 INIT_LIST_HEAD(&ei->delalloc_inodes);
8109 INIT_LIST_HEAD(&ei->ordered_operations);
8110 RB_CLEAR_NODE(&ei->rb_node); 8100 RB_CLEAR_NODE(&ei->rb_node);
8111 8101
8112 return inode; 8102 return inode;
@@ -8146,17 +8136,6 @@ void btrfs_destroy_inode(struct inode *inode)
8146 if (!root) 8136 if (!root)
8147 goto free; 8137 goto free;
8148 8138
8149 /*
8150 * Make sure we're properly removed from the ordered operation
8151 * lists.
8152 */
8153 smp_mb();
8154 if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
8155 spin_lock(&root->fs_info->ordered_root_lock);
8156 list_del_init(&BTRFS_I(inode)->ordered_operations);
8157 spin_unlock(&root->fs_info->ordered_root_lock);
8158 }
8159
8160 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 8139 if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
8161 &BTRFS_I(inode)->runtime_flags)) { 8140 &BTRFS_I(inode)->runtime_flags)) {
8162 btrfs_info(root->fs_info, "inode %llu still on the orphan list", 8141 btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8338,12 +8317,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8338 ret = 0; 8317 ret = 0;
8339 8318
8340 /* 8319 /*
8341 * we're using rename to replace one file with another. 8320 * we're using rename to replace one file with another. Start IO on it
8342 * and the replacement file is large. Start IO on it now so 8321 * now so we don't add too much work to the end of the transaction
8343 * we don't add too much work to the end of the transaction
8344 */ 8322 */
8345 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size && 8323 if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
8346 old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
8347 filemap_flush(old_inode->i_mapping); 8324 filemap_flush(old_inode->i_mapping);
8348 8325
8349 /* close the racy window with snapshot create/destroy ioctl */ 8326 /* close the racy window with snapshot create/destroy ioctl */
@@ -8391,12 +8368,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
8391 */ 8368 */
8392 btrfs_pin_log_trans(root); 8369 btrfs_pin_log_trans(root);
8393 } 8370 }
8394 /*
8395 * make sure the inode gets flushed if it is replacing
8396 * something.
8397 */
8398 if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
8399 btrfs_add_ordered_operation(trans, root, old_inode);
8400 8371
8401 inode_inc_iversion(old_dir); 8372 inode_inc_iversion(old_dir);
8402 inode_inc_iversion(new_dir); 8373 inode_inc_iversion(new_dir);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7187b14faa6c..963895c1f801 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -571,18 +571,6 @@ void btrfs_remove_ordered_extent(struct inode *inode,
571 571
572 trace_btrfs_ordered_extent_remove(inode, entry); 572 trace_btrfs_ordered_extent_remove(inode, entry);
573 573
574 /*
575 * we have no more ordered extents for this inode and
576 * no dirty pages. We can safely remove it from the
577 * list of ordered extents
578 */
579 if (RB_EMPTY_ROOT(&tree->tree) &&
580 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
581 spin_lock(&root->fs_info->ordered_root_lock);
582 list_del_init(&BTRFS_I(inode)->ordered_operations);
583 spin_unlock(&root->fs_info->ordered_root_lock);
584 }
585
586 if (!root->nr_ordered_extents) { 574 if (!root->nr_ordered_extents) {
587 spin_lock(&root->fs_info->ordered_root_lock); 575 spin_lock(&root->fs_info->ordered_root_lock);
588 BUG_ON(list_empty(&root->ordered_root)); 576 BUG_ON(list_empty(&root->ordered_root));
@@ -687,81 +675,6 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
687} 675}
688 676
689/* 677/*
690 * this is used during transaction commit to write all the inodes
691 * added to the ordered operation list. These files must be fully on
692 * disk before the transaction commits.
693 *
694 * we have two modes here, one is to just start the IO via filemap_flush
695 * and the other is to wait for all the io. When we wait, we have an
696 * extra check to make sure the ordered operation list really is empty
697 * before we return
698 */
699int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
700 struct btrfs_root *root, int wait)
701{
702 struct btrfs_inode *btrfs_inode;
703 struct inode *inode;
704 struct btrfs_transaction *cur_trans = trans->transaction;
705 struct list_head splice;
706 struct list_head works;
707 struct btrfs_delalloc_work *work, *next;
708 int ret = 0;
709
710 INIT_LIST_HEAD(&splice);
711 INIT_LIST_HEAD(&works);
712
713 mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
714 spin_lock(&root->fs_info->ordered_root_lock);
715 list_splice_init(&cur_trans->ordered_operations, &splice);
716 while (!list_empty(&splice)) {
717 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
718 ordered_operations);
719 inode = &btrfs_inode->vfs_inode;
720
721 list_del_init(&btrfs_inode->ordered_operations);
722
723 /*
724 * the inode may be getting freed (in sys_unlink path).
725 */
726 inode = igrab(inode);
727 if (!inode)
728 continue;
729
730 if (!wait)
731 list_add_tail(&BTRFS_I(inode)->ordered_operations,
732 &cur_trans->ordered_operations);
733 spin_unlock(&root->fs_info->ordered_root_lock);
734
735 work = btrfs_alloc_delalloc_work(inode, wait, 1);
736 if (!work) {
737 spin_lock(&root->fs_info->ordered_root_lock);
738 if (list_empty(&BTRFS_I(inode)->ordered_operations))
739 list_add_tail(&btrfs_inode->ordered_operations,
740 &splice);
741 list_splice_tail(&splice,
742 &cur_trans->ordered_operations);
743 spin_unlock(&root->fs_info->ordered_root_lock);
744 ret = -ENOMEM;
745 goto out;
746 }
747 list_add_tail(&work->list, &works);
748 btrfs_queue_work(root->fs_info->flush_workers,
749 &work->work);
750
751 cond_resched();
752 spin_lock(&root->fs_info->ordered_root_lock);
753 }
754 spin_unlock(&root->fs_info->ordered_root_lock);
755out:
756 list_for_each_entry_safe(work, next, &works, list) {
757 list_del_init(&work->list);
758 btrfs_wait_and_free_delalloc_work(work);
759 }
760 mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
761 return ret;
762}
763
764/*
765 * Used to start IO or wait for a given ordered extent to finish. 678 * Used to start IO or wait for a given ordered extent to finish.
766 * 679 *
767 * If wait is one, this effectively waits on page writeback for all the pages 680 * If wait is one, this effectively waits on page writeback for all the pages
@@ -1120,42 +1033,6 @@ out:
1120 return index; 1033 return index;
1121} 1034}
1122 1035
1123
1124/*
1125 * add a given inode to the list of inodes that must be fully on
1126 * disk before a transaction commit finishes.
1127 *
1128 * This basically gives us the ext3 style data=ordered mode, and it is mostly
1129 * used to make sure renamed files are fully on disk.
1130 *
1131 * It is a noop if the inode is already fully on disk.
1132 *
1133 * If trans is not null, we'll do a friendly check for a transaction that
1134 * is already flushing things and force the IO down ourselves.
1135 */
1136void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
1137 struct btrfs_root *root, struct inode *inode)
1138{
1139 struct btrfs_transaction *cur_trans = trans->transaction;
1140 u64 last_mod;
1141
1142 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
1143
1144 /*
1145 * if this file hasn't been changed since the last transaction
1146 * commit, we can safely return without doing anything
1147 */
1148 if (last_mod <= root->fs_info->last_trans_committed)
1149 return;
1150
1151 spin_lock(&root->fs_info->ordered_root_lock);
1152 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
1153 list_add_tail(&BTRFS_I(inode)->ordered_operations,
1154 &cur_trans->ordered_operations);
1155 }
1156 spin_unlock(&root->fs_info->ordered_root_lock);
1157}
1158
1159int __init ordered_data_init(void) 1036int __init ordered_data_init(void)
1160{ 1037{
1161 btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent", 1038 btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 246897058efb..d81a274d621e 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -190,11 +190,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
190 struct btrfs_ordered_extent *ordered); 190 struct btrfs_ordered_extent *ordered);
191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, 191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
192 u32 *sum, int len); 192 u32 *sum, int len);
193int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
194 struct btrfs_root *root, int wait);
195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root,
197 struct inode *inode);
198int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); 193int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
199void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); 194void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
200void btrfs_get_logged_extents(struct inode *inode, 195void btrfs_get_logged_extents(struct inode *inode,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2630f9..b497498484be 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1201,6 +1201,50 @@ out:
1201 mutex_unlock(&fs_info->qgroup_ioctl_lock); 1201 mutex_unlock(&fs_info->qgroup_ioctl_lock);
1202 return ret; 1202 return ret;
1203} 1203}
1204
1205static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
1206 struct btrfs_qgroup_operation *oper2)
1207{
1208 /*
1209 * Ignore seq and type here, we're looking for any operation
1210 * at all related to this extent on that root.
1211 */
1212 if (oper1->bytenr < oper2->bytenr)
1213 return -1;
1214 if (oper1->bytenr > oper2->bytenr)
1215 return 1;
1216 if (oper1->ref_root < oper2->ref_root)
1217 return -1;
1218 if (oper1->ref_root > oper2->ref_root)
1219 return 1;
1220 return 0;
1221}
1222
1223static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
1224 struct btrfs_qgroup_operation *oper)
1225{
1226 struct rb_node *n;
1227 struct btrfs_qgroup_operation *cur;
1228 int cmp;
1229
1230 spin_lock(&fs_info->qgroup_op_lock);
1231 n = fs_info->qgroup_op_tree.rb_node;
1232 while (n) {
1233 cur = rb_entry(n, struct btrfs_qgroup_operation, n);
1234 cmp = comp_oper_exist(cur, oper);
1235 if (cmp < 0) {
1236 n = n->rb_right;
1237 } else if (cmp) {
1238 n = n->rb_left;
1239 } else {
1240 spin_unlock(&fs_info->qgroup_op_lock);
1241 return -EEXIST;
1242 }
1243 }
1244 spin_unlock(&fs_info->qgroup_op_lock);
1245 return 0;
1246}
1247
1204static int comp_oper(struct btrfs_qgroup_operation *oper1, 1248static int comp_oper(struct btrfs_qgroup_operation *oper1,
1205 struct btrfs_qgroup_operation *oper2) 1249 struct btrfs_qgroup_operation *oper2)
1206{ 1250{
@@ -1290,6 +1334,23 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans,
1290 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq); 1334 oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
1291 INIT_LIST_HEAD(&oper->elem.list); 1335 INIT_LIST_HEAD(&oper->elem.list);
1292 oper->elem.seq = 0; 1336 oper->elem.seq = 0;
1337
1338 if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
1339 /*
1340 * If any operation for this bytenr/ref_root combo
1341 * exists, then we know it's not exclusively owned and
1342 * shouldn't be queued up.
1343 *
1344 * This also catches the case where we have a cloned
1345 * extent that gets queued up multiple times during
1346 * drop snapshot.
1347 */
1348 if (qgroup_oper_exists(fs_info, oper)) {
1349 kfree(oper);
1350 return 0;
1351 }
1352 }
1353
1293 ret = insert_qgroup_oper(fs_info, oper); 1354 ret = insert_qgroup_oper(fs_info, oper);
1294 if (ret) { 1355 if (ret) {
1295 /* Shouldn't happen so have an assert for developers */ 1356 /* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,111 @@ out:
1884} 1945}
1885 1946
1886/* 1947/*
1948 * Process a reference to a shared subtree. This type of operation is
1949 * queued during snapshot removal when we encounter extents which are
1950 * shared between more than one root.
1951 */
1952static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
1953 struct btrfs_fs_info *fs_info,
1954 struct btrfs_qgroup_operation *oper)
1955{
1956 struct ulist *roots = NULL;
1957 struct ulist_node *unode;
1958 struct ulist_iterator uiter;
1959 struct btrfs_qgroup_list *glist;
1960 struct ulist *parents;
1961 int ret = 0;
1962 int err;
1963 struct btrfs_qgroup *qg;
1964 u64 root_obj = 0;
1965 struct seq_list elem = {};
1966
1967 parents = ulist_alloc(GFP_NOFS);
1968 if (!parents)
1969 return -ENOMEM;
1970
1971 btrfs_get_tree_mod_seq(fs_info, &elem);
1972 ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
1973 elem.seq, &roots);
1974 btrfs_put_tree_mod_seq(fs_info, &elem);
1975 if (ret < 0)
1976 return ret;
1977
1978 if (roots->nnodes != 1)
1979 goto out;
1980
1981 ULIST_ITER_INIT(&uiter);
1982 unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
1983 /*
1984 * If we find our ref root then that means all refs
1985 * this extent has to the root have not yet been
1986 * deleted. In that case, we do nothing and let the
1987 * last ref for this bytenr drive our update.
1988 *
1989 * This can happen for example if an extent is
1990 * referenced multiple times in a snapshot (clone,
1991 * etc). If we are in the middle of snapshot removal,
1992 * queued updates for such an extent will find the
1993 * root if we have not yet finished removing the
1994 * snapshot.
1995 */
1996 if (unode->val == oper->ref_root)
1997 goto out;
1998
1999 root_obj = unode->val;
2000 BUG_ON(!root_obj);
2001
2002 spin_lock(&fs_info->qgroup_lock);
2003 qg = find_qgroup_rb(fs_info, root_obj);
2004 if (!qg)
2005 goto out_unlock;
2006
2007 qg->excl += oper->num_bytes;
2008 qg->excl_cmpr += oper->num_bytes;
2009 qgroup_dirty(fs_info, qg);
2010
2011 /*
2012 * Adjust counts for parent groups. First we find all
2013 * parents, then in the 2nd loop we do the adjustment
2014 * while adding parents of the parents to our ulist.
2015 */
2016 list_for_each_entry(glist, &qg->groups, next_group) {
2017 err = ulist_add(parents, glist->group->qgroupid,
2018 ptr_to_u64(glist->group), GFP_ATOMIC);
2019 if (err < 0) {
2020 ret = err;
2021 goto out_unlock;
2022 }
2023 }
2024
2025 ULIST_ITER_INIT(&uiter);
2026 while ((unode = ulist_next(parents, &uiter))) {
2027 qg = u64_to_ptr(unode->aux);
2028 qg->excl += oper->num_bytes;
2029 qg->excl_cmpr += oper->num_bytes;
2030 qgroup_dirty(fs_info, qg);
2031
2032 /* Add any parents of the parents */
2033 list_for_each_entry(glist, &qg->groups, next_group) {
2034 err = ulist_add(parents, glist->group->qgroupid,
2035 ptr_to_u64(glist->group), GFP_ATOMIC);
2036 if (err < 0) {
2037 ret = err;
2038 goto out_unlock;
2039 }
2040 }
2041 }
2042
2043out_unlock:
2044 spin_unlock(&fs_info->qgroup_lock);
2045
2046out:
2047 ulist_free(roots);
2048 ulist_free(parents);
2049 return ret;
2050}
2051
2052/*
1887 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted 2053 * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
1888 * from the fs. First, all roots referencing the extent are searched, and 2054 * from the fs. First, all roots referencing the extent are searched, and
1889 * then the space is accounted accordingly to the different roots. The 2055 * then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2086,9 @@ static int btrfs_qgroup_account(struct btrfs_trans_handle *trans,
1920 case BTRFS_QGROUP_OPER_SUB_SHARED: 2086 case BTRFS_QGROUP_OPER_SUB_SHARED:
1921 ret = qgroup_shared_accounting(trans, fs_info, oper); 2087 ret = qgroup_shared_accounting(trans, fs_info, oper);
1922 break; 2088 break;
2089 case BTRFS_QGROUP_OPER_SUB_SUBTREE:
2090 ret = qgroup_subtree_accounting(trans, fs_info, oper);
2091 break;
1923 default: 2092 default:
1924 ASSERT(0); 2093 ASSERT(0);
1925 } 2094 }
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1fbd7a..18cc68ca3090 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -44,6 +44,7 @@ enum btrfs_qgroup_operation_type {
44 BTRFS_QGROUP_OPER_ADD_SHARED, 44 BTRFS_QGROUP_OPER_ADD_SHARED,
45 BTRFS_QGROUP_OPER_SUB_EXCL, 45 BTRFS_QGROUP_OPER_SUB_EXCL,
46 BTRFS_QGROUP_OPER_SUB_SHARED, 46 BTRFS_QGROUP_OPER_SUB_SHARED,
47 BTRFS_QGROUP_OPER_SUB_SUBTREE,
47}; 48};
48 49
49struct btrfs_qgroup_operation { 50struct btrfs_qgroup_operation {
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67b48b9a03e0..c4124de4435b 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1665,6 +1665,21 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
1665 return 0; 1665 return 0;
1666} 1666}
1667 1667
1668/*
1669 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
1670 *
1671 * If there's a redundant raid level at DATA block groups, use the respective
1672 * multiplier to scale the sizes.
1673 *
1674 * Unused device space usage is based on simulating the chunk allocator
1675 * algorithm that respects the device sizes, order of allocations and the
1676 * 'alloc_start' value, this is a close approximation of the actual use but
1677 * there are other factors that may change the result (like a new metadata
1678 * chunk).
1679 *
1680 * FIXME: not accurate for mixed block groups, total and free/used are ok,
1681 * available appears slightly larger.
1682 */
1668static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 1683static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1669{ 1684{
1670 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); 1685 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
@@ -1675,6 +1690,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1675 u64 total_free_data = 0; 1690 u64 total_free_data = 0;
1676 int bits = dentry->d_sb->s_blocksize_bits; 1691 int bits = dentry->d_sb->s_blocksize_bits;
1677 __be32 *fsid = (__be32 *)fs_info->fsid; 1692 __be32 *fsid = (__be32 *)fs_info->fsid;
1693 unsigned factor = 1;
1694 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
1678 int ret; 1695 int ret;
1679 1696
1680 /* holding chunk_muext to avoid allocating new chunks */ 1697 /* holding chunk_muext to avoid allocating new chunks */
@@ -1682,30 +1699,52 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
1682 rcu_read_lock(); 1699 rcu_read_lock();
1683 list_for_each_entry_rcu(found, head, list) { 1700 list_for_each_entry_rcu(found, head, list) {
1684 if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 1701 if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
1702 int i;
1703
1685 total_free_data += found->disk_total - found->disk_used; 1704 total_free_data += found->disk_total - found->disk_used;
1686 total_free_data -= 1705 total_free_data -=
1687 btrfs_account_ro_block_groups_free_space(found); 1706 btrfs_account_ro_block_groups_free_space(found);
1707
1708 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1709 if (!list_empty(&found->block_groups[i])) {
1710 switch (i) {
1711 case BTRFS_RAID_DUP:
1712 case BTRFS_RAID_RAID1:
1713 case BTRFS_RAID_RAID10:
1714 factor = 2;
1715 }
1716 }
1717 }
1688 } 1718 }
1689 1719
1690 total_used += found->disk_used; 1720 total_used += found->disk_used;
1691 } 1721 }
1722
1692 rcu_read_unlock(); 1723 rcu_read_unlock();
1693 1724
1694 buf->f_namelen = BTRFS_NAME_LEN; 1725 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
1695 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 1726 buf->f_blocks >>= bits;
1696 buf->f_bfree = buf->f_blocks - (total_used >> bits); 1727 buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
1697 buf->f_bsize = dentry->d_sb->s_blocksize; 1728
1698 buf->f_type = BTRFS_SUPER_MAGIC; 1729 /* Account global block reserve as used, it's in logical size already */
1730 spin_lock(&block_rsv->lock);
1731 buf->f_bfree -= block_rsv->size >> bits;
1732 spin_unlock(&block_rsv->lock);
1733
1699 buf->f_bavail = total_free_data; 1734 buf->f_bavail = total_free_data;
1700 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data); 1735 ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
1701 if (ret) { 1736 if (ret) {
1702 mutex_unlock(&fs_info->chunk_mutex); 1737 mutex_unlock(&fs_info->chunk_mutex);
1703 return ret; 1738 return ret;
1704 } 1739 }
1705 buf->f_bavail += total_free_data; 1740 buf->f_bavail += div_u64(total_free_data, factor);
1706 buf->f_bavail = buf->f_bavail >> bits; 1741 buf->f_bavail = buf->f_bavail >> bits;
1707 mutex_unlock(&fs_info->chunk_mutex); 1742 mutex_unlock(&fs_info->chunk_mutex);
1708 1743
1744 buf->f_type = BTRFS_SUPER_MAGIC;
1745 buf->f_bsize = dentry->d_sb->s_blocksize;
1746 buf->f_namelen = BTRFS_NAME_LEN;
1747
1709 /* We treat it as constant endianness (it doesn't matter _which_) 1748 /* We treat it as constant endianness (it doesn't matter _which_)
1710 because we want the fsid to come out the same whether mounted 1749 because we want the fsid to come out the same whether mounted
1711 on a big-endian or little-endian host */ 1750 on a big-endian or little-endian host */
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5f379affdf23..d89c6d3542ca 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -218,7 +218,6 @@ loop:
218 spin_lock_init(&cur_trans->delayed_refs.lock); 218 spin_lock_init(&cur_trans->delayed_refs.lock);
219 219
220 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 220 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
221 INIT_LIST_HEAD(&cur_trans->ordered_operations);
222 INIT_LIST_HEAD(&cur_trans->pending_chunks); 221 INIT_LIST_HEAD(&cur_trans->pending_chunks);
223 INIT_LIST_HEAD(&cur_trans->switch_commits); 222 INIT_LIST_HEAD(&cur_trans->switch_commits);
224 list_add_tail(&cur_trans->list, &fs_info->trans_list); 223 list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -1612,27 +1611,6 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
1612 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1611 kmem_cache_free(btrfs_trans_handle_cachep, trans);
1613} 1612}
1614 1613
1615static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1616 struct btrfs_root *root)
1617{
1618 int ret;
1619
1620 ret = btrfs_run_delayed_items(trans, root);
1621 if (ret)
1622 return ret;
1623
1624 /*
1625 * rename don't use btrfs_join_transaction, so, once we
1626 * set the transaction to blocked above, we aren't going
1627 * to get any new ordered operations. We can safely run
1628 * it here and no for sure that nothing new will be added
1629 * to the list
1630 */
1631 ret = btrfs_run_ordered_operations(trans, root, 1);
1632
1633 return ret;
1634}
1635
1636static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) 1614static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1637{ 1615{
1638 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1616 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
@@ -1653,13 +1631,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1653 struct btrfs_transaction *prev_trans = NULL; 1631 struct btrfs_transaction *prev_trans = NULL;
1654 int ret; 1632 int ret;
1655 1633
1656 ret = btrfs_run_ordered_operations(trans, root, 0);
1657 if (ret) {
1658 btrfs_abort_transaction(trans, root, ret);
1659 btrfs_end_transaction(trans, root);
1660 return ret;
1661 }
1662
1663 /* Stop the commit early if ->aborted is set */ 1634 /* Stop the commit early if ->aborted is set */
1664 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { 1635 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1665 ret = cur_trans->aborted; 1636 ret = cur_trans->aborted;
@@ -1740,7 +1711,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1740 if (ret) 1711 if (ret)
1741 goto cleanup_transaction; 1712 goto cleanup_transaction;
1742 1713
1743 ret = btrfs_flush_all_pending_stuffs(trans, root); 1714 ret = btrfs_run_delayed_items(trans, root);
1744 if (ret) 1715 if (ret)
1745 goto cleanup_transaction; 1716 goto cleanup_transaction;
1746 1717
@@ -1748,7 +1719,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1748 extwriter_counter_read(cur_trans) == 0); 1719 extwriter_counter_read(cur_trans) == 0);
1749 1720
1750 /* some pending stuffs might be added after the previous flush. */ 1721 /* some pending stuffs might be added after the previous flush. */
1751 ret = btrfs_flush_all_pending_stuffs(trans, root); 1722 ret = btrfs_run_delayed_items(trans, root);
1752 if (ret) 1723 if (ret)
1753 goto cleanup_transaction; 1724 goto cleanup_transaction;
1754 1725
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7dd558ed0716..579be51b27e5 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -55,7 +55,6 @@ struct btrfs_transaction {
55 wait_queue_head_t writer_wait; 55 wait_queue_head_t writer_wait;
56 wait_queue_head_t commit_wait; 56 wait_queue_head_t commit_wait;
57 struct list_head pending_snapshots; 57 struct list_head pending_snapshots;
58 struct list_head ordered_operations;
59 struct list_head pending_chunks; 58 struct list_head pending_chunks;
60 struct list_head switch_commits; 59 struct list_head switch_commits;
61 struct btrfs_delayed_ref_root delayed_refs; 60 struct btrfs_delayed_ref_root delayed_refs;
diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 7f78cbf5cf41..4c29db604bbe 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h
@@ -57,6 +57,21 @@ void ulist_free(struct ulist *ulist);
57int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask); 57int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
58int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, 58int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
59 u64 *old_aux, gfp_t gfp_mask); 59 u64 *old_aux, gfp_t gfp_mask);
60
61/* just like ulist_add_merge() but take a pointer for the aux data */
62static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
63 void **old_aux, gfp_t gfp_mask)
64{
65#if BITS_PER_LONG == 32
66 u64 old64 = (uintptr_t)*old_aux;
67 int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
68 *old_aux = (void *)((uintptr_t)old64);
69 return ret;
70#else
71 return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
72#endif
73}
74
60struct ulist_node *ulist_next(struct ulist *ulist, 75struct ulist_node *ulist_next(struct ulist *ulist,
61 struct ulist_iterator *uiter); 76 struct ulist_iterator *uiter);
62 77
diff --git a/fs/locks.c b/fs/locks.c
index a6f54802d277..cb66fb05ad4a 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -247,6 +247,18 @@ void locks_free_lock(struct file_lock *fl)
247} 247}
248EXPORT_SYMBOL(locks_free_lock); 248EXPORT_SYMBOL(locks_free_lock);
249 249
250static void
251locks_dispose_list(struct list_head *dispose)
252{
253 struct file_lock *fl;
254
255 while (!list_empty(dispose)) {
256 fl = list_first_entry(dispose, struct file_lock, fl_block);
257 list_del_init(&fl->fl_block);
258 locks_free_lock(fl);
259 }
260}
261
250void locks_init_lock(struct file_lock *fl) 262void locks_init_lock(struct file_lock *fl)
251{ 263{
252 memset(fl, 0, sizeof(struct file_lock)); 264 memset(fl, 0, sizeof(struct file_lock));
@@ -285,7 +297,8 @@ EXPORT_SYMBOL(__locks_copy_lock);
285 297
286void locks_copy_lock(struct file_lock *new, struct file_lock *fl) 298void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
287{ 299{
288 locks_release_private(new); 300 /* "new" must be a freshly-initialized lock */
301 WARN_ON_ONCE(new->fl_ops);
289 302
290 __locks_copy_lock(new, fl); 303 __locks_copy_lock(new, fl);
291 new->fl_file = fl->fl_file; 304 new->fl_file = fl->fl_file;
@@ -650,12 +663,16 @@ static void locks_unlink_lock(struct file_lock **thisfl_p)
650 * 663 *
651 * Must be called with i_lock held! 664 * Must be called with i_lock held!
652 */ 665 */
653static void locks_delete_lock(struct file_lock **thisfl_p) 666static void locks_delete_lock(struct file_lock **thisfl_p,
667 struct list_head *dispose)
654{ 668{
655 struct file_lock *fl = *thisfl_p; 669 struct file_lock *fl = *thisfl_p;
656 670
657 locks_unlink_lock(thisfl_p); 671 locks_unlink_lock(thisfl_p);
658 locks_free_lock(fl); 672 if (dispose)
673 list_add(&fl->fl_block, dispose);
674 else
675 locks_free_lock(fl);
659} 676}
660 677
661/* Determine if lock sys_fl blocks lock caller_fl. Common functionality 678/* Determine if lock sys_fl blocks lock caller_fl. Common functionality
@@ -811,6 +828,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
811 struct inode * inode = file_inode(filp); 828 struct inode * inode = file_inode(filp);
812 int error = 0; 829 int error = 0;
813 int found = 0; 830 int found = 0;
831 LIST_HEAD(dispose);
814 832
815 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { 833 if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
816 new_fl = locks_alloc_lock(); 834 new_fl = locks_alloc_lock();
@@ -833,7 +851,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
833 if (request->fl_type == fl->fl_type) 851 if (request->fl_type == fl->fl_type)
834 goto out; 852 goto out;
835 found = 1; 853 found = 1;
836 locks_delete_lock(before); 854 locks_delete_lock(before, &dispose);
837 break; 855 break;
838 } 856 }
839 857
@@ -880,6 +898,7 @@ out:
880 spin_unlock(&inode->i_lock); 898 spin_unlock(&inode->i_lock);
881 if (new_fl) 899 if (new_fl)
882 locks_free_lock(new_fl); 900 locks_free_lock(new_fl);
901 locks_dispose_list(&dispose);
883 return error; 902 return error;
884} 903}
885 904
@@ -893,6 +912,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
893 struct file_lock **before; 912 struct file_lock **before;
894 int error; 913 int error;
895 bool added = false; 914 bool added = false;
915 LIST_HEAD(dispose);
896 916
897 /* 917 /*
898 * We may need two file_lock structures for this operation, 918 * We may need two file_lock structures for this operation,
@@ -988,7 +1008,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
988 else 1008 else
989 request->fl_end = fl->fl_end; 1009 request->fl_end = fl->fl_end;
990 if (added) { 1010 if (added) {
991 locks_delete_lock(before); 1011 locks_delete_lock(before, &dispose);
992 continue; 1012 continue;
993 } 1013 }
994 request = fl; 1014 request = fl;
@@ -1018,21 +1038,24 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1018 * one (This may happen several times). 1038 * one (This may happen several times).
1019 */ 1039 */
1020 if (added) { 1040 if (added) {
1021 locks_delete_lock(before); 1041 locks_delete_lock(before, &dispose);
1022 continue; 1042 continue;
1023 } 1043 }
1024 /* Replace the old lock with the new one. 1044 /*
1025 * Wake up anybody waiting for the old one, 1045 * Replace the old lock with new_fl, and
1026 * as the change in lock type might satisfy 1046 * remove the old one. It's safe to do the
1027 * their needs. 1047 * insert here since we know that we won't be
1048 * using new_fl later, and that the lock is
1049 * just replacing an existing lock.
1028 */ 1050 */
1029 locks_wake_up_blocks(fl); 1051 error = -ENOLCK;
1030 fl->fl_start = request->fl_start; 1052 if (!new_fl)
1031 fl->fl_end = request->fl_end; 1053 goto out;
1032 fl->fl_type = request->fl_type; 1054 locks_copy_lock(new_fl, request);
1033 locks_release_private(fl); 1055 request = new_fl;
1034 locks_copy_private(fl, request); 1056 new_fl = NULL;
1035 request = fl; 1057 locks_delete_lock(before, &dispose);
1058 locks_insert_lock(before, request);
1036 added = true; 1059 added = true;
1037 } 1060 }
1038 } 1061 }
@@ -1093,6 +1116,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
1093 locks_free_lock(new_fl); 1116 locks_free_lock(new_fl);
1094 if (new_fl2) 1117 if (new_fl2)
1095 locks_free_lock(new_fl2); 1118 locks_free_lock(new_fl2);
1119 locks_dispose_list(&dispose);
1096 return error; 1120 return error;
1097} 1121}
1098 1122
@@ -1268,7 +1292,7 @@ int lease_modify(struct file_lock **before, int arg)
1268 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); 1292 printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1269 fl->fl_fasync = NULL; 1293 fl->fl_fasync = NULL;
1270 } 1294 }
1271 locks_delete_lock(before); 1295 locks_delete_lock(before, NULL);
1272 } 1296 }
1273 return 0; 1297 return 0;
1274} 1298}
@@ -1737,13 +1761,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1737 ret = fl; 1761 ret = fl;
1738 spin_lock(&inode->i_lock); 1762 spin_lock(&inode->i_lock);
1739 error = __vfs_setlease(filp, arg, &ret); 1763 error = __vfs_setlease(filp, arg, &ret);
1740 if (error) { 1764 if (error)
1741 spin_unlock(&inode->i_lock); 1765 goto out_unlock;
1742 locks_free_lock(fl); 1766 if (ret == fl)
1743 goto out_free_fasync; 1767 fl = NULL;
1744 }
1745 if (ret != fl)
1746 locks_free_lock(fl);
1747 1768
1748 /* 1769 /*
1749 * fasync_insert_entry() returns the old entry if any. 1770 * fasync_insert_entry() returns the old entry if any.
@@ -1755,9 +1776,10 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1755 new = NULL; 1776 new = NULL;
1756 1777
1757 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); 1778 error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
1779out_unlock:
1758 spin_unlock(&inode->i_lock); 1780 spin_unlock(&inode->i_lock);
1759 1781 if (fl)
1760out_free_fasync: 1782 locks_free_lock(fl);
1761 if (new) 1783 if (new)
1762 fasync_free(new); 1784 fasync_free(new);
1763 return error; 1785 return error;
@@ -2320,6 +2342,7 @@ void locks_remove_file(struct file *filp)
2320 struct inode * inode = file_inode(filp); 2342 struct inode * inode = file_inode(filp);
2321 struct file_lock *fl; 2343 struct file_lock *fl;
2322 struct file_lock **before; 2344 struct file_lock **before;
2345 LIST_HEAD(dispose);
2323 2346
2324 if (!inode->i_flock) 2347 if (!inode->i_flock)
2325 return; 2348 return;
@@ -2365,12 +2388,13 @@ void locks_remove_file(struct file *filp)
2365 fl->fl_type, fl->fl_flags, 2388 fl->fl_type, fl->fl_flags,
2366 fl->fl_start, fl->fl_end); 2389 fl->fl_start, fl->fl_end);
2367 2390
2368 locks_delete_lock(before); 2391 locks_delete_lock(before, &dispose);
2369 continue; 2392 continue;
2370 } 2393 }
2371 before = &fl->fl_next; 2394 before = &fl->fl_next;
2372 } 2395 }
2373 spin_unlock(&inode->i_lock); 2396 spin_unlock(&inode->i_lock);
2397 locks_dispose_list(&dispose);
2374} 2398}
2375 2399
2376/** 2400/**
@@ -2452,7 +2476,11 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2452 seq_puts(f, "FLOCK ADVISORY "); 2476 seq_puts(f, "FLOCK ADVISORY ");
2453 } 2477 }
2454 } else if (IS_LEASE(fl)) { 2478 } else if (IS_LEASE(fl)) {
2455 seq_puts(f, "LEASE "); 2479 if (fl->fl_flags & FL_DELEG)
2480 seq_puts(f, "DELEG ");
2481 else
2482 seq_puts(f, "LEASE ");
2483
2456 if (lease_breaking(fl)) 2484 if (lease_breaking(fl))
2457 seq_puts(f, "BREAKING "); 2485 seq_puts(f, "BREAKING ");
2458 else if (fl->fl_file) 2486 else if (fl->fl_file)
diff --git a/kernel/module.c b/kernel/module.c
index 6f69463f0066..03214bd288e9 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3304,6 +3304,11 @@ static int load_module(struct load_info *info, const char __user *uargs,
3304 mutex_lock(&module_mutex); 3304 mutex_lock(&module_mutex);
3305 module_bug_cleanup(mod); 3305 module_bug_cleanup(mod);
3306 mutex_unlock(&module_mutex); 3306 mutex_unlock(&module_mutex);
3307
3308 /* we can't deallocate the module until we clear memory protection */
3309 unset_module_init_ro_nx(mod);
3310 unset_module_core_ro_nx(mod);
3311
3307 ddebug_cleanup: 3312 ddebug_cleanup:
3308 dynamic_debug_remove(info->debug); 3313 dynamic_debug_remove(info->debug);
3309 synchronize_sched(); 3314 synchronize_sched();
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d0396af99fa0..5b1b807265a1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -267,90 +267,90 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
267/* 267/*
268 * Example Format w/ field column widths: 268 * Example Format w/ field column widths:
269 * 269 *
270 * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt 270 * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
271 * 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 271 * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
272 */ 272 */
273 273
274void print_header(void) 274void print_header(void)
275{ 275{
276 if (show_pkg) 276 if (show_pkg)
277 outp += sprintf(outp, "Package "); 277 outp += sprintf(outp, " Package");
278 if (show_core) 278 if (show_core)
279 outp += sprintf(outp, " Core "); 279 outp += sprintf(outp, " Core");
280 if (show_cpu) 280 if (show_cpu)
281 outp += sprintf(outp, " CPU "); 281 outp += sprintf(outp, " CPU");
282 if (has_aperf) 282 if (has_aperf)
283 outp += sprintf(outp, "Avg_MHz "); 283 outp += sprintf(outp, " Avg_MHz");
284 if (do_nhm_cstates) 284 if (do_nhm_cstates)
285 outp += sprintf(outp, " %%Busy "); 285 outp += sprintf(outp, " %%Busy");
286 if (has_aperf) 286 if (has_aperf)
287 outp += sprintf(outp, "Bzy_MHz "); 287 outp += sprintf(outp, " Bzy_MHz");
288 outp += sprintf(outp, "TSC_MHz "); 288 outp += sprintf(outp, " TSC_MHz");
289 if (do_smi) 289 if (do_smi)
290 outp += sprintf(outp, " SMI "); 290 outp += sprintf(outp, " SMI");
291 if (extra_delta_offset32) 291 if (extra_delta_offset32)
292 outp += sprintf(outp, " count 0x%03X ", extra_delta_offset32); 292 outp += sprintf(outp, " count 0x%03X", extra_delta_offset32);
293 if (extra_delta_offset64) 293 if (extra_delta_offset64)
294 outp += sprintf(outp, " COUNT 0x%03X ", extra_delta_offset64); 294 outp += sprintf(outp, " COUNT 0x%03X", extra_delta_offset64);
295 if (extra_msr_offset32) 295 if (extra_msr_offset32)
296 outp += sprintf(outp, " MSR 0x%03X ", extra_msr_offset32); 296 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32);
297 if (extra_msr_offset64) 297 if (extra_msr_offset64)
298 outp += sprintf(outp, " MSR 0x%03X ", extra_msr_offset64); 298 outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64);
299 if (do_nhm_cstates) 299 if (do_nhm_cstates)
300 outp += sprintf(outp, " CPU%%c1 "); 300 outp += sprintf(outp, " CPU%%c1");
301 if (do_nhm_cstates && !do_slm_cstates) 301 if (do_nhm_cstates && !do_slm_cstates)
302 outp += sprintf(outp, " CPU%%c3 "); 302 outp += sprintf(outp, " CPU%%c3");
303 if (do_nhm_cstates) 303 if (do_nhm_cstates)
304 outp += sprintf(outp, " CPU%%c6 "); 304 outp += sprintf(outp, " CPU%%c6");
305 if (do_snb_cstates) 305 if (do_snb_cstates)
306 outp += sprintf(outp, " CPU%%c7 "); 306 outp += sprintf(outp, " CPU%%c7");
307 307
308 if (do_dts) 308 if (do_dts)
309 outp += sprintf(outp, "CoreTmp "); 309 outp += sprintf(outp, " CoreTmp");
310 if (do_ptm) 310 if (do_ptm)
311 outp += sprintf(outp, " PkgTmp "); 311 outp += sprintf(outp, " PkgTmp");
312 312
313 if (do_snb_cstates) 313 if (do_snb_cstates)
314 outp += sprintf(outp, "Pkg%%pc2 "); 314 outp += sprintf(outp, " Pkg%%pc2");
315 if (do_nhm_cstates && !do_slm_cstates) 315 if (do_nhm_cstates && !do_slm_cstates)
316 outp += sprintf(outp, "Pkg%%pc3 "); 316 outp += sprintf(outp, " Pkg%%pc3");
317 if (do_nhm_cstates && !do_slm_cstates) 317 if (do_nhm_cstates && !do_slm_cstates)
318 outp += sprintf(outp, "Pkg%%pc6 "); 318 outp += sprintf(outp, " Pkg%%pc6");
319 if (do_snb_cstates) 319 if (do_snb_cstates)
320 outp += sprintf(outp, "Pkg%%pc7 "); 320 outp += sprintf(outp, " Pkg%%pc7");
321 if (do_c8_c9_c10) { 321 if (do_c8_c9_c10) {
322 outp += sprintf(outp, "Pkg%%pc8 "); 322 outp += sprintf(outp, " Pkg%%pc8");
323 outp += sprintf(outp, "Pkg%%pc9 "); 323 outp += sprintf(outp, " Pkg%%pc9");
324 outp += sprintf(outp, "Pk%%pc10 "); 324 outp += sprintf(outp, " Pk%%pc10");
325 } 325 }
326 326
327 if (do_rapl && !rapl_joules) { 327 if (do_rapl && !rapl_joules) {
328 if (do_rapl & RAPL_PKG) 328 if (do_rapl & RAPL_PKG)
329 outp += sprintf(outp, "PkgWatt "); 329 outp += sprintf(outp, " PkgWatt");
330 if (do_rapl & RAPL_CORES) 330 if (do_rapl & RAPL_CORES)
331 outp += sprintf(outp, "CorWatt "); 331 outp += sprintf(outp, " CorWatt");
332 if (do_rapl & RAPL_GFX) 332 if (do_rapl & RAPL_GFX)
333 outp += sprintf(outp, "GFXWatt "); 333 outp += sprintf(outp, " GFXWatt");
334 if (do_rapl & RAPL_DRAM) 334 if (do_rapl & RAPL_DRAM)
335 outp += sprintf(outp, "RAMWatt "); 335 outp += sprintf(outp, " RAMWatt");
336 if (do_rapl & RAPL_PKG_PERF_STATUS) 336 if (do_rapl & RAPL_PKG_PERF_STATUS)
337 outp += sprintf(outp, " PKG_%% "); 337 outp += sprintf(outp, " PKG_%%");
338 if (do_rapl & RAPL_DRAM_PERF_STATUS) 338 if (do_rapl & RAPL_DRAM_PERF_STATUS)
339 outp += sprintf(outp, " RAM_%% "); 339 outp += sprintf(outp, " RAM_%%");
340 } else { 340 } else {
341 if (do_rapl & RAPL_PKG) 341 if (do_rapl & RAPL_PKG)
342 outp += sprintf(outp, " Pkg_J "); 342 outp += sprintf(outp, " Pkg_J");
343 if (do_rapl & RAPL_CORES) 343 if (do_rapl & RAPL_CORES)
344 outp += sprintf(outp, " Cor_J "); 344 outp += sprintf(outp, " Cor_J");
345 if (do_rapl & RAPL_GFX) 345 if (do_rapl & RAPL_GFX)
346 outp += sprintf(outp, " GFX_J "); 346 outp += sprintf(outp, " GFX_J");
347 if (do_rapl & RAPL_DRAM) 347 if (do_rapl & RAPL_DRAM)
348 outp += sprintf(outp, " RAM_W "); 348 outp += sprintf(outp, " RAM_W");
349 if (do_rapl & RAPL_PKG_PERF_STATUS) 349 if (do_rapl & RAPL_PKG_PERF_STATUS)
350 outp += sprintf(outp, " PKG_%% "); 350 outp += sprintf(outp, " PKG_%%");
351 if (do_rapl & RAPL_DRAM_PERF_STATUS) 351 if (do_rapl & RAPL_DRAM_PERF_STATUS)
352 outp += sprintf(outp, " RAM_%% "); 352 outp += sprintf(outp, " RAM_%%");
353 outp += sprintf(outp, " time "); 353 outp += sprintf(outp, " time");
354 354
355 } 355 }
356 outp += sprintf(outp, "\n"); 356 outp += sprintf(outp, "\n");
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index bf06577fea51..5819a2708d7e 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -526,8 +526,10 @@ static int assign_guest_irq(struct kvm *kvm,
526 dev->irq_requested_type |= guest_irq_type; 526 dev->irq_requested_type |= guest_irq_type;
527 if (dev->ack_notifier.gsi != -1) 527 if (dev->ack_notifier.gsi != -1)
528 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); 528 kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
529 } else 529 } else {
530 kvm_free_irq_source_id(kvm, dev->irq_source_id); 530 kvm_free_irq_source_id(kvm, dev->irq_source_id);
531 dev->irq_source_id = -1;
532 }
531 533
532 return r; 534 return r;
533} 535}
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 0df7d4b34dfe..714b94932312 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -61,6 +61,14 @@ static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
61 return pfn; 61 return pfn;
62} 62}
63 63
64static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
65{
66 unsigned long i;
67
68 for (i = 0; i < npages; ++i)
69 kvm_release_pfn_clean(pfn + i);
70}
71
64int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) 72int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
65{ 73{
66 gfn_t gfn, end_gfn; 74 gfn_t gfn, end_gfn;
@@ -123,6 +131,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
123 if (r) { 131 if (r) {
124 printk(KERN_ERR "kvm_iommu_map_address:" 132 printk(KERN_ERR "kvm_iommu_map_address:"
125 "iommu failed to map pfn=%llx\n", pfn); 133 "iommu failed to map pfn=%llx\n", pfn);
134 kvm_unpin_pages(kvm, pfn, page_size);
126 goto unmap_pages; 135 goto unmap_pages;
127 } 136 }
128 137
@@ -134,7 +143,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
134 return 0; 143 return 0;
135 144
136unmap_pages: 145unmap_pages:
137 kvm_iommu_put_pages(kvm, slot->base_gfn, gfn); 146 kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
138 return r; 147 return r;
139} 148}
140 149
@@ -266,14 +275,6 @@ out_unlock:
266 return r; 275 return r;
267} 276}
268 277
269static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
270{
271 unsigned long i;
272
273 for (i = 0; i < npages; ++i)
274 kvm_release_pfn_clean(pfn + i);
275}
276
277static void kvm_iommu_put_pages(struct kvm *kvm, 278static void kvm_iommu_put_pages(struct kvm *kvm,
278 gfn_t base_gfn, unsigned long npages) 279 gfn_t base_gfn, unsigned long npages)
279{ 280{