aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--Documentation/device-mapper/switch.txt12
-rw-r--r--Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt17
-rw-r--r--Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt12
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc.txt4
-rw-r--r--Documentation/devicetree/bindings/mmc/renesas,mmcif.txt32
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-msm.txt8
-rw-r--r--Documentation/devicetree/bindings/mmc/sdhci-st.txt33
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt12
-rw-r--r--Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt54
-rw-r--r--Documentation/devicetree/bindings/mmc/tmio_mmc.txt1
-rw-r--r--Documentation/devicetree/changesets.txt40
-rw-r--r--Documentation/devicetree/todo.txt11
-rw-r--r--Documentation/infiniband/user_mad.txt13
-rw-r--r--Documentation/kbuild/00-INDEX2
-rw-r--r--Documentation/kbuild/headers_install.txt (renamed from Documentation/make/headers_install.txt)0
-rw-r--r--Documentation/kbuild/makefiles.txt39
-rw-r--r--Makefile30
-rw-r--r--arch/arm/boot/dts/versatile-ab.dts4
-rw-r--r--arch/arm/boot/dts/versatile-pb.dts2
-rw-r--r--arch/arm/xen/grant-table.c5
-rw-r--r--arch/ia64/Makefile2
-rw-r--r--arch/powerpc/boot/gunzip_util.c4
-rw-r--r--arch/powerpc/include/asm/cputable.h6
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/opal.h11
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h2
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h30
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/spinlock.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S110
-rw-r--r--arch/powerpc/kernel/head_44x.S4
-rw-r--r--arch/powerpc/kernel/iommu.c38
-rw-r--r--arch/powerpc/kernel/prom.c70
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/lib/locks.c4
-rw-r--r--arch/powerpc/mm/hash_native_64.c40
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c88
-rw-r--r--arch/powerpc/mm/numa.c13
-rw-r--r--arch/powerpc/mm/pgtable_64.c44
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/mm/tlb_nohash.c111
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/powermac/feature.c20
-rw-r--r--arch/powerpc/platforms/powermac/pci.c2
-rw-r--r--arch/powerpc/platforms/powermac/smp.c2
-rw-r--r--arch/powerpc/platforms/powermac/udbg_adb.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c4
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c20
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c20
-rw-r--r--arch/powerpc/platforms/pseries/setup.c3
-rw-r--r--arch/powerpc/xmon/xmon.c3
-rw-r--r--arch/x86/xen/grant-table.c10
-rw-r--r--arch/x86/xen/time.c2
-rw-r--r--block/bio-integrity.c12
-rw-r--r--block/bio.c3
-rw-r--r--block/blk-core.c13
-rw-r--r--block/blk-mq.c81
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/compat_ioctl.c6
-rw-r--r--block/ioctl.c5
-rw-r--r--block/partitions/aix.c4
-rw-r--r--block/partitions/amiga.c12
-rw-r--r--block/partitions/efi.c46
-rw-r--r--block/partitions/msdos.c13
-rw-r--r--block/scsi_ioctl.c20
-rw-r--r--drivers/block/drbd/Makefile1
-rw-r--r--drivers/block/drbd/drbd_actlog.c518
-rw-r--r--drivers/block/drbd/drbd_bitmap.c150
-rw-r--r--drivers/block/drbd/drbd_debugfs.c958
-rw-r--r--drivers/block/drbd/drbd_debugfs.h39
-rw-r--r--drivers/block/drbd/drbd_int.h383
-rw-r--r--drivers/block/drbd/drbd_interval.h4
-rw-r--r--drivers/block/drbd/drbd_main.c302
-rw-r--r--drivers/block/drbd/drbd_nl.c110
-rw-r--r--drivers/block/drbd/drbd_proc.c125
-rw-r--r--drivers/block/drbd/drbd_receiver.c316
-rw-r--r--drivers/block/drbd/drbd_req.c527
-rw-r--r--drivers/block/drbd/drbd_req.h1
-rw-r--r--drivers/block/drbd/drbd_state.c90
-rw-r--r--drivers/block/drbd/drbd_worker.c348
-rw-r--r--drivers/block/virtio_blk.c104
-rw-r--r--drivers/cpufreq/pmac64-cpufreq.c3
-rw-r--r--drivers/crypto/nx/nx-842.c30
-rw-r--r--drivers/edac/cell_edac.c3
-rw-r--r--drivers/hwmon/adm1025.c3
-rw-r--r--drivers/hwmon/adm1026.c3
-rw-r--r--drivers/hwmon/ads1015.c2
-rw-r--r--drivers/hwmon/asb100.c4
-rw-r--r--drivers/hwmon/dme1737.c33
-rw-r--r--drivers/hwmon/emc6w201.c4
-rw-r--r--drivers/hwmon/hih6130.c3
-rw-r--r--drivers/hwmon/lm87.c4
-rw-r--r--drivers/hwmon/lm92.c13
-rw-r--r--drivers/hwmon/pc87360.c3
-rw-r--r--drivers/hwmon/tmp103.c7
-rw-r--r--drivers/hwmon/vt1211.c3
-rw-r--r--drivers/hwmon/w83627hf.c3
-rw-r--r--drivers/hwmon/w83791d.c3
-rw-r--r--drivers/hwmon/w83793.c3
-rw-r--r--drivers/hwspinlock/Kconfig2
-rw-r--r--drivers/hwspinlock/omap_hwspinlock.c27
-rw-r--r--drivers/infiniband/core/agent.c16
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/iwcm.c27
-rw-r--r--drivers/infiniband/core/mad.c283
-rw-r--r--drivers/infiniband/core/mad_priv.h3
-rw-r--r--drivers/infiniband/core/sa_query.c2
-rw-r--r--drivers/infiniband/core/user_mad.c188
-rw-r--r--drivers/infiniband/core/uverbs.h1
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c93
-rw-r--r--drivers/infiniband/core/uverbs_main.c1
-rw-r--r--drivers/infiniband/hw/amso1100/c2_cq.c7
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c1
-rw-r--r--drivers/infiniband/hw/cxgb4/qp.c37
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h11
-rw-r--r--drivers/infiniband/hw/ipath/ipath_mad.c14
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/mlx4/main.c8
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h4
-rw-r--r--drivers/infiniband/hw/mlx4/mr.c88
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c2
-rw-r--r--drivers/infiniband/hw/mthca/mthca_mad.c2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma.h26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c6
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c227
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.h2
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_main.c83
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_sli.h295
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c36
-rw-r--r--drivers/infiniband/hw/qib/qib_mad.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c133
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c9
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c47
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.h8
-rw-r--r--drivers/infiniband/ulp/iser/iser_verbs.c128
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c48
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c3
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/bcache.h4
-rw-r--r--drivers/md/bcache/bset.c2
-rw-r--r--drivers/md/bcache/bset.h2
-rw-r--r--drivers/md/bcache/btree.c50
-rw-r--r--drivers/md/bcache/btree.h5
-rw-r--r--drivers/md/bcache/extents.c13
-rw-r--r--drivers/md/bcache/extents.h1
-rw-r--r--drivers/md/bcache/journal.c24
-rw-r--r--drivers/md/bcache/request.c3
-rw-r--r--drivers/md/bcache/super.c57
-rw-r--r--drivers/md/bcache/util.h4
-rw-r--r--drivers/md/bcache/writeback.c14
-rw-r--r--drivers/md/bcache/writeback.h3
-rw-r--r--drivers/md/dm-cache-metadata.c4
-rw-r--r--drivers/md/dm-cache-metadata.h8
-rw-r--r--drivers/md/dm-cache-target.c128
-rw-r--r--drivers/md/dm-crypt.c41
-rw-r--r--drivers/md/dm-io.c77
-rw-r--r--drivers/md/dm-mpath.c6
-rw-r--r--drivers/md/dm-switch.c67
-rw-r--r--drivers/md/dm-table.c86
-rw-r--r--drivers/md/dm-thin.c181
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/mfd/rtsx_usb.c1
-rw-r--r--drivers/mmc/card/block.c6
-rw-r--r--drivers/mmc/core/bus.c10
-rw-r--r--drivers/mmc/core/core.c3
-rw-r--r--drivers/mmc/core/mmc.c11
-rw-r--r--drivers/mmc/core/quirks.c2
-rw-r--r--drivers/mmc/core/sd_ops.c3
-rw-r--r--drivers/mmc/host/Kconfig28
-rw-r--r--drivers/mmc/host/Makefile1
-rw-r--r--drivers/mmc/host/dw_mmc.c98
-rw-r--r--drivers/mmc/host/dw_mmc.h5
-rw-r--r--drivers/mmc/host/mmci.c168
-rw-r--r--drivers/mmc/host/mmci.h20
-rw-r--r--drivers/mmc/host/moxart-mmc.c1
-rw-r--r--drivers/mmc/host/mxs-mmc.c3
-rw-r--r--drivers/mmc/host/omap_hsmmc.c283
-rw-r--r--drivers/mmc/host/s3cmci.c186
-rw-r--r--drivers/mmc/host/s3cmci.h4
-rw-r--r--drivers/mmc/host/sdhci-acpi.c4
-rw-r--r--drivers/mmc/host/sdhci-msm.c1
-rw-r--r--drivers/mmc/host/sdhci-pci.c38
-rw-r--r--drivers/mmc/host/sdhci-pci.h1
-rw-r--r--drivers/mmc/host/sdhci-pxav3.c13
-rw-r--r--drivers/mmc/host/sdhci-st.c176
-rw-r--r--drivers/mmc/host/sdhci-tegra.c2
-rw-r--r--drivers/mmc/host/sdhci.c144
-rw-r--r--drivers/mmc/host/sh_mmcif.c96
-rw-r--r--drivers/mmc/host/tmio_mmc_dma.c2
-rw-r--r--drivers/mmc/host/wmt-sdmmc.c33
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c7
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c3
-rw-r--r--drivers/net/ethernet/emulex/benet/be.h1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c1
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.c18
-rw-r--r--drivers/net/ethernet/emulex/benet/be_roce.h3
-rw-r--r--drivers/net/ethernet/ibm/ehea/Makefile2
-rw-r--r--drivers/net/ethernet/intel/e1000e/manage.c4
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_fcoe.c1
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c18
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/cmd.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/fw.c91
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/main.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mlx4.h3
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/mr.c160
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/resource_tracker.c26
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c88
-rw-r--r--drivers/net/ethernet/sun/sunvnet.c38
-rw-r--r--drivers/net/ethernet/sun/sunvnet.h4
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_axienet_main.c1
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c1
-rw-r--r--drivers/net/irda/donauboe.c15
-rw-r--r--drivers/net/macvlan.c12
-rw-r--r--drivers/net/wireless/ath/carl9170/carl9170.h1
-rw-r--r--drivers/net/wireless/ath/carl9170/usb.c31
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c4
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/pcie.c3
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2200.c1
-rw-r--r--drivers/net/wireless/iwlwifi/mvm/mac80211.c3
-rw-r--r--drivers/net/xen-netback/common.h5
-rw-r--r--drivers/net/xen-netback/interface.c56
-rw-r--r--drivers/net/xen-netback/netback.c26
-rw-r--r--drivers/net/xen-netback/xenbus.c17
-rw-r--r--drivers/of/Kconfig3
-rw-r--r--drivers/of/Makefile4
-rw-r--r--drivers/of/base.c451
-rw-r--r--drivers/of/device.c4
-rw-r--r--drivers/of/dynamic.c660
-rw-r--r--drivers/of/fdt.c22
-rw-r--r--drivers/of/of_private.h59
-rw-r--r--drivers/of/of_reserved_mem.c70
-rw-r--r--drivers/of/platform.c32
-rw-r--r--drivers/of/selftest.c235
-rw-r--r--drivers/of/testcase-data/testcases.dts15
-rw-r--r--drivers/of/testcase-data/testcases.dtsi4
-rw-r--r--drivers/pci/hotplug/rpaphp_core.c4
-rw-r--r--drivers/scsi/cxgbi/cxgb3i/Kconfig2
-rw-r--r--drivers/scsi/cxgbi/cxgb4i/Kconfig2
-rw-r--r--drivers/scsi/scsi_transport_srp.c3
-rw-r--r--drivers/tty/ehv_bytechan.c43
-rw-r--r--drivers/tty/hvc/hvc_opal.c15
-rw-r--r--drivers/tty/hvc/hvc_vio.c29
-rw-r--r--drivers/tty/serial/pmac_zilog.c9
-rw-r--r--drivers/tty/serial/serial_core.c3
-rw-r--r--drivers/vfio/Kconfig6
-rw-r--r--drivers/vfio/Makefile2
-rw-r--r--drivers/vfio/pci/vfio_pci.c161
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h3
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c17
-rw-r--r--include/linux/bio.h1
-rw-r--r--include/linux/blkdev.h4
-rw-r--r--include/linux/drbd.h4
-rw-r--r--include/linux/drbd_genl.h4
-rw-r--r--include/linux/drbd_limits.h6
-rw-r--r--include/linux/mlx4/cmd.h7
-rw-r--r--include/linux/mlx4/device.h17
-rw-r--r--include/linux/mmc/dw_mmc.h2
-rw-r--r--include/linux/mmc/sdhci.h3
-rw-r--r--include/linux/of.h87
-rw-r--r--include/linux/of_platform.h7
-rw-r--r--include/linux/of_reserved_mem.h7
-rw-r--r--include/linux/platform_data/mmc-omap.h1
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/rhashtable.h17
-rw-r--r--include/linux/vfio.h6
-rw-r--r--include/net/inet_connection_sock.h1
-rw-r--r--include/net/sock.h1
-rw-r--r--include/net/tcp.h7
-rw-r--r--include/rdma/ib_mad.h18
-rw-r--r--include/rdma/ib_verbs.h10
-rw-r--r--include/scsi/sg.h4
-rw-r--r--include/trace/events/bcache.h21
-rw-r--r--include/trace/events/thp.h88
-rw-r--r--include/uapi/linux/bsg.h11
-rw-r--r--include/uapi/linux/virtio_blk.h5
-rw-r--r--include/uapi/rdma/ib_user_mad.h42
-rw-r--r--include/uapi/rdma/ib_user_verbs.h16
-rw-r--r--include/uapi/rdma/rdma_user_cm.h1
-rw-r--r--init/Kconfig2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/printk/printk.c12
-rw-r--r--kernel/seccomp.c10
-rw-r--r--kernel/time/timekeeping.c5
-rw-r--r--lib/Kconfig.debug24
-rw-r--r--lib/lru_cache.c23
-rw-r--r--lib/rhashtable.c10
-rw-r--r--mm/hugetlb_cgroup.c1
-rw-r--r--net/atm/lec.c5
-rw-r--r--net/atm/svc.c60
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_input.c27
-rw-r--r--net/ipv4/tcp_ipv4.c5
-rw-r--r--net/ipv4/tcp_metrics.c6
-rw-r--r--net/ipv4/tcp_output.c24
-rw-r--r--net/ipv6/sit.c6
-rw-r--r--net/ipv6/tcp_ipv6.c3
-rw-r--r--net/irda/irlap_frame.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/openvswitch/vport.c4
-rw-r--r--scripts/Kbuild.include14
-rw-r--r--scripts/Makefile.clean4
-rw-r--r--scripts/Makefile.extrawarn21
-rw-r--r--scripts/Makefile.host61
-rw-r--r--scripts/coccinelle/api/alloc/alloc_cast.cocci72
-rw-r--r--scripts/coccinelle/misc/array_size.cocci87
-rw-r--r--scripts/coccinelle/misc/badty.cocci76
-rw-r--r--scripts/coccinelle/misc/bugon.cocci (renamed from scripts/coccinelle/api/alloc/drop_kmalloc_cast.cocci)47
-rw-r--r--scripts/coccinelle/null/badzero.cocci3
-rw-r--r--sound/ppc/pmac.c6
319 files changed, 9170 insertions, 3788 deletions
diff --git a/.gitignore b/.gitignore
index f4c0b091dcf4..e213b27f3921 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@
34*.gcno 34*.gcno
35modules.builtin 35modules.builtin
36Module.symvers 36Module.symvers
37*.dwo
37 38
38# 39#
39# Top-level generic files 40# Top-level generic files
diff --git a/Documentation/device-mapper/switch.txt b/Documentation/device-mapper/switch.txt
index 2fa749387be8..8897d0494838 100644
--- a/Documentation/device-mapper/switch.txt
+++ b/Documentation/device-mapper/switch.txt
@@ -106,6 +106,11 @@ which paths.
106 The path number in the range 0 ... (<num_paths> - 1). 106 The path number in the range 0 ... (<num_paths> - 1).
107 Expressed in hexadecimal (WITHOUT any prefix like 0x). 107 Expressed in hexadecimal (WITHOUT any prefix like 0x).
108 108
109R<n>,<m>
110 This parameter allows repetitive patterns to be loaded quickly. <n> and <m>
111 are hexadecimal numbers. The last <n> mappings are repeated in the next <m>
112 slots.
113
109Status 114Status
110====== 115======
111 116
@@ -124,3 +129,10 @@ Create a switch device with 64kB region size:
124Set mappings for the first 7 entries to point to devices switch0, switch1, 129Set mappings for the first 7 entries to point to devices switch0, switch1,
125switch2, switch0, switch1, switch2, switch1: 130switch2, switch0, switch1, switch2, switch1:
126 dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1 131 dmsetup message switch 0 set_region_mappings 0:0 :1 :2 :0 :1 :2 :1
132
133Set repetitive mapping. This command:
134 dmsetup message switch 0 set_region_mappings 1000:1 :2 R2,10
135is equivalent to:
136 dmsetup message switch 0 set_region_mappings 1000:1 :2 :1 :2 :1 :2 :1 :2 \
137 :1 :2 :1 :2 :1 :2 :1 :2 :1 :2
138
diff --git a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
index 532b1d440abc..6cd3525d0e09 100644
--- a/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/exynos-dw-mshc.txt
@@ -46,13 +46,14 @@ Required Properties:
46 - if CIU clock divider value is 0 (that is divide by 1), both tx and rx 46 - if CIU clock divider value is 0 (that is divide by 1), both tx and rx
47 phase shift clocks should be 0. 47 phase shift clocks should be 0.
48 48
49Required properties for a slot: 49Required properties for a slot (Deprecated - Recommend to use one slot per host):
50 50
51* gpios: specifies a list of gpios used for command, clock and data bus. The 51* gpios: specifies a list of gpios used for command, clock and data bus. The
52 first gpio is the command line and the second gpio is the clock line. The 52 first gpio is the command line and the second gpio is the clock line. The
53 rest of the gpios (depending on the bus-width property) are the data lines in 53 rest of the gpios (depending on the bus-width property) are the data lines in
54 no particular order. The format of the gpio specifier depends on the gpio 54 no particular order. The format of the gpio specifier depends on the gpio
55 controller. 55 controller.
56(Deprecated - Refer to Documentation/devicetree/binding/pinctrl/samsung-pinctrl.txt)
56 57
57Example: 58Example:
58 59
@@ -69,21 +70,13 @@ Example:
69 70
70 dwmmc0@12200000 { 71 dwmmc0@12200000 {
71 num-slots = <1>; 72 num-slots = <1>;
72 supports-highspeed; 73 cap-mmc-highspeed;
74 cap-sd-highspeed;
73 broken-cd; 75 broken-cd;
74 fifo-depth = <0x80>; 76 fifo-depth = <0x80>;
75 card-detect-delay = <200>; 77 card-detect-delay = <200>;
76 samsung,dw-mshc-ciu-div = <3>; 78 samsung,dw-mshc-ciu-div = <3>;
77 samsung,dw-mshc-sdr-timing = <2 3>; 79 samsung,dw-mshc-sdr-timing = <2 3>;
78 samsung,dw-mshc-ddr-timing = <1 2>; 80 samsung,dw-mshc-ddr-timing = <1 2>;
79 81 bus-width = <8>;
80 slot@0 {
81 reg = <0>;
82 bus-width = <8>;
83 gpios = <&gpc0 0 2 0 3>, <&gpc0 1 2 0 3>,
84 <&gpc1 0 2 3 3>, <&gpc1 1 2 3 3>,
85 <&gpc1 2 2 3 3>, <&gpc1 3 2 3 3>,
86 <&gpc0 3 2 3 3>, <&gpc0 4 2 3 3>,
87 <&gpc0 5 2 3 3>, <&gpc0 6 2 3 3>;
88 };
89 }; 82 };
diff --git a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
index e5bc49f764d1..3b3544931437 100644
--- a/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/k3-dw-mshc.txt
@@ -34,13 +34,11 @@ Example:
34 num-slots = <1>; 34 num-slots = <1>;
35 vmmc-supply = <&ldo12>; 35 vmmc-supply = <&ldo12>;
36 fifo-depth = <0x100>; 36 fifo-depth = <0x100>;
37 supports-highspeed;
38 pinctrl-names = "default"; 37 pinctrl-names = "default";
39 pinctrl-0 = <&sd_pmx_pins &sd_cfg_func1 &sd_cfg_func2>; 38 pinctrl-0 = <&sd_pmx_pins &sd_cfg_func1 &sd_cfg_func2>;
40 slot@0 { 39 bus-width = <4>;
41 reg = <0>; 40 disable-wp;
42 bus-width = <4>; 41 cd-gpios = <&gpio10 3 0>;
43 disable-wp; 42 cap-mmc-highspeed;
44 cd-gpios = <&gpio10 3 0>; 43 cap-sd-highspeed;
45 };
46 }; 44 };
diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 3c18001dfd5d..431716e37a39 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -34,8 +34,8 @@ Optional properties:
34- cap-power-off-card: powering off the card is safe 34- cap-power-off-card: powering off the card is safe
35- cap-sdio-irq: enable SDIO IRQ signalling on this interface 35- cap-sdio-irq: enable SDIO IRQ signalling on this interface
36- full-pwr-cycle: full power cycle of the card is supported 36- full-pwr-cycle: full power cycle of the card is supported
37- mmc-highspeed-ddr-1_8v: eMMC high-speed DDR mode(1.8V I/O) is supported 37- mmc-ddr-1_8v: eMMC high-speed DDR mode(1.8V I/O) is supported
38- mmc-highspeed-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported 38- mmc-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
39- mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported 39- mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported
40- mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported 40- mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
41- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported 41- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
diff --git a/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
new file mode 100644
index 000000000000..299081f94abd
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/renesas,mmcif.txt
@@ -0,0 +1,32 @@
1* Renesas Multi Media Card Interface (MMCIF) Controller
2
3This file documents differences between the core properties in mmc.txt
4and the properties used by the MMCIF device.
5
6
7Required properties:
8
9- compatible: must contain one of the following
10 - "renesas,mmcif-r8a7740" for the MMCIF found in r8a7740 SoCs
11 - "renesas,mmcif-r8a7790" for the MMCIF found in r8a7790 SoCs
12 - "renesas,mmcif-r8a7791" for the MMCIF found in r8a7791 SoCs
13 - "renesas,sh-mmcif" for the generic MMCIF
14
15- clocks: reference to the functional clock
16
17- dmas: reference to the DMA channels, one per channel name listed in the
18 dma-names property.
19- dma-names: must contain "tx" for the transmit DMA channel and "rx" for the
20 receive DMA channel.
21
22
23Example: R8A7790 (R-Car H2) MMCIF0
24
25 mmcif0: mmc@ee200000 {
26 compatible = "renesas,mmcif-r8a7790", "renesas,sh-mmcif";
27 reg = <0 0xee200000 0 0x80>;
28 interrupts = <0 169 IRQ_TYPE_LEVEL_HIGH>;
29 clocks = <&mstp3_clks R8A7790_CLK_MMCIF0>;
30 dmas = <&dmac0 0xd1>, <&dmac0 0xd2>;
31 dma-names = "tx", "rx";
32 };
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index 81b33b5b20fc..485483a63d8c 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -27,8 +27,8 @@ Example:
27 bus-width = <8>; 27 bus-width = <8>;
28 non-removable; 28 non-removable;
29 29
30 vmmc = <&pm8941_l20>; 30 vmmc-supply = <&pm8941_l20>;
31 vqmmc = <&pm8941_s3>; 31 vqmmc-supply = <&pm8941_s3>;
32 32
33 pinctrl-names = "default"; 33 pinctrl-names = "default";
34 pinctrl-0 = <&sdc1_clk &sdc1_cmd &sdc1_data>; 34 pinctrl-0 = <&sdc1_clk &sdc1_cmd &sdc1_data>;
@@ -44,8 +44,8 @@ Example:
44 bus-width = <4>; 44 bus-width = <4>;
45 cd-gpios = <&msmgpio 62 0x1>; 45 cd-gpios = <&msmgpio 62 0x1>;
46 46
47 vmmc = <&pm8941_l21>; 47 vmmc-supply = <&pm8941_l21>;
48 vqmmc = <&pm8941_l13>; 48 vqmmc-supply = <&pm8941_l13>;
49 49
50 pinctrl-names = "default"; 50 pinctrl-names = "default";
51 pinctrl-0 = <&sdc2_clk &sdc2_cmd &sdc2_data>; 51 pinctrl-0 = <&sdc2_clk &sdc2_cmd &sdc2_data>;
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-st.txt b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
new file mode 100644
index 000000000000..7527db447a35
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/sdhci-st.txt
@@ -0,0 +1,33 @@
1* STMicroelectronics sdhci-st MMC/SD controller
2
3This file documents the differences between the core properties in
4Documentation/devicetree/bindings/mmc/mmc.txt and the properties
5used by the sdhci-st driver.
6
7Required properties:
8- compatible : Must be "st,sdhci"
9- clock-names : Should be "mmc"
10 See: Documentation/devicetree/bindings/resource-names.txt
11- clocks : Phandle of the clock used by the sdhci controler
12 See: Documentation/devicetree/bindings/clock/clock-bindings.txt
13
14Optional properties:
15- non-removable: non-removable slot
16 See: Documentation/devicetree/bindings/mmc/mmc.txt
17- bus-width: Number of data lines
18 See: Documentation/devicetree/bindings/mmc/mmc.txt
19
20Example:
21
22mmc0: sdhci@fe81e000 {
23 compatible = "st,sdhci";
24 status = "disabled";
25 reg = <0xfe81e000 0x1000>;
26 interrupts = <GIC_SPI 127 IRQ_TYPE_NONE>;
27 interrupt-names = "mmcirq";
28 pinctrl-names = "default";
29 pinctrl-0 = <&pinctrl_mmc0>;
30 clock-names = "mmc";
31 clocks = <&clk_s_a1_ls 1>;
32 bus-width = <8>
33};
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
index 2d4a7258a10d..346c6095a615 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
@@ -67,7 +67,8 @@ Optional properties:
67* card-detect-delay: Delay in milli-seconds before detecting card after card 67* card-detect-delay: Delay in milli-seconds before detecting card after card
68 insert event. The default value is 0. 68 insert event. The default value is 0.
69 69
70* supports-highspeed: Enables support for high speed cards (up to 50MHz) 70* supports-highspeed (DEPRECATED): Enables support for high speed cards (up to 50MHz)
71 (use "cap-mmc-highspeed" or "cap-sd-highspeed" instead)
71 72
72* broken-cd: as documented in mmc core bindings. 73* broken-cd: as documented in mmc core bindings.
73 74
@@ -98,14 +99,11 @@ board specific portions as listed below.
98 clock-frequency = <400000000>; 99 clock-frequency = <400000000>;
99 clock-freq-min-max = <400000 200000000>; 100 clock-freq-min-max = <400000 200000000>;
100 num-slots = <1>; 101 num-slots = <1>;
101 supports-highspeed;
102 broken-cd; 102 broken-cd;
103 fifo-depth = <0x80>; 103 fifo-depth = <0x80>;
104 card-detect-delay = <200>; 104 card-detect-delay = <200>;
105 vmmc-supply = <&buck8>; 105 vmmc-supply = <&buck8>;
106 106 bus-width = <8>;
107 slot@0 { 107 cap-mmc-highspeed;
108 reg = <0>; 108 cap-sd-highspeed;
109 bus-width = <8>;
110 };
111 }; 109 };
diff --git a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
index ce8056116fb0..76bf087bc889 100644
--- a/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
+++ b/Documentation/devicetree/bindings/mmc/ti-omap-hsmmc.txt
@@ -12,6 +12,7 @@ Required properties:
12 Should be "ti,omap3-hsmmc", for OMAP3 controllers 12 Should be "ti,omap3-hsmmc", for OMAP3 controllers
13 Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0 13 Should be "ti,omap3-pre-es3-hsmmc" for OMAP3 controllers pre ES3.0
14 Should be "ti,omap4-hsmmc", for OMAP4 controllers 14 Should be "ti,omap4-hsmmc", for OMAP4 controllers
15 Should be "ti,am33xx-hsmmc", for AM335x controllers
15- ti,hwmods: Must be "mmc<n>", n is controller instance starting 1 16- ti,hwmods: Must be "mmc<n>", n is controller instance starting 1
16 17
17Optional properties: 18Optional properties:
@@ -56,3 +57,56 @@ Examples:
56 &edma 25>; 57 &edma 25>;
57 dma-names = "tx", "rx"; 58 dma-names = "tx", "rx";
58 }; 59 };
60
61[workaround for missing swakeup on am33xx]
62
63This SOC is missing the swakeup line, it will not detect SDIO irq
64while in suspend.
65
66 ------
67 | PRCM |
68 ------
69 ^ |
70 swakeup | | fclk
71 | v
72 ------ ------- -----
73 | card | -- CIRQ --> | hsmmc | -- IRQ --> | CPU |
74 ------ ------- -----
75
76In suspend the fclk is off and the module is disfunctional. Even register reads
77will fail. A small logic in the host will request fclk restore, when an
78external event is detected. Once the clock is restored, the host detects the
79event normally. Since am33xx doesn't have this line it never wakes from
80suspend.
81
82The workaround is to reconfigure the dat1 line as a GPIO upon suspend. To make
83this work, we need to set the named pinctrl states "default" and "idle".
84Prepare idle to remux dat1 as a gpio, and default to remux it back as sdio
85dat1. The MMC driver will then toggle between idle and default state during
86runtime.
87
88In summary:
891. select matching 'compatible' section, see example below.
902. specify pinctrl states "default" and "idle", "sleep" is optional.
913. specify the gpio irq used for detecting sdio irq in suspend
92
93If configuration is incomplete, a warning message is emitted "falling back to
94polling". Also check the "sdio irq mode" in /sys/kernel/debug/mmc0/regs. Mind
95not every application needs SDIO irq, e.g. MMC cards.
96
97 mmc1: mmc@48060100 {
98 compatible = "ti,am33xx-hsmmc";
99 ...
100 pinctrl-names = "default", "idle", "sleep"
101 pinctrl-0 = <&mmc1_pins>;
102 pinctrl-1 = <&mmc1_idle>;
103 pinctrl-2 = <&mmc1_sleep>;
104 ...
105 interrupts-extended = <&intc 64 &gpio2 28 0>;
106 };
107
108 mmc1_idle : pinmux_cirq_pin {
109 pinctrl-single,pins = <
110 0x0f8 0x3f /* GPIO2_28 */
111 >;
112 };
diff --git a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
index 6a2a1160a70d..fa0f327cde01 100644
--- a/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/tmio_mmc.txt
@@ -18,6 +18,7 @@ Required properties:
18 "renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC 18 "renesas,sdhi-r8a7778" - SDHI IP on R8A7778 SoC
19 "renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC 19 "renesas,sdhi-r8a7779" - SDHI IP on R8A7779 SoC
20 "renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC 20 "renesas,sdhi-r8a7790" - SDHI IP on R8A7790 SoC
21 "renesas,sdhi-r8a7791" - SDHI IP on R8A7791 SoC
21 22
22Optional properties: 23Optional properties:
23- toshiba,mmc-wrprotect-disable: write-protect detection is unavailable 24- toshiba,mmc-wrprotect-disable: write-protect detection is unavailable
diff --git a/Documentation/devicetree/changesets.txt b/Documentation/devicetree/changesets.txt
new file mode 100644
index 000000000000..935ba5acc34e
--- /dev/null
+++ b/Documentation/devicetree/changesets.txt
@@ -0,0 +1,40 @@
1A DT changeset is a method which allows one to apply changes
2in the live tree in such a way that either the full set of changes
3will be applied, or none of them will be. If an error occurs partway
4through applying the changeset, then the tree will be rolled back to the
5previous state. A changeset can also be removed after it has been
6applied.
7
8When a changeset is applied, all of the changes get applied to the tree
9at once before emitting OF_RECONFIG notifiers. This is so that the
10receiver sees a complete and consistent state of the tree when it
11receives the notifier.
12
13The sequence of a changeset is as follows.
14
151. of_changeset_init() - initializes a changeset
16
172. A number of DT tree change calls, of_changeset_attach_node(),
18of_changeset_detach_node(), of_changeset_add_property(),
19of_changeset_remove_property, of_changeset_update_property() to prepare
20a set of changes. No changes to the active tree are made at this point.
21All the change operations are recorded in the of_changeset 'entries'
22list.
23
243. mutex_lock(of_mutex) - starts a changeset; The global of_mutex
25ensures there can only be one editor at a time.
26
274. of_changeset_apply() - Apply the changes to the tree. Either the
28entire changeset will get applied, or if there is an error the tree will
29be restored to the previous state
30
315. mutex_unlock(of_mutex) - All operations complete, release the mutex
32
33If a successfully applied changeset needs to be removed, it can be done
34with the following sequence.
35
361. mutex_lock(of_mutex)
37
382. of_changeset_revert()
39
403. mutex_unlock(of_mutex)
diff --git a/Documentation/devicetree/todo.txt b/Documentation/devicetree/todo.txt
new file mode 100644
index 000000000000..c3cf0659bd19
--- /dev/null
+++ b/Documentation/devicetree/todo.txt
@@ -0,0 +1,11 @@
1Todo list for devicetree:
2
3=== General structure ===
4- Switch from custom lists to (h)list_head for nodes and properties structure
5- Remove of_allnodes list and iterate using list of child nodes alone
6
7=== CONFIG_OF_DYNAMIC ===
8- Switch to RCU for tree updates and get rid of global spinlock
9- Document node lifecycle for CONFIG_OF_DYNAMIC
10- Always set ->full_name at of_attach_node() time
11- pseries: Get rid of open-coded tree modification from arch/powerpc/platforms/pseries/dlpar.c
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 8a366959f5cc..7aca13a54a3a 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -26,6 +26,11 @@ Creating MAD agents
26 ioctl. Also, all agents registered through a file descriptor will 26 ioctl. Also, all agents registered through a file descriptor will
27 be unregistered when the descriptor is closed. 27 be unregistered when the descriptor is closed.
28 28
29 2014 -- a new registration ioctl is now provided which allows additional
30 fields to be provided during registration.
31 Users of this registration call are implicitly setting the use of
32 pkey_index (see below).
33
29Receiving MADs 34Receiving MADs
30 35
31 MADs are received using read(). The receive side now supports 36 MADs are received using read(). The receive side now supports
@@ -104,10 +109,10 @@ P_Key Index Handling
104 The old ib_umad interface did not allow setting the P_Key index for 109 The old ib_umad interface did not allow setting the P_Key index for
105 MADs that are sent and did not provide a way for obtaining the P_Key 110 MADs that are sent and did not provide a way for obtaining the P_Key
106 index of received MADs. A new layout for struct ib_user_mad_hdr 111 index of received MADs. A new layout for struct ib_user_mad_hdr
107 with a pkey_index member has been defined; however, to preserve 112 with a pkey_index member has been defined; however, to preserve binary
108 binary compatibility with older applications, this new layout will 113 compatibility with older applications, this new layout will not be used
109 not be used unless the IB_USER_MAD_ENABLE_PKEY ioctl is called 114 unless one of IB_USER_MAD_ENABLE_PKEY or IB_USER_MAD_REGISTER_AGENT2 ioctl's
110 before a file descriptor is used for anything else. 115 are called before a file descriptor is used for anything else.
111 116
112 In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented 117 In September 2008, the IB_USER_MAD_ABI_VERSION will be incremented
113 to 6, the new layout of struct ib_user_mad_hdr will be used by 118 to 6, the new layout of struct ib_user_mad_hdr will be used by
diff --git a/Documentation/kbuild/00-INDEX b/Documentation/kbuild/00-INDEX
index e8d2b6d83a3d..8c5e6aa78004 100644
--- a/Documentation/kbuild/00-INDEX
+++ b/Documentation/kbuild/00-INDEX
@@ -1,5 +1,7 @@
100-INDEX 100-INDEX
2 - this file: info on the kernel build process 2 - this file: info on the kernel build process
3headers_install.txt
4 - how to export Linux headers for use by userspace
3kbuild.txt 5kbuild.txt
4 - developer information on kbuild 6 - developer information on kbuild
5kconfig.txt 7kconfig.txt
diff --git a/Documentation/make/headers_install.txt b/Documentation/kbuild/headers_install.txt
index 951eb9f1e040..951eb9f1e040 100644
--- a/Documentation/make/headers_install.txt
+++ b/Documentation/kbuild/headers_install.txt
diff --git a/Documentation/kbuild/makefiles.txt b/Documentation/kbuild/makefiles.txt
index c600e2f44a62..764f5991a3fc 100644
--- a/Documentation/kbuild/makefiles.txt
+++ b/Documentation/kbuild/makefiles.txt
@@ -23,11 +23,10 @@ This document describes the Linux kernel Makefiles.
23 === 4 Host Program support 23 === 4 Host Program support
24 --- 4.1 Simple Host Program 24 --- 4.1 Simple Host Program
25 --- 4.2 Composite Host Programs 25 --- 4.2 Composite Host Programs
26 --- 4.3 Defining shared libraries 26 --- 4.3 Using C++ for host programs
27 --- 4.4 Using C++ for host programs 27 --- 4.4 Controlling compiler options for host programs
28 --- 4.5 Controlling compiler options for host programs 28 --- 4.5 When host programs are actually built
29 --- 4.6 When host programs are actually built 29 --- 4.6 Using hostprogs-$(CONFIG_FOO)
30 --- 4.7 Using hostprogs-$(CONFIG_FOO)
31 30
32 === 5 Kbuild clean infrastructure 31 === 5 Kbuild clean infrastructure
33 32
@@ -643,29 +642,7 @@ Both possibilities are described in the following.
643 Finally, the two .o files are linked to the executable, lxdialog. 642 Finally, the two .o files are linked to the executable, lxdialog.
644 Note: The syntax <executable>-y is not permitted for host-programs. 643 Note: The syntax <executable>-y is not permitted for host-programs.
645 644
646--- 4.3 Defining shared libraries 645--- 4.3 Using C++ for host programs
647
648 Objects with extension .so are considered shared libraries, and
649 will be compiled as position independent objects.
650 Kbuild provides support for shared libraries, but the usage
651 shall be restricted.
652 In the following example the libkconfig.so shared library is used
653 to link the executable conf.
654
655 Example:
656 #scripts/kconfig/Makefile
657 hostprogs-y := conf
658 conf-objs := conf.o libkconfig.so
659 libkconfig-objs := expr.o type.o
660
661 Shared libraries always require a corresponding -objs line, and
662 in the example above the shared library libkconfig is composed by
663 the two objects expr.o and type.o.
664 expr.o and type.o will be built as position independent code and
665 linked as a shared library libkconfig.so. C++ is not supported for
666 shared libraries.
667
668--- 4.4 Using C++ for host programs
669 646
670 kbuild offers support for host programs written in C++. This was 647 kbuild offers support for host programs written in C++. This was
671 introduced solely to support kconfig, and is not recommended 648 introduced solely to support kconfig, and is not recommended
@@ -688,7 +665,7 @@ Both possibilities are described in the following.
688 qconf-cxxobjs := qconf.o 665 qconf-cxxobjs := qconf.o
689 qconf-objs := check.o 666 qconf-objs := check.o
690 667
691--- 4.5 Controlling compiler options for host programs 668--- 4.4 Controlling compiler options for host programs
692 669
693 When compiling host programs, it is possible to set specific flags. 670 When compiling host programs, it is possible to set specific flags.
694 The programs will always be compiled utilising $(HOSTCC) passed 671 The programs will always be compiled utilising $(HOSTCC) passed
@@ -716,7 +693,7 @@ Both possibilities are described in the following.
716 When linking qconf, it will be passed the extra option 693 When linking qconf, it will be passed the extra option
717 "-L$(QTDIR)/lib". 694 "-L$(QTDIR)/lib".
718 695
719--- 4.6 When host programs are actually built 696--- 4.5 When host programs are actually built
720 697
721 Kbuild will only build host-programs when they are referenced 698 Kbuild will only build host-programs when they are referenced
722 as a prerequisite. 699 as a prerequisite.
@@ -747,7 +724,7 @@ Both possibilities are described in the following.
747 This will tell kbuild to build lxdialog even if not referenced in 724 This will tell kbuild to build lxdialog even if not referenced in
748 any rule. 725 any rule.
749 726
750--- 4.7 Using hostprogs-$(CONFIG_FOO) 727--- 4.6 Using hostprogs-$(CONFIG_FOO)
751 728
752 A typical pattern in a Kbuild file looks like this: 729 A typical pattern in a Kbuild file looks like this:
753 730
diff --git a/Makefile b/Makefile
index a897c50db515..6aace6750567 100644
--- a/Makefile
+++ b/Makefile
@@ -372,6 +372,7 @@ GENKSYMS = scripts/genksyms/genksyms
372INSTALLKERNEL := installkernel 372INSTALLKERNEL := installkernel
373DEPMOD = /sbin/depmod 373DEPMOD = /sbin/depmod
374PERL = perl 374PERL = perl
375PYTHON = python
375CHECK = sparse 376CHECK = sparse
376 377
377CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \ 378CHECKFLAGS := -D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ \
@@ -422,7 +423,7 @@ KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(S
422export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION 423export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
423export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC 424export ARCH SRCARCH CONFIG_SHELL HOSTCC HOSTCFLAGS CROSS_COMPILE AS LD CC
424export CPP AR NM STRIP OBJCOPY OBJDUMP 425export CPP AR NM STRIP OBJCOPY OBJDUMP
425export MAKE AWK GENKSYMS INSTALLKERNEL PERL UTS_MACHINE 426export MAKE AWK GENKSYMS INSTALLKERNEL PERL PYTHON UTS_MACHINE
426export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS 427export HOSTCXX HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
427 428
428export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS 429export KBUILD_CPPFLAGS NOSTDINC_FLAGS LINUXINCLUDE OBJCOPYFLAGS LDFLAGS
@@ -687,6 +688,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
687# source of a reference will be _MergedGlobals and not on of the whitelisted names. 688# source of a reference will be _MergedGlobals and not on of the whitelisted names.
688# See modpost pattern 2 689# See modpost pattern 2
689KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,) 690KBUILD_CFLAGS += $(call cc-option, -mno-global-merge,)
691KBUILD_CFLAGS += $(call cc-option, -fcatch-undefined-behavior)
690else 692else
691 693
692# This warning generated too much noise in a regular build. 694# This warning generated too much noise in a regular build.
@@ -710,9 +712,16 @@ endif
710KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments) 712KBUILD_CFLAGS += $(call cc-option, -fno-var-tracking-assignments)
711 713
712ifdef CONFIG_DEBUG_INFO 714ifdef CONFIG_DEBUG_INFO
715ifdef CONFIG_DEBUG_INFO_SPLIT
716KBUILD_CFLAGS += $(call cc-option, -gsplit-dwarf, -g)
717else
713KBUILD_CFLAGS += -g 718KBUILD_CFLAGS += -g
719endif
714KBUILD_AFLAGS += -Wa,-gdwarf-2 720KBUILD_AFLAGS += -Wa,-gdwarf-2
715endif 721endif
722ifdef CONFIG_DEBUG_INFO_DWARF4
723KBUILD_CFLAGS += $(call cc-option, -gdwarf-4,)
724endif
716 725
717ifdef CONFIG_DEBUG_INFO_REDUCED 726ifdef CONFIG_DEBUG_INFO_REDUCED
718KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) \ 727KBUILD_CFLAGS += $(call cc-option, -femit-struct-debug-baseonly) \
@@ -1055,6 +1064,13 @@ headers_check: headers_install
1055 $(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1 1064 $(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1
1056 1065
1057# --------------------------------------------------------------------------- 1066# ---------------------------------------------------------------------------
1067# Kernel selftest
1068
1069PHONY += kselftest
1070kselftest:
1071 $(Q)$(MAKE) -C tools/testing/selftests run_tests
1072
1073# ---------------------------------------------------------------------------
1058# Modules 1074# Modules
1059 1075
1060ifdef CONFIG_MODULES 1076ifdef CONFIG_MODULES
@@ -1241,9 +1257,9 @@ help:
1241 @echo ' tags/TAGS - Generate tags file for editors' 1257 @echo ' tags/TAGS - Generate tags file for editors'
1242 @echo ' cscope - Generate cscope index' 1258 @echo ' cscope - Generate cscope index'
1243 @echo ' gtags - Generate GNU GLOBAL index' 1259 @echo ' gtags - Generate GNU GLOBAL index'
1244 @echo ' kernelrelease - Output the release version string' 1260 @echo ' kernelrelease - Output the release version string (use with make -s)'
1245 @echo ' kernelversion - Output the version stored in Makefile' 1261 @echo ' kernelversion - Output the version stored in Makefile (use with make -s)'
1246 @echo ' image_name - Output the image name' 1262 @echo ' image_name - Output the image name (use with make -s)'
1247 @echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \ 1263 @echo ' headers_install - Install sanitised kernel headers to INSTALL_HDR_PATH'; \
1248 echo ' (default: $(INSTALL_HDR_PATH))'; \ 1264 echo ' (default: $(INSTALL_HDR_PATH))'; \
1249 echo '' 1265 echo ''
@@ -1257,6 +1273,11 @@ help:
1257 @echo ' headerdep - Detect inclusion cycles in headers' 1273 @echo ' headerdep - Detect inclusion cycles in headers'
1258 @$(MAKE) -f $(srctree)/scripts/Makefile.help checker-help 1274 @$(MAKE) -f $(srctree)/scripts/Makefile.help checker-help
1259 @echo '' 1275 @echo ''
1276 @echo 'Kernel selftest'
1277 @echo ' kselftest - Build and run kernel selftest (run as root)'
1278 @echo ' Build, install, and boot kernel before'
1279 @echo ' running kselftest on it'
1280 @echo ''
1260 @echo 'Kernel packaging:' 1281 @echo 'Kernel packaging:'
1261 @$(MAKE) $(build)=$(package-dir) help 1282 @$(MAKE) $(build)=$(package-dir) help
1262 @echo '' 1283 @echo ''
@@ -1398,6 +1419,7 @@ clean: $(clean-dirs)
1398 @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \ 1419 @find $(if $(KBUILD_EXTMOD), $(KBUILD_EXTMOD), .) $(RCS_FIND_IGNORE) \
1399 \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \ 1420 \( -name '*.[oas]' -o -name '*.ko' -o -name '.*.cmd' \
1400 -o -name '*.ko.*' \ 1421 -o -name '*.ko.*' \
1422 -o -name '*.dwo' \
1401 -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \ 1423 -o -name '.*.d' -o -name '.*.tmp' -o -name '*.mod.c' \
1402 -o -name '*.symtypes' -o -name 'modules.order' \ 1424 -o -name '*.symtypes' -o -name 'modules.order' \
1403 -o -name modules.builtin -o -name '.tmp_*.o.*' \ 1425 -o -name modules.builtin -o -name '.tmp_*.o.*' \
diff --git a/arch/arm/boot/dts/versatile-ab.dts b/arch/arm/boot/dts/versatile-ab.dts
index 36c771a2d765..27d0d9c8adf3 100644
--- a/arch/arm/boot/dts/versatile-ab.dts
+++ b/arch/arm/boot/dts/versatile-ab.dts
@@ -15,6 +15,10 @@
15 i2c0 = &i2c0; 15 i2c0 = &i2c0;
16 }; 16 };
17 17
18 chosen {
19 stdout-path = &uart0;
20 };
21
18 memory { 22 memory {
19 reg = <0x0 0x08000000>; 23 reg = <0x0 0x08000000>;
20 }; 24 };
diff --git a/arch/arm/boot/dts/versatile-pb.dts b/arch/arm/boot/dts/versatile-pb.dts
index d025048119d3..e36c1e82fea7 100644
--- a/arch/arm/boot/dts/versatile-pb.dts
+++ b/arch/arm/boot/dts/versatile-pb.dts
@@ -56,5 +56,3 @@
56 }; 56 };
57 }; 57 };
58}; 58};
59
60#include <testcases.dtsi>
diff --git a/arch/arm/xen/grant-table.c b/arch/arm/xen/grant-table.c
index 2c4041c9bac5..e43791829ace 100644
--- a/arch/arm/xen/grant-table.c
+++ b/arch/arm/xen/grant-table.c
@@ -49,8 +49,3 @@ int arch_gnttab_init(unsigned long nr_shared)
49{ 49{
50 return 0; 50 return 0;
51} 51}
52
53int arch_gnttab_init(unsigned long nr_shared, unsigned long nr_status)
54{
55 return 0;
56}
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index f37238f45bcd..5441b14994fc 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -76,7 +76,7 @@ vmlinux.gz: vmlinux
76 $(Q)$(MAKE) $(build)=$(boot) $@ 76 $(Q)$(MAKE) $(build)=$(boot) $@
77 77
78unwcheck: vmlinux 78unwcheck: vmlinux
79 -$(Q)READELF=$(READELF) python $(srctree)/arch/ia64/scripts/unwcheck.py $< 79 -$(Q)READELF=$(READELF) $(PYTHON) $(srctree)/arch/ia64/scripts/unwcheck.py $<
80 80
81archclean: 81archclean:
82 $(Q)$(MAKE) $(clean)=$(boot) 82 $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c
index ef2aed0f63ca..9dc52501de83 100644
--- a/arch/powerpc/boot/gunzip_util.c
+++ b/arch/powerpc/boot/gunzip_util.c
@@ -112,10 +112,10 @@ int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen)
112 r = zlib_inflate(&state->s, Z_FULL_FLUSH); 112 r = zlib_inflate(&state->s, Z_FULL_FLUSH);
113 if (r != Z_OK && r != Z_STREAM_END) 113 if (r != Z_OK && r != Z_STREAM_END)
114 fatal("inflate returned %d msg: %s\n\r", r, state->s.msg); 114 fatal("inflate returned %d msg: %s\n\r", r, state->s.msg);
115 len = state->s.next_out - (unsigned char *)dst; 115 len = state->s.next_out - (Byte *)dst;
116 } else { 116 } else {
117 /* uncompressed image */ 117 /* uncompressed image */
118 len = min(state->s.avail_in, (unsigned)dstlen); 118 len = min(state->s.avail_in, (uLong)dstlen);
119 memcpy(dst, state->s.next_in, len); 119 memcpy(dst, state->s.next_in, len);
120 state->s.next_in += len; 120 state->s.next_in += len;
121 state->s.avail_in -= len; 121 state->s.avail_in -= len;
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 642e436d4595..daa5af91163c 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -459,7 +459,8 @@ extern const char *powerpc_base_platform;
459#define CPU_FTRS_POSSIBLE \ 459#define CPU_FTRS_POSSIBLE \
460 (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ 460 (CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
461 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \ 461 CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
462 CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX) 462 CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
463 CPU_FTRS_PA6T | CPU_FTR_VSX)
463#endif 464#endif
464#else 465#else
465enum { 466enum {
@@ -509,7 +510,8 @@ enum {
509#define CPU_FTRS_ALWAYS \ 510#define CPU_FTRS_ALWAYS \
510 (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ 511 (CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
511 CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \ 512 CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
512 CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE) 513 CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
514 CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE)
513#endif 515#endif
514#else 516#else
515enum { 517enum {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 44e90516519b..b125ceab149c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -57,10 +57,10 @@ struct machdep_calls {
57 void (*hpte_removebolted)(unsigned long ea, 57 void (*hpte_removebolted)(unsigned long ea,
58 int psize, int ssize); 58 int psize, int ssize);
59 void (*flush_hash_range)(unsigned long number, int local); 59 void (*flush_hash_range)(unsigned long number, int local);
60 void (*hugepage_invalidate)(struct mm_struct *mm, 60 void (*hugepage_invalidate)(unsigned long vsid,
61 unsigned long addr,
61 unsigned char *hpte_slot_array, 62 unsigned char *hpte_slot_array,
62 unsigned long addr, int psize); 63 int psize, int ssize);
63
64 /* special for kexec, to be called in real mode, linear mapping is 64 /* special for kexec, to be called in real mode, linear mapping is
65 * destroyed as well */ 65 * destroyed as well */
66 void (*hpte_clear_all)(void); 66 void (*hpte_clear_all)(void);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index b2f8ce1fd0d7..86055e598269 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -149,6 +149,8 @@ struct opal_sg_list {
149#define OPAL_DUMP_INFO2 94 149#define OPAL_DUMP_INFO2 94
150#define OPAL_PCI_EEH_FREEZE_SET 97 150#define OPAL_PCI_EEH_FREEZE_SET 97
151#define OPAL_HANDLE_HMI 98 151#define OPAL_HANDLE_HMI 98
152#define OPAL_REGISTER_DUMP_REGION 101
153#define OPAL_UNREGISTER_DUMP_REGION 102
152 154
153#ifndef __ASSEMBLY__ 155#ifndef __ASSEMBLY__
154 156
@@ -920,6 +922,8 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
920 uint64_t length); 922 uint64_t length);
921int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); 923int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
922int64_t opal_handle_hmi(void); 924int64_t opal_handle_hmi(void);
925int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
926int64_t opal_unregister_dump_region(uint32_t id);
923 927
924/* Internal functions */ 928/* Internal functions */
925extern int early_init_dt_scan_opal(unsigned long node, const char *uname, 929extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -974,6 +978,13 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
974 unsigned long vmalloc_size); 978 unsigned long vmalloc_size);
975void opal_free_sg_list(struct opal_sg_list *sg); 979void opal_free_sg_list(struct opal_sg_list *sg);
976 980
981/*
982 * Dump region ID range usable by the OS
983 */
984#define OPAL_DUMP_REGION_HOST_START 0x80
985#define OPAL_DUMP_REGION_LOG_BUF 0x80
986#define OPAL_DUMP_REGION_HOST_END 0xFF
987
977#endif /* __ASSEMBLY__ */ 988#endif /* __ASSEMBLY__ */
978 989
979#endif /* __OPAL_H */ 990#endif /* __OPAL_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index eb9261024f51..7b3d54fae46f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -413,7 +413,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
413} 413}
414 414
415extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 415extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
416 pmd_t *pmdp); 416 pmd_t *pmdp, unsigned long old_pmd);
417#ifdef CONFIG_TRANSPARENT_HUGEPAGE 417#ifdef CONFIG_TRANSPARENT_HUGEPAGE
418extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot); 418extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
419extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot); 419extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index b6d2d42f84b5..4f4ec2ab45c9 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -46,11 +46,31 @@
46 * in order to deal with 64K made of 4K HW pages. Thus we override the 46 * in order to deal with 64K made of 4K HW pages. Thus we override the
47 * generic accessors and iterators here 47 * generic accessors and iterators here
48 */ 48 */
49#define __real_pte(e,p) ((real_pte_t) { \ 49#define __real_pte __real_pte
50 (e), (pte_val(e) & _PAGE_COMBO) ? \ 50static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
51 (pte_val(*((p) + PTRS_PER_PTE))) : 0 }) 51{
52#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \ 52 real_pte_t rpte;
53 (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf)) 53
54 rpte.pte = pte;
55 rpte.hidx = 0;
56 if (pte_val(pte) & _PAGE_COMBO) {
57 /*
58 * Make sure we order the hidx load against the _PAGE_COMBO
59 * check. The store side ordering is done in __hash_page_4K
60 */
61 smp_rmb();
62 rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
63 }
64 return rpte;
65}
66
67static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
68{
69 if ((pte_val(rpte.pte) & _PAGE_COMBO))
70 return (rpte.hidx >> (index<<2)) & 0xf;
71 return (pte_val(rpte.pte) >> 12) & 0xf;
72}
73
54#define __rpte_to_pte(r) ((r).pte) 74#define __rpte_to_pte(r) ((r).pte)
55#define __rpte_sub_valid(rpte, index) \ 75#define __rpte_sub_valid(rpte, index) \
56 (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index))) 76 (pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c987bf794ef..0c0505956a29 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -213,9 +213,8 @@
213#define SPRN_ACOP 0x1F /* Available Coprocessor Register */ 213#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
214#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ 214#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
215#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */ 215#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
216#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
217#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */ 216#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
218#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */ 217#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
219#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ 218#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
220#define SPRN_CTRLF 0x088 219#define SPRN_CTRLF 0x088
221#define SPRN_CTRLT 0x098 220#define SPRN_CTRLT 0x098
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 35aa339410bd..4dbe072eecbe 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -61,6 +61,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
61 61
62static inline int arch_spin_is_locked(arch_spinlock_t *lock) 62static inline int arch_spin_is_locked(arch_spinlock_t *lock)
63{ 63{
64 smp_mb();
64 return !arch_spin_value_unlocked(*lock); 65 return !arch_spin_value_unlocked(*lock);
65} 66}
66 67
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6144d5a6bfe7..050f79a4a168 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -592,61 +592,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
592 MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception) 592 MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
593 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62) 593 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
594 594
595 .globl hmi_exception_early
596hmi_exception_early:
597 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
598 mr r10,r1 /* Save r1 */
599 ld r1,PACAEMERGSP(r13) /* Use emergency stack */
600 subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
601 std r9,_CCR(r1) /* save CR in stackframe */
602 mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
603 std r11,_NIP(r1) /* save HSRR0 in stackframe */
604 mfspr r12,SPRN_HSRR1 /* Save SRR1 */
605 std r12,_MSR(r1) /* save SRR1 in stackframe */
606 std r10,0(r1) /* make stack chain pointer */
607 std r0,GPR0(r1) /* save r0 in stackframe */
608 std r10,GPR1(r1) /* save r1 in stackframe */
609 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
610 EXCEPTION_PROLOG_COMMON_3(0xe60)
611 addi r3,r1,STACK_FRAME_OVERHEAD
612 bl hmi_exception_realmode
613 /* Windup the stack. */
614 /* Clear MSR_RI before setting SRR0 and SRR1. */
615 li r0,MSR_RI
616 mfmsr r9 /* get MSR value */
617 andc r9,r9,r0
618 mtmsrd r9,1 /* Clear MSR_RI */
619 /* Move original HSRR0 and HSRR1 into the respective regs */
620 ld r9,_MSR(r1)
621 mtspr SPRN_HSRR1,r9
622 ld r3,_NIP(r1)
623 mtspr SPRN_HSRR0,r3
624 ld r9,_CTR(r1)
625 mtctr r9
626 ld r9,_XER(r1)
627 mtxer r9
628 ld r9,_LINK(r1)
629 mtlr r9
630 REST_GPR(0, r1)
631 REST_8GPRS(2, r1)
632 REST_GPR(10, r1)
633 ld r11,_CCR(r1)
634 mtcr r11
635 REST_GPR(11, r1)
636 REST_2GPRS(12, r1)
637 /* restore original r1. */
638 ld r1,GPR1(r1)
639
640 /*
641 * Go to virtual mode and pull the HMI event information from
642 * firmware.
643 */
644 .globl hmi_exception_after_realmode
645hmi_exception_after_realmode:
646 SET_SCRATCH0(r13)
647 EXCEPTION_PROLOG_0(PACA_EXGEN)
648 b hmi_exception_hv
649
650 MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell) 595 MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
651 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82) 596 KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
652 597
@@ -1306,6 +1251,61 @@ fwnmi_data_area:
1306 . = 0x8000 1251 . = 0x8000
1307#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */ 1252#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
1308 1253
1254 .globl hmi_exception_early
1255hmi_exception_early:
1256 EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
1257 mr r10,r1 /* Save r1 */
1258 ld r1,PACAEMERGSP(r13) /* Use emergency stack */
1259 subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
1260 std r9,_CCR(r1) /* save CR in stackframe */
1261 mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
1262 std r11,_NIP(r1) /* save HSRR0 in stackframe */
1263 mfspr r12,SPRN_HSRR1 /* Save SRR1 */
1264 std r12,_MSR(r1) /* save SRR1 in stackframe */
1265 std r10,0(r1) /* make stack chain pointer */
1266 std r0,GPR0(r1) /* save r0 in stackframe */
1267 std r10,GPR1(r1) /* save r1 in stackframe */
1268 EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
1269 EXCEPTION_PROLOG_COMMON_3(0xe60)
1270 addi r3,r1,STACK_FRAME_OVERHEAD
1271 bl hmi_exception_realmode
1272 /* Windup the stack. */
1273 /* Clear MSR_RI before setting SRR0 and SRR1. */
1274 li r0,MSR_RI
1275 mfmsr r9 /* get MSR value */
1276 andc r9,r9,r0
1277 mtmsrd r9,1 /* Clear MSR_RI */
1278 /* Move original HSRR0 and HSRR1 into the respective regs */
1279 ld r9,_MSR(r1)
1280 mtspr SPRN_HSRR1,r9
1281 ld r3,_NIP(r1)
1282 mtspr SPRN_HSRR0,r3
1283 ld r9,_CTR(r1)
1284 mtctr r9
1285 ld r9,_XER(r1)
1286 mtxer r9
1287 ld r9,_LINK(r1)
1288 mtlr r9
1289 REST_GPR(0, r1)
1290 REST_8GPRS(2, r1)
1291 REST_GPR(10, r1)
1292 ld r11,_CCR(r1)
1293 mtcr r11
1294 REST_GPR(11, r1)
1295 REST_2GPRS(12, r1)
1296 /* restore original r1. */
1297 ld r1,GPR1(r1)
1298
1299 /*
1300 * Go to virtual mode and pull the HMI event information from
1301 * firmware.
1302 */
1303 .globl hmi_exception_after_realmode
1304hmi_exception_after_realmode:
1305 SET_SCRATCH0(r13)
1306 EXCEPTION_PROLOG_0(PACA_EXGEN)
1307 b hmi_exception_hv
1308
1309#ifdef CONFIG_PPC_POWERNV 1309#ifdef CONFIG_PPC_POWERNV
1310_GLOBAL(opal_mc_secondary_handler) 1310_GLOBAL(opal_mc_secondary_handler)
1311 HMT_MEDIUM_PPR_DISCARD 1311 HMT_MEDIUM_PPR_DISCARD
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index c334f53453f7..b5061abbd2e0 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1210,10 +1210,12 @@ clear_utlb_entry:
1210 1210
1211 /* We configure icbi to invalidate 128 bytes at a time since the 1211 /* We configure icbi to invalidate 128 bytes at a time since the
1212 * current 32-bit kernel code isn't too happy with icache != dcache 1212 * current 32-bit kernel code isn't too happy with icache != dcache
1213 * block size 1213 * block size. We also disable the BTAC as this can cause errors
1214 * in some circumstances (see IBM Erratum 47).
1214 */ 1215 */
1215 mfspr r3,SPRN_CCR0 1216 mfspr r3,SPRN_CCR0
1216 oris r3,r3,0x0020 1217 oris r3,r3,0x0020
1218 ori r3,r3,0x0040
1217 mtspr SPRN_CCR0,r3 1219 mtspr SPRN_CCR0,r3
1218 isync 1220 isync
1219 1221
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f84f799babb1..a10642a0d861 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1120,37 +1120,41 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership);
1120int iommu_add_device(struct device *dev) 1120int iommu_add_device(struct device *dev)
1121{ 1121{
1122 struct iommu_table *tbl; 1122 struct iommu_table *tbl;
1123 int ret = 0;
1124 1123
1125 if (WARN_ON(dev->iommu_group)) { 1124 /*
1126 pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n", 1125 * The sysfs entries should be populated before
1127 dev_name(dev), 1126 * binding IOMMU group. If sysfs entries isn't
1128 iommu_group_id(dev->iommu_group)); 1127 * ready, we simply bail.
1128 */
1129 if (!device_is_registered(dev))
1130 return -ENOENT;
1131
1132 if (dev->iommu_group) {
1133 pr_debug("%s: Skipping device %s with iommu group %d\n",
1134 __func__, dev_name(dev),
1135 iommu_group_id(dev->iommu_group));
1129 return -EBUSY; 1136 return -EBUSY;
1130 } 1137 }
1131 1138
1132 tbl = get_iommu_table_base(dev); 1139 tbl = get_iommu_table_base(dev);
1133 if (!tbl || !tbl->it_group) { 1140 if (!tbl || !tbl->it_group) {
1134 pr_debug("iommu_tce: skipping device %s with no tbl\n", 1141 pr_debug("%s: Skipping device %s with no tbl\n",
1135 dev_name(dev)); 1142 __func__, dev_name(dev));
1136 return 0; 1143 return 0;
1137 } 1144 }
1138 1145
1139 pr_debug("iommu_tce: adding %s to iommu group %d\n", 1146 pr_debug("%s: Adding %s to iommu group %d\n",
1140 dev_name(dev), iommu_group_id(tbl->it_group)); 1147 __func__, dev_name(dev),
1148 iommu_group_id(tbl->it_group));
1141 1149
1142 if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) { 1150 if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
1143 pr_err("iommu_tce: unsupported iommu page size."); 1151 pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
1144 pr_err("%s has not been added\n", dev_name(dev)); 1152 __func__, IOMMU_PAGE_SIZE(tbl),
1153 PAGE_SIZE, dev_name(dev));
1145 return -EINVAL; 1154 return -EINVAL;
1146 } 1155 }
1147 1156
1148 ret = iommu_group_add_device(tbl->it_group, dev); 1157 return iommu_group_add_device(tbl->it_group, dev);
1149 if (ret < 0)
1150 pr_err("iommu_tce: %s has not been added, ret=%d\n",
1151 dev_name(dev), ret);
1152
1153 return ret;
1154} 1158}
1155EXPORT_SYMBOL_GPL(iommu_add_device); 1159EXPORT_SYMBOL_GPL(iommu_add_device);
1156 1160
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1a3b1055f5eb..4e139f8a69ef 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -818,76 +818,6 @@ int cpu_to_chip_id(int cpu)
818} 818}
819EXPORT_SYMBOL(cpu_to_chip_id); 819EXPORT_SYMBOL(cpu_to_chip_id);
820 820
821#ifdef CONFIG_PPC_PSERIES
822/*
823 * Fix up the uninitialized fields in a new device node:
824 * name, type and pci-specific fields
825 */
826
827static int of_finish_dynamic_node(struct device_node *node)
828{
829 struct device_node *parent = of_get_parent(node);
830 int err = 0;
831 const phandle *ibm_phandle;
832
833 node->name = of_get_property(node, "name", NULL);
834 node->type = of_get_property(node, "device_type", NULL);
835
836 if (!node->name)
837 node->name = "<NULL>";
838 if (!node->type)
839 node->type = "<NULL>";
840
841 if (!parent) {
842 err = -ENODEV;
843 goto out;
844 }
845
846 /* We don't support that function on PowerMac, at least
847 * not yet
848 */
849 if (machine_is(powermac))
850 return -ENODEV;
851
852 /* fix up new node's phandle field */
853 if ((ibm_phandle = of_get_property(node, "ibm,phandle", NULL)))
854 node->phandle = *ibm_phandle;
855
856out:
857 of_node_put(parent);
858 return err;
859}
860
861static int prom_reconfig_notifier(struct notifier_block *nb,
862 unsigned long action, void *node)
863{
864 int err;
865
866 switch (action) {
867 case OF_RECONFIG_ATTACH_NODE:
868 err = of_finish_dynamic_node(node);
869 if (err < 0)
870 printk(KERN_ERR "finish_node returned %d\n", err);
871 break;
872 default:
873 err = 0;
874 break;
875 }
876 return notifier_from_errno(err);
877}
878
879static struct notifier_block prom_reconfig_nb = {
880 .notifier_call = prom_reconfig_notifier,
881 .priority = 10, /* This one needs to run first */
882};
883
884static int __init prom_reconfig_setup(void)
885{
886 return of_reconfig_notifier_register(&prom_reconfig_nb);
887}
888__initcall(prom_reconfig_setup);
889#endif
890
891bool arch_match_cpu_phys_id(int cpu, u64 phys_id) 821bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
892{ 822{
893 return (int)phys_id == get_hard_smp_processor_id(cpu); 823 return (int)phys_id == get_hard_smp_processor_id(cpu);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1007fb802e6b..a0738af4aba6 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -376,6 +376,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
376 GFP_KERNEL, cpu_to_node(cpu)); 376 GFP_KERNEL, cpu_to_node(cpu));
377 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu), 377 zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
378 GFP_KERNEL, cpu_to_node(cpu)); 378 GFP_KERNEL, cpu_to_node(cpu));
379 /*
380 * numa_node_id() works after this.
381 */
382 set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
383 set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
379 } 384 }
380 385
381 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); 386 cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@@ -723,12 +728,6 @@ void start_secondary(void *unused)
723 } 728 }
724 traverse_core_siblings(cpu, true); 729 traverse_core_siblings(cpu, true);
725 730
726 /*
727 * numa_node_id() works after this.
728 */
729 set_numa_node(numa_cpu_lookup_table[cpu]);
730 set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
731
732 smp_wmb(); 731 smp_wmb();
733 notify_cpu_starting(cpu); 732 notify_cpu_starting(cpu);
734 set_cpu_online(cpu, true); 733 set_cpu_online(cpu, true);
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 0c9c8d7d0734..170a0346f756 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw)
70 70
71void arch_spin_unlock_wait(arch_spinlock_t *lock) 71void arch_spin_unlock_wait(arch_spinlock_t *lock)
72{ 72{
73 smp_mb();
74
73 while (lock->slock) { 75 while (lock->slock) {
74 HMT_low(); 76 HMT_low();
75 if (SHARED_PROCESSOR) 77 if (SHARED_PROCESSOR)
76 __spin_yield(lock); 78 __spin_yield(lock);
77 } 79 }
78 HMT_medium(); 80 HMT_medium();
81
82 smp_mb();
79} 83}
80 84
81EXPORT_SYMBOL(arch_spin_unlock_wait); 85EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index cf1d325eae8b..afc0a8295f84 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -412,18 +412,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
412 local_irq_restore(flags); 412 local_irq_restore(flags);
413} 413}
414 414
415static void native_hugepage_invalidate(struct mm_struct *mm, 415static void native_hugepage_invalidate(unsigned long vsid,
416 unsigned long addr,
416 unsigned char *hpte_slot_array, 417 unsigned char *hpte_slot_array,
417 unsigned long addr, int psize) 418 int psize, int ssize)
418{ 419{
419 int ssize = 0, i; 420 int i;
420 int lock_tlbie;
421 struct hash_pte *hptep; 421 struct hash_pte *hptep;
422 int actual_psize = MMU_PAGE_16M; 422 int actual_psize = MMU_PAGE_16M;
423 unsigned int max_hpte_count, valid; 423 unsigned int max_hpte_count, valid;
424 unsigned long flags, s_addr = addr; 424 unsigned long flags, s_addr = addr;
425 unsigned long hpte_v, want_v, shift; 425 unsigned long hpte_v, want_v, shift;
426 unsigned long hidx, vpn = 0, vsid, hash, slot; 426 unsigned long hidx, vpn = 0, hash, slot;
427 427
428 shift = mmu_psize_defs[psize].shift; 428 shift = mmu_psize_defs[psize].shift;
429 max_hpte_count = 1U << (PMD_SHIFT - shift); 429 max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -437,15 +437,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
437 437
438 /* get the vpn */ 438 /* get the vpn */
439 addr = s_addr + (i * (1ul << shift)); 439 addr = s_addr + (i * (1ul << shift));
440 if (!is_kernel_addr(addr)) {
441 ssize = user_segment_size(addr);
442 vsid = get_vsid(mm->context.id, addr, ssize);
443 WARN_ON(vsid == 0);
444 } else {
445 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
446 ssize = mmu_kernel_ssize;
447 }
448
449 vpn = hpt_vpn(addr, vsid, ssize); 440 vpn = hpt_vpn(addr, vsid, ssize);
450 hash = hpt_hash(vpn, shift, ssize); 441 hash = hpt_hash(vpn, shift, ssize);
451 if (hidx & _PTEIDX_SECONDARY) 442 if (hidx & _PTEIDX_SECONDARY)
@@ -465,22 +456,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
465 else 456 else
466 /* Invalidate the hpte. NOTE: this also unlocks it */ 457 /* Invalidate the hpte. NOTE: this also unlocks it */
467 hptep->v = 0; 458 hptep->v = 0;
459 /*
460 * We need to do tlb invalidate for all the address, tlbie
461 * instruction compares entry_VA in tlb with the VA specified
462 * here
463 */
464 tlbie(vpn, psize, actual_psize, ssize, 0);
468 } 465 }
469 /*
470 * Since this is a hugepage, we just need a single tlbie.
471 * use the last vpn.
472 */
473 lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
474 if (lock_tlbie)
475 raw_spin_lock(&native_tlbie_lock);
476
477 asm volatile("ptesync":::"memory");
478 __tlbie(vpn, psize, actual_psize, ssize);
479 asm volatile("eieio; tlbsync; ptesync":::"memory");
480
481 if (lock_tlbie)
482 raw_spin_unlock(&native_tlbie_lock);
483
484 local_irq_restore(flags); 466 local_irq_restore(flags);
485} 467}
486 468
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 826893fcb3a7..5f5e6328c21c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,6 +18,57 @@
18#include <linux/mm.h> 18#include <linux/mm.h>
19#include <asm/machdep.h> 19#include <asm/machdep.h>
20 20
21static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
22 pmd_t *pmdp, unsigned int psize, int ssize)
23{
24 int i, max_hpte_count, valid;
25 unsigned long s_addr;
26 unsigned char *hpte_slot_array;
27 unsigned long hidx, shift, vpn, hash, slot;
28
29 s_addr = addr & HPAGE_PMD_MASK;
30 hpte_slot_array = get_hpte_slot_array(pmdp);
31 /*
32 * IF we try to do a HUGE PTE update after a withdraw is done.
33 * we will find the below NULL. This happens when we do
34 * split_huge_page_pmd
35 */
36 if (!hpte_slot_array)
37 return;
38
39 if (ppc_md.hugepage_invalidate)
40 return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
41 psize, ssize);
42 /*
43 * No bluk hpte removal support, invalidate each entry
44 */
45 shift = mmu_psize_defs[psize].shift;
46 max_hpte_count = HPAGE_PMD_SIZE >> shift;
47 for (i = 0; i < max_hpte_count; i++) {
48 /*
49 * 8 bits per each hpte entries
50 * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
51 */
52 valid = hpte_valid(hpte_slot_array, i);
53 if (!valid)
54 continue;
55 hidx = hpte_hash_index(hpte_slot_array, i);
56
57 /* get the vpn */
58 addr = s_addr + (i * (1ul << shift));
59 vpn = hpt_vpn(addr, vsid, ssize);
60 hash = hpt_hash(vpn, shift, ssize);
61 if (hidx & _PTEIDX_SECONDARY)
62 hash = ~hash;
63
64 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
65 slot += hidx & _PTEIDX_GROUP_IX;
66 ppc_md.hpte_invalidate(slot, vpn, psize,
67 MMU_PAGE_16M, ssize, 0);
68 }
69}
70
71
21int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, 72int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
22 pmd_t *pmdp, unsigned long trap, int local, int ssize, 73 pmd_t *pmdp, unsigned long trap, int local, int ssize,
23 unsigned int psize) 74 unsigned int psize)
@@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
33 * atomically mark the linux large page PMD busy and dirty 84 * atomically mark the linux large page PMD busy and dirty
34 */ 85 */
35 do { 86 do {
36 old_pmd = pmd_val(*pmdp); 87 pmd_t pmd = ACCESS_ONCE(*pmdp);
88
89 old_pmd = pmd_val(pmd);
37 /* If PMD busy, retry the access */ 90 /* If PMD busy, retry the access */
38 if (unlikely(old_pmd & _PAGE_BUSY)) 91 if (unlikely(old_pmd & _PAGE_BUSY))
39 return 0; 92 return 0;
@@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
85 vpn = hpt_vpn(ea, vsid, ssize); 138 vpn = hpt_vpn(ea, vsid, ssize);
86 hash = hpt_hash(vpn, shift, ssize); 139 hash = hpt_hash(vpn, shift, ssize);
87 hpte_slot_array = get_hpte_slot_array(pmdp); 140 hpte_slot_array = get_hpte_slot_array(pmdp);
141 if (psize == MMU_PAGE_4K) {
142 /*
143 * invalidate the old hpte entry if we have that mapped via 64K
144 * base page size. This is because demote_segment won't flush
145 * hash page table entries.
146 */
147 if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
148 invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
149 }
88 150
89 valid = hpte_valid(hpte_slot_array, index); 151 valid = hpte_valid(hpte_slot_array, index);
90 if (valid) { 152 if (valid) {
@@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
107 * safely update this here. 169 * safely update this here.
108 */ 170 */
109 valid = 0; 171 valid = 0;
110 new_pmd &= ~_PAGE_HPTEFLAGS;
111 hpte_slot_array[index] = 0; 172 hpte_slot_array[index] = 0;
112 } else 173 }
113 /* clear the busy bits and set the hash pte bits */
114 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
115 } 174 }
116 175
117 if (!valid) { 176 if (!valid) {
@@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
119 178
120 /* insert new entry */ 179 /* insert new entry */
121 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; 180 pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
122repeat: 181 new_pmd |= _PAGE_HASHPTE;
123 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
124
125 /* clear the busy bits and set the hash pte bits */
126 new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
127 182
128 /* Add in WIMG bits */ 183 /* Add in WIMG bits */
129 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE | 184 rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
@@ -132,6 +187,8 @@ repeat:
132 * enable the memory coherence always 187 * enable the memory coherence always
133 */ 188 */
134 rflags |= HPTE_R_M; 189 rflags |= HPTE_R_M;
190repeat:
191 hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
135 192
136 /* Insert into the hash table, primary slot */ 193 /* Insert into the hash table, primary slot */
137 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0, 194 slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -172,8 +229,17 @@ repeat:
172 mark_hpte_slot_valid(hpte_slot_array, index, slot); 229 mark_hpte_slot_valid(hpte_slot_array, index, slot);
173 } 230 }
174 /* 231 /*
175 * No need to use ldarx/stdcx here 232 * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
233 * base page size 4k.
234 */
235 if (psize == MMU_PAGE_4K)
236 new_pmd |= _PAGE_COMBO;
237 /*
238 * The hpte valid is stored in the pgtable whose address is in the
239 * second half of the PMD. Order this against clearing of the busy bit in
240 * huge pmd.
176 */ 241 */
242 smp_wmb();
177 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY); 243 *pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
178 return 0; 244 return 0;
179} 245}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d3e9a78eaed3..d7737a542fd7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1049,7 +1049,7 @@ static void __init mark_reserved_regions_for_nid(int nid)
1049 1049
1050void __init do_init_bootmem(void) 1050void __init do_init_bootmem(void)
1051{ 1051{
1052 int nid; 1052 int nid, cpu;
1053 1053
1054 min_low_pfn = 0; 1054 min_low_pfn = 0;
1055 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT; 1055 max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -1122,8 +1122,15 @@ void __init do_init_bootmem(void)
1122 1122
1123 reset_numa_cpu_lookup_table(); 1123 reset_numa_cpu_lookup_table();
1124 register_cpu_notifier(&ppc64_numa_nb); 1124 register_cpu_notifier(&ppc64_numa_nb);
1125 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE, 1125 /*
1126 (void *)(unsigned long)boot_cpuid); 1126 * We need the numa_cpu_lookup_table to be accurate for all CPUs,
1127 * even before we online them, so that we can use cpu_to_{node,mem}
1128 * early in boot, cf. smp_prepare_cpus().
1129 */
1130 for_each_possible_cpu(cpu) {
1131 cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
1132 (void *)(unsigned long)cpu);
1133 }
1127} 1134}
1128 1135
1129void __init paging_init(void) 1136void __init paging_init(void)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3b3c4d34c7a0..c8d709ab489d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -54,6 +54,9 @@
54 54
55#include "mmu_decl.h" 55#include "mmu_decl.h"
56 56
57#define CREATE_TRACE_POINTS
58#include <trace/events/thp.h>
59
57/* Some sanity checking */ 60/* Some sanity checking */
58#if TASK_SIZE_USER64 > PGTABLE_RANGE 61#if TASK_SIZE_USER64 > PGTABLE_RANGE
59#error TASK_SIZE_USER64 exceeds pagetable range 62#error TASK_SIZE_USER64 exceeds pagetable range
@@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
537 old = pmd_val(*pmdp); 540 old = pmd_val(*pmdp);
538 *pmdp = __pmd((old & ~clr) | set); 541 *pmdp = __pmd((old & ~clr) | set);
539#endif 542#endif
543 trace_hugepage_update(addr, old, clr, set);
540 if (old & _PAGE_HASHPTE) 544 if (old & _PAGE_HASHPTE)
541 hpte_do_hugepage_flush(mm, addr, pmdp); 545 hpte_do_hugepage_flush(mm, addr, pmdp, old);
542 return old; 546 return old;
543} 547}
544 548
@@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
642 * If we didn't had the splitting flag set, go and flush the 646 * If we didn't had the splitting flag set, go and flush the
643 * HPTE entries. 647 * HPTE entries.
644 */ 648 */
649 trace_hugepage_splitting(address, old);
645 if (!(old & _PAGE_SPLITTING)) { 650 if (!(old & _PAGE_SPLITTING)) {
646 /* We need to flush the hpte */ 651 /* We need to flush the hpte */
647 if (old & _PAGE_HASHPTE) 652 if (old & _PAGE_HASHPTE)
648 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp); 653 hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
649 } 654 }
650 /* 655 /*
651 * This ensures that generic code that rely on IRQ disabling 656 * This ensures that generic code that rely on IRQ disabling
@@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
709 assert_spin_locked(&mm->page_table_lock); 714 assert_spin_locked(&mm->page_table_lock);
710 WARN_ON(!pmd_trans_huge(pmd)); 715 WARN_ON(!pmd_trans_huge(pmd));
711#endif 716#endif
717 trace_hugepage_set_pmd(addr, pmd);
712 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); 718 return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
713} 719}
714 720
@@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
723 * neesd to be flushed. 729 * neesd to be flushed.
724 */ 730 */
725void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, 731void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
726 pmd_t *pmdp) 732 pmd_t *pmdp, unsigned long old_pmd)
727{ 733{
728 int ssize, i; 734 int ssize, i;
729 unsigned long s_addr; 735 unsigned long s_addr;
@@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
745 if (!hpte_slot_array) 751 if (!hpte_slot_array)
746 return; 752 return;
747 753
748 /* get the base page size */ 754 /* get the base page size,vsid and segment size */
755#ifdef CONFIG_DEBUG_VM
749 psize = get_slice_psize(mm, s_addr); 756 psize = get_slice_psize(mm, s_addr);
757 BUG_ON(psize == MMU_PAGE_16M);
758#endif
759 if (old_pmd & _PAGE_COMBO)
760 psize = MMU_PAGE_4K;
761 else
762 psize = MMU_PAGE_64K;
763
764 if (!is_kernel_addr(s_addr)) {
765 ssize = user_segment_size(s_addr);
766 vsid = get_vsid(mm->context.id, s_addr, ssize);
767 WARN_ON(vsid == 0);
768 } else {
769 vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
770 ssize = mmu_kernel_ssize;
771 }
750 772
751 if (ppc_md.hugepage_invalidate) 773 if (ppc_md.hugepage_invalidate)
752 return ppc_md.hugepage_invalidate(mm, hpte_slot_array, 774 return ppc_md.hugepage_invalidate(vsid, s_addr,
753 s_addr, psize); 775 hpte_slot_array,
776 psize, ssize);
754 /* 777 /*
755 * No bluk hpte removal support, invalidate each entry 778 * No bluk hpte removal support, invalidate each entry
756 */ 779 */
@@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
768 791
769 /* get the vpn */ 792 /* get the vpn */
770 addr = s_addr + (i * (1ul << shift)); 793 addr = s_addr + (i * (1ul << shift));
771 if (!is_kernel_addr(addr)) {
772 ssize = user_segment_size(addr);
773 vsid = get_vsid(mm->context.id, addr, ssize);
774 WARN_ON(vsid == 0);
775 } else {
776 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
777 ssize = mmu_kernel_ssize;
778 }
779
780 vpn = hpt_vpn(addr, vsid, ssize); 794 vpn = hpt_vpn(addr, vsid, ssize);
781 hash = hpt_hash(vpn, shift, ssize); 795 hash = hpt_hash(vpn, shift, ssize);
782 if (hidx & _PTEIDX_SECONDARY) 796 if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c99f6510a0b2..d2a94b85dbc2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -30,6 +30,8 @@
30#include <asm/tlb.h> 30#include <asm/tlb.h>
31#include <asm/bug.h> 31#include <asm/bug.h>
32 32
33#include <trace/events/thp.h>
34
33DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch); 35DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
34 36
35/* 37/*
@@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
213 if (ptep == NULL) 215 if (ptep == NULL)
214 continue; 216 continue;
215 pte = pte_val(*ptep); 217 pte = pte_val(*ptep);
218 if (hugepage_shift)
219 trace_hugepage_invalidate(start, pte_val(pte));
216 if (!(pte & _PAGE_HASHPTE)) 220 if (!(pte & _PAGE_HASHPTE))
217 continue; 221 continue;
218 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte))) 222 if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
219 hpte_do_hugepage_flush(mm, start, (pmd_t *)pte); 223 hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
220 else 224 else
221 hpte_need_flush(mm, start, ptep, pte, 0); 225 hpte_need_flush(mm, start, ptep, pte, 0);
222 } 226 }
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 92cb18d52ea8..f38ea4df6a85 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -581,42 +581,10 @@ static void setup_mmu_htw(void)
581/* 581/*
582 * Early initialization of the MMU TLB code 582 * Early initialization of the MMU TLB code
583 */ 583 */
584static void __early_init_mmu(int boot_cpu) 584static void early_init_this_mmu(void)
585{ 585{
586 unsigned int mas4; 586 unsigned int mas4;
587 587
588 /* XXX This will have to be decided at runtime, but right
589 * now our boot and TLB miss code hard wires it. Ideally
590 * we should find out a suitable page size and patch the
591 * TLB miss code (either that or use the PACA to store
592 * the value we want)
593 */
594 mmu_linear_psize = MMU_PAGE_1G;
595
596 /* XXX This should be decided at runtime based on supported
597 * page sizes in the TLB, but for now let's assume 16M is
598 * always there and a good fit (which it probably is)
599 *
600 * Freescale booke only supports 4K pages in TLB0, so use that.
601 */
602 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
603 mmu_vmemmap_psize = MMU_PAGE_4K;
604 else
605 mmu_vmemmap_psize = MMU_PAGE_16M;
606
607 /* XXX This code only checks for TLB 0 capabilities and doesn't
608 * check what page size combos are supported by the HW. It
609 * also doesn't handle the case where a separate array holds
610 * the IND entries from the array loaded by the PT.
611 */
612 if (boot_cpu) {
613 /* Look for supported page sizes */
614 setup_page_sizes();
615
616 /* Look for HW tablewalk support */
617 setup_mmu_htw();
618 }
619
620 /* Set MAS4 based on page table setting */ 588 /* Set MAS4 based on page table setting */
621 589
622 mas4 = 0x4 << MAS4_WIMGED_SHIFT; 590 mas4 = 0x4 << MAS4_WIMGED_SHIFT;
@@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu)
650 } 618 }
651 mtspr(SPRN_MAS4, mas4); 619 mtspr(SPRN_MAS4, mas4);
652 620
653 /* Set the global containing the top of the linear mapping
654 * for use by the TLB miss code
655 */
656 linear_map_top = memblock_end_of_DRAM();
657
658#ifdef CONFIG_PPC_FSL_BOOK3E 621#ifdef CONFIG_PPC_FSL_BOOK3E
659 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) { 622 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
660 unsigned int num_cams; 623 unsigned int num_cams;
@@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu)
662 /* use a quarter of the TLBCAM for bolted linear map */ 625 /* use a quarter of the TLBCAM for bolted linear map */
663 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4; 626 num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
664 linear_map_top = map_mem_in_cams(linear_map_top, num_cams); 627 linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
628 }
629#endif
665 630
666 /* limit memory so we dont have linear faults */ 631 /* A sync won't hurt us after mucking around with
667 memblock_enforce_memory_limit(linear_map_top); 632 * the MMU configuration
633 */
634 mb();
635}
668 636
637static void __init early_init_mmu_global(void)
638{
639 /* XXX This will have to be decided at runtime, but right
640 * now our boot and TLB miss code hard wires it. Ideally
641 * we should find out a suitable page size and patch the
642 * TLB miss code (either that or use the PACA to store
643 * the value we want)
644 */
645 mmu_linear_psize = MMU_PAGE_1G;
646
647 /* XXX This should be decided at runtime based on supported
648 * page sizes in the TLB, but for now let's assume 16M is
649 * always there and a good fit (which it probably is)
650 *
651 * Freescale booke only supports 4K pages in TLB0, so use that.
652 */
653 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
654 mmu_vmemmap_psize = MMU_PAGE_4K;
655 else
656 mmu_vmemmap_psize = MMU_PAGE_16M;
657
658 /* XXX This code only checks for TLB 0 capabilities and doesn't
659 * check what page size combos are supported by the HW. It
660 * also doesn't handle the case where a separate array holds
661 * the IND entries from the array loaded by the PT.
662 */
663 /* Look for supported page sizes */
664 setup_page_sizes();
665
666 /* Look for HW tablewalk support */
667 setup_mmu_htw();
668
669#ifdef CONFIG_PPC_FSL_BOOK3E
670 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
669 if (book3e_htw_mode == PPC_HTW_NONE) { 671 if (book3e_htw_mode == PPC_HTW_NONE) {
670 extlb_level_exc = EX_TLB_SIZE; 672 extlb_level_exc = EX_TLB_SIZE;
671 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); 673 patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
@@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu)
675 } 677 }
676#endif 678#endif
677 679
678 /* A sync won't hurt us after mucking around with 680 /* Set the global containing the top of the linear mapping
679 * the MMU configuration 681 * for use by the TLB miss code
680 */ 682 */
681 mb(); 683 linear_map_top = memblock_end_of_DRAM();
684}
685
686static void __init early_mmu_set_memory_limit(void)
687{
688#ifdef CONFIG_PPC_FSL_BOOK3E
689 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
690 /*
691 * Limit memory so we dont have linear faults.
692 * Unlike memblock_set_current_limit, which limits
693 * memory available during early boot, this permanently
694 * reduces the memory available to Linux. We need to
695 * do this because highmem is not supported on 64-bit.
696 */
697 memblock_enforce_memory_limit(linear_map_top);
698 }
699#endif
682 700
683 memblock_set_current_limit(linear_map_top); 701 memblock_set_current_limit(linear_map_top);
684} 702}
685 703
704/* boot cpu only */
686void __init early_init_mmu(void) 705void __init early_init_mmu(void)
687{ 706{
688 __early_init_mmu(1); 707 early_init_mmu_global();
708 early_init_this_mmu();
709 early_mmu_set_memory_limit();
689} 710}
690 711
691void early_init_mmu_secondary(void) 712void early_init_mmu_secondary(void)
692{ 713{
693 __early_init_mmu(0); 714 early_init_this_mmu();
694} 715}
695 716
696void setup_initial_memory_limit(phys_addr_t first_memblock_base, 717void setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 66d0f179650f..70d4f748b54b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -223,7 +223,7 @@ e_free:
223 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:" 223 pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
224 " rc=%ld\n", 224 " rc=%ld\n",
225 catalog_version_num, page_offset, hret); 225 catalog_version_num, page_offset, hret);
226 kfree(page); 226 kmem_cache_free(hv_page_cache, page);
227 227
228 pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n", 228 pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
229 offset, page_offset, count, page_count, catalog_len, 229 offset, page_offset, count, page_count, catalog_len,
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 1413e72bc2e1..4882bfd90e27 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2805,25 +2805,20 @@ set_initial_features(void)
2805 /* Enable GMAC for now for PCI probing. It will be disabled 2805 /* Enable GMAC for now for PCI probing. It will be disabled
2806 * later on after PCI probe 2806 * later on after PCI probe
2807 */ 2807 */
2808 np = of_find_node_by_name(NULL, "ethernet"); 2808 for_each_node_by_name(np, "ethernet")
2809 while(np) {
2810 if (of_device_is_compatible(np, "K2-GMAC")) 2809 if (of_device_is_compatible(np, "K2-GMAC"))
2811 g5_gmac_enable(np, 0, 1); 2810 g5_gmac_enable(np, 0, 1);
2812 np = of_find_node_by_name(np, "ethernet");
2813 }
2814 2811
2815 /* Enable FW before PCI probe. Will be disabled later on 2812 /* Enable FW before PCI probe. Will be disabled later on
2816 * Note: We should have a batter way to check that we are 2813 * Note: We should have a batter way to check that we are
2817 * dealing with uninorth internal cell and not a PCI cell 2814 * dealing with uninorth internal cell and not a PCI cell
2818 * on the external PCI. The code below works though. 2815 * on the external PCI. The code below works though.
2819 */ 2816 */
2820 np = of_find_node_by_name(NULL, "firewire"); 2817 for_each_node_by_name(np, "firewire") {
2821 while(np) {
2822 if (of_device_is_compatible(np, "pci106b,5811")) { 2818 if (of_device_is_compatible(np, "pci106b,5811")) {
2823 macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; 2819 macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
2824 g5_fw_enable(np, 0, 1); 2820 g5_fw_enable(np, 0, 1);
2825 } 2821 }
2826 np = of_find_node_by_name(np, "firewire");
2827 } 2822 }
2828 } 2823 }
2829#else /* CONFIG_PPC64 */ 2824#else /* CONFIG_PPC64 */
@@ -2834,13 +2829,11 @@ set_initial_features(void)
2834 /* Enable GMAC for now for PCI probing. It will be disabled 2829 /* Enable GMAC for now for PCI probing. It will be disabled
2835 * later on after PCI probe 2830 * later on after PCI probe
2836 */ 2831 */
2837 np = of_find_node_by_name(NULL, "ethernet"); 2832 for_each_node_by_name(np, "ethernet") {
2838 while(np) {
2839 if (np->parent 2833 if (np->parent
2840 && of_device_is_compatible(np->parent, "uni-north") 2834 && of_device_is_compatible(np->parent, "uni-north")
2841 && of_device_is_compatible(np, "gmac")) 2835 && of_device_is_compatible(np, "gmac"))
2842 core99_gmac_enable(np, 0, 1); 2836 core99_gmac_enable(np, 0, 1);
2843 np = of_find_node_by_name(np, "ethernet");
2844 } 2837 }
2845 2838
2846 /* Enable FW before PCI probe. Will be disabled later on 2839 /* Enable FW before PCI probe. Will be disabled later on
@@ -2848,8 +2841,7 @@ set_initial_features(void)
2848 * dealing with uninorth internal cell and not a PCI cell 2841 * dealing with uninorth internal cell and not a PCI cell
2849 * on the external PCI. The code below works though. 2842 * on the external PCI. The code below works though.
2850 */ 2843 */
2851 np = of_find_node_by_name(NULL, "firewire"); 2844 for_each_node_by_name(np, "firewire") {
2852 while(np) {
2853 if (np->parent 2845 if (np->parent
2854 && of_device_is_compatible(np->parent, "uni-north") 2846 && of_device_is_compatible(np->parent, "uni-north")
2855 && (of_device_is_compatible(np, "pci106b,18") || 2847 && (of_device_is_compatible(np, "pci106b,18") ||
@@ -2858,18 +2850,16 @@ set_initial_features(void)
2858 macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; 2850 macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED;
2859 core99_firewire_enable(np, 0, 1); 2851 core99_firewire_enable(np, 0, 1);
2860 } 2852 }
2861 np = of_find_node_by_name(np, "firewire");
2862 } 2853 }
2863 2854
2864 /* Enable ATA-100 before PCI probe. */ 2855 /* Enable ATA-100 before PCI probe. */
2865 np = of_find_node_by_name(NULL, "ata-6"); 2856 np = of_find_node_by_name(NULL, "ata-6");
2866 while(np) { 2857 for_each_node_by_name(np, "ata-6") {
2867 if (np->parent 2858 if (np->parent
2868 && of_device_is_compatible(np->parent, "uni-north") 2859 && of_device_is_compatible(np->parent, "uni-north")
2869 && of_device_is_compatible(np, "kauai-ata")) { 2860 && of_device_is_compatible(np, "kauai-ata")) {
2870 core99_ata100_enable(np, 1); 2861 core99_ata100_enable(np, 1);
2871 } 2862 }
2872 np = of_find_node_by_name(np, "ata-6");
2873 } 2863 }
2874 2864
2875 /* Switch airport off */ 2865 /* Switch airport off */
diff --git a/arch/powerpc/platforms/powermac/pci.c b/arch/powerpc/platforms/powermac/pci.c
index cf7009b8c7b6..7e868ccf3b0d 100644
--- a/arch/powerpc/platforms/powermac/pci.c
+++ b/arch/powerpc/platforms/powermac/pci.c
@@ -698,7 +698,7 @@ static void __init fixup_nec_usb2(void)
698{ 698{
699 struct device_node *nec; 699 struct device_node *nec;
700 700
701 for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) { 701 for_each_node_by_name(nec, "usb") {
702 struct pci_controller *hose; 702 struct pci_controller *hose;
703 u32 data; 703 u32 data;
704 const u32 *prop; 704 const u32 *prop;
diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c
index 5cbd4d67d5c4..af094ae03dbb 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -577,7 +577,7 @@ static void __init smp_core99_setup_i2c_hwsync(int ncpus)
577 int ok; 577 int ok;
578 578
579 /* Look for the clock chip */ 579 /* Look for the clock chip */
580 while ((cc = of_find_node_by_name(cc, "i2c-hwclock")) != NULL) { 580 for_each_node_by_name(cc, "i2c-hwclock") {
581 p = of_get_parent(cc); 581 p = of_get_parent(cc);
582 ok = p && of_device_is_compatible(p, "uni-n-i2c"); 582 ok = p && of_device_is_compatible(p, "uni-n-i2c");
583 of_node_put(p); 583 of_node_put(p);
diff --git a/arch/powerpc/platforms/powermac/udbg_adb.c b/arch/powerpc/platforms/powermac/udbg_adb.c
index 44e0b55a2a02..366bd221edec 100644
--- a/arch/powerpc/platforms/powermac/udbg_adb.c
+++ b/arch/powerpc/platforms/powermac/udbg_adb.c
@@ -191,7 +191,7 @@ int __init udbg_adb_init(int force_btext)
191 * of type "adb". If not, we return a failure, but we keep the 191 * of type "adb". If not, we return a failure, but we keep the
192 * bext output set for now 192 * bext output set for now
193 */ 193 */
194 for (np = NULL; (np = of_find_node_by_name(np, "keyboard")) != NULL;) { 194 for_each_node_by_name(np, "keyboard") {
195 struct device_node *parent = of_get_parent(np); 195 struct device_node *parent = of_get_parent(np);
196 int found = (parent && strcmp(parent->type, "adb") == 0); 196 int found = (parent && strcmp(parent->type, "adb") == 0);
197 of_node_put(parent); 197 of_node_put(parent);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a328be44880f..2e6ce1b8dc8f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -245,3 +245,5 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
245OPAL_CALL(opal_get_param, OPAL_GET_PARAM); 245OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
246OPAL_CALL(opal_set_param, OPAL_SET_PARAM); 246OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
247OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI); 247OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
248OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
249OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f0a01a46a57d..b44eec3e8dbd 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -605,6 +605,24 @@ static int opal_sysfs_init(void)
605 return 0; 605 return 0;
606} 606}
607 607
608static void __init opal_dump_region_init(void)
609{
610 void *addr;
611 uint64_t size;
612 int rc;
613
614 /* Register kernel log buffer */
615 addr = log_buf_addr_get();
616 size = log_buf_len_get();
617 rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
618 __pa(addr), size);
619 /* Don't warn if this is just an older OPAL that doesn't
620 * know about that call
621 */
622 if (rc && rc != OPAL_UNSUPPORTED)
623 pr_warn("DUMP: Failed to register kernel log buffer. "
624 "rc = %d\n", rc);
625}
608static int __init opal_init(void) 626static int __init opal_init(void)
609{ 627{
610 struct device_node *np, *consoles; 628 struct device_node *np, *consoles;
@@ -654,6 +672,8 @@ static int __init opal_init(void)
654 /* Create "opal" kobject under /sys/firmware */ 672 /* Create "opal" kobject under /sys/firmware */
655 rc = opal_sysfs_init(); 673 rc = opal_sysfs_init();
656 if (rc == 0) { 674 if (rc == 0) {
675 /* Setup dump region interface */
676 opal_dump_region_init();
657 /* Setup error log interface */ 677 /* Setup error log interface */
658 rc = opal_elog_init(); 678 rc = opal_elog_init();
659 /* Setup code update interface */ 679 /* Setup code update interface */
@@ -694,6 +714,9 @@ void opal_shutdown(void)
694 else 714 else
695 mdelay(10); 715 mdelay(10);
696 } 716 }
717
718 /* Unregister memory dump region */
719 opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
697} 720}
698 721
699/* Export this so that test modules can use it */ 722/* Export this so that test modules can use it */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b136108ddc99..df241b11d4f7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -857,7 +857,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
857 857
858 pe = &phb->ioda.pe_array[pdn->pe_number]; 858 pe = &phb->ioda.pe_array[pdn->pe_number];
859 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops); 859 WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
860 set_iommu_table_base(&pdev->dev, &pe->tce32_table); 860 set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
861} 861}
862 862
863static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb, 863static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 7995135170a3..c904583baf4b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -146,7 +146,7 @@ static inline int pseries_remove_memblock(unsigned long base,
146} 146}
147static inline int pseries_remove_mem_node(struct device_node *np) 147static inline int pseries_remove_mem_node(struct device_node *np)
148{ 148{
149 return -EOPNOTSUPP; 149 return 0;
150} 150}
151#endif /* CONFIG_MEMORY_HOTREMOVE */ 151#endif /* CONFIG_MEMORY_HOTREMOVE */
152 152
@@ -194,7 +194,7 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
194 if (!memblock_size) 194 if (!memblock_size)
195 return -EINVAL; 195 return -EINVAL;
196 196
197 p = (u32 *)of_get_property(pr->dn, "ibm,dynamic-memory", NULL); 197 p = (u32 *) pr->old_prop->value;
198 if (!p) 198 if (!p)
199 return -EINVAL; 199 return -EINVAL;
200 200
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 4557e91626c4..eedb64594dc5 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -163,8 +163,8 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
163 return retval; 163 return retval;
164 } 164 }
165 165
166 last_p_partition_ID = pi_buff[0]; 166 last_p_partition_ID = be64_to_cpu(pi_buff[0]);
167 last_p_unit_address = pi_buff[1]; 167 last_p_unit_address = be64_to_cpu(pi_buff[1]);
168 168
169 /* This indicates that there are no further partners */ 169 /* This indicates that there are no further partners */
170 if (last_p_partition_ID == ~0UL 170 if (last_p_partition_ID == ~0UL
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 33b552ffbe57..4642d6a4d356 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -721,13 +721,13 @@ static int __init disable_ddw_setup(char *str)
721 721
722early_param("disable_ddw", disable_ddw_setup); 722early_param("disable_ddw", disable_ddw_setup);
723 723
724static void remove_ddw(struct device_node *np) 724static void remove_ddw(struct device_node *np, bool remove_prop)
725{ 725{
726 struct dynamic_dma_window_prop *dwp; 726 struct dynamic_dma_window_prop *dwp;
727 struct property *win64; 727 struct property *win64;
728 const u32 *ddw_avail; 728 const u32 *ddw_avail;
729 u64 liobn; 729 u64 liobn;
730 int len, ret; 730 int len, ret = 0;
731 731
732 ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len); 732 ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
733 win64 = of_find_property(np, DIRECT64_PROPNAME, NULL); 733 win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
@@ -761,7 +761,8 @@ static void remove_ddw(struct device_node *np)
761 np->full_name, ret, ddw_avail[2], liobn); 761 np->full_name, ret, ddw_avail[2], liobn);
762 762
763delprop: 763delprop:
764 ret = of_remove_property(np, win64); 764 if (remove_prop)
765 ret = of_remove_property(np, win64);
765 if (ret) 766 if (ret)
766 pr_warning("%s: failed to remove direct window property: %d\n", 767 pr_warning("%s: failed to remove direct window property: %d\n",
767 np->full_name, ret); 768 np->full_name, ret);
@@ -805,7 +806,7 @@ static int find_existing_ddw_windows(void)
805 window = kzalloc(sizeof(*window), GFP_KERNEL); 806 window = kzalloc(sizeof(*window), GFP_KERNEL);
806 if (!window || len < sizeof(struct dynamic_dma_window_prop)) { 807 if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
807 kfree(window); 808 kfree(window);
808 remove_ddw(pdn); 809 remove_ddw(pdn, true);
809 continue; 810 continue;
810 } 811 }
811 812
@@ -1045,7 +1046,7 @@ out_free_window:
1045 kfree(window); 1046 kfree(window);
1046 1047
1047out_clear_window: 1048out_clear_window:
1048 remove_ddw(pdn); 1049 remove_ddw(pdn, true);
1049 1050
1050out_free_prop: 1051out_free_prop:
1051 kfree(win64->name); 1052 kfree(win64->name);
@@ -1255,7 +1256,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
1255 1256
1256 switch (action) { 1257 switch (action) {
1257 case OF_RECONFIG_DETACH_NODE: 1258 case OF_RECONFIG_DETACH_NODE:
1258 remove_ddw(np); 1259 /*
1260 * Removing the property will invoke the reconfig
1261 * notifier again, which causes dead-lock on the
1262 * read-write semaphore of the notifier chain. So
1263 * we have to remove the property when releasing
1264 * the device node.
1265 */
1266 remove_ddw(np, false);
1259 if (pci && pci->iommu_table) 1267 if (pci && pci->iommu_table)
1260 iommu_free_table(pci->iommu_table, np->full_name); 1268 iommu_free_table(pci->iommu_table, np->full_name);
1261 1269
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index fbfcef514aa7..34e64237fff9 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -431,16 +431,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
431 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags); 431 spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
432} 432}
433 433
434static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm, 434static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
435 unsigned char *hpte_slot_array, 435 unsigned long addr,
436 unsigned long addr, int psize) 436 unsigned char *hpte_slot_array,
437 int psize, int ssize)
437{ 438{
438 int ssize = 0, i, index = 0; 439 int i, index = 0;
439 unsigned long s_addr = addr; 440 unsigned long s_addr = addr;
440 unsigned int max_hpte_count, valid; 441 unsigned int max_hpte_count, valid;
441 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH]; 442 unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
442 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH]; 443 unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
443 unsigned long shift, hidx, vpn = 0, vsid, hash, slot; 444 unsigned long shift, hidx, vpn = 0, hash, slot;
444 445
445 shift = mmu_psize_defs[psize].shift; 446 shift = mmu_psize_defs[psize].shift;
446 max_hpte_count = 1U << (PMD_SHIFT - shift); 447 max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -453,15 +454,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
453 454
454 /* get the vpn */ 455 /* get the vpn */
455 addr = s_addr + (i * (1ul << shift)); 456 addr = s_addr + (i * (1ul << shift));
456 if (!is_kernel_addr(addr)) {
457 ssize = user_segment_size(addr);
458 vsid = get_vsid(mm->context.id, addr, ssize);
459 WARN_ON(vsid == 0);
460 } else {
461 vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
462 ssize = mmu_kernel_ssize;
463 }
464
465 vpn = hpt_vpn(addr, vsid, ssize); 457 vpn = hpt_vpn(addr, vsid, ssize);
466 hash = hpt_hash(vpn, shift, ssize); 458 hash = hpt_hash(vpn, shift, ssize);
467 if (hidx & _PTEIDX_SECONDARY) 459 if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index cfe8a6389a51..e724d3186e73 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -232,8 +232,7 @@ static void __init pseries_discover_pic(void)
232 struct device_node *np; 232 struct device_node *np;
233 const char *typep; 233 const char *typep;
234 234
235 for (np = NULL; (np = of_find_node_by_name(np, 235 for_each_node_by_name(np, "interrupt-controller") {
236 "interrupt-controller"));) {
237 typep = of_get_property(np, "compatible", NULL); 236 typep = of_get_property(np, "compatible", NULL);
238 if (strstr(typep, "open-pic")) { 237 if (strstr(typep, "open-pic")) {
239 pSeries_mpic_node = of_node_get(np); 238 pSeries_mpic_node = of_node_get(np);
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 8d198b5e9e0a..b988b5addf86 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -24,6 +24,7 @@
24#include <linux/interrupt.h> 24#include <linux/interrupt.h>
25#include <linux/irq.h> 25#include <linux/irq.h>
26#include <linux/bug.h> 26#include <linux/bug.h>
27#include <linux/nmi.h>
27 28
28#include <asm/ptrace.h> 29#include <asm/ptrace.h>
29#include <asm/string.h> 30#include <asm/string.h>
@@ -374,6 +375,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
374#endif 375#endif
375 376
376 local_irq_save(flags); 377 local_irq_save(flags);
378 hard_irq_disable();
377 379
378 bp = in_breakpoint_table(regs->nip, &offset); 380 bp = in_breakpoint_table(regs->nip, &offset);
379 if (bp != NULL) { 381 if (bp != NULL) {
@@ -558,6 +560,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
558#endif 560#endif
559 insert_cpu_bpts(); 561 insert_cpu_bpts();
560 562
563 touch_nmi_watchdog();
561 local_irq_restore(flags); 564 local_irq_restore(flags);
562 565
563 return cmd != 'X' && cmd != EOF; 566 return cmd != 'X' && cmd != EOF;
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c
index c0413046483a..1580e7a5a4cf 100644
--- a/arch/x86/xen/grant-table.c
+++ b/arch/x86/xen/grant-table.c
@@ -118,6 +118,7 @@ static int __init xlated_setup_gnttab_pages(void)
118{ 118{
119 struct page **pages; 119 struct page **pages;
120 xen_pfn_t *pfns; 120 xen_pfn_t *pfns;
121 void *vaddr;
121 int rc; 122 int rc;
122 unsigned int i; 123 unsigned int i;
123 unsigned long nr_grant_frames = gnttab_max_grant_frames(); 124 unsigned long nr_grant_frames = gnttab_max_grant_frames();
@@ -143,21 +144,20 @@ static int __init xlated_setup_gnttab_pages(void)
143 for (i = 0; i < nr_grant_frames; i++) 144 for (i = 0; i < nr_grant_frames; i++)
144 pfns[i] = page_to_pfn(pages[i]); 145 pfns[i] = page_to_pfn(pages[i]);
145 146
146 rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, 147 vaddr = vmap(pages, nr_grant_frames, 0, PAGE_KERNEL);
147 &xen_auto_xlat_grant_frames.vaddr); 148 if (!vaddr) {
148
149 if (rc) {
150 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, 149 pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__,
151 nr_grant_frames, rc); 150 nr_grant_frames, rc);
152 free_xenballooned_pages(nr_grant_frames, pages); 151 free_xenballooned_pages(nr_grant_frames, pages);
153 kfree(pages); 152 kfree(pages);
154 kfree(pfns); 153 kfree(pfns);
155 return rc; 154 return -ENOMEM;
156 } 155 }
157 kfree(pages); 156 kfree(pages);
158 157
159 xen_auto_xlat_grant_frames.pfn = pfns; 158 xen_auto_xlat_grant_frames.pfn = pfns;
160 xen_auto_xlat_grant_frames.count = nr_grant_frames; 159 xen_auto_xlat_grant_frames.count = nr_grant_frames;
160 xen_auto_xlat_grant_frames.vaddr = vaddr;
161 161
162 return 0; 162 return 0;
163} 163}
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 7b78f88c1707..5718b0b58b60 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -444,7 +444,7 @@ void xen_setup_timer(int cpu)
444 444
445 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt, 445 irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| 446 IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
447 IRQF_FORCE_RESUME, 447 IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
448 name, NULL); 448 name, NULL);
449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); 449 (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
450 450
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 9e241063a616..bc423f7b02da 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
70 bs->bvec_integrity_pool); 70 bs->bvec_integrity_pool);
71 if (!bip->bip_vec) 71 if (!bip->bip_vec)
72 goto err; 72 goto err;
73 bip->bip_max_vcnt = bvec_nr_vecs(idx);
73 } else { 74 } else {
74 bip->bip_vec = bip->bip_inline_vecs; 75 bip->bip_vec = bip->bip_inline_vecs;
76 bip->bip_max_vcnt = inline_vecs;
75 } 77 }
76 78
77 bip->bip_slab = idx; 79 bip->bip_slab = idx;
@@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio)
114} 116}
115EXPORT_SYMBOL(bio_integrity_free); 117EXPORT_SYMBOL(bio_integrity_free);
116 118
117static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
118{
119 if (bip->bip_slab == BIO_POOL_NONE)
120 return BIP_INLINE_VECS;
121
122 return bvec_nr_vecs(bip->bip_slab);
123}
124
125/** 119/**
126 * bio_integrity_add_page - Attach integrity metadata 120 * bio_integrity_add_page - Attach integrity metadata
127 * @bio: bio to update 121 * @bio: bio to update
@@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
137 struct bio_integrity_payload *bip = bio->bi_integrity; 131 struct bio_integrity_payload *bip = bio->bi_integrity;
138 struct bio_vec *iv; 132 struct bio_vec *iv;
139 133
140 if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { 134 if (bip->bip_vcnt >= bip->bip_max_vcnt) {
141 printk(KERN_ERR "%s: bip_vec full\n", __func__); 135 printk(KERN_ERR "%s: bip_vec full\n", __func__);
142 return 0; 136 return 0;
143 } 137 }
diff --git a/block/bio.c b/block/bio.c
index 0ec61c9e536c..3e6331d25d90 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
112 bslab = &bio_slabs[entry]; 112 bslab = &bio_slabs[entry];
113 113
114 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); 114 snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry);
115 slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); 115 slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN,
116 SLAB_HWCACHE_ALIGN, NULL);
116 if (!slab) 117 if (!slab)
117 goto out_unlock; 118 goto out_unlock;
118 119
diff --git a/block/blk-core.c b/block/blk-core.c
index 6f8dba161bfe..c359d72e9d76 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -438,14 +438,17 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
438 */ 438 */
439void blk_queue_bypass_start(struct request_queue *q) 439void blk_queue_bypass_start(struct request_queue *q)
440{ 440{
441 bool drain;
442
443 spin_lock_irq(q->queue_lock); 441 spin_lock_irq(q->queue_lock);
444 drain = !q->bypass_depth++; 442 q->bypass_depth++;
445 queue_flag_set(QUEUE_FLAG_BYPASS, q); 443 queue_flag_set(QUEUE_FLAG_BYPASS, q);
446 spin_unlock_irq(q->queue_lock); 444 spin_unlock_irq(q->queue_lock);
447 445
448 if (drain) { 446 /*
447 * Queues start drained. Skip actual draining till init is
448 * complete. This avoids lenghty delays during queue init which
449 * can happen many times during boot.
450 */
451 if (blk_queue_init_done(q)) {
449 spin_lock_irq(q->queue_lock); 452 spin_lock_irq(q->queue_lock);
450 __blk_drain_queue(q, false); 453 __blk_drain_queue(q, false);
451 spin_unlock_irq(q->queue_lock); 454 spin_unlock_irq(q->queue_lock);
@@ -511,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q)
511 * prevent that q->request_fn() gets invoked after draining finished. 514 * prevent that q->request_fn() gets invoked after draining finished.
512 */ 515 */
513 if (q->mq_ops) { 516 if (q->mq_ops) {
514 blk_mq_drain_queue(q); 517 blk_mq_freeze_queue(q);
515 spin_lock_irq(lock); 518 spin_lock_irq(lock);
516 } else { 519 } else {
517 spin_lock_irq(lock); 520 spin_lock_irq(lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index ad69ef657e85..5189cb1e478a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,68 +78,47 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
78 78
79static int blk_mq_queue_enter(struct request_queue *q) 79static int blk_mq_queue_enter(struct request_queue *q)
80{ 80{
81 int ret; 81 while (true) {
82 82 int ret;
83 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
84 smp_wmb();
85
86 /* we have problems freezing the queue if it's initializing */
87 if (!blk_queue_dying(q) &&
88 (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
89 return 0;
90
91 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
92 83
93 spin_lock_irq(q->queue_lock); 84 if (percpu_ref_tryget_live(&q->mq_usage_counter))
94 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, 85 return 0;
95 !blk_queue_bypass(q) || blk_queue_dying(q),
96 *q->queue_lock);
97 /* inc usage with lock hold to avoid freeze_queue runs here */
98 if (!ret && !blk_queue_dying(q))
99 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
100 else if (blk_queue_dying(q))
101 ret = -ENODEV;
102 spin_unlock_irq(q->queue_lock);
103 86
104 return ret; 87 ret = wait_event_interruptible(q->mq_freeze_wq,
88 !q->mq_freeze_depth || blk_queue_dying(q));
89 if (blk_queue_dying(q))
90 return -ENODEV;
91 if (ret)
92 return ret;
93 }
105} 94}
106 95
107static void blk_mq_queue_exit(struct request_queue *q) 96static void blk_mq_queue_exit(struct request_queue *q)
108{ 97{
109 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 98 percpu_ref_put(&q->mq_usage_counter);
110} 99}
111 100
112void blk_mq_drain_queue(struct request_queue *q) 101static void blk_mq_usage_counter_release(struct percpu_ref *ref)
113{ 102{
114 while (true) { 103 struct request_queue *q =
115 s64 count; 104 container_of(ref, struct request_queue, mq_usage_counter);
116
117 spin_lock_irq(q->queue_lock);
118 count = percpu_counter_sum(&q->mq_usage_counter);
119 spin_unlock_irq(q->queue_lock);
120 105
121 if (count == 0) 106 wake_up_all(&q->mq_freeze_wq);
122 break;
123 blk_mq_start_hw_queues(q);
124 msleep(10);
125 }
126} 107}
127 108
128/* 109/*
129 * Guarantee no request is in use, so we can change any data structure of 110 * Guarantee no request is in use, so we can change any data structure of
130 * the queue afterward. 111 * the queue afterward.
131 */ 112 */
132static void blk_mq_freeze_queue(struct request_queue *q) 113void blk_mq_freeze_queue(struct request_queue *q)
133{ 114{
134 bool drain;
135
136 spin_lock_irq(q->queue_lock); 115 spin_lock_irq(q->queue_lock);
137 drain = !q->bypass_depth++; 116 q->mq_freeze_depth++;
138 queue_flag_set(QUEUE_FLAG_BYPASS, q);
139 spin_unlock_irq(q->queue_lock); 117 spin_unlock_irq(q->queue_lock);
140 118
141 if (drain) 119 percpu_ref_kill(&q->mq_usage_counter);
142 blk_mq_drain_queue(q); 120 blk_mq_run_queues(q, false);
121 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
143} 122}
144 123
145static void blk_mq_unfreeze_queue(struct request_queue *q) 124static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -147,14 +126,13 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
147 bool wake = false; 126 bool wake = false;
148 127
149 spin_lock_irq(q->queue_lock); 128 spin_lock_irq(q->queue_lock);
150 if (!--q->bypass_depth) { 129 wake = !--q->mq_freeze_depth;
151 queue_flag_clear(QUEUE_FLAG_BYPASS, q); 130 WARN_ON_ONCE(q->mq_freeze_depth < 0);
152 wake = true;
153 }
154 WARN_ON_ONCE(q->bypass_depth < 0);
155 spin_unlock_irq(q->queue_lock); 131 spin_unlock_irq(q->queue_lock);
156 if (wake) 132 if (wake) {
133 percpu_ref_reinit(&q->mq_usage_counter);
157 wake_up_all(&q->mq_freeze_wq); 134 wake_up_all(&q->mq_freeze_wq);
135 }
158} 136}
159 137
160bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) 138bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
@@ -1798,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1798 if (!q) 1776 if (!q)
1799 goto err_hctxs; 1777 goto err_hctxs;
1800 1778
1801 if (percpu_counter_init(&q->mq_usage_counter, 0)) 1779 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
1802 goto err_map; 1780 goto err_map;
1803 1781
1804 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); 1782 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
@@ -1891,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q)
1891 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); 1869 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
1892 blk_mq_free_hw_queues(q, set); 1870 blk_mq_free_hw_queues(q, set);
1893 1871
1894 percpu_counter_destroy(&q->mq_usage_counter); 1872 percpu_ref_exit(&q->mq_usage_counter);
1895 1873
1896 free_percpu(q->queue_ctx); 1874 free_percpu(q->queue_ctx);
1897 kfree(q->queue_hw_ctx); 1875 kfree(q->queue_hw_ctx);
@@ -2050,8 +2028,7 @@ static int __init blk_mq_init(void)
2050{ 2028{
2051 blk_mq_cpu_init(); 2029 blk_mq_cpu_init();
2052 2030
2053 /* Must be called after percpu_counter_hotcpu_callback() */ 2031 hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
2054 hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
2055 2032
2056 return 0; 2033 return 0;
2057} 2034}
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 26460884c6cd..ca4964a6295d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -28,7 +28,7 @@ struct blk_mq_ctx {
28void __blk_mq_complete_request(struct request *rq); 28void __blk_mq_complete_request(struct request *rq);
29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
30void blk_mq_init_flush(struct request_queue *q); 30void blk_mq_init_flush(struct request_queue *q);
31void blk_mq_drain_queue(struct request_queue *q); 31void blk_mq_freeze_queue(struct request_queue *q);
32void blk_mq_free_queue(struct request_queue *q); 32void blk_mq_free_queue(struct request_queue *q);
33void blk_mq_clone_flush_request(struct request *flush_rq, 33void blk_mq_clone_flush_request(struct request *flush_rq,
34 struct request *orig_rq); 34 struct request *orig_rq);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 23321fbab293..4db5abf96b9e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -554,8 +554,8 @@ int blk_register_queue(struct gendisk *disk)
554 * Initialization must be complete by now. Finish the initial 554 * Initialization must be complete by now. Finish the initial
555 * bypass from queue allocation. 555 * bypass from queue allocation.
556 */ 556 */
557 blk_queue_bypass_end(q);
558 queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); 557 queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
558 blk_queue_bypass_end(q);
559 559
560 ret = blk_trace_init_sysfs(dev); 560 ret = blk_trace_init_sysfs(dev);
561 if (ret) 561 if (ret)
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index a0926a6094b2..18b282ce361e 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -663,6 +663,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
663 fmode_t mode = file->f_mode; 663 fmode_t mode = file->f_mode;
664 struct backing_dev_info *bdi; 664 struct backing_dev_info *bdi;
665 loff_t size; 665 loff_t size;
666 unsigned int max_sectors;
666 667
667 /* 668 /*
668 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have 669 * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
@@ -719,8 +720,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
719 case BLKSSZGET: /* get block device hardware sector size */ 720 case BLKSSZGET: /* get block device hardware sector size */
720 return compat_put_int(arg, bdev_logical_block_size(bdev)); 721 return compat_put_int(arg, bdev_logical_block_size(bdev));
721 case BLKSECTGET: 722 case BLKSECTGET:
722 return compat_put_ushort(arg, 723 max_sectors = min_t(unsigned int, USHRT_MAX,
723 queue_max_sectors(bdev_get_queue(bdev))); 724 queue_max_sectors(bdev_get_queue(bdev)));
725 return compat_put_ushort(arg, max_sectors);
724 case BLKROTATIONAL: 726 case BLKROTATIONAL:
725 return compat_put_ushort(arg, 727 return compat_put_ushort(arg,
726 !blk_queue_nonrot(bdev_get_queue(bdev))); 728 !blk_queue_nonrot(bdev_get_queue(bdev)));
diff --git a/block/ioctl.c b/block/ioctl.c
index 7d5c3b20af45..d6cda8147c91 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -278,6 +278,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
278 struct backing_dev_info *bdi; 278 struct backing_dev_info *bdi;
279 loff_t size; 279 loff_t size;
280 int ret, n; 280 int ret, n;
281 unsigned int max_sectors;
281 282
282 switch(cmd) { 283 switch(cmd) {
283 case BLKFLSBUF: 284 case BLKFLSBUF:
@@ -375,7 +376,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
375 case BLKDISCARDZEROES: 376 case BLKDISCARDZEROES:
376 return put_uint(arg, bdev_discard_zeroes_data(bdev)); 377 return put_uint(arg, bdev_discard_zeroes_data(bdev));
377 case BLKSECTGET: 378 case BLKSECTGET:
378 return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); 379 max_sectors = min_t(unsigned int, USHRT_MAX,
380 queue_max_sectors(bdev_get_queue(bdev)));
381 return put_ushort(arg, max_sectors);
379 case BLKROTATIONAL: 382 case BLKROTATIONAL:
380 return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); 383 return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev)));
381 case BLKRASET: 384 case BLKRASET:
diff --git a/block/partitions/aix.c b/block/partitions/aix.c
index 43be471d9b1d..f3ed7b2d89bf 100644
--- a/block/partitions/aix.c
+++ b/block/partitions/aix.c
@@ -215,7 +215,7 @@ int aix_partition(struct parsed_partitions *state)
215 numlvs = be16_to_cpu(p->numlvs); 215 numlvs = be16_to_cpu(p->numlvs);
216 put_dev_sector(sect); 216 put_dev_sector(sect);
217 } 217 }
218 lvip = kzalloc(sizeof(struct lv_info) * state->limit, GFP_KERNEL); 218 lvip = kcalloc(state->limit, sizeof(struct lv_info), GFP_KERNEL);
219 if (!lvip) 219 if (!lvip)
220 return 0; 220 return 0;
221 if (numlvs && (d = read_part_sector(state, vgda_sector + 1, &sect))) { 221 if (numlvs && (d = read_part_sector(state, vgda_sector + 1, &sect))) {
@@ -253,7 +253,7 @@ int aix_partition(struct parsed_partitions *state)
253 continue; 253 continue;
254 } 254 }
255 lv_ix = be16_to_cpu(p->lv_ix) - 1; 255 lv_ix = be16_to_cpu(p->lv_ix) - 1;
256 if (lv_ix > state->limit) { 256 if (lv_ix >= state->limit) {
257 cur_lv_ix = -1; 257 cur_lv_ix = -1;
258 continue; 258 continue;
259 } 259 }
diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c
index 70cbf44a1560..2b13533d60a2 100644
--- a/block/partitions/amiga.c
+++ b/block/partitions/amiga.c
@@ -7,6 +7,8 @@
7 * Re-organised Feb 1998 Russell King 7 * Re-organised Feb 1998 Russell King
8 */ 8 */
9 9
10#define pr_fmt(fmt) fmt
11
10#include <linux/types.h> 12#include <linux/types.h>
11#include <linux/affs_hardblocks.h> 13#include <linux/affs_hardblocks.h>
12 14
@@ -40,7 +42,7 @@ int amiga_partition(struct parsed_partitions *state)
40 data = read_part_sector(state, blk, &sect); 42 data = read_part_sector(state, blk, &sect);
41 if (!data) { 43 if (!data) {
42 if (warn_no_part) 44 if (warn_no_part)
43 printk("Dev %s: unable to read RDB block %d\n", 45 pr_err("Dev %s: unable to read RDB block %d\n",
44 bdevname(state->bdev, b), blk); 46 bdevname(state->bdev, b), blk);
45 res = -1; 47 res = -1;
46 goto rdb_done; 48 goto rdb_done;
@@ -57,12 +59,12 @@ int amiga_partition(struct parsed_partitions *state)
57 *(__be32 *)(data+0xdc) = 0; 59 *(__be32 *)(data+0xdc) = 0;
58 if (checksum_block((__be32 *)data, 60 if (checksum_block((__be32 *)data,
59 be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) { 61 be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) {
60 printk("Warning: Trashed word at 0xd0 in block %d " 62 pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n",
61 "ignored in checksum calculation\n",blk); 63 blk);
62 break; 64 break;
63 } 65 }
64 66
65 printk("Dev %s: RDB in block %d has bad checksum\n", 67 pr_err("Dev %s: RDB in block %d has bad checksum\n",
66 bdevname(state->bdev, b), blk); 68 bdevname(state->bdev, b), blk);
67 } 69 }
68 70
@@ -83,7 +85,7 @@ int amiga_partition(struct parsed_partitions *state)
83 data = read_part_sector(state, blk, &sect); 85 data = read_part_sector(state, blk, &sect);
84 if (!data) { 86 if (!data) {
85 if (warn_no_part) 87 if (warn_no_part)
86 printk("Dev %s: unable to read partition block %d\n", 88 pr_err("Dev %s: unable to read partition block %d\n",
87 bdevname(state->bdev, b), blk); 89 bdevname(state->bdev, b), blk);
88 res = -1; 90 res = -1;
89 goto rdb_done; 91 goto rdb_done;
diff --git a/block/partitions/efi.c b/block/partitions/efi.c
index dc51f467a560..56d08fd75b1a 100644
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -121,7 +121,7 @@ __setup("gpt", force_gpt_fn);
121/** 121/**
122 * efi_crc32() - EFI version of crc32 function 122 * efi_crc32() - EFI version of crc32 function
123 * @buf: buffer to calculate crc32 of 123 * @buf: buffer to calculate crc32 of
124 * @len - length of buf 124 * @len: length of buf
125 * 125 *
126 * Description: Returns EFI-style CRC32 value for @buf 126 * Description: Returns EFI-style CRC32 value for @buf
127 * 127 *
@@ -240,10 +240,10 @@ done:
240 240
241/** 241/**
242 * read_lba(): Read bytes from disk, starting at given LBA 242 * read_lba(): Read bytes from disk, starting at given LBA
243 * @state 243 * @state: disk parsed partitions
244 * @lba 244 * @lba: the Logical Block Address of the partition table
245 * @buffer 245 * @buffer: destination buffer
246 * @size_t 246 * @count: bytes to read
247 * 247 *
248 * Description: Reads @count bytes from @state->bdev into @buffer. 248 * Description: Reads @count bytes from @state->bdev into @buffer.
249 * Returns number of bytes read on success, 0 on error. 249 * Returns number of bytes read on success, 0 on error.
@@ -277,8 +277,8 @@ static size_t read_lba(struct parsed_partitions *state,
277 277
278/** 278/**
279 * alloc_read_gpt_entries(): reads partition entries from disk 279 * alloc_read_gpt_entries(): reads partition entries from disk
280 * @state 280 * @state: disk parsed partitions
281 * @gpt - GPT header 281 * @gpt: GPT header
282 * 282 *
283 * Description: Returns ptes on success, NULL on error. 283 * Description: Returns ptes on success, NULL on error.
284 * Allocates space for PTEs based on information found in @gpt. 284 * Allocates space for PTEs based on information found in @gpt.
@@ -312,8 +312,8 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
312 312
313/** 313/**
314 * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk 314 * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk
315 * @state 315 * @state: disk parsed partitions
316 * @lba is the Logical Block Address of the partition table 316 * @lba: the Logical Block Address of the partition table
317 * 317 *
318 * Description: returns GPT header on success, NULL on error. Allocates 318 * Description: returns GPT header on success, NULL on error. Allocates
319 * and fills a GPT header starting at @ from @state->bdev. 319 * and fills a GPT header starting at @ from @state->bdev.
@@ -340,10 +340,10 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state,
340 340
341/** 341/**
342 * is_gpt_valid() - tests one GPT header and PTEs for validity 342 * is_gpt_valid() - tests one GPT header and PTEs for validity
343 * @state 343 * @state: disk parsed partitions
344 * @lba is the logical block address of the GPT header to test 344 * @lba: logical block address of the GPT header to test
345 * @gpt is a GPT header ptr, filled on return. 345 * @gpt: GPT header ptr, filled on return.
346 * @ptes is a PTEs ptr, filled on return. 346 * @ptes: PTEs ptr, filled on return.
347 * 347 *
348 * Description: returns 1 if valid, 0 on error. 348 * Description: returns 1 if valid, 0 on error.
349 * If valid, returns pointers to newly allocated GPT header and PTEs. 349 * If valid, returns pointers to newly allocated GPT header and PTEs.
@@ -461,8 +461,8 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
461 461
462/** 462/**
463 * is_pte_valid() - tests one PTE for validity 463 * is_pte_valid() - tests one PTE for validity
464 * @pte is the pte to check 464 * @pte:pte to check
465 * @lastlba is last lba of the disk 465 * @lastlba: last lba of the disk
466 * 466 *
467 * Description: returns 1 if valid, 0 on error. 467 * Description: returns 1 if valid, 0 on error.
468 */ 468 */
@@ -478,9 +478,10 @@ is_pte_valid(const gpt_entry *pte, const u64 lastlba)
478 478
479/** 479/**
480 * compare_gpts() - Search disk for valid GPT headers and PTEs 480 * compare_gpts() - Search disk for valid GPT headers and PTEs
481 * @pgpt is the primary GPT header 481 * @pgpt: primary GPT header
482 * @agpt is the alternate GPT header 482 * @agpt: alternate GPT header
483 * @lastlba is the last LBA number 483 * @lastlba: last LBA number
484 *
484 * Description: Returns nothing. Sanity checks pgpt and agpt fields 485 * Description: Returns nothing. Sanity checks pgpt and agpt fields
485 * and prints warnings on discrepancies. 486 * and prints warnings on discrepancies.
486 * 487 *
@@ -572,9 +573,10 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
572 573
573/** 574/**
574 * find_valid_gpt() - Search disk for valid GPT headers and PTEs 575 * find_valid_gpt() - Search disk for valid GPT headers and PTEs
575 * @state 576 * @state: disk parsed partitions
576 * @gpt is a GPT header ptr, filled on return. 577 * @gpt: GPT header ptr, filled on return.
577 * @ptes is a PTEs ptr, filled on return. 578 * @ptes: PTEs ptr, filled on return.
579 *
578 * Description: Returns 1 if valid, 0 on error. 580 * Description: Returns 1 if valid, 0 on error.
579 * If valid, returns pointers to newly allocated GPT header and PTEs. 581 * If valid, returns pointers to newly allocated GPT header and PTEs.
580 * Validity depends on PMBR being valid (or being overridden by the 582 * Validity depends on PMBR being valid (or being overridden by the
@@ -663,7 +665,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
663 665
664/** 666/**
665 * efi_partition(struct parsed_partitions *state) 667 * efi_partition(struct parsed_partitions *state)
666 * @state 668 * @state: disk parsed partitions
667 * 669 *
668 * Description: called from check.c, if the disk contains GPT 670 * Description: called from check.c, if the disk contains GPT
669 * partitions, sets up partition entries in the kernel. 671 * partitions, sets up partition entries in the kernel.
diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c
index 9123f250b425..93e7c1b32edd 100644
--- a/block/partitions/msdos.c
+++ b/block/partitions/msdos.c
@@ -159,8 +159,9 @@ static void parse_extended(struct parsed_partitions *state,
159 /* 159 /*
160 * First process the data partition(s) 160 * First process the data partition(s)
161 */ 161 */
162 for (i=0; i<4; i++, p++) { 162 for (i = 0; i < 4; i++, p++) {
163 sector_t offs, size, next; 163 sector_t offs, size, next;
164
164 if (!nr_sects(p) || is_extended_partition(p)) 165 if (!nr_sects(p) || is_extended_partition(p))
165 continue; 166 continue;
166 167
@@ -194,7 +195,7 @@ static void parse_extended(struct parsed_partitions *state,
194 * It should be a link to the next logical partition. 195 * It should be a link to the next logical partition.
195 */ 196 */
196 p -= 4; 197 p -= 4;
197 for (i=0; i<4; i++, p++) 198 for (i = 0; i < 4; i++, p++)
198 if (nr_sects(p) && is_extended_partition(p)) 199 if (nr_sects(p) && is_extended_partition(p))
199 break; 200 break;
200 if (i == 4) 201 if (i == 4)
@@ -243,8 +244,8 @@ static void parse_solaris_x86(struct parsed_partitions *state,
243 return; 244 return;
244 } 245 }
245 /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ 246 /* Ensure we can handle previous case of VTOC with 8 entries gracefully */
246 max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; 247 max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8;
247 for (i=0; i<max_nparts && state->next<state->limit; i++) { 248 for (i = 0; i < max_nparts && state->next < state->limit; i++) {
248 struct solaris_x86_slice *s = &v->v_slice[i]; 249 struct solaris_x86_slice *s = &v->v_slice[i];
249 char tmp[3 + 10 + 1 + 1]; 250 char tmp[3 + 10 + 1 + 1];
250 251
@@ -409,7 +410,7 @@ static void parse_minix(struct parsed_partitions *state,
409 /* The first sector of a Minix partition can have either 410 /* The first sector of a Minix partition can have either
410 * a secondary MBR describing its subpartitions, or 411 * a secondary MBR describing its subpartitions, or
411 * the normal boot sector. */ 412 * the normal boot sector. */
412 if (msdos_magic_present (data + 510) && 413 if (msdos_magic_present(data + 510) &&
413 SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ 414 SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */
414 char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; 415 char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1];
415 416
@@ -527,6 +528,7 @@ int msdos_partition(struct parsed_partitions *state)
527 for (slot = 1 ; slot <= 4 ; slot++, p++) { 528 for (slot = 1 ; slot <= 4 ; slot++, p++) {
528 sector_t start = start_sect(p)*sector_size; 529 sector_t start = start_sect(p)*sector_size;
529 sector_t size = nr_sects(p)*sector_size; 530 sector_t size = nr_sects(p)*sector_size;
531
530 if (!size) 532 if (!size)
531 continue; 533 continue;
532 if (is_extended_partition(p)) { 534 if (is_extended_partition(p)) {
@@ -537,6 +539,7 @@ int msdos_partition(struct parsed_partitions *state)
537 * sector, although it may not be enough/proper. 539 * sector, although it may not be enough/proper.
538 */ 540 */
539 sector_t n = 2; 541 sector_t n = 2;
542
540 n = min(size, max(sector_size, n)); 543 n = min(size, max(sector_size, n));
541 put_partition(state, slot, start, n); 544 put_partition(state, slot, start, n);
542 545
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 14695c6221c8..51bf5155ee75 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -82,9 +82,18 @@ static int sg_set_timeout(struct request_queue *q, int __user *p)
82 return err; 82 return err;
83} 83}
84 84
85static int max_sectors_bytes(struct request_queue *q)
86{
87 unsigned int max_sectors = queue_max_sectors(q);
88
89 max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9);
90
91 return max_sectors << 9;
92}
93
85static int sg_get_reserved_size(struct request_queue *q, int __user *p) 94static int sg_get_reserved_size(struct request_queue *q, int __user *p)
86{ 95{
87 unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9); 96 int val = min_t(int, q->sg_reserved_size, max_sectors_bytes(q));
88 97
89 return put_user(val, p); 98 return put_user(val, p);
90} 99}
@@ -98,10 +107,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p)
98 107
99 if (size < 0) 108 if (size < 0)
100 return -EINVAL; 109 return -EINVAL;
101 if (size > (queue_max_sectors(q) << 9))
102 size = queue_max_sectors(q) << 9;
103 110
104 q->sg_reserved_size = size; 111 q->sg_reserved_size = min(size, max_sectors_bytes(q));
105 return 0; 112 return 0;
106} 113}
107 114
@@ -283,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
283 unsigned long start_time; 290 unsigned long start_time;
284 ssize_t ret = 0; 291 ssize_t ret = 0;
285 int writing = 0; 292 int writing = 0;
293 int at_head = 0;
286 struct request *rq; 294 struct request *rq;
287 char sense[SCSI_SENSE_BUFFERSIZE]; 295 char sense[SCSI_SENSE_BUFFERSIZE];
288 struct bio *bio; 296 struct bio *bio;
@@ -306,6 +314,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
306 case SG_DXFER_FROM_DEV: 314 case SG_DXFER_FROM_DEV:
307 break; 315 break;
308 } 316 }
317 if (hdr->flags & SG_FLAG_Q_AT_HEAD)
318 at_head = 1;
309 319
310 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); 320 rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL);
311 if (!rq) 321 if (!rq)
@@ -362,7 +372,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
362 * (if he doesn't check that is his problem). 372 * (if he doesn't check that is his problem).
363 * N.B. a non-zero SCSI status is _not_ necessarily an error. 373 * N.B. a non-zero SCSI status is _not_ necessarily an error.
364 */ 374 */
365 blk_execute_rq(q, bd_disk, rq, 0); 375 blk_execute_rq(q, bd_disk, rq, at_head);
366 376
367 hdr->duration = jiffies_to_msecs(jiffies - start_time); 377 hdr->duration = jiffies_to_msecs(jiffies - start_time);
368 378
diff --git a/drivers/block/drbd/Makefile b/drivers/block/drbd/Makefile
index 8b450338075e..4464e353c1e8 100644
--- a/drivers/block/drbd/Makefile
+++ b/drivers/block/drbd/Makefile
@@ -3,5 +3,6 @@ drbd-y += drbd_worker.o drbd_receiver.o drbd_req.o drbd_actlog.o
3drbd-y += drbd_main.o drbd_strings.o drbd_nl.o 3drbd-y += drbd_main.o drbd_strings.o drbd_nl.o
4drbd-y += drbd_interval.o drbd_state.o 4drbd-y += drbd_interval.o drbd_state.o
5drbd-y += drbd_nla.o 5drbd-y += drbd_nla.o
6drbd-$(CONFIG_DEBUG_FS) += drbd_debugfs.o
6 7
7obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o 8obj-$(CONFIG_BLK_DEV_DRBD) += drbd.o
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index 05a1780ffa85..d26a3fa63688 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -92,34 +92,26 @@ struct __packed al_transaction_on_disk {
92 __be32 context[AL_CONTEXT_PER_TRANSACTION]; 92 __be32 context[AL_CONTEXT_PER_TRANSACTION];
93}; 93};
94 94
95struct update_odbm_work { 95void *drbd_md_get_buffer(struct drbd_device *device, const char *intent)
96 struct drbd_work w;
97 struct drbd_device *device;
98 unsigned int enr;
99};
100
101struct update_al_work {
102 struct drbd_work w;
103 struct drbd_device *device;
104 struct completion event;
105 int err;
106};
107
108
109void *drbd_md_get_buffer(struct drbd_device *device)
110{ 96{
111 int r; 97 int r;
112 98
113 wait_event(device->misc_wait, 99 wait_event(device->misc_wait,
114 (r = atomic_cmpxchg(&device->md_io_in_use, 0, 1)) == 0 || 100 (r = atomic_cmpxchg(&device->md_io.in_use, 0, 1)) == 0 ||
115 device->state.disk <= D_FAILED); 101 device->state.disk <= D_FAILED);
116 102
117 return r ? NULL : page_address(device->md_io_page); 103 if (r)
104 return NULL;
105
106 device->md_io.current_use = intent;
107 device->md_io.start_jif = jiffies;
108 device->md_io.submit_jif = device->md_io.start_jif - 1;
109 return page_address(device->md_io.page);
118} 110}
119 111
120void drbd_md_put_buffer(struct drbd_device *device) 112void drbd_md_put_buffer(struct drbd_device *device)
121{ 113{
122 if (atomic_dec_and_test(&device->md_io_in_use)) 114 if (atomic_dec_and_test(&device->md_io.in_use))
123 wake_up(&device->misc_wait); 115 wake_up(&device->misc_wait);
124} 116}
125 117
@@ -145,10 +137,11 @@ void wait_until_done_or_force_detached(struct drbd_device *device, struct drbd_b
145 137
146static int _drbd_md_sync_page_io(struct drbd_device *device, 138static int _drbd_md_sync_page_io(struct drbd_device *device,
147 struct drbd_backing_dev *bdev, 139 struct drbd_backing_dev *bdev,
148 struct page *page, sector_t sector, 140 sector_t sector, int rw)
149 int rw, int size)
150{ 141{
151 struct bio *bio; 142 struct bio *bio;
143 /* we do all our meta data IO in aligned 4k blocks. */
144 const int size = 4096;
152 int err; 145 int err;
153 146
154 device->md_io.done = 0; 147 device->md_io.done = 0;
@@ -156,15 +149,15 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
156 149
157 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags)) 150 if ((rw & WRITE) && !test_bit(MD_NO_FUA, &device->flags))
158 rw |= REQ_FUA | REQ_FLUSH; 151 rw |= REQ_FUA | REQ_FLUSH;
159 rw |= REQ_SYNC; 152 rw |= REQ_SYNC | REQ_NOIDLE;
160 153
161 bio = bio_alloc_drbd(GFP_NOIO); 154 bio = bio_alloc_drbd(GFP_NOIO);
162 bio->bi_bdev = bdev->md_bdev; 155 bio->bi_bdev = bdev->md_bdev;
163 bio->bi_iter.bi_sector = sector; 156 bio->bi_iter.bi_sector = sector;
164 err = -EIO; 157 err = -EIO;
165 if (bio_add_page(bio, page, size, 0) != size) 158 if (bio_add_page(bio, device->md_io.page, size, 0) != size)
166 goto out; 159 goto out;
167 bio->bi_private = &device->md_io; 160 bio->bi_private = device;
168 bio->bi_end_io = drbd_md_io_complete; 161 bio->bi_end_io = drbd_md_io_complete;
169 bio->bi_rw = rw; 162 bio->bi_rw = rw;
170 163
@@ -179,7 +172,8 @@ static int _drbd_md_sync_page_io(struct drbd_device *device,
179 } 172 }
180 173
181 bio_get(bio); /* one bio_put() is in the completion handler */ 174 bio_get(bio); /* one bio_put() is in the completion handler */
182 atomic_inc(&device->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */ 175 atomic_inc(&device->md_io.in_use); /* drbd_md_put_buffer() is in the completion handler */
176 device->md_io.submit_jif = jiffies;
183 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD)) 177 if (drbd_insert_fault(device, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
184 bio_endio(bio, -EIO); 178 bio_endio(bio, -EIO);
185 else 179 else
@@ -197,9 +191,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
197 sector_t sector, int rw) 191 sector_t sector, int rw)
198{ 192{
199 int err; 193 int err;
200 struct page *iop = device->md_io_page; 194 D_ASSERT(device, atomic_read(&device->md_io.in_use) == 1);
201
202 D_ASSERT(device, atomic_read(&device->md_io_in_use) == 1);
203 195
204 BUG_ON(!bdev->md_bdev); 196 BUG_ON(!bdev->md_bdev);
205 197
@@ -214,8 +206,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd
214 current->comm, current->pid, __func__, 206 current->comm, current->pid, __func__,
215 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ"); 207 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ");
216 208
217 /* we do all our meta data IO in aligned 4k blocks. */ 209 err = _drbd_md_sync_page_io(device, bdev, sector, rw);
218 err = _drbd_md_sync_page_io(device, bdev, iop, sector, rw, 4096);
219 if (err) { 210 if (err) {
220 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n", 211 drbd_err(device, "drbd_md_sync_page_io(,%llus,%s) failed with error %d\n",
221 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err); 212 (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", err);
@@ -297,26 +288,12 @@ bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *
297 return need_transaction; 288 return need_transaction;
298} 289}
299 290
300static int al_write_transaction(struct drbd_device *device, bool delegate); 291static int al_write_transaction(struct drbd_device *device);
301
302/* When called through generic_make_request(), we must delegate
303 * activity log I/O to the worker thread: a further request
304 * submitted via generic_make_request() within the same task
305 * would be queued on current->bio_list, and would only start
306 * after this function returns (see generic_make_request()).
307 *
308 * However, if we *are* the worker, we must not delegate to ourselves.
309 */
310 292
311/* 293void drbd_al_begin_io_commit(struct drbd_device *device)
312 * @delegate: delegate activity log I/O to the worker thread
313 */
314void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
315{ 294{
316 bool locked = false; 295 bool locked = false;
317 296
318 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
319
320 /* Serialize multiple transactions. 297 /* Serialize multiple transactions.
321 * This uses test_and_set_bit, memory barrier is implicit. 298 * This uses test_and_set_bit, memory barrier is implicit.
322 */ 299 */
@@ -335,7 +312,7 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
335 rcu_read_unlock(); 312 rcu_read_unlock();
336 313
337 if (write_al_updates) 314 if (write_al_updates)
338 al_write_transaction(device, delegate); 315 al_write_transaction(device);
339 spin_lock_irq(&device->al_lock); 316 spin_lock_irq(&device->al_lock);
340 /* FIXME 317 /* FIXME
341 if (err) 318 if (err)
@@ -352,12 +329,10 @@ void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate)
352/* 329/*
353 * @delegate: delegate activity log I/O to the worker thread 330 * @delegate: delegate activity log I/O to the worker thread
354 */ 331 */
355void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate) 332void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i)
356{ 333{
357 BUG_ON(delegate && current == first_peer_device(device)->connection->worker.task);
358
359 if (drbd_al_begin_io_prepare(device, i)) 334 if (drbd_al_begin_io_prepare(device, i))
360 drbd_al_begin_io_commit(device, delegate); 335 drbd_al_begin_io_commit(device);
361} 336}
362 337
363int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i) 338int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i)
@@ -380,8 +355,19 @@ int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *
380 /* We want all necessary updates for a given request within the same transaction 355 /* We want all necessary updates for a given request within the same transaction
381 * We could first check how many updates are *actually* needed, 356 * We could first check how many updates are *actually* needed,
382 * and use that instead of the worst-case nr_al_extents */ 357 * and use that instead of the worst-case nr_al_extents */
383 if (available_update_slots < nr_al_extents) 358 if (available_update_slots < nr_al_extents) {
384 return -EWOULDBLOCK; 359 /* Too many activity log extents are currently "hot".
360 *
361 * If we have accumulated pending changes already,
362 * we made progress.
363 *
364 * If we cannot get even a single pending change through,
365 * stop the fast path until we made some progress,
366 * or requests to "cold" extents could be starved. */
367 if (!al->pending_changes)
368 __set_bit(__LC_STARVING, &device->act_log->flags);
369 return -ENOBUFS;
370 }
385 371
386 /* Is resync active in this area? */ 372 /* Is resync active in this area? */
387 for (enr = first; enr <= last; enr++) { 373 for (enr = first; enr <= last; enr++) {
@@ -452,15 +438,6 @@ static unsigned int al_extent_to_bm_page(unsigned int al_enr)
452 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT)); 438 (AL_EXTENT_SHIFT - BM_BLOCK_SHIFT));
453} 439}
454 440
455static unsigned int rs_extent_to_bm_page(unsigned int rs_enr)
456{
457 return rs_enr >>
458 /* bit to page */
459 ((PAGE_SHIFT + 3) -
460 /* resync extent number to bit */
461 (BM_EXT_SHIFT - BM_BLOCK_SHIFT));
462}
463
464static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device) 441static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
465{ 442{
466 const unsigned int stripes = device->ldev->md.al_stripes; 443 const unsigned int stripes = device->ldev->md.al_stripes;
@@ -479,8 +456,7 @@ static sector_t al_tr_number_to_on_disk_sector(struct drbd_device *device)
479 return device->ldev->md.md_offset + device->ldev->md.al_offset + t; 456 return device->ldev->md.md_offset + device->ldev->md.al_offset + t;
480} 457}
481 458
482static int 459int al_write_transaction(struct drbd_device *device)
483_al_write_transaction(struct drbd_device *device)
484{ 460{
485 struct al_transaction_on_disk *buffer; 461 struct al_transaction_on_disk *buffer;
486 struct lc_element *e; 462 struct lc_element *e;
@@ -505,7 +481,8 @@ _al_write_transaction(struct drbd_device *device)
505 return -EIO; 481 return -EIO;
506 } 482 }
507 483
508 buffer = drbd_md_get_buffer(device); /* protects md_io_buffer, al_tr_cycle, ... */ 484 /* protects md_io_buffer, al_tr_cycle, ... */
485 buffer = drbd_md_get_buffer(device, __func__);
509 if (!buffer) { 486 if (!buffer) {
510 drbd_err(device, "disk failed while waiting for md_io buffer\n"); 487 drbd_err(device, "disk failed while waiting for md_io buffer\n");
511 put_ldev(device); 488 put_ldev(device);
@@ -590,38 +567,6 @@ _al_write_transaction(struct drbd_device *device)
590 return err; 567 return err;
591} 568}
592 569
593
594static int w_al_write_transaction(struct drbd_work *w, int unused)
595{
596 struct update_al_work *aw = container_of(w, struct update_al_work, w);
597 struct drbd_device *device = aw->device;
598 int err;
599
600 err = _al_write_transaction(device);
601 aw->err = err;
602 complete(&aw->event);
603
604 return err != -EIO ? err : 0;
605}
606
607/* Calls from worker context (see w_restart_disk_io()) need to write the
608 transaction directly. Others came through generic_make_request(),
609 those need to delegate it to the worker. */
610static int al_write_transaction(struct drbd_device *device, bool delegate)
611{
612 if (delegate) {
613 struct update_al_work al_work;
614 init_completion(&al_work.event);
615 al_work.w.cb = w_al_write_transaction;
616 al_work.device = device;
617 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
618 &al_work.w);
619 wait_for_completion(&al_work.event);
620 return al_work.err;
621 } else
622 return _al_write_transaction(device);
623}
624
625static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext) 570static int _try_lc_del(struct drbd_device *device, struct lc_element *al_ext)
626{ 571{
627 int rv; 572 int rv;
@@ -682,72 +627,56 @@ int drbd_initialize_al(struct drbd_device *device, void *buffer)
682 return 0; 627 return 0;
683} 628}
684 629
685static int w_update_odbm(struct drbd_work *w, int unused) 630static const char *drbd_change_sync_fname[] = {
686{ 631 [RECORD_RS_FAILED] = "drbd_rs_failed_io",
687 struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w); 632 [SET_IN_SYNC] = "drbd_set_in_sync",
688 struct drbd_device *device = udw->device; 633 [SET_OUT_OF_SYNC] = "drbd_set_out_of_sync"
689 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, }; 634};
690
691 if (!get_ldev(device)) {
692 if (__ratelimit(&drbd_ratelimit_state))
693 drbd_warn(device, "Can not update on disk bitmap, local IO disabled.\n");
694 kfree(udw);
695 return 0;
696 }
697
698 drbd_bm_write_page(device, rs_extent_to_bm_page(udw->enr));
699 put_ldev(device);
700
701 kfree(udw);
702
703 if (drbd_bm_total_weight(device) <= device->rs_failed) {
704 switch (device->state.conn) {
705 case C_SYNC_SOURCE: case C_SYNC_TARGET:
706 case C_PAUSED_SYNC_S: case C_PAUSED_SYNC_T:
707 drbd_resync_finished(device);
708 default:
709 /* nothing to do */
710 break;
711 }
712 }
713 drbd_bcast_event(device, &sib);
714
715 return 0;
716}
717
718 635
719/* ATTENTION. The AL's extents are 4MB each, while the extents in the 636/* ATTENTION. The AL's extents are 4MB each, while the extents in the
720 * resync LRU-cache are 16MB each. 637 * resync LRU-cache are 16MB each.
721 * The caller of this function has to hold an get_ldev() reference. 638 * The caller of this function has to hold an get_ldev() reference.
722 * 639 *
640 * Adjusts the caching members ->rs_left (success) or ->rs_failed (!success),
641 * potentially pulling in (and recounting the corresponding bits)
642 * this resync extent into the resync extent lru cache.
643 *
644 * Returns whether all bits have been cleared for this resync extent,
645 * precisely: (rs_left <= rs_failed)
646 *
723 * TODO will be obsoleted once we have a caching lru of the on disk bitmap 647 * TODO will be obsoleted once we have a caching lru of the on disk bitmap
724 */ 648 */
725static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t sector, 649static bool update_rs_extent(struct drbd_device *device,
726 int count, int success) 650 unsigned int enr, int count,
651 enum update_sync_bits_mode mode)
727{ 652{
728 struct lc_element *e; 653 struct lc_element *e;
729 struct update_odbm_work *udw;
730
731 unsigned int enr;
732 654
733 D_ASSERT(device, atomic_read(&device->local_cnt)); 655 D_ASSERT(device, atomic_read(&device->local_cnt));
734 656
735 /* I simply assume that a sector/size pair never crosses 657 /* When setting out-of-sync bits,
736 * a 16 MB extent border. (Currently this is true...) */ 658 * we don't need it cached (lc_find).
737 enr = BM_SECT_TO_EXT(sector); 659 * But if it is present in the cache,
738 660 * we should update the cached bit count.
739 e = lc_get(device->resync, enr); 661 * Otherwise, that extent should be in the resync extent lru cache
662 * already -- or we want to pull it in if necessary -- (lc_get),
663 * then update and check rs_left and rs_failed. */
664 if (mode == SET_OUT_OF_SYNC)
665 e = lc_find(device->resync, enr);
666 else
667 e = lc_get(device->resync, enr);
740 if (e) { 668 if (e) {
741 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce); 669 struct bm_extent *ext = lc_entry(e, struct bm_extent, lce);
742 if (ext->lce.lc_number == enr) { 670 if (ext->lce.lc_number == enr) {
743 if (success) 671 if (mode == SET_IN_SYNC)
744 ext->rs_left -= count; 672 ext->rs_left -= count;
673 else if (mode == SET_OUT_OF_SYNC)
674 ext->rs_left += count;
745 else 675 else
746 ext->rs_failed += count; 676 ext->rs_failed += count;
747 if (ext->rs_left < ext->rs_failed) { 677 if (ext->rs_left < ext->rs_failed) {
748 drbd_warn(device, "BAD! sector=%llus enr=%u rs_left=%d " 678 drbd_warn(device, "BAD! enr=%u rs_left=%d "
749 "rs_failed=%d count=%d cstate=%s\n", 679 "rs_failed=%d count=%d cstate=%s\n",
750 (unsigned long long)sector,
751 ext->lce.lc_number, ext->rs_left, 680 ext->lce.lc_number, ext->rs_left,
752 ext->rs_failed, count, 681 ext->rs_failed, count,
753 drbd_conn_str(device->state.conn)); 682 drbd_conn_str(device->state.conn));
@@ -781,34 +710,27 @@ static void drbd_try_clear_on_disk_bm(struct drbd_device *device, sector_t secto
781 ext->lce.lc_number, ext->rs_failed); 710 ext->lce.lc_number, ext->rs_failed);
782 } 711 }
783 ext->rs_left = rs_left; 712 ext->rs_left = rs_left;
784 ext->rs_failed = success ? 0 : count; 713 ext->rs_failed = (mode == RECORD_RS_FAILED) ? count : 0;
785 /* we don't keep a persistent log of the resync lru, 714 /* we don't keep a persistent log of the resync lru,
786 * we can commit any change right away. */ 715 * we can commit any change right away. */
787 lc_committed(device->resync); 716 lc_committed(device->resync);
788 } 717 }
789 lc_put(device->resync, &ext->lce); 718 if (mode != SET_OUT_OF_SYNC)
719 lc_put(device->resync, &ext->lce);
790 /* no race, we are within the al_lock! */ 720 /* no race, we are within the al_lock! */
791 721
792 if (ext->rs_left == ext->rs_failed) { 722 if (ext->rs_left <= ext->rs_failed) {
793 ext->rs_failed = 0; 723 ext->rs_failed = 0;
794 724 return true;
795 udw = kmalloc(sizeof(*udw), GFP_ATOMIC);
796 if (udw) {
797 udw->enr = ext->lce.lc_number;
798 udw->w.cb = w_update_odbm;
799 udw->device = device;
800 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
801 &udw->w);
802 } else {
803 drbd_warn(device, "Could not kmalloc an udw\n");
804 }
805 } 725 }
806 } else { 726 } else if (mode != SET_OUT_OF_SYNC) {
727 /* be quiet if lc_find() did not find it. */
807 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n", 728 drbd_err(device, "lc_get() failed! locked=%d/%d flags=%lu\n",
808 device->resync_locked, 729 device->resync_locked,
809 device->resync->nr_elements, 730 device->resync->nr_elements,
810 device->resync->flags); 731 device->resync->flags);
811 } 732 }
733 return false;
812} 734}
813 735
814void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go) 736void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go)
@@ -827,105 +749,105 @@ void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go
827 } 749 }
828} 750}
829 751
830/* clear the bit corresponding to the piece of storage in question: 752/* It is called lazy update, so don't do write-out too often. */
831 * size byte of data starting from sector. Only clear a bits of the affected 753static bool lazy_bitmap_update_due(struct drbd_device *device)
832 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
833 *
834 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
835 *
836 */
837void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size,
838 const char *file, const unsigned int line)
839{ 754{
840 /* Is called from worker and receiver context _only_ */ 755 return time_after(jiffies, device->rs_last_bcast + 2*HZ);
841 unsigned long sbnr, ebnr, lbnr; 756}
842 unsigned long count = 0;
843 sector_t esector, nr_sectors;
844 int wake_up = 0;
845 unsigned long flags;
846 757
847 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 758static void maybe_schedule_on_disk_bitmap_update(struct drbd_device *device, bool rs_done)
848 drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", 759{
849 (unsigned long long)sector, size); 760 if (rs_done)
761 set_bit(RS_DONE, &device->flags);
762 /* and also set RS_PROGRESS below */
763 else if (!lazy_bitmap_update_due(device))
850 return; 764 return;
851 }
852
853 if (!get_ldev(device))
854 return; /* no disk, no metadata, no bitmap to clear bits in */
855
856 nr_sectors = drbd_get_capacity(device->this_bdev);
857 esector = sector + (size >> 9) - 1;
858
859 if (!expect(sector < nr_sectors))
860 goto out;
861 if (!expect(esector < nr_sectors))
862 esector = nr_sectors - 1;
863
864 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
865
866 /* we clear it (in sync).
867 * round up start sector, round down end sector. we make sure we only
868 * clear full, aligned, BM_BLOCK_SIZE (4K) blocks */
869 if (unlikely(esector < BM_SECT_PER_BIT-1))
870 goto out;
871 if (unlikely(esector == (nr_sectors-1)))
872 ebnr = lbnr;
873 else
874 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
875 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
876 765
877 if (sbnr > ebnr) 766 drbd_device_post_work(device, RS_PROGRESS);
878 goto out; 767}
879 768
769static int update_sync_bits(struct drbd_device *device,
770 unsigned long sbnr, unsigned long ebnr,
771 enum update_sync_bits_mode mode)
772{
880 /* 773 /*
881 * ok, (capacity & 7) != 0 sometimes, but who cares... 774 * We keep a count of set bits per resync-extent in the ->rs_left
882 * we count rs_{total,left} in bits, not sectors. 775 * caching member, so we need to loop and work within the resync extent
776 * alignment. Typically this loop will execute exactly once.
883 */ 777 */
884 count = drbd_bm_clear_bits(device, sbnr, ebnr); 778 unsigned long flags;
885 if (count) { 779 unsigned long count = 0;
886 drbd_advance_rs_marks(device, drbd_bm_total_weight(device)); 780 unsigned int cleared = 0;
887 spin_lock_irqsave(&device->al_lock, flags); 781 while (sbnr <= ebnr) {
888 drbd_try_clear_on_disk_bm(device, sector, count, true); 782 /* set temporary boundary bit number to last bit number within
889 spin_unlock_irqrestore(&device->al_lock, flags); 783 * the resync extent of the current start bit number,
890 784 * but cap at provided end bit number */
891 /* just wake_up unconditional now, various lc_chaged(), 785 unsigned long tbnr = min(ebnr, sbnr | BM_BLOCKS_PER_BM_EXT_MASK);
892 * lc_put() in drbd_try_clear_on_disk_bm(). */ 786 unsigned long c;
893 wake_up = 1; 787
788 if (mode == RECORD_RS_FAILED)
789 /* Only called from drbd_rs_failed_io(), bits
790 * supposedly still set. Recount, maybe some
791 * of the bits have been successfully cleared
792 * by application IO meanwhile.
793 */
794 c = drbd_bm_count_bits(device, sbnr, tbnr);
795 else if (mode == SET_IN_SYNC)
796 c = drbd_bm_clear_bits(device, sbnr, tbnr);
797 else /* if (mode == SET_OUT_OF_SYNC) */
798 c = drbd_bm_set_bits(device, sbnr, tbnr);
799
800 if (c) {
801 spin_lock_irqsave(&device->al_lock, flags);
802 cleared += update_rs_extent(device, BM_BIT_TO_EXT(sbnr), c, mode);
803 spin_unlock_irqrestore(&device->al_lock, flags);
804 count += c;
805 }
806 sbnr = tbnr + 1;
894 } 807 }
895out: 808 if (count) {
896 put_ldev(device); 809 if (mode == SET_IN_SYNC) {
897 if (wake_up) 810 unsigned long still_to_go = drbd_bm_total_weight(device);
811 bool rs_is_done = (still_to_go <= device->rs_failed);
812 drbd_advance_rs_marks(device, still_to_go);
813 if (cleared || rs_is_done)
814 maybe_schedule_on_disk_bitmap_update(device, rs_is_done);
815 } else if (mode == RECORD_RS_FAILED)
816 device->rs_failed += count;
898 wake_up(&device->al_wait); 817 wake_up(&device->al_wait);
818 }
819 return count;
899} 820}
900 821
901/* 822/* clear the bit corresponding to the piece of storage in question:
902 * this is intended to set one request worth of data out of sync. 823 * size byte of data starting from sector. Only clear a bits of the affected
903 * affects at least 1 bit, 824 * one ore more _aligned_ BM_BLOCK_SIZE blocks.
904 * and at most 1+DRBD_MAX_BIO_SIZE/BM_BLOCK_SIZE bits. 825 *
826 * called by worker on C_SYNC_TARGET and receiver on SyncSource.
905 * 827 *
906 * called by tl_clear and drbd_send_dblock (==drbd_make_request).
907 * so this can be _any_ process.
908 */ 828 */
909int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size, 829int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
910 const char *file, const unsigned int line) 830 enum update_sync_bits_mode mode,
831 const char *file, const unsigned int line)
911{ 832{
912 unsigned long sbnr, ebnr, flags; 833 /* Is called from worker and receiver context _only_ */
834 unsigned long sbnr, ebnr, lbnr;
835 unsigned long count = 0;
913 sector_t esector, nr_sectors; 836 sector_t esector, nr_sectors;
914 unsigned int enr, count = 0;
915 struct lc_element *e;
916 837
917 /* this should be an empty REQ_FLUSH */ 838 /* This would be an empty REQ_FLUSH, be silent. */
918 if (size == 0) 839 if ((mode == SET_OUT_OF_SYNC) && size == 0)
919 return 0; 840 return 0;
920 841
921 if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { 842 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
922 drbd_err(device, "sector: %llus, size: %d\n", 843 drbd_err(device, "%s: sector=%llus size=%d nonsense!\n",
923 (unsigned long long)sector, size); 844 drbd_change_sync_fname[mode],
845 (unsigned long long)sector, size);
924 return 0; 846 return 0;
925 } 847 }
926 848
927 if (!get_ldev(device)) 849 if (!get_ldev(device))
928 return 0; /* no disk, no metadata, no bitmap to set bits in */ 850 return 0; /* no disk, no metadata, no bitmap to manipulate bits in */
929 851
930 nr_sectors = drbd_get_capacity(device->this_bdev); 852 nr_sectors = drbd_get_capacity(device->this_bdev);
931 esector = sector + (size >> 9) - 1; 853 esector = sector + (size >> 9) - 1;
@@ -935,25 +857,28 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size
935 if (!expect(esector < nr_sectors)) 857 if (!expect(esector < nr_sectors))
936 esector = nr_sectors - 1; 858 esector = nr_sectors - 1;
937 859
938 /* we set it out of sync, 860 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
939 * we do not need to round anything here */
940 sbnr = BM_SECT_TO_BIT(sector);
941 ebnr = BM_SECT_TO_BIT(esector);
942
943 /* ok, (capacity & 7) != 0 sometimes, but who cares...
944 * we count rs_{total,left} in bits, not sectors. */
945 spin_lock_irqsave(&device->al_lock, flags);
946 count = drbd_bm_set_bits(device, sbnr, ebnr);
947 861
948 enr = BM_SECT_TO_EXT(sector); 862 if (mode == SET_IN_SYNC) {
949 e = lc_find(device->resync, enr); 863 /* Round up start sector, round down end sector. We make sure
950 if (e) 864 * we only clear full, aligned, BM_BLOCK_SIZE blocks. */
951 lc_entry(e, struct bm_extent, lce)->rs_left += count; 865 if (unlikely(esector < BM_SECT_PER_BIT-1))
952 spin_unlock_irqrestore(&device->al_lock, flags); 866 goto out;
867 if (unlikely(esector == (nr_sectors-1)))
868 ebnr = lbnr;
869 else
870 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
871 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
872 } else {
873 /* We set it out of sync, or record resync failure.
874 * Should not round anything here. */
875 sbnr = BM_SECT_TO_BIT(sector);
876 ebnr = BM_SECT_TO_BIT(esector);
877 }
953 878
879 count = update_sync_bits(device, sbnr, ebnr, mode);
954out: 880out:
955 put_ldev(device); 881 put_ldev(device);
956
957 return count; 882 return count;
958} 883}
959 884
@@ -1075,6 +1000,15 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
1075 struct lc_element *e; 1000 struct lc_element *e;
1076 struct bm_extent *bm_ext; 1001 struct bm_extent *bm_ext;
1077 int i; 1002 int i;
1003 bool throttle = drbd_rs_should_slow_down(device, sector, true);
1004
1005 /* If we need to throttle, a half-locked (only marked BME_NO_WRITES,
1006 * not yet BME_LOCKED) extent needs to be kicked out explicitly if we
1007 * need to throttle. There is at most one such half-locked extent,
1008 * which is remembered in resync_wenr. */
1009
1010 if (throttle && device->resync_wenr != enr)
1011 return -EAGAIN;
1078 1012
1079 spin_lock_irq(&device->al_lock); 1013 spin_lock_irq(&device->al_lock);
1080 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) { 1014 if (device->resync_wenr != LC_FREE && device->resync_wenr != enr) {
@@ -1098,8 +1032,10 @@ int drbd_try_rs_begin_io(struct drbd_device *device, sector_t sector)
1098 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags)); 1032 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
1099 clear_bit(BME_NO_WRITES, &bm_ext->flags); 1033 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1100 device->resync_wenr = LC_FREE; 1034 device->resync_wenr = LC_FREE;
1101 if (lc_put(device->resync, &bm_ext->lce) == 0) 1035 if (lc_put(device->resync, &bm_ext->lce) == 0) {
1036 bm_ext->flags = 0;
1102 device->resync_locked--; 1037 device->resync_locked--;
1038 }
1103 wake_up(&device->al_wait); 1039 wake_up(&device->al_wait);
1104 } else { 1040 } else {
1105 drbd_alert(device, "LOGIC BUG\n"); 1041 drbd_alert(device, "LOGIC BUG\n");
@@ -1161,8 +1097,20 @@ proceed:
1161 return 0; 1097 return 0;
1162 1098
1163try_again: 1099try_again:
1164 if (bm_ext) 1100 if (bm_ext) {
1165 device->resync_wenr = enr; 1101 if (throttle) {
1102 D_ASSERT(device, !test_bit(BME_LOCKED, &bm_ext->flags));
1103 D_ASSERT(device, test_bit(BME_NO_WRITES, &bm_ext->flags));
1104 clear_bit(BME_NO_WRITES, &bm_ext->flags);
1105 device->resync_wenr = LC_FREE;
1106 if (lc_put(device->resync, &bm_ext->lce) == 0) {
1107 bm_ext->flags = 0;
1108 device->resync_locked--;
1109 }
1110 wake_up(&device->al_wait);
1111 } else
1112 device->resync_wenr = enr;
1113 }
1166 spin_unlock_irq(&device->al_lock); 1114 spin_unlock_irq(&device->al_lock);
1167 return -EAGAIN; 1115 return -EAGAIN;
1168} 1116}
@@ -1270,69 +1218,3 @@ int drbd_rs_del_all(struct drbd_device *device)
1270 1218
1271 return 0; 1219 return 0;
1272} 1220}
1273
1274/**
1275 * drbd_rs_failed_io() - Record information on a failure to resync the specified blocks
1276 * @device: DRBD device.
1277 * @sector: The sector number.
1278 * @size: Size of failed IO operation, in byte.
1279 */
1280void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size)
1281{
1282 /* Is called from worker and receiver context _only_ */
1283 unsigned long sbnr, ebnr, lbnr;
1284 unsigned long count;
1285 sector_t esector, nr_sectors;
1286 int wake_up = 0;
1287
1288 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) {
1289 drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n",
1290 (unsigned long long)sector, size);
1291 return;
1292 }
1293 nr_sectors = drbd_get_capacity(device->this_bdev);
1294 esector = sector + (size >> 9) - 1;
1295
1296 if (!expect(sector < nr_sectors))
1297 return;
1298 if (!expect(esector < nr_sectors))
1299 esector = nr_sectors - 1;
1300
1301 lbnr = BM_SECT_TO_BIT(nr_sectors-1);
1302
1303 /*
1304 * round up start sector, round down end sector. we make sure we only
1305 * handle full, aligned, BM_BLOCK_SIZE (4K) blocks */
1306 if (unlikely(esector < BM_SECT_PER_BIT-1))
1307 return;
1308 if (unlikely(esector == (nr_sectors-1)))
1309 ebnr = lbnr;
1310 else
1311 ebnr = BM_SECT_TO_BIT(esector - (BM_SECT_PER_BIT-1));
1312 sbnr = BM_SECT_TO_BIT(sector + BM_SECT_PER_BIT-1);
1313
1314 if (sbnr > ebnr)
1315 return;
1316
1317 /*
1318 * ok, (capacity & 7) != 0 sometimes, but who cares...
1319 * we count rs_{total,left} in bits, not sectors.
1320 */
1321 spin_lock_irq(&device->al_lock);
1322 count = drbd_bm_count_bits(device, sbnr, ebnr);
1323 if (count) {
1324 device->rs_failed += count;
1325
1326 if (get_ldev(device)) {
1327 drbd_try_clear_on_disk_bm(device, sector, count, false);
1328 put_ldev(device);
1329 }
1330
1331 /* just wake_up unconditional now, various lc_chaged(),
1332 * lc_put() in drbd_try_clear_on_disk_bm(). */
1333 wake_up = 1;
1334 }
1335 spin_unlock_irq(&device->al_lock);
1336 if (wake_up)
1337 wake_up(&device->al_wait);
1338}
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 1aa29f8fdfe1..426c97aef900 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -22,6 +22,8 @@
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
25#include <linux/bitops.h> 27#include <linux/bitops.h>
26#include <linux/vmalloc.h> 28#include <linux/vmalloc.h>
27#include <linux/string.h> 29#include <linux/string.h>
@@ -353,9 +355,8 @@ static void bm_free_pages(struct page **pages, unsigned long number)
353 355
354 for (i = 0; i < number; i++) { 356 for (i = 0; i < number; i++) {
355 if (!pages[i]) { 357 if (!pages[i]) {
356 printk(KERN_ALERT "drbd: bm_free_pages tried to free " 358 pr_alert("bm_free_pages tried to free a NULL pointer; i=%lu n=%lu\n",
357 "a NULL pointer; i=%lu n=%lu\n", 359 i, number);
358 i, number);
359 continue; 360 continue;
360 } 361 }
361 __free_page(pages[i]); 362 __free_page(pages[i]);
@@ -592,7 +593,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
592 end = offset + len; 593 end = offset + len;
593 594
594 if (end > b->bm_words) { 595 if (end > b->bm_words) {
595 printk(KERN_ALERT "drbd: bm_memset end > bm_words\n"); 596 pr_alert("bm_memset end > bm_words\n");
596 return; 597 return;
597 } 598 }
598 599
@@ -602,7 +603,7 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
602 p_addr = bm_map_pidx(b, idx); 603 p_addr = bm_map_pidx(b, idx);
603 bm = p_addr + MLPP(offset); 604 bm = p_addr + MLPP(offset);
604 if (bm+do_now > p_addr + LWPP) { 605 if (bm+do_now > p_addr + LWPP) {
605 printk(KERN_ALERT "drbd: BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n", 606 pr_alert("BUG BUG BUG! p_addr:%p bm:%p do_now:%d\n",
606 p_addr, bm, (int)do_now); 607 p_addr, bm, (int)do_now);
607 } else 608 } else
608 memset(bm, c, do_now * sizeof(long)); 609 memset(bm, c, do_now * sizeof(long));
@@ -927,22 +928,14 @@ void drbd_bm_clear_all(struct drbd_device *device)
927 spin_unlock_irq(&b->bm_lock); 928 spin_unlock_irq(&b->bm_lock);
928} 929}
929 930
930struct bm_aio_ctx { 931static void drbd_bm_aio_ctx_destroy(struct kref *kref)
931 struct drbd_device *device;
932 atomic_t in_flight;
933 unsigned int done;
934 unsigned flags;
935#define BM_AIO_COPY_PAGES 1
936#define BM_AIO_WRITE_HINTED 2
937#define BM_WRITE_ALL_PAGES 4
938 int error;
939 struct kref kref;
940};
941
942static void bm_aio_ctx_destroy(struct kref *kref)
943{ 932{
944 struct bm_aio_ctx *ctx = container_of(kref, struct bm_aio_ctx, kref); 933 struct drbd_bm_aio_ctx *ctx = container_of(kref, struct drbd_bm_aio_ctx, kref);
934 unsigned long flags;
945 935
936 spin_lock_irqsave(&ctx->device->resource->req_lock, flags);
937 list_del(&ctx->list);
938 spin_unlock_irqrestore(&ctx->device->resource->req_lock, flags);
946 put_ldev(ctx->device); 939 put_ldev(ctx->device);
947 kfree(ctx); 940 kfree(ctx);
948} 941}
@@ -950,7 +943,7 @@ static void bm_aio_ctx_destroy(struct kref *kref)
950/* bv_page may be a copy, or may be the original */ 943/* bv_page may be a copy, or may be the original */
951static void bm_async_io_complete(struct bio *bio, int error) 944static void bm_async_io_complete(struct bio *bio, int error)
952{ 945{
953 struct bm_aio_ctx *ctx = bio->bi_private; 946 struct drbd_bm_aio_ctx *ctx = bio->bi_private;
954 struct drbd_device *device = ctx->device; 947 struct drbd_device *device = ctx->device;
955 struct drbd_bitmap *b = device->bitmap; 948 struct drbd_bitmap *b = device->bitmap;
956 unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page); 949 unsigned int idx = bm_page_to_idx(bio->bi_io_vec[0].bv_page);
@@ -993,17 +986,18 @@ static void bm_async_io_complete(struct bio *bio, int error)
993 if (atomic_dec_and_test(&ctx->in_flight)) { 986 if (atomic_dec_and_test(&ctx->in_flight)) {
994 ctx->done = 1; 987 ctx->done = 1;
995 wake_up(&device->misc_wait); 988 wake_up(&device->misc_wait);
996 kref_put(&ctx->kref, &bm_aio_ctx_destroy); 989 kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
997 } 990 }
998} 991}
999 992
1000static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must_hold(local) 993static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
1001{ 994{
1002 struct bio *bio = bio_alloc_drbd(GFP_NOIO); 995 struct bio *bio = bio_alloc_drbd(GFP_NOIO);
1003 struct drbd_device *device = ctx->device; 996 struct drbd_device *device = ctx->device;
1004 struct drbd_bitmap *b = device->bitmap; 997 struct drbd_bitmap *b = device->bitmap;
1005 struct page *page; 998 struct page *page;
1006 unsigned int len; 999 unsigned int len;
1000 unsigned int rw = (ctx->flags & BM_AIO_READ) ? READ : WRITE;
1007 1001
1008 sector_t on_disk_sector = 1002 sector_t on_disk_sector =
1009 device->ldev->md.md_offset + device->ldev->md.bm_offset; 1003 device->ldev->md.md_offset + device->ldev->md.bm_offset;
@@ -1049,9 +1043,9 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must
1049/* 1043/*
1050 * bm_rw: read/write the whole bitmap from/to its on disk location. 1044 * bm_rw: read/write the whole bitmap from/to its on disk location.
1051 */ 1045 */
1052static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned lazy_writeout_upper_idx) __must_hold(local) 1046static int bm_rw(struct drbd_device *device, const unsigned int flags, unsigned lazy_writeout_upper_idx) __must_hold(local)
1053{ 1047{
1054 struct bm_aio_ctx *ctx; 1048 struct drbd_bm_aio_ctx *ctx;
1055 struct drbd_bitmap *b = device->bitmap; 1049 struct drbd_bitmap *b = device->bitmap;
1056 int num_pages, i, count = 0; 1050 int num_pages, i, count = 0;
1057 unsigned long now; 1051 unsigned long now;
@@ -1067,12 +1061,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1067 * as we submit copies of pages anyways. 1061 * as we submit copies of pages anyways.
1068 */ 1062 */
1069 1063
1070 ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO); 1064 ctx = kmalloc(sizeof(struct drbd_bm_aio_ctx), GFP_NOIO);
1071 if (!ctx) 1065 if (!ctx)
1072 return -ENOMEM; 1066 return -ENOMEM;
1073 1067
1074 *ctx = (struct bm_aio_ctx) { 1068 *ctx = (struct drbd_bm_aio_ctx) {
1075 .device = device, 1069 .device = device,
1070 .start_jif = jiffies,
1076 .in_flight = ATOMIC_INIT(1), 1071 .in_flight = ATOMIC_INIT(1),
1077 .done = 0, 1072 .done = 0,
1078 .flags = flags, 1073 .flags = flags,
@@ -1080,15 +1075,21 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1080 .kref = { ATOMIC_INIT(2) }, 1075 .kref = { ATOMIC_INIT(2) },
1081 }; 1076 };
1082 1077
1083 if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */ 1078 if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in drbd_bm_aio_ctx_destroy() */
1084 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n"); 1079 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in bm_rw()\n");
1085 kfree(ctx); 1080 kfree(ctx);
1086 return -ENODEV; 1081 return -ENODEV;
1087 } 1082 }
1083 /* Here D_ATTACHING is sufficient since drbd_bm_read() is called only from
1084 drbd_adm_attach(), after device->ldev was assigned. */
1088 1085
1089 if (!ctx->flags) 1086 if (0 == (ctx->flags & ~BM_AIO_READ))
1090 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags)); 1087 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
1091 1088
1089 spin_lock_irq(&device->resource->req_lock);
1090 list_add_tail(&ctx->list, &device->pending_bitmap_io);
1091 spin_unlock_irq(&device->resource->req_lock);
1092
1092 num_pages = b->bm_number_of_pages; 1093 num_pages = b->bm_number_of_pages;
1093 1094
1094 now = jiffies; 1095 now = jiffies;
@@ -1098,13 +1099,13 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1098 /* ignore completely unchanged pages */ 1099 /* ignore completely unchanged pages */
1099 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) 1100 if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx)
1100 break; 1101 break;
1101 if (rw & WRITE) { 1102 if (!(flags & BM_AIO_READ)) {
1102 if ((flags & BM_AIO_WRITE_HINTED) && 1103 if ((flags & BM_AIO_WRITE_HINTED) &&
1103 !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, 1104 !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT,
1104 &page_private(b->bm_pages[i]))) 1105 &page_private(b->bm_pages[i])))
1105 continue; 1106 continue;
1106 1107
1107 if (!(flags & BM_WRITE_ALL_PAGES) && 1108 if (!(flags & BM_AIO_WRITE_ALL_PAGES) &&
1108 bm_test_page_unchanged(b->bm_pages[i])) { 1109 bm_test_page_unchanged(b->bm_pages[i])) {
1109 dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i); 1110 dynamic_drbd_dbg(device, "skipped bm write for idx %u\n", i);
1110 continue; 1111 continue;
@@ -1118,7 +1119,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1118 } 1119 }
1119 } 1120 }
1120 atomic_inc(&ctx->in_flight); 1121 atomic_inc(&ctx->in_flight);
1121 bm_page_io_async(ctx, i, rw); 1122 bm_page_io_async(ctx, i);
1122 ++count; 1123 ++count;
1123 cond_resched(); 1124 cond_resched();
1124 } 1125 }
@@ -1134,12 +1135,12 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1134 if (!atomic_dec_and_test(&ctx->in_flight)) 1135 if (!atomic_dec_and_test(&ctx->in_flight))
1135 wait_until_done_or_force_detached(device, device->ldev, &ctx->done); 1136 wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
1136 else 1137 else
1137 kref_put(&ctx->kref, &bm_aio_ctx_destroy); 1138 kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
1138 1139
1139 /* summary for global bitmap IO */ 1140 /* summary for global bitmap IO */
1140 if (flags == 0) 1141 if (flags == 0)
1141 drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n", 1142 drbd_info(device, "bitmap %s of %u pages took %lu jiffies\n",
1142 rw == WRITE ? "WRITE" : "READ", 1143 (flags & BM_AIO_READ) ? "READ" : "WRITE",
1143 count, jiffies - now); 1144 count, jiffies - now);
1144 1145
1145 if (ctx->error) { 1146 if (ctx->error) {
@@ -1152,20 +1153,18 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1152 err = -EIO; /* Disk timeout/force-detach during IO... */ 1153 err = -EIO; /* Disk timeout/force-detach during IO... */
1153 1154
1154 now = jiffies; 1155 now = jiffies;
1155 if (rw == WRITE) { 1156 if (flags & BM_AIO_READ) {
1156 drbd_md_flush(device);
1157 } else /* rw == READ */ {
1158 b->bm_set = bm_count_bits(b); 1157 b->bm_set = bm_count_bits(b);
1159 drbd_info(device, "recounting of set bits took additional %lu jiffies\n", 1158 drbd_info(device, "recounting of set bits took additional %lu jiffies\n",
1160 jiffies - now); 1159 jiffies - now);
1161 } 1160 }
1162 now = b->bm_set; 1161 now = b->bm_set;
1163 1162
1164 if (flags == 0) 1163 if ((flags & ~BM_AIO_READ) == 0)
1165 drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", 1164 drbd_info(device, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n",
1166 ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); 1165 ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now);
1167 1166
1168 kref_put(&ctx->kref, &bm_aio_ctx_destroy); 1167 kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
1169 return err; 1168 return err;
1170} 1169}
1171 1170
@@ -1175,7 +1174,7 @@ static int bm_rw(struct drbd_device *device, int rw, unsigned flags, unsigned la
1175 */ 1174 */
1176int drbd_bm_read(struct drbd_device *device) __must_hold(local) 1175int drbd_bm_read(struct drbd_device *device) __must_hold(local)
1177{ 1176{
1178 return bm_rw(device, READ, 0, 0); 1177 return bm_rw(device, BM_AIO_READ, 0);
1179} 1178}
1180 1179
1181/** 1180/**
@@ -1186,7 +1185,7 @@ int drbd_bm_read(struct drbd_device *device) __must_hold(local)
1186 */ 1185 */
1187int drbd_bm_write(struct drbd_device *device) __must_hold(local) 1186int drbd_bm_write(struct drbd_device *device) __must_hold(local)
1188{ 1187{
1189 return bm_rw(device, WRITE, 0, 0); 1188 return bm_rw(device, 0, 0);
1190} 1189}
1191 1190
1192/** 1191/**
@@ -1197,7 +1196,17 @@ int drbd_bm_write(struct drbd_device *device) __must_hold(local)
1197 */ 1196 */
1198int drbd_bm_write_all(struct drbd_device *device) __must_hold(local) 1197int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
1199{ 1198{
1200 return bm_rw(device, WRITE, BM_WRITE_ALL_PAGES, 0); 1199 return bm_rw(device, BM_AIO_WRITE_ALL_PAGES, 0);
1200}
1201
1202/**
1203 * drbd_bm_write_lazy() - Write bitmap pages 0 to @upper_idx-1, if they have changed.
1204 * @device: DRBD device.
1205 * @upper_idx: 0: write all changed pages; +ve: page index to stop scanning for changed pages
1206 */
1207int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local)
1208{
1209 return bm_rw(device, BM_AIO_COPY_PAGES, upper_idx);
1201} 1210}
1202 1211
1203/** 1212/**
@@ -1213,7 +1222,7 @@ int drbd_bm_write_all(struct drbd_device *device) __must_hold(local)
1213 */ 1222 */
1214int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local) 1223int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
1215{ 1224{
1216 return bm_rw(device, WRITE, BM_AIO_COPY_PAGES, 0); 1225 return bm_rw(device, BM_AIO_COPY_PAGES, 0);
1217} 1226}
1218 1227
1219/** 1228/**
@@ -1222,62 +1231,7 @@ int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local)
1222 */ 1231 */
1223int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local) 1232int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local)
1224{ 1233{
1225 return bm_rw(device, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); 1234 return bm_rw(device, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0);
1226}
1227
1228/**
1229 * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap
1230 * @device: DRBD device.
1231 * @idx: bitmap page index
1232 *
1233 * We don't want to special case on logical_block_size of the backend device,
1234 * so we submit PAGE_SIZE aligned pieces.
1235 * Note that on "most" systems, PAGE_SIZE is 4k.
1236 *
1237 * In case this becomes an issue on systems with larger PAGE_SIZE,
1238 * we may want to change this again to write 4k aligned 4k pieces.
1239 */
1240int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local)
1241{
1242 struct bm_aio_ctx *ctx;
1243 int err;
1244
1245 if (bm_test_page_unchanged(device->bitmap->bm_pages[idx])) {
1246 dynamic_drbd_dbg(device, "skipped bm page write for idx %u\n", idx);
1247 return 0;
1248 }
1249
1250 ctx = kmalloc(sizeof(struct bm_aio_ctx), GFP_NOIO);
1251 if (!ctx)
1252 return -ENOMEM;
1253
1254 *ctx = (struct bm_aio_ctx) {
1255 .device = device,
1256 .in_flight = ATOMIC_INIT(1),
1257 .done = 0,
1258 .flags = BM_AIO_COPY_PAGES,
1259 .error = 0,
1260 .kref = { ATOMIC_INIT(2) },
1261 };
1262
1263 if (!get_ldev_if_state(device, D_ATTACHING)) { /* put is in bm_aio_ctx_destroy() */
1264 drbd_err(device, "ASSERT FAILED: get_ldev_if_state() == 1 in drbd_bm_write_page()\n");
1265 kfree(ctx);
1266 return -ENODEV;
1267 }
1268
1269 bm_page_io_async(ctx, idx, WRITE_SYNC);
1270 wait_until_done_or_force_detached(device, device->ldev, &ctx->done);
1271
1272 if (ctx->error)
1273 drbd_chk_io_error(device, 1, DRBD_META_IO_ERROR);
1274 /* that causes us to detach, so the in memory bitmap will be
1275 * gone in a moment as well. */
1276
1277 device->bm_writ_cnt++;
1278 err = atomic_read(&ctx->in_flight) ? -EIO : ctx->error;
1279 kref_put(&ctx->kref, &bm_aio_ctx_destroy);
1280 return err;
1281} 1235}
1282 1236
1283/* NOTE 1237/* NOTE
diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
new file mode 100644
index 000000000000..5c20b18540b8
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -0,0 +1,958 @@
1#define pr_fmt(fmt) "drbd debugfs: " fmt
2#include <linux/kernel.h>
3#include <linux/module.h>
4#include <linux/debugfs.h>
5#include <linux/seq_file.h>
6#include <linux/stat.h>
7#include <linux/jiffies.h>
8#include <linux/list.h>
9
10#include "drbd_int.h"
11#include "drbd_req.h"
12#include "drbd_debugfs.h"
13
14
15/**********************************************************************
16 * Whenever you change the file format, remember to bump the version. *
17 **********************************************************************/
18
19static struct dentry *drbd_debugfs_root;
20static struct dentry *drbd_debugfs_version;
21static struct dentry *drbd_debugfs_resources;
22static struct dentry *drbd_debugfs_minors;
23
24static void seq_print_age_or_dash(struct seq_file *m, bool valid, unsigned long dt)
25{
26 if (valid)
27 seq_printf(m, "\t%d", jiffies_to_msecs(dt));
28 else
29 seq_printf(m, "\t-");
30}
31
32static void __seq_print_rq_state_bit(struct seq_file *m,
33 bool is_set, char *sep, const char *set_name, const char *unset_name)
34{
35 if (is_set && set_name) {
36 seq_putc(m, *sep);
37 seq_puts(m, set_name);
38 *sep = '|';
39 } else if (!is_set && unset_name) {
40 seq_putc(m, *sep);
41 seq_puts(m, unset_name);
42 *sep = '|';
43 }
44}
45
46static void seq_print_rq_state_bit(struct seq_file *m,
47 bool is_set, char *sep, const char *set_name)
48{
49 __seq_print_rq_state_bit(m, is_set, sep, set_name, NULL);
50}
51
52/* pretty print enum drbd_req_state_bits req->rq_state */
53static void seq_print_request_state(struct seq_file *m, struct drbd_request *req)
54{
55 unsigned int s = req->rq_state;
56 char sep = ' ';
57 seq_printf(m, "\t0x%08x", s);
58 seq_printf(m, "\tmaster: %s", req->master_bio ? "pending" : "completed");
59
60 /* RQ_WRITE ignored, already reported */
61 seq_puts(m, "\tlocal:");
62 seq_print_rq_state_bit(m, s & RQ_IN_ACT_LOG, &sep, "in-AL");
63 seq_print_rq_state_bit(m, s & RQ_POSTPONED, &sep, "postponed");
64 seq_print_rq_state_bit(m, s & RQ_COMPLETION_SUSP, &sep, "suspended");
65 sep = ' ';
66 seq_print_rq_state_bit(m, s & RQ_LOCAL_PENDING, &sep, "pending");
67 seq_print_rq_state_bit(m, s & RQ_LOCAL_COMPLETED, &sep, "completed");
68 seq_print_rq_state_bit(m, s & RQ_LOCAL_ABORTED, &sep, "aborted");
69 seq_print_rq_state_bit(m, s & RQ_LOCAL_OK, &sep, "ok");
70 if (sep == ' ')
71 seq_puts(m, " -");
72
73 /* for_each_connection ... */
74 seq_printf(m, "\tnet:");
75 sep = ' ';
76 seq_print_rq_state_bit(m, s & RQ_NET_PENDING, &sep, "pending");
77 seq_print_rq_state_bit(m, s & RQ_NET_QUEUED, &sep, "queued");
78 seq_print_rq_state_bit(m, s & RQ_NET_SENT, &sep, "sent");
79 seq_print_rq_state_bit(m, s & RQ_NET_DONE, &sep, "done");
80 seq_print_rq_state_bit(m, s & RQ_NET_SIS, &sep, "sis");
81 seq_print_rq_state_bit(m, s & RQ_NET_OK, &sep, "ok");
82 if (sep == ' ')
83 seq_puts(m, " -");
84
85 seq_printf(m, " :");
86 sep = ' ';
87 seq_print_rq_state_bit(m, s & RQ_EXP_RECEIVE_ACK, &sep, "B");
88 seq_print_rq_state_bit(m, s & RQ_EXP_WRITE_ACK, &sep, "C");
89 seq_print_rq_state_bit(m, s & RQ_EXP_BARR_ACK, &sep, "barr");
90 if (sep == ' ')
91 seq_puts(m, " -");
92 seq_printf(m, "\n");
93}
94
95static void seq_print_one_request(struct seq_file *m, struct drbd_request *req, unsigned long now)
96{
97 /* change anything here, fixup header below! */
98 unsigned int s = req->rq_state;
99
100#define RQ_HDR_1 "epoch\tsector\tsize\trw"
101 seq_printf(m, "0x%x\t%llu\t%u\t%s",
102 req->epoch,
103 (unsigned long long)req->i.sector, req->i.size >> 9,
104 (s & RQ_WRITE) ? "W" : "R");
105
106#define RQ_HDR_2 "\tstart\tin AL\tsubmit"
107 seq_printf(m, "\t%d", jiffies_to_msecs(now - req->start_jif));
108 seq_print_age_or_dash(m, s & RQ_IN_ACT_LOG, now - req->in_actlog_jif);
109 seq_print_age_or_dash(m, s & RQ_LOCAL_PENDING, now - req->pre_submit_jif);
110
111#define RQ_HDR_3 "\tsent\tacked\tdone"
112 seq_print_age_or_dash(m, s & RQ_NET_SENT, now - req->pre_send_jif);
113 seq_print_age_or_dash(m, (s & RQ_NET_SENT) && !(s & RQ_NET_PENDING), now - req->acked_jif);
114 seq_print_age_or_dash(m, s & RQ_NET_DONE, now - req->net_done_jif);
115
116#define RQ_HDR_4 "\tstate\n"
117 seq_print_request_state(m, req);
118}
119#define RQ_HDR RQ_HDR_1 RQ_HDR_2 RQ_HDR_3 RQ_HDR_4
120
121static void seq_print_minor_vnr_req(struct seq_file *m, struct drbd_request *req, unsigned long now)
122{
123 seq_printf(m, "%u\t%u\t", req->device->minor, req->device->vnr);
124 seq_print_one_request(m, req, now);
125}
126
127static void seq_print_resource_pending_meta_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
128{
129 struct drbd_device *device;
130 unsigned int i;
131
132 seq_puts(m, "minor\tvnr\tstart\tsubmit\tintent\n");
133 rcu_read_lock();
134 idr_for_each_entry(&resource->devices, device, i) {
135 struct drbd_md_io tmp;
136 /* In theory this is racy,
137 * in the sense that there could have been a
138 * drbd_md_put_buffer(); drbd_md_get_buffer();
139 * between accessing these members here. */
140 tmp = device->md_io;
141 if (atomic_read(&tmp.in_use)) {
142 seq_printf(m, "%u\t%u\t%d\t",
143 device->minor, device->vnr,
144 jiffies_to_msecs(now - tmp.start_jif));
145 if (time_before(tmp.submit_jif, tmp.start_jif))
146 seq_puts(m, "-\t");
147 else
148 seq_printf(m, "%d\t", jiffies_to_msecs(now - tmp.submit_jif));
149 seq_printf(m, "%s\n", tmp.current_use);
150 }
151 }
152 rcu_read_unlock();
153}
154
155static void seq_print_waiting_for_AL(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
156{
157 struct drbd_device *device;
158 unsigned int i;
159
160 seq_puts(m, "minor\tvnr\tage\t#waiting\n");
161 rcu_read_lock();
162 idr_for_each_entry(&resource->devices, device, i) {
163 unsigned long jif;
164 struct drbd_request *req;
165 int n = atomic_read(&device->ap_actlog_cnt);
166 if (n) {
167 spin_lock_irq(&device->resource->req_lock);
168 req = list_first_entry_or_null(&device->pending_master_completion[1],
169 struct drbd_request, req_pending_master_completion);
170 /* if the oldest request does not wait for the activity log
171 * it is not interesting for us here */
172 if (req && !(req->rq_state & RQ_IN_ACT_LOG))
173 jif = req->start_jif;
174 else
175 req = NULL;
176 spin_unlock_irq(&device->resource->req_lock);
177 }
178 if (n) {
179 seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
180 if (req)
181 seq_printf(m, "%u\t", jiffies_to_msecs(now - jif));
182 else
183 seq_puts(m, "-\t");
184 seq_printf(m, "%u\n", n);
185 }
186 }
187 rcu_read_unlock();
188}
189
190static void seq_print_device_bitmap_io(struct seq_file *m, struct drbd_device *device, unsigned long now)
191{
192 struct drbd_bm_aio_ctx *ctx;
193 unsigned long start_jif;
194 unsigned int in_flight;
195 unsigned int flags;
196 spin_lock_irq(&device->resource->req_lock);
197 ctx = list_first_entry_or_null(&device->pending_bitmap_io, struct drbd_bm_aio_ctx, list);
198 if (ctx && ctx->done)
199 ctx = NULL;
200 if (ctx) {
201 start_jif = ctx->start_jif;
202 in_flight = atomic_read(&ctx->in_flight);
203 flags = ctx->flags;
204 }
205 spin_unlock_irq(&device->resource->req_lock);
206 if (ctx) {
207 seq_printf(m, "%u\t%u\t%c\t%u\t%u\n",
208 device->minor, device->vnr,
209 (flags & BM_AIO_READ) ? 'R' : 'W',
210 jiffies_to_msecs(now - start_jif),
211 in_flight);
212 }
213}
214
215static void seq_print_resource_pending_bitmap_io(struct seq_file *m, struct drbd_resource *resource, unsigned long now)
216{
217 struct drbd_device *device;
218 unsigned int i;
219
220 seq_puts(m, "minor\tvnr\trw\tage\t#in-flight\n");
221 rcu_read_lock();
222 idr_for_each_entry(&resource->devices, device, i) {
223 seq_print_device_bitmap_io(m, device, now);
224 }
225 rcu_read_unlock();
226}
227
228/* pretty print enum peer_req->flags */
229static void seq_print_peer_request_flags(struct seq_file *m, struct drbd_peer_request *peer_req)
230{
231 unsigned long f = peer_req->flags;
232 char sep = ' ';
233
234 __seq_print_rq_state_bit(m, f & EE_SUBMITTED, &sep, "submitted", "preparing");
235 __seq_print_rq_state_bit(m, f & EE_APPLICATION, &sep, "application", "internal");
236 seq_print_rq_state_bit(m, f & EE_CALL_AL_COMPLETE_IO, &sep, "in-AL");
237 seq_print_rq_state_bit(m, f & EE_SEND_WRITE_ACK, &sep, "C");
238 seq_print_rq_state_bit(m, f & EE_MAY_SET_IN_SYNC, &sep, "set-in-sync");
239
240 if (f & EE_IS_TRIM) {
241 seq_putc(m, sep);
242 sep = '|';
243 if (f & EE_IS_TRIM_USE_ZEROOUT)
244 seq_puts(m, "zero-out");
245 else
246 seq_puts(m, "trim");
247 }
248 seq_putc(m, '\n');
249}
250
251static void seq_print_peer_request(struct seq_file *m,
252 struct drbd_device *device, struct list_head *lh,
253 unsigned long now)
254{
255 bool reported_preparing = false;
256 struct drbd_peer_request *peer_req;
257 list_for_each_entry(peer_req, lh, w.list) {
258 if (reported_preparing && !(peer_req->flags & EE_SUBMITTED))
259 continue;
260
261 if (device)
262 seq_printf(m, "%u\t%u\t", device->minor, device->vnr);
263
264 seq_printf(m, "%llu\t%u\t%c\t%u\t",
265 (unsigned long long)peer_req->i.sector, peer_req->i.size >> 9,
266 (peer_req->flags & EE_WRITE) ? 'W' : 'R',
267 jiffies_to_msecs(now - peer_req->submit_jif));
268 seq_print_peer_request_flags(m, peer_req);
269 if (peer_req->flags & EE_SUBMITTED)
270 break;
271 else
272 reported_preparing = true;
273 }
274}
275
276static void seq_print_device_peer_requests(struct seq_file *m,
277 struct drbd_device *device, unsigned long now)
278{
279 seq_puts(m, "minor\tvnr\tsector\tsize\trw\tage\tflags\n");
280 spin_lock_irq(&device->resource->req_lock);
281 seq_print_peer_request(m, device, &device->active_ee, now);
282 seq_print_peer_request(m, device, &device->read_ee, now);
283 seq_print_peer_request(m, device, &device->sync_ee, now);
284 spin_unlock_irq(&device->resource->req_lock);
285 if (test_bit(FLUSH_PENDING, &device->flags)) {
286 seq_printf(m, "%u\t%u\t-\t-\tF\t%u\tflush\n",
287 device->minor, device->vnr,
288 jiffies_to_msecs(now - device->flush_jif));
289 }
290}
291
292static void seq_print_resource_pending_peer_requests(struct seq_file *m,
293 struct drbd_resource *resource, unsigned long now)
294{
295 struct drbd_device *device;
296 unsigned int i;
297
298 rcu_read_lock();
299 idr_for_each_entry(&resource->devices, device, i) {
300 seq_print_device_peer_requests(m, device, now);
301 }
302 rcu_read_unlock();
303}
304
305static void seq_print_resource_transfer_log_summary(struct seq_file *m,
306 struct drbd_resource *resource,
307 struct drbd_connection *connection,
308 unsigned long now)
309{
310 struct drbd_request *req;
311 unsigned int count = 0;
312 unsigned int show_state = 0;
313
314 seq_puts(m, "n\tdevice\tvnr\t" RQ_HDR);
315 spin_lock_irq(&resource->req_lock);
316 list_for_each_entry(req, &connection->transfer_log, tl_requests) {
317 unsigned int tmp = 0;
318 unsigned int s;
319 ++count;
320
321 /* don't disable irq "forever" */
322 if (!(count & 0x1ff)) {
323 struct drbd_request *req_next;
324 kref_get(&req->kref);
325 spin_unlock_irq(&resource->req_lock);
326 cond_resched();
327 spin_lock_irq(&resource->req_lock);
328 req_next = list_next_entry(req, tl_requests);
329 if (kref_put(&req->kref, drbd_req_destroy))
330 req = req_next;
331 if (&req->tl_requests == &connection->transfer_log)
332 break;
333 }
334
335 s = req->rq_state;
336
337 /* This is meant to summarize timing issues, to be able to tell
338 * local disk problems from network problems.
339 * Skip requests, if we have shown an even older request with
340 * similar aspects already. */
341 if (req->master_bio == NULL)
342 tmp |= 1;
343 if ((s & RQ_LOCAL_MASK) && (s & RQ_LOCAL_PENDING))
344 tmp |= 2;
345 if (s & RQ_NET_MASK) {
346 if (!(s & RQ_NET_SENT))
347 tmp |= 4;
348 if (s & RQ_NET_PENDING)
349 tmp |= 8;
350 if (!(s & RQ_NET_DONE))
351 tmp |= 16;
352 }
353 if ((tmp & show_state) == tmp)
354 continue;
355 show_state |= tmp;
356 seq_printf(m, "%u\t", count);
357 seq_print_minor_vnr_req(m, req, now);
358 if (show_state == 0x1f)
359 break;
360 }
361 spin_unlock_irq(&resource->req_lock);
362}
363
364/* TODO: transfer_log and friends should be moved to resource */
365static int in_flight_summary_show(struct seq_file *m, void *pos)
366{
367 struct drbd_resource *resource = m->private;
368 struct drbd_connection *connection;
369 unsigned long jif = jiffies;
370
371 connection = first_connection(resource);
372 /* This does not happen, actually.
373 * But be robust and prepare for future code changes. */
374 if (!connection || !kref_get_unless_zero(&connection->kref))
375 return -ESTALE;
376
377 /* BUMP me if you change the file format/content/presentation */
378 seq_printf(m, "v: %u\n\n", 0);
379
380 seq_puts(m, "oldest bitmap IO\n");
381 seq_print_resource_pending_bitmap_io(m, resource, jif);
382 seq_putc(m, '\n');
383
384 seq_puts(m, "meta data IO\n");
385 seq_print_resource_pending_meta_io(m, resource, jif);
386 seq_putc(m, '\n');
387
388 seq_puts(m, "socket buffer stats\n");
389 /* for each connection ... once we have more than one */
390 rcu_read_lock();
391 if (connection->data.socket) {
392 /* open coded SIOCINQ, the "relevant" part */
393 struct tcp_sock *tp = tcp_sk(connection->data.socket->sk);
394 int answ = tp->rcv_nxt - tp->copied_seq;
395 seq_printf(m, "unread receive buffer: %u Byte\n", answ);
396 /* open coded SIOCOUTQ, the "relevant" part */
397 answ = tp->write_seq - tp->snd_una;
398 seq_printf(m, "unacked send buffer: %u Byte\n", answ);
399 }
400 rcu_read_unlock();
401 seq_putc(m, '\n');
402
403 seq_puts(m, "oldest peer requests\n");
404 seq_print_resource_pending_peer_requests(m, resource, jif);
405 seq_putc(m, '\n');
406
407 seq_puts(m, "application requests waiting for activity log\n");
408 seq_print_waiting_for_AL(m, resource, jif);
409 seq_putc(m, '\n');
410
411 seq_puts(m, "oldest application requests\n");
412 seq_print_resource_transfer_log_summary(m, resource, connection, jif);
413 seq_putc(m, '\n');
414
415 jif = jiffies - jif;
416 if (jif)
417 seq_printf(m, "generated in %d ms\n", jiffies_to_msecs(jif));
418 kref_put(&connection->kref, drbd_destroy_connection);
419 return 0;
420}
421
422/* simple_positive(file->f_dentry) respectively debugfs_positive(),
423 * but neither is "reachable" from here.
424 * So we have our own inline version of it above. :-( */
425static inline int debugfs_positive(struct dentry *dentry)
426{
427 return dentry->d_inode && !d_unhashed(dentry);
428}
429
430/* make sure at *open* time that the respective object won't go away. */
431static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, void *),
432 void *data, struct kref *kref,
433 void (*release)(struct kref *))
434{
435 struct dentry *parent;
436 int ret = -ESTALE;
437
438 /* Are we still linked,
439 * or has debugfs_remove() already been called? */
440 parent = file->f_dentry->d_parent;
441 /* not sure if this can happen: */
442 if (!parent || !parent->d_inode)
443 goto out;
444 /* serialize with d_delete() */
445 mutex_lock(&parent->d_inode->i_mutex);
446 /* Make sure the object is still alive */
447 if (debugfs_positive(file->f_dentry)
448 && kref_get_unless_zero(kref))
449 ret = 0;
450 mutex_unlock(&parent->d_inode->i_mutex);
451 if (!ret) {
452 ret = single_open(file, show, data);
453 if (ret)
454 kref_put(kref, release);
455 }
456out:
457 return ret;
458}
459
460static int in_flight_summary_open(struct inode *inode, struct file *file)
461{
462 struct drbd_resource *resource = inode->i_private;
463 return drbd_single_open(file, in_flight_summary_show, resource,
464 &resource->kref, drbd_destroy_resource);
465}
466
467static int in_flight_summary_release(struct inode *inode, struct file *file)
468{
469 struct drbd_resource *resource = inode->i_private;
470 kref_put(&resource->kref, drbd_destroy_resource);
471 return single_release(inode, file);
472}
473
474static const struct file_operations in_flight_summary_fops = {
475 .owner = THIS_MODULE,
476 .open = in_flight_summary_open,
477 .read = seq_read,
478 .llseek = seq_lseek,
479 .release = in_flight_summary_release,
480};
481
482void drbd_debugfs_resource_add(struct drbd_resource *resource)
483{
484 struct dentry *dentry;
485 if (!drbd_debugfs_resources)
486 return;
487
488 dentry = debugfs_create_dir(resource->name, drbd_debugfs_resources);
489 if (IS_ERR_OR_NULL(dentry))
490 goto fail;
491 resource->debugfs_res = dentry;
492
493 dentry = debugfs_create_dir("volumes", resource->debugfs_res);
494 if (IS_ERR_OR_NULL(dentry))
495 goto fail;
496 resource->debugfs_res_volumes = dentry;
497
498 dentry = debugfs_create_dir("connections", resource->debugfs_res);
499 if (IS_ERR_OR_NULL(dentry))
500 goto fail;
501 resource->debugfs_res_connections = dentry;
502
503 dentry = debugfs_create_file("in_flight_summary", S_IRUSR|S_IRGRP,
504 resource->debugfs_res, resource,
505 &in_flight_summary_fops);
506 if (IS_ERR_OR_NULL(dentry))
507 goto fail;
508 resource->debugfs_res_in_flight_summary = dentry;
509 return;
510
511fail:
512 drbd_debugfs_resource_cleanup(resource);
513 drbd_err(resource, "failed to create debugfs dentry\n");
514}
515
516static void drbd_debugfs_remove(struct dentry **dp)
517{
518 debugfs_remove(*dp);
519 *dp = NULL;
520}
521
522void drbd_debugfs_resource_cleanup(struct drbd_resource *resource)
523{
524 /* it is ok to call debugfs_remove(NULL) */
525 drbd_debugfs_remove(&resource->debugfs_res_in_flight_summary);
526 drbd_debugfs_remove(&resource->debugfs_res_connections);
527 drbd_debugfs_remove(&resource->debugfs_res_volumes);
528 drbd_debugfs_remove(&resource->debugfs_res);
529}
530
531static void seq_print_one_timing_detail(struct seq_file *m,
532 const struct drbd_thread_timing_details *tdp,
533 unsigned long now)
534{
535 struct drbd_thread_timing_details td;
536 /* No locking...
537 * use temporary assignment to get at consistent data. */
538 do {
539 td = *tdp;
540 } while (td.cb_nr != tdp->cb_nr);
541 if (!td.cb_addr)
542 return;
543 seq_printf(m, "%u\t%d\t%s:%u\t%ps\n",
544 td.cb_nr,
545 jiffies_to_msecs(now - td.start_jif),
546 td.caller_fn, td.line,
547 td.cb_addr);
548}
549
550static void seq_print_timing_details(struct seq_file *m,
551 const char *title,
552 unsigned int cb_nr, struct drbd_thread_timing_details *tdp, unsigned long now)
553{
554 unsigned int start_idx;
555 unsigned int i;
556
557 seq_printf(m, "%s\n", title);
558 /* If not much is going on, this will result in natural ordering.
559 * If it is very busy, we will possibly skip events, or even see wrap
560 * arounds, which could only be avoided with locking.
561 */
562 start_idx = cb_nr % DRBD_THREAD_DETAILS_HIST;
563 for (i = start_idx; i < DRBD_THREAD_DETAILS_HIST; i++)
564 seq_print_one_timing_detail(m, tdp+i, now);
565 for (i = 0; i < start_idx; i++)
566 seq_print_one_timing_detail(m, tdp+i, now);
567}
568
569static int callback_history_show(struct seq_file *m, void *ignored)
570{
571 struct drbd_connection *connection = m->private;
572 unsigned long jif = jiffies;
573
574 /* BUMP me if you change the file format/content/presentation */
575 seq_printf(m, "v: %u\n\n", 0);
576
577 seq_puts(m, "n\tage\tcallsite\tfn\n");
578 seq_print_timing_details(m, "worker", connection->w_cb_nr, connection->w_timing_details, jif);
579 seq_print_timing_details(m, "receiver", connection->r_cb_nr, connection->r_timing_details, jif);
580 return 0;
581}
582
583static int callback_history_open(struct inode *inode, struct file *file)
584{
585 struct drbd_connection *connection = inode->i_private;
586 return drbd_single_open(file, callback_history_show, connection,
587 &connection->kref, drbd_destroy_connection);
588}
589
590static int callback_history_release(struct inode *inode, struct file *file)
591{
592 struct drbd_connection *connection = inode->i_private;
593 kref_put(&connection->kref, drbd_destroy_connection);
594 return single_release(inode, file);
595}
596
597static const struct file_operations connection_callback_history_fops = {
598 .owner = THIS_MODULE,
599 .open = callback_history_open,
600 .read = seq_read,
601 .llseek = seq_lseek,
602 .release = callback_history_release,
603};
604
605static int connection_oldest_requests_show(struct seq_file *m, void *ignored)
606{
607 struct drbd_connection *connection = m->private;
608 unsigned long now = jiffies;
609 struct drbd_request *r1, *r2;
610
611 /* BUMP me if you change the file format/content/presentation */
612 seq_printf(m, "v: %u\n\n", 0);
613
614 spin_lock_irq(&connection->resource->req_lock);
615 r1 = connection->req_next;
616 if (r1)
617 seq_print_minor_vnr_req(m, r1, now);
618 r2 = connection->req_ack_pending;
619 if (r2 && r2 != r1) {
620 r1 = r2;
621 seq_print_minor_vnr_req(m, r1, now);
622 }
623 r2 = connection->req_not_net_done;
624 if (r2 && r2 != r1)
625 seq_print_minor_vnr_req(m, r2, now);
626 spin_unlock_irq(&connection->resource->req_lock);
627 return 0;
628}
629
630static int connection_oldest_requests_open(struct inode *inode, struct file *file)
631{
632 struct drbd_connection *connection = inode->i_private;
633 return drbd_single_open(file, connection_oldest_requests_show, connection,
634 &connection->kref, drbd_destroy_connection);
635}
636
637static int connection_oldest_requests_release(struct inode *inode, struct file *file)
638{
639 struct drbd_connection *connection = inode->i_private;
640 kref_put(&connection->kref, drbd_destroy_connection);
641 return single_release(inode, file);
642}
643
644static const struct file_operations connection_oldest_requests_fops = {
645 .owner = THIS_MODULE,
646 .open = connection_oldest_requests_open,
647 .read = seq_read,
648 .llseek = seq_lseek,
649 .release = connection_oldest_requests_release,
650};
651
652void drbd_debugfs_connection_add(struct drbd_connection *connection)
653{
654 struct dentry *conns_dir = connection->resource->debugfs_res_connections;
655 struct dentry *dentry;
656 if (!conns_dir)
657 return;
658
659 /* Once we enable mutliple peers,
660 * these connections will have descriptive names.
661 * For now, it is just the one connection to the (only) "peer". */
662 dentry = debugfs_create_dir("peer", conns_dir);
663 if (IS_ERR_OR_NULL(dentry))
664 goto fail;
665 connection->debugfs_conn = dentry;
666
667 dentry = debugfs_create_file("callback_history", S_IRUSR|S_IRGRP,
668 connection->debugfs_conn, connection,
669 &connection_callback_history_fops);
670 if (IS_ERR_OR_NULL(dentry))
671 goto fail;
672 connection->debugfs_conn_callback_history = dentry;
673
674 dentry = debugfs_create_file("oldest_requests", S_IRUSR|S_IRGRP,
675 connection->debugfs_conn, connection,
676 &connection_oldest_requests_fops);
677 if (IS_ERR_OR_NULL(dentry))
678 goto fail;
679 connection->debugfs_conn_oldest_requests = dentry;
680 return;
681
682fail:
683 drbd_debugfs_connection_cleanup(connection);
684 drbd_err(connection, "failed to create debugfs dentry\n");
685}
686
687void drbd_debugfs_connection_cleanup(struct drbd_connection *connection)
688{
689 drbd_debugfs_remove(&connection->debugfs_conn_callback_history);
690 drbd_debugfs_remove(&connection->debugfs_conn_oldest_requests);
691 drbd_debugfs_remove(&connection->debugfs_conn);
692}
693
694static void resync_dump_detail(struct seq_file *m, struct lc_element *e)
695{
696 struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
697
698 seq_printf(m, "%5d %s %s %s\n", bme->rs_left,
699 test_bit(BME_NO_WRITES, &bme->flags) ? "NO_WRITES" : "---------",
700 test_bit(BME_LOCKED, &bme->flags) ? "LOCKED" : "------",
701 test_bit(BME_PRIORITY, &bme->flags) ? "PRIORITY" : "--------"
702 );
703}
704
705static int device_resync_extents_show(struct seq_file *m, void *ignored)
706{
707 struct drbd_device *device = m->private;
708
709 /* BUMP me if you change the file format/content/presentation */
710 seq_printf(m, "v: %u\n\n", 0);
711
712 if (get_ldev_if_state(device, D_FAILED)) {
713 lc_seq_printf_stats(m, device->resync);
714 lc_seq_dump_details(m, device->resync, "rs_left flags", resync_dump_detail);
715 put_ldev(device);
716 }
717 return 0;
718}
719
720static int device_act_log_extents_show(struct seq_file *m, void *ignored)
721{
722 struct drbd_device *device = m->private;
723
724 /* BUMP me if you change the file format/content/presentation */
725 seq_printf(m, "v: %u\n\n", 0);
726
727 if (get_ldev_if_state(device, D_FAILED)) {
728 lc_seq_printf_stats(m, device->act_log);
729 lc_seq_dump_details(m, device->act_log, "", NULL);
730 put_ldev(device);
731 }
732 return 0;
733}
734
735static int device_oldest_requests_show(struct seq_file *m, void *ignored)
736{
737 struct drbd_device *device = m->private;
738 struct drbd_resource *resource = device->resource;
739 unsigned long now = jiffies;
740 struct drbd_request *r1, *r2;
741 int i;
742
743 /* BUMP me if you change the file format/content/presentation */
744 seq_printf(m, "v: %u\n\n", 0);
745
746 seq_puts(m, RQ_HDR);
747 spin_lock_irq(&resource->req_lock);
748 /* WRITE, then READ */
749 for (i = 1; i >= 0; --i) {
750 r1 = list_first_entry_or_null(&device->pending_master_completion[i],
751 struct drbd_request, req_pending_master_completion);
752 r2 = list_first_entry_or_null(&device->pending_completion[i],
753 struct drbd_request, req_pending_local);
754 if (r1)
755 seq_print_one_request(m, r1, now);
756 if (r2 && r2 != r1)
757 seq_print_one_request(m, r2, now);
758 }
759 spin_unlock_irq(&resource->req_lock);
760 return 0;
761}
762
763static int device_data_gen_id_show(struct seq_file *m, void *ignored)
764{
765 struct drbd_device *device = m->private;
766 struct drbd_md *md;
767 enum drbd_uuid_index idx;
768
769 if (!get_ldev_if_state(device, D_FAILED))
770 return -ENODEV;
771
772 md = &device->ldev->md;
773 spin_lock_irq(&md->uuid_lock);
774 for (idx = UI_CURRENT; idx <= UI_HISTORY_END; idx++) {
775 seq_printf(m, "0x%016llX\n", md->uuid[idx]);
776 }
777 spin_unlock_irq(&md->uuid_lock);
778 put_ldev(device);
779 return 0;
780}
781
782#define drbd_debugfs_device_attr(name) \
783static int device_ ## name ## _open(struct inode *inode, struct file *file) \
784{ \
785 struct drbd_device *device = inode->i_private; \
786 return drbd_single_open(file, device_ ## name ## _show, device, \
787 &device->kref, drbd_destroy_device); \
788} \
789static int device_ ## name ## _release(struct inode *inode, struct file *file) \
790{ \
791 struct drbd_device *device = inode->i_private; \
792 kref_put(&device->kref, drbd_destroy_device); \
793 return single_release(inode, file); \
794} \
795static const struct file_operations device_ ## name ## _fops = { \
796 .owner = THIS_MODULE, \
797 .open = device_ ## name ## _open, \
798 .read = seq_read, \
799 .llseek = seq_lseek, \
800 .release = device_ ## name ## _release, \
801};
802
803drbd_debugfs_device_attr(oldest_requests)
804drbd_debugfs_device_attr(act_log_extents)
805drbd_debugfs_device_attr(resync_extents)
806drbd_debugfs_device_attr(data_gen_id)
807
808void drbd_debugfs_device_add(struct drbd_device *device)
809{
810 struct dentry *vols_dir = device->resource->debugfs_res_volumes;
811 char minor_buf[8]; /* MINORMASK, MINORBITS == 20; */
812 char vnr_buf[8]; /* volume number vnr is even 16 bit only; */
813 char *slink_name = NULL;
814
815 struct dentry *dentry;
816 if (!vols_dir || !drbd_debugfs_minors)
817 return;
818
819 snprintf(vnr_buf, sizeof(vnr_buf), "%u", device->vnr);
820 dentry = debugfs_create_dir(vnr_buf, vols_dir);
821 if (IS_ERR_OR_NULL(dentry))
822 goto fail;
823 device->debugfs_vol = dentry;
824
825 snprintf(minor_buf, sizeof(minor_buf), "%u", device->minor);
826 slink_name = kasprintf(GFP_KERNEL, "../resources/%s/volumes/%u",
827 device->resource->name, device->vnr);
828 if (!slink_name)
829 goto fail;
830 dentry = debugfs_create_symlink(minor_buf, drbd_debugfs_minors, slink_name);
831 kfree(slink_name);
832 slink_name = NULL;
833 if (IS_ERR_OR_NULL(dentry))
834 goto fail;
835 device->debugfs_minor = dentry;
836
837#define DCF(name) do { \
838 dentry = debugfs_create_file(#name, S_IRUSR|S_IRGRP, \
839 device->debugfs_vol, device, \
840 &device_ ## name ## _fops); \
841 if (IS_ERR_OR_NULL(dentry)) \
842 goto fail; \
843 device->debugfs_vol_ ## name = dentry; \
844 } while (0)
845
846 DCF(oldest_requests);
847 DCF(act_log_extents);
848 DCF(resync_extents);
849 DCF(data_gen_id);
850#undef DCF
851 return;
852
853fail:
854 drbd_debugfs_device_cleanup(device);
855 drbd_err(device, "failed to create debugfs entries\n");
856}
857
858void drbd_debugfs_device_cleanup(struct drbd_device *device)
859{
860 drbd_debugfs_remove(&device->debugfs_minor);
861 drbd_debugfs_remove(&device->debugfs_vol_oldest_requests);
862 drbd_debugfs_remove(&device->debugfs_vol_act_log_extents);
863 drbd_debugfs_remove(&device->debugfs_vol_resync_extents);
864 drbd_debugfs_remove(&device->debugfs_vol_data_gen_id);
865 drbd_debugfs_remove(&device->debugfs_vol);
866}
867
868void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device)
869{
870 struct dentry *conn_dir = peer_device->connection->debugfs_conn;
871 struct dentry *dentry;
872 char vnr_buf[8];
873
874 if (!conn_dir)
875 return;
876
877 snprintf(vnr_buf, sizeof(vnr_buf), "%u", peer_device->device->vnr);
878 dentry = debugfs_create_dir(vnr_buf, conn_dir);
879 if (IS_ERR_OR_NULL(dentry))
880 goto fail;
881 peer_device->debugfs_peer_dev = dentry;
882 return;
883
884fail:
885 drbd_debugfs_peer_device_cleanup(peer_device);
886 drbd_err(peer_device, "failed to create debugfs entries\n");
887}
888
889void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device)
890{
891 drbd_debugfs_remove(&peer_device->debugfs_peer_dev);
892}
893
894static int drbd_version_show(struct seq_file *m, void *ignored)
895{
896 seq_printf(m, "# %s\n", drbd_buildtag());
897 seq_printf(m, "VERSION=%s\n", REL_VERSION);
898 seq_printf(m, "API_VERSION=%u\n", API_VERSION);
899 seq_printf(m, "PRO_VERSION_MIN=%u\n", PRO_VERSION_MIN);
900 seq_printf(m, "PRO_VERSION_MAX=%u\n", PRO_VERSION_MAX);
901 return 0;
902}
903
904static int drbd_version_open(struct inode *inode, struct file *file)
905{
906 return single_open(file, drbd_version_show, NULL);
907}
908
909static struct file_operations drbd_version_fops = {
910 .owner = THIS_MODULE,
911 .open = drbd_version_open,
912 .llseek = seq_lseek,
913 .read = seq_read,
914 .release = single_release,
915};
916
917/* not __exit, may be indirectly called
918 * from the module-load-failure path as well. */
919void drbd_debugfs_cleanup(void)
920{
921 drbd_debugfs_remove(&drbd_debugfs_resources);
922 drbd_debugfs_remove(&drbd_debugfs_minors);
923 drbd_debugfs_remove(&drbd_debugfs_version);
924 drbd_debugfs_remove(&drbd_debugfs_root);
925}
926
927int __init drbd_debugfs_init(void)
928{
929 struct dentry *dentry;
930
931 dentry = debugfs_create_dir("drbd", NULL);
932 if (IS_ERR_OR_NULL(dentry))
933 goto fail;
934 drbd_debugfs_root = dentry;
935
936 dentry = debugfs_create_file("version", 0444, drbd_debugfs_root, NULL, &drbd_version_fops);
937 if (IS_ERR_OR_NULL(dentry))
938 goto fail;
939 drbd_debugfs_version = dentry;
940
941 dentry = debugfs_create_dir("resources", drbd_debugfs_root);
942 if (IS_ERR_OR_NULL(dentry))
943 goto fail;
944 drbd_debugfs_resources = dentry;
945
946 dentry = debugfs_create_dir("minors", drbd_debugfs_root);
947 if (IS_ERR_OR_NULL(dentry))
948 goto fail;
949 drbd_debugfs_minors = dentry;
950 return 0;
951
952fail:
953 drbd_debugfs_cleanup();
954 if (dentry)
955 return PTR_ERR(dentry);
956 else
957 return -EINVAL;
958}
diff --git a/drivers/block/drbd/drbd_debugfs.h b/drivers/block/drbd/drbd_debugfs.h
new file mode 100644
index 000000000000..8bee21340dce
--- /dev/null
+++ b/drivers/block/drbd/drbd_debugfs.h
@@ -0,0 +1,39 @@
1#include <linux/kernel.h>
2#include <linux/module.h>
3#include <linux/debugfs.h>
4
5#include "drbd_int.h"
6
7#ifdef CONFIG_DEBUG_FS
8int __init drbd_debugfs_init(void);
9void drbd_debugfs_cleanup(void);
10
11void drbd_debugfs_resource_add(struct drbd_resource *resource);
12void drbd_debugfs_resource_cleanup(struct drbd_resource *resource);
13
14void drbd_debugfs_connection_add(struct drbd_connection *connection);
15void drbd_debugfs_connection_cleanup(struct drbd_connection *connection);
16
17void drbd_debugfs_device_add(struct drbd_device *device);
18void drbd_debugfs_device_cleanup(struct drbd_device *device);
19
20void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device);
21void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device);
22#else
23
24static inline int __init drbd_debugfs_init(void) { return -ENODEV; }
25static inline void drbd_debugfs_cleanup(void) { }
26
27static inline void drbd_debugfs_resource_add(struct drbd_resource *resource) { }
28static inline void drbd_debugfs_resource_cleanup(struct drbd_resource *resource) { }
29
30static inline void drbd_debugfs_connection_add(struct drbd_connection *connection) { }
31static inline void drbd_debugfs_connection_cleanup(struct drbd_connection *connection) { }
32
33static inline void drbd_debugfs_device_add(struct drbd_device *device) { }
34static inline void drbd_debugfs_device_cleanup(struct drbd_device *device) { }
35
36static inline void drbd_debugfs_peer_device_add(struct drbd_peer_device *peer_device) { }
37static inline void drbd_debugfs_peer_device_cleanup(struct drbd_peer_device *peer_device) { }
38
39#endif
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index a76ceb344d64..1a000016ccdf 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -317,7 +317,63 @@ struct drbd_request {
317 317
318 struct list_head tl_requests; /* ring list in the transfer log */ 318 struct list_head tl_requests; /* ring list in the transfer log */
319 struct bio *master_bio; /* master bio pointer */ 319 struct bio *master_bio; /* master bio pointer */
320 unsigned long start_time; 320
321 /* see struct drbd_device */
322 struct list_head req_pending_master_completion;
323 struct list_head req_pending_local;
324
325 /* for generic IO accounting */
326 unsigned long start_jif;
327
328 /* for DRBD internal statistics */
329
330 /* Minimal set of time stamps to determine if we wait for activity log
331 * transactions, local disk or peer. 32 bit "jiffies" are good enough,
332 * we don't expect a DRBD request to be stalled for several month.
333 */
334
335 /* before actual request processing */
336 unsigned long in_actlog_jif;
337
338 /* local disk */
339 unsigned long pre_submit_jif;
340
341 /* per connection */
342 unsigned long pre_send_jif;
343 unsigned long acked_jif;
344 unsigned long net_done_jif;
345
346 /* Possibly even more detail to track each phase:
347 * master_completion_jif
348 * how long did it take to complete the master bio
349 * (application visible latency)
350 * allocated_jif
351 * how long the master bio was blocked until we finally allocated
352 * a tracking struct
353 * in_actlog_jif
354 * how long did we wait for activity log transactions
355 *
356 * net_queued_jif
357 * when did we finally queue it for sending
358 * pre_send_jif
359 * when did we start sending it
360 * post_send_jif
361 * how long did we block in the network stack trying to send it
362 * acked_jif
363 * when did we receive (or fake, in protocol A) a remote ACK
364 * net_done_jif
365 * when did we receive final acknowledgement (P_BARRIER_ACK),
366 * or decide, e.g. on connection loss, that we do no longer expect
367 * anything from this peer for this request.
368 *
369 * pre_submit_jif
370 * post_sub_jif
371 * when did we start submiting to the lower level device,
372 * and how long did we block in that submit function
373 * local_completion_jif
374 * how long did it take the lower level device to complete this request
375 */
376
321 377
322 /* once it hits 0, we may complete the master_bio */ 378 /* once it hits 0, we may complete the master_bio */
323 atomic_t completion_ref; 379 atomic_t completion_ref;
@@ -366,6 +422,7 @@ struct drbd_peer_request {
366 struct drbd_interval i; 422 struct drbd_interval i;
367 /* see comments on ee flag bits below */ 423 /* see comments on ee flag bits below */
368 unsigned long flags; 424 unsigned long flags;
425 unsigned long submit_jif;
369 union { 426 union {
370 u64 block_id; 427 u64 block_id;
371 struct digest_info *digest; 428 struct digest_info *digest;
@@ -408,6 +465,17 @@ enum {
408 465
409 /* Is set when net_conf had two_primaries set while creating this peer_req */ 466 /* Is set when net_conf had two_primaries set while creating this peer_req */
410 __EE_IN_INTERVAL_TREE, 467 __EE_IN_INTERVAL_TREE,
468
469 /* for debugfs: */
470 /* has this been submitted, or does it still wait for something else? */
471 __EE_SUBMITTED,
472
473 /* this is/was a write request */
474 __EE_WRITE,
475
476 /* this originates from application on peer
477 * (not some resync or verify or other DRBD internal request) */
478 __EE_APPLICATION,
411}; 479};
412#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) 480#define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO)
413#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) 481#define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC)
@@ -419,6 +487,9 @@ enum {
419#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) 487#define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS)
420#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK) 488#define EE_SEND_WRITE_ACK (1<<__EE_SEND_WRITE_ACK)
421#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE) 489#define EE_IN_INTERVAL_TREE (1<<__EE_IN_INTERVAL_TREE)
490#define EE_SUBMITTED (1<<__EE_SUBMITTED)
491#define EE_WRITE (1<<__EE_WRITE)
492#define EE_APPLICATION (1<<__EE_APPLICATION)
422 493
423/* flag bits per device */ 494/* flag bits per device */
424enum { 495enum {
@@ -433,11 +504,11 @@ enum {
433 CONSIDER_RESYNC, 504 CONSIDER_RESYNC,
434 505
435 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */ 506 MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
507
436 SUSPEND_IO, /* suspend application io */ 508 SUSPEND_IO, /* suspend application io */
437 BITMAP_IO, /* suspend application io; 509 BITMAP_IO, /* suspend application io;
438 once no more io in flight, start bitmap io */ 510 once no more io in flight, start bitmap io */
439 BITMAP_IO_QUEUED, /* Started bitmap IO */ 511 BITMAP_IO_QUEUED, /* Started bitmap IO */
440 GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
441 WAS_IO_ERROR, /* Local disk failed, returned IO error */ 512 WAS_IO_ERROR, /* Local disk failed, returned IO error */
442 WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */ 513 WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
443 FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */ 514 FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
@@ -450,6 +521,20 @@ enum {
450 B_RS_H_DONE, /* Before resync handler done (already executed) */ 521 B_RS_H_DONE, /* Before resync handler done (already executed) */
451 DISCARD_MY_DATA, /* discard_my_data flag per volume */ 522 DISCARD_MY_DATA, /* discard_my_data flag per volume */
452 READ_BALANCE_RR, 523 READ_BALANCE_RR,
524
525 FLUSH_PENDING, /* if set, device->flush_jif is when we submitted that flush
526 * from drbd_flush_after_epoch() */
527
528 /* cleared only after backing device related structures have been destroyed. */
529 GOING_DISKLESS, /* Disk is being detached, because of io-error, or admin request. */
530
531 /* to be used in drbd_device_post_work() */
532 GO_DISKLESS, /* tell worker to schedule cleanup before detach */
533 DESTROY_DISK, /* tell worker to close backing devices and destroy related structures. */
534 MD_SYNC, /* tell worker to call drbd_md_sync() */
535 RS_START, /* tell worker to start resync/OV */
536 RS_PROGRESS, /* tell worker that resync made significant progress */
537 RS_DONE, /* tell worker that resync is done */
453}; 538};
454 539
455struct drbd_bitmap; /* opaque for drbd_device */ 540struct drbd_bitmap; /* opaque for drbd_device */
@@ -531,6 +616,11 @@ struct drbd_backing_dev {
531}; 616};
532 617
533struct drbd_md_io { 618struct drbd_md_io {
619 struct page *page;
620 unsigned long start_jif; /* last call to drbd_md_get_buffer */
621 unsigned long submit_jif; /* last _drbd_md_sync_page_io() submit */
622 const char *current_use;
623 atomic_t in_use;
534 unsigned int done; 624 unsigned int done;
535 int error; 625 int error;
536}; 626};
@@ -577,10 +667,18 @@ enum {
577 * and potentially deadlock on, this drbd worker. 667 * and potentially deadlock on, this drbd worker.
578 */ 668 */
579 DISCONNECT_SENT, 669 DISCONNECT_SENT,
670
671 DEVICE_WORK_PENDING, /* tell worker that some device has pending work */
580}; 672};
581 673
582struct drbd_resource { 674struct drbd_resource {
583 char *name; 675 char *name;
676#ifdef CONFIG_DEBUG_FS
677 struct dentry *debugfs_res;
678 struct dentry *debugfs_res_volumes;
679 struct dentry *debugfs_res_connections;
680 struct dentry *debugfs_res_in_flight_summary;
681#endif
584 struct kref kref; 682 struct kref kref;
585 struct idr devices; /* volume number to device mapping */ 683 struct idr devices; /* volume number to device mapping */
586 struct list_head connections; 684 struct list_head connections;
@@ -594,12 +692,28 @@ struct drbd_resource {
594 unsigned susp_nod:1; /* IO suspended because no data */ 692 unsigned susp_nod:1; /* IO suspended because no data */
595 unsigned susp_fen:1; /* IO suspended because fence peer handler runs */ 693 unsigned susp_fen:1; /* IO suspended because fence peer handler runs */
596 694
695 enum write_ordering_e write_ordering;
696
597 cpumask_var_t cpu_mask; 697 cpumask_var_t cpu_mask;
598}; 698};
599 699
700struct drbd_thread_timing_details
701{
702 unsigned long start_jif;
703 void *cb_addr;
704 const char *caller_fn;
705 unsigned int line;
706 unsigned int cb_nr;
707};
708
600struct drbd_connection { 709struct drbd_connection {
601 struct list_head connections; 710 struct list_head connections;
602 struct drbd_resource *resource; 711 struct drbd_resource *resource;
712#ifdef CONFIG_DEBUG_FS
713 struct dentry *debugfs_conn;
714 struct dentry *debugfs_conn_callback_history;
715 struct dentry *debugfs_conn_oldest_requests;
716#endif
603 struct kref kref; 717 struct kref kref;
604 struct idr peer_devices; /* volume number to peer device mapping */ 718 struct idr peer_devices; /* volume number to peer device mapping */
605 enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */ 719 enum drbd_conns cstate; /* Only C_STANDALONE to C_WF_REPORT_PARAMS */
@@ -636,7 +750,6 @@ struct drbd_connection {
636 struct drbd_epoch *current_epoch; 750 struct drbd_epoch *current_epoch;
637 spinlock_t epoch_lock; 751 spinlock_t epoch_lock;
638 unsigned int epochs; 752 unsigned int epochs;
639 enum write_ordering_e write_ordering;
640 atomic_t current_tle_nr; /* transfer log epoch number */ 753 atomic_t current_tle_nr; /* transfer log epoch number */
641 unsigned current_tle_writes; /* writes seen within this tl epoch */ 754 unsigned current_tle_writes; /* writes seen within this tl epoch */
642 755
@@ -645,9 +758,22 @@ struct drbd_connection {
645 struct drbd_thread worker; 758 struct drbd_thread worker;
646 struct drbd_thread asender; 759 struct drbd_thread asender;
647 760
761 /* cached pointers,
762 * so we can look up the oldest pending requests more quickly.
763 * protected by resource->req_lock */
764 struct drbd_request *req_next; /* DRBD 9: todo.req_next */
765 struct drbd_request *req_ack_pending;
766 struct drbd_request *req_not_net_done;
767
648 /* sender side */ 768 /* sender side */
649 struct drbd_work_queue sender_work; 769 struct drbd_work_queue sender_work;
650 770
771#define DRBD_THREAD_DETAILS_HIST 16
772 unsigned int w_cb_nr; /* keeps counting up */
773 unsigned int r_cb_nr; /* keeps counting up */
774 struct drbd_thread_timing_details w_timing_details[DRBD_THREAD_DETAILS_HIST];
775 struct drbd_thread_timing_details r_timing_details[DRBD_THREAD_DETAILS_HIST];
776
651 struct { 777 struct {
652 /* whether this sender thread 778 /* whether this sender thread
653 * has processed a single write yet. */ 779 * has processed a single write yet. */
@@ -663,11 +789,22 @@ struct drbd_connection {
663 } send; 789 } send;
664}; 790};
665 791
792void __update_timing_details(
793 struct drbd_thread_timing_details *tdp,
794 unsigned int *cb_nr,
795 void *cb,
796 const char *fn, const unsigned int line);
797
798#define update_worker_timing_details(c, cb) \
799 __update_timing_details(c->w_timing_details, &c->w_cb_nr, cb, __func__ , __LINE__ )
800#define update_receiver_timing_details(c, cb) \
801 __update_timing_details(c->r_timing_details, &c->r_cb_nr, cb, __func__ , __LINE__ )
802
666struct submit_worker { 803struct submit_worker {
667 struct workqueue_struct *wq; 804 struct workqueue_struct *wq;
668 struct work_struct worker; 805 struct work_struct worker;
669 806
670 spinlock_t lock; 807 /* protected by ..->resource->req_lock */
671 struct list_head writes; 808 struct list_head writes;
672}; 809};
673 810
@@ -675,12 +812,29 @@ struct drbd_peer_device {
675 struct list_head peer_devices; 812 struct list_head peer_devices;
676 struct drbd_device *device; 813 struct drbd_device *device;
677 struct drbd_connection *connection; 814 struct drbd_connection *connection;
815#ifdef CONFIG_DEBUG_FS
816 struct dentry *debugfs_peer_dev;
817#endif
678}; 818};
679 819
680struct drbd_device { 820struct drbd_device {
681 struct drbd_resource *resource; 821 struct drbd_resource *resource;
682 struct list_head peer_devices; 822 struct list_head peer_devices;
683 int vnr; /* volume number within the connection */ 823 struct list_head pending_bitmap_io;
824
825 unsigned long flush_jif;
826#ifdef CONFIG_DEBUG_FS
827 struct dentry *debugfs_minor;
828 struct dentry *debugfs_vol;
829 struct dentry *debugfs_vol_oldest_requests;
830 struct dentry *debugfs_vol_act_log_extents;
831 struct dentry *debugfs_vol_resync_extents;
832 struct dentry *debugfs_vol_data_gen_id;
833#endif
834
835 unsigned int vnr; /* volume number within the connection */
836 unsigned int minor; /* device minor number */
837
684 struct kref kref; 838 struct kref kref;
685 839
686 /* things that are stored as / read from meta data on disk */ 840 /* things that are stored as / read from meta data on disk */
@@ -697,19 +851,10 @@ struct drbd_device {
697 unsigned long last_reattach_jif; 851 unsigned long last_reattach_jif;
698 struct drbd_work resync_work; 852 struct drbd_work resync_work;
699 struct drbd_work unplug_work; 853 struct drbd_work unplug_work;
700 struct drbd_work go_diskless;
701 struct drbd_work md_sync_work;
702 struct drbd_work start_resync_work;
703 struct timer_list resync_timer; 854 struct timer_list resync_timer;
704 struct timer_list md_sync_timer; 855 struct timer_list md_sync_timer;
705 struct timer_list start_resync_timer; 856 struct timer_list start_resync_timer;
706 struct timer_list request_timer; 857 struct timer_list request_timer;
707#ifdef DRBD_DEBUG_MD_SYNC
708 struct {
709 unsigned int line;
710 const char* func;
711 } last_md_mark_dirty;
712#endif
713 858
714 /* Used after attach while negotiating new disk state. */ 859 /* Used after attach while negotiating new disk state. */
715 union drbd_state new_state_tmp; 860 union drbd_state new_state_tmp;
@@ -724,6 +869,7 @@ struct drbd_device {
724 unsigned int al_writ_cnt; 869 unsigned int al_writ_cnt;
725 unsigned int bm_writ_cnt; 870 unsigned int bm_writ_cnt;
726 atomic_t ap_bio_cnt; /* Requests we need to complete */ 871 atomic_t ap_bio_cnt; /* Requests we need to complete */
872 atomic_t ap_actlog_cnt; /* Requests waiting for activity log */
727 atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */ 873 atomic_t ap_pending_cnt; /* AP data packets on the wire, ack expected */
728 atomic_t rs_pending_cnt; /* RS request/data packets on the wire */ 874 atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
729 atomic_t unacked_cnt; /* Need to send replies for */ 875 atomic_t unacked_cnt; /* Need to send replies for */
@@ -733,6 +879,13 @@ struct drbd_device {
733 struct rb_root read_requests; 879 struct rb_root read_requests;
734 struct rb_root write_requests; 880 struct rb_root write_requests;
735 881
882 /* for statistics and timeouts */
883 /* [0] read, [1] write */
884 struct list_head pending_master_completion[2];
885 struct list_head pending_completion[2];
886
887 /* use checksums for *this* resync */
888 bool use_csums;
736 /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ 889 /* blocks to resync in this run [unit BM_BLOCK_SIZE] */
737 unsigned long rs_total; 890 unsigned long rs_total;
738 /* number of resync blocks that failed in this run */ 891 /* number of resync blocks that failed in this run */
@@ -788,9 +941,7 @@ struct drbd_device {
788 atomic_t pp_in_use; /* allocated from page pool */ 941 atomic_t pp_in_use; /* allocated from page pool */
789 atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */ 942 atomic_t pp_in_use_by_net; /* sendpage()d, still referenced by tcp */
790 wait_queue_head_t ee_wait; 943 wait_queue_head_t ee_wait;
791 struct page *md_io_page; /* one page buffer for md_io */
792 struct drbd_md_io md_io; 944 struct drbd_md_io md_io;
793 atomic_t md_io_in_use; /* protects the md_io, md_io_page and md_io_tmpp */
794 spinlock_t al_lock; 945 spinlock_t al_lock;
795 wait_queue_head_t al_wait; 946 wait_queue_head_t al_wait;
796 struct lru_cache *act_log; /* activity log */ 947 struct lru_cache *act_log; /* activity log */
@@ -800,7 +951,6 @@ struct drbd_device {
800 atomic_t packet_seq; 951 atomic_t packet_seq;
801 unsigned int peer_seq; 952 unsigned int peer_seq;
802 spinlock_t peer_seq_lock; 953 spinlock_t peer_seq_lock;
803 unsigned int minor;
804 unsigned long comm_bm_set; /* communicated number of set bits. */ 954 unsigned long comm_bm_set; /* communicated number of set bits. */
805 struct bm_io_work bm_io_work; 955 struct bm_io_work bm_io_work;
806 u64 ed_uuid; /* UUID of the exposed data */ 956 u64 ed_uuid; /* UUID of the exposed data */
@@ -824,6 +974,21 @@ struct drbd_device {
824 struct submit_worker submit; 974 struct submit_worker submit;
825}; 975};
826 976
977struct drbd_bm_aio_ctx {
978 struct drbd_device *device;
979 struct list_head list; /* on device->pending_bitmap_io */;
980 unsigned long start_jif;
981 atomic_t in_flight;
982 unsigned int done;
983 unsigned flags;
984#define BM_AIO_COPY_PAGES 1
985#define BM_AIO_WRITE_HINTED 2
986#define BM_AIO_WRITE_ALL_PAGES 4
987#define BM_AIO_READ 8
988 int error;
989 struct kref kref;
990};
991
827struct drbd_config_context { 992struct drbd_config_context {
828 /* assigned from drbd_genlmsghdr */ 993 /* assigned from drbd_genlmsghdr */
829 unsigned int minor; 994 unsigned int minor;
@@ -949,7 +1114,7 @@ extern int drbd_send_ov_request(struct drbd_peer_device *, sector_t sector, int
949extern int drbd_send_bitmap(struct drbd_device *device); 1114extern int drbd_send_bitmap(struct drbd_device *device);
950extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode); 1115extern void drbd_send_sr_reply(struct drbd_peer_device *, enum drbd_state_rv retcode);
951extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode); 1116extern void conn_send_sr_reply(struct drbd_connection *connection, enum drbd_state_rv retcode);
952extern void drbd_free_bc(struct drbd_backing_dev *ldev); 1117extern void drbd_free_ldev(struct drbd_backing_dev *ldev);
953extern void drbd_device_cleanup(struct drbd_device *device); 1118extern void drbd_device_cleanup(struct drbd_device *device);
954void drbd_print_uuids(struct drbd_device *device, const char *text); 1119void drbd_print_uuids(struct drbd_device *device, const char *text);
955 1120
@@ -966,13 +1131,7 @@ extern void __drbd_uuid_set(struct drbd_device *device, int idx, u64 val) __must
966extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local); 1131extern void drbd_md_set_flag(struct drbd_device *device, int flags) __must_hold(local);
967extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local); 1132extern void drbd_md_clear_flag(struct drbd_device *device, int flags)__must_hold(local);
968extern int drbd_md_test_flag(struct drbd_backing_dev *, int); 1133extern int drbd_md_test_flag(struct drbd_backing_dev *, int);
969#ifndef DRBD_DEBUG_MD_SYNC
970extern void drbd_md_mark_dirty(struct drbd_device *device); 1134extern void drbd_md_mark_dirty(struct drbd_device *device);
971#else
972#define drbd_md_mark_dirty(m) drbd_md_mark_dirty_(m, __LINE__ , __func__ )
973extern void drbd_md_mark_dirty_(struct drbd_device *device,
974 unsigned int line, const char *func);
975#endif
976extern void drbd_queue_bitmap_io(struct drbd_device *device, 1135extern void drbd_queue_bitmap_io(struct drbd_device *device,
977 int (*io_fn)(struct drbd_device *), 1136 int (*io_fn)(struct drbd_device *),
978 void (*done)(struct drbd_device *, int), 1137 void (*done)(struct drbd_device *, int),
@@ -983,9 +1142,8 @@ extern int drbd_bitmap_io(struct drbd_device *device,
983extern int drbd_bitmap_io_from_worker(struct drbd_device *device, 1142extern int drbd_bitmap_io_from_worker(struct drbd_device *device,
984 int (*io_fn)(struct drbd_device *), 1143 int (*io_fn)(struct drbd_device *),
985 char *why, enum bm_flag flags); 1144 char *why, enum bm_flag flags);
986extern int drbd_bmio_set_n_write(struct drbd_device *device); 1145extern int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local);
987extern int drbd_bmio_clear_n_write(struct drbd_device *device); 1146extern int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local);
988extern void drbd_ldev_destroy(struct drbd_device *device);
989 1147
990/* Meta data layout 1148/* Meta data layout
991 * 1149 *
@@ -1105,17 +1263,21 @@ struct bm_extent {
1105/* in which _bitmap_ extent (resp. sector) the bit for a certain 1263/* in which _bitmap_ extent (resp. sector) the bit for a certain
1106 * _storage_ sector is located in */ 1264 * _storage_ sector is located in */
1107#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9)) 1265#define BM_SECT_TO_EXT(x) ((x)>>(BM_EXT_SHIFT-9))
1266#define BM_BIT_TO_EXT(x) ((x) >> (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
1108 1267
1109/* how much _storage_ sectors we have per bitmap sector */ 1268/* first storage sector a bitmap extent corresponds to */
1110#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9)) 1269#define BM_EXT_TO_SECT(x) ((sector_t)(x) << (BM_EXT_SHIFT-9))
1270/* how much _storage_ sectors we have per bitmap extent */
1111#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1) 1271#define BM_SECT_PER_EXT BM_EXT_TO_SECT(1)
1272/* how many bits are covered by one bitmap extent (resync extent) */
1273#define BM_BITS_PER_EXT (1UL << (BM_EXT_SHIFT - BM_BLOCK_SHIFT))
1274
1275#define BM_BLOCKS_PER_BM_EXT_MASK (BM_BITS_PER_EXT - 1)
1276
1112 1277
1113/* in one sector of the bitmap, we have this many activity_log extents. */ 1278/* in one sector of the bitmap, we have this many activity_log extents. */
1114#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT)) 1279#define AL_EXT_PER_BM_SECT (1 << (BM_EXT_SHIFT - AL_EXTENT_SHIFT))
1115 1280
1116#define BM_BLOCKS_PER_BM_EXT_B (BM_EXT_SHIFT - BM_BLOCK_SHIFT)
1117#define BM_BLOCKS_PER_BM_EXT_MASK ((1<<BM_BLOCKS_PER_BM_EXT_B) - 1)
1118
1119/* the extent in "PER_EXTENT" below is an activity log extent 1281/* the extent in "PER_EXTENT" below is an activity log extent
1120 * we need that many (long words/bytes) to store the bitmap 1282 * we need that many (long words/bytes) to store the bitmap
1121 * of one AL_EXTENT_SIZE chunk of storage. 1283 * of one AL_EXTENT_SIZE chunk of storage.
@@ -1195,11 +1357,11 @@ extern void _drbd_bm_set_bits(struct drbd_device *device,
1195 const unsigned long s, const unsigned long e); 1357 const unsigned long s, const unsigned long e);
1196extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr); 1358extern int drbd_bm_test_bit(struct drbd_device *device, unsigned long bitnr);
1197extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr); 1359extern int drbd_bm_e_weight(struct drbd_device *device, unsigned long enr);
1198extern int drbd_bm_write_page(struct drbd_device *device, unsigned int idx) __must_hold(local);
1199extern int drbd_bm_read(struct drbd_device *device) __must_hold(local); 1360extern int drbd_bm_read(struct drbd_device *device) __must_hold(local);
1200extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr); 1361extern void drbd_bm_mark_for_writeout(struct drbd_device *device, int page_nr);
1201extern int drbd_bm_write(struct drbd_device *device) __must_hold(local); 1362extern int drbd_bm_write(struct drbd_device *device) __must_hold(local);
1202extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local); 1363extern int drbd_bm_write_hinted(struct drbd_device *device) __must_hold(local);
1364extern int drbd_bm_write_lazy(struct drbd_device *device, unsigned upper_idx) __must_hold(local);
1203extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local); 1365extern int drbd_bm_write_all(struct drbd_device *device) __must_hold(local);
1204extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local); 1366extern int drbd_bm_write_copy_pages(struct drbd_device *device) __must_hold(local);
1205extern size_t drbd_bm_words(struct drbd_device *device); 1367extern size_t drbd_bm_words(struct drbd_device *device);
@@ -1213,7 +1375,6 @@ extern unsigned long _drbd_bm_find_next(struct drbd_device *device, unsigned lon
1213extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo); 1375extern unsigned long _drbd_bm_find_next_zero(struct drbd_device *device, unsigned long bm_fo);
1214extern unsigned long _drbd_bm_total_weight(struct drbd_device *device); 1376extern unsigned long _drbd_bm_total_weight(struct drbd_device *device);
1215extern unsigned long drbd_bm_total_weight(struct drbd_device *device); 1377extern unsigned long drbd_bm_total_weight(struct drbd_device *device);
1216extern int drbd_bm_rs_done(struct drbd_device *device);
1217/* for receive_bitmap */ 1378/* for receive_bitmap */
1218extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset, 1379extern void drbd_bm_merge_lel(struct drbd_device *device, size_t offset,
1219 size_t number, unsigned long *buffer); 1380 size_t number, unsigned long *buffer);
@@ -1312,7 +1473,7 @@ enum determine_dev_size {
1312extern enum determine_dev_size 1473extern enum determine_dev_size
1313drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local); 1474drbd_determine_dev_size(struct drbd_device *, enum dds_flags, struct resize_parms *) __must_hold(local);
1314extern void resync_after_online_grow(struct drbd_device *); 1475extern void resync_after_online_grow(struct drbd_device *);
1315extern void drbd_reconsider_max_bio_size(struct drbd_device *device); 1476extern void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev);
1316extern enum drbd_state_rv drbd_set_role(struct drbd_device *device, 1477extern enum drbd_state_rv drbd_set_role(struct drbd_device *device,
1317 enum drbd_role new_role, 1478 enum drbd_role new_role,
1318 int force); 1479 int force);
@@ -1333,7 +1494,7 @@ extern void resume_next_sg(struct drbd_device *device);
1333extern void suspend_other_sg(struct drbd_device *device); 1494extern void suspend_other_sg(struct drbd_device *device);
1334extern int drbd_resync_finished(struct drbd_device *device); 1495extern int drbd_resync_finished(struct drbd_device *device);
1335/* maybe rather drbd_main.c ? */ 1496/* maybe rather drbd_main.c ? */
1336extern void *drbd_md_get_buffer(struct drbd_device *device); 1497extern void *drbd_md_get_buffer(struct drbd_device *device, const char *intent);
1337extern void drbd_md_put_buffer(struct drbd_device *device); 1498extern void drbd_md_put_buffer(struct drbd_device *device);
1338extern int drbd_md_sync_page_io(struct drbd_device *device, 1499extern int drbd_md_sync_page_io(struct drbd_device *device,
1339 struct drbd_backing_dev *bdev, sector_t sector, int rw); 1500 struct drbd_backing_dev *bdev, sector_t sector, int rw);
@@ -1380,7 +1541,8 @@ extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req);
1380extern int drbd_receiver(struct drbd_thread *thi); 1541extern int drbd_receiver(struct drbd_thread *thi);
1381extern int drbd_asender(struct drbd_thread *thi); 1542extern int drbd_asender(struct drbd_thread *thi);
1382extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); 1543extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device);
1383extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); 1544extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
1545 bool throttle_if_app_is_waiting);
1384extern int drbd_submit_peer_request(struct drbd_device *, 1546extern int drbd_submit_peer_request(struct drbd_device *,
1385 struct drbd_peer_request *, const unsigned, 1547 struct drbd_peer_request *, const unsigned,
1386 const int); 1548 const int);
@@ -1464,10 +1626,7 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
1464{ 1626{
1465 __release(local); 1627 __release(local);
1466 if (!bio->bi_bdev) { 1628 if (!bio->bi_bdev) {
1467 printk(KERN_ERR "drbd%d: drbd_generic_make_request: " 1629 drbd_err(device, "drbd_generic_make_request: bio->bi_bdev == NULL\n");
1468 "bio->bi_bdev == NULL\n",
1469 device_to_minor(device));
1470 dump_stack();
1471 bio_endio(bio, -ENODEV); 1630 bio_endio(bio, -ENODEV);
1472 return; 1631 return;
1473 } 1632 }
@@ -1478,7 +1637,8 @@ static inline void drbd_generic_make_request(struct drbd_device *device,
1478 generic_make_request(bio); 1637 generic_make_request(bio);
1479} 1638}
1480 1639
1481void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); 1640void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1641 enum write_ordering_e wo);
1482 1642
1483/* drbd_proc.c */ 1643/* drbd_proc.c */
1484extern struct proc_dir_entry *drbd_proc; 1644extern struct proc_dir_entry *drbd_proc;
@@ -1489,9 +1649,9 @@ extern const char *drbd_role_str(enum drbd_role s);
1489/* drbd_actlog.c */ 1649/* drbd_actlog.c */
1490extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); 1650extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i);
1491extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); 1651extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i);
1492extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); 1652extern void drbd_al_begin_io_commit(struct drbd_device *device);
1493extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); 1653extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i);
1494extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i, bool delegate); 1654extern void drbd_al_begin_io(struct drbd_device *device, struct drbd_interval *i);
1495extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i); 1655extern void drbd_al_complete_io(struct drbd_device *device, struct drbd_interval *i);
1496extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector); 1656extern void drbd_rs_complete_io(struct drbd_device *device, sector_t sector);
1497extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector); 1657extern int drbd_rs_begin_io(struct drbd_device *device, sector_t sector);
@@ -1501,14 +1661,17 @@ extern int drbd_rs_del_all(struct drbd_device *device);
1501extern void drbd_rs_failed_io(struct drbd_device *device, 1661extern void drbd_rs_failed_io(struct drbd_device *device,
1502 sector_t sector, int size); 1662 sector_t sector, int size);
1503extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go); 1663extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long still_to_go);
1504extern void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, 1664
1505 int size, const char *file, const unsigned int line); 1665enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
1666extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
1667 enum update_sync_bits_mode mode,
1668 const char *file, const unsigned int line);
1506#define drbd_set_in_sync(device, sector, size) \ 1669#define drbd_set_in_sync(device, sector, size) \
1507 __drbd_set_in_sync(device, sector, size, __FILE__, __LINE__) 1670 __drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__)
1508extern int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector,
1509 int size, const char *file, const unsigned int line);
1510#define drbd_set_out_of_sync(device, sector, size) \ 1671#define drbd_set_out_of_sync(device, sector, size) \
1511 __drbd_set_out_of_sync(device, sector, size, __FILE__, __LINE__) 1672 __drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__)
1673#define drbd_rs_failed_io(device, sector, size) \
1674 __drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__)
1512extern void drbd_al_shrink(struct drbd_device *device); 1675extern void drbd_al_shrink(struct drbd_device *device);
1513extern int drbd_initialize_al(struct drbd_device *, void *); 1676extern int drbd_initialize_al(struct drbd_device *, void *);
1514 1677
@@ -1764,25 +1927,38 @@ static inline sector_t drbd_md_ss(struct drbd_backing_dev *bdev)
1764} 1927}
1765 1928
1766static inline void 1929static inline void
1767drbd_queue_work_front(struct drbd_work_queue *q, struct drbd_work *w) 1930drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w)
1768{ 1931{
1769 unsigned long flags; 1932 unsigned long flags;
1770 spin_lock_irqsave(&q->q_lock, flags); 1933 spin_lock_irqsave(&q->q_lock, flags);
1771 list_add(&w->list, &q->q); 1934 list_add_tail(&w->list, &q->q);
1772 spin_unlock_irqrestore(&q->q_lock, flags); 1935 spin_unlock_irqrestore(&q->q_lock, flags);
1773 wake_up(&q->q_wait); 1936 wake_up(&q->q_wait);
1774} 1937}
1775 1938
1776static inline void 1939static inline void
1777drbd_queue_work(struct drbd_work_queue *q, struct drbd_work *w) 1940drbd_queue_work_if_unqueued(struct drbd_work_queue *q, struct drbd_work *w)
1778{ 1941{
1779 unsigned long flags; 1942 unsigned long flags;
1780 spin_lock_irqsave(&q->q_lock, flags); 1943 spin_lock_irqsave(&q->q_lock, flags);
1781 list_add_tail(&w->list, &q->q); 1944 if (list_empty_careful(&w->list))
1945 list_add_tail(&w->list, &q->q);
1782 spin_unlock_irqrestore(&q->q_lock, flags); 1946 spin_unlock_irqrestore(&q->q_lock, flags);
1783 wake_up(&q->q_wait); 1947 wake_up(&q->q_wait);
1784} 1948}
1785 1949
1950static inline void
1951drbd_device_post_work(struct drbd_device *device, int work_bit)
1952{
1953 if (!test_and_set_bit(work_bit, &device->flags)) {
1954 struct drbd_connection *connection =
1955 first_peer_device(device)->connection;
1956 struct drbd_work_queue *q = &connection->sender_work;
1957 if (!test_and_set_bit(DEVICE_WORK_PENDING, &connection->flags))
1958 wake_up(&q->q_wait);
1959 }
1960}
1961
1786extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue); 1962extern void drbd_flush_workqueue(struct drbd_work_queue *work_queue);
1787 1963
1788static inline void wake_asender(struct drbd_connection *connection) 1964static inline void wake_asender(struct drbd_connection *connection)
@@ -1859,7 +2035,7 @@ static inline void inc_ap_pending(struct drbd_device *device)
1859 func, line, \ 2035 func, line, \
1860 atomic_read(&device->which)) 2036 atomic_read(&device->which))
1861 2037
1862#define dec_ap_pending(device) _dec_ap_pending(device, __FUNCTION__, __LINE__) 2038#define dec_ap_pending(device) _dec_ap_pending(device, __func__, __LINE__)
1863static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line) 2039static inline void _dec_ap_pending(struct drbd_device *device, const char *func, int line)
1864{ 2040{
1865 if (atomic_dec_and_test(&device->ap_pending_cnt)) 2041 if (atomic_dec_and_test(&device->ap_pending_cnt))
@@ -1878,7 +2054,7 @@ static inline void inc_rs_pending(struct drbd_device *device)
1878 atomic_inc(&device->rs_pending_cnt); 2054 atomic_inc(&device->rs_pending_cnt);
1879} 2055}
1880 2056
1881#define dec_rs_pending(device) _dec_rs_pending(device, __FUNCTION__, __LINE__) 2057#define dec_rs_pending(device) _dec_rs_pending(device, __func__, __LINE__)
1882static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line) 2058static inline void _dec_rs_pending(struct drbd_device *device, const char *func, int line)
1883{ 2059{
1884 atomic_dec(&device->rs_pending_cnt); 2060 atomic_dec(&device->rs_pending_cnt);
@@ -1899,20 +2075,29 @@ static inline void inc_unacked(struct drbd_device *device)
1899 atomic_inc(&device->unacked_cnt); 2075 atomic_inc(&device->unacked_cnt);
1900} 2076}
1901 2077
1902#define dec_unacked(device) _dec_unacked(device, __FUNCTION__, __LINE__) 2078#define dec_unacked(device) _dec_unacked(device, __func__, __LINE__)
1903static inline void _dec_unacked(struct drbd_device *device, const char *func, int line) 2079static inline void _dec_unacked(struct drbd_device *device, const char *func, int line)
1904{ 2080{
1905 atomic_dec(&device->unacked_cnt); 2081 atomic_dec(&device->unacked_cnt);
1906 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); 2082 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
1907} 2083}
1908 2084
1909#define sub_unacked(device, n) _sub_unacked(device, n, __FUNCTION__, __LINE__) 2085#define sub_unacked(device, n) _sub_unacked(device, n, __func__, __LINE__)
1910static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line) 2086static inline void _sub_unacked(struct drbd_device *device, int n, const char *func, int line)
1911{ 2087{
1912 atomic_sub(n, &device->unacked_cnt); 2088 atomic_sub(n, &device->unacked_cnt);
1913 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line); 2089 ERR_IF_CNT_IS_NEGATIVE(unacked_cnt, func, line);
1914} 2090}
1915 2091
2092static inline bool is_sync_state(enum drbd_conns connection_state)
2093{
2094 return
2095 (connection_state == C_SYNC_SOURCE
2096 || connection_state == C_SYNC_TARGET
2097 || connection_state == C_PAUSED_SYNC_S
2098 || connection_state == C_PAUSED_SYNC_T);
2099}
2100
1916/** 2101/**
1917 * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev 2102 * get_ldev() - Increase the ref count on device->ldev. Returns 0 if there is no ldev
1918 * @M: DRBD device. 2103 * @M: DRBD device.
@@ -1924,6 +2109,11 @@ static inline void _sub_unacked(struct drbd_device *device, int n, const char *f
1924 2109
1925static inline void put_ldev(struct drbd_device *device) 2110static inline void put_ldev(struct drbd_device *device)
1926{ 2111{
2112 enum drbd_disk_state ds = device->state.disk;
2113 /* We must check the state *before* the atomic_dec becomes visible,
2114 * or we have a theoretical race where someone hitting zero,
2115 * while state still D_FAILED, will then see D_DISKLESS in the
2116 * condition below and calling into destroy, where he must not, yet. */
1927 int i = atomic_dec_return(&device->local_cnt); 2117 int i = atomic_dec_return(&device->local_cnt);
1928 2118
1929 /* This may be called from some endio handler, 2119 /* This may be called from some endio handler,
@@ -1932,15 +2122,13 @@ static inline void put_ldev(struct drbd_device *device)
1932 __release(local); 2122 __release(local);
1933 D_ASSERT(device, i >= 0); 2123 D_ASSERT(device, i >= 0);
1934 if (i == 0) { 2124 if (i == 0) {
1935 if (device->state.disk == D_DISKLESS) 2125 if (ds == D_DISKLESS)
1936 /* even internal references gone, safe to destroy */ 2126 /* even internal references gone, safe to destroy */
1937 drbd_ldev_destroy(device); 2127 drbd_device_post_work(device, DESTROY_DISK);
1938 if (device->state.disk == D_FAILED) { 2128 if (ds == D_FAILED)
1939 /* all application IO references gone. */ 2129 /* all application IO references gone. */
1940 if (!test_and_set_bit(GO_DISKLESS, &device->flags)) 2130 if (!test_and_set_bit(GOING_DISKLESS, &device->flags))
1941 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 2131 drbd_device_post_work(device, GO_DISKLESS);
1942 &device->go_diskless);
1943 }
1944 wake_up(&device->misc_wait); 2132 wake_up(&device->misc_wait);
1945 } 2133 }
1946} 2134}
@@ -1964,54 +2152,6 @@ static inline int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_
1964extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins); 2152extern int _get_ldev_if_state(struct drbd_device *device, enum drbd_disk_state mins);
1965#endif 2153#endif
1966 2154
1967/* you must have an "get_ldev" reference */
1968static inline void drbd_get_syncer_progress(struct drbd_device *device,
1969 unsigned long *bits_left, unsigned int *per_mil_done)
1970{
1971 /* this is to break it at compile time when we change that, in case we
1972 * want to support more than (1<<32) bits on a 32bit arch. */
1973 typecheck(unsigned long, device->rs_total);
1974
1975 /* note: both rs_total and rs_left are in bits, i.e. in
1976 * units of BM_BLOCK_SIZE.
1977 * for the percentage, we don't care. */
1978
1979 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
1980 *bits_left = device->ov_left;
1981 else
1982 *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
1983 /* >> 10 to prevent overflow,
1984 * +1 to prevent division by zero */
1985 if (*bits_left > device->rs_total) {
1986 /* doh. maybe a logic bug somewhere.
1987 * may also be just a race condition
1988 * between this and a disconnect during sync.
1989 * for now, just prevent in-kernel buffer overflow.
1990 */
1991 smp_rmb();
1992 drbd_warn(device, "cs:%s rs_left=%lu > rs_total=%lu (rs_failed %lu)\n",
1993 drbd_conn_str(device->state.conn),
1994 *bits_left, device->rs_total, device->rs_failed);
1995 *per_mil_done = 0;
1996 } else {
1997 /* Make sure the division happens in long context.
1998 * We allow up to one petabyte storage right now,
1999 * at a granularity of 4k per bit that is 2**38 bits.
2000 * After shift right and multiplication by 1000,
2001 * this should still fit easily into a 32bit long,
2002 * so we don't need a 64bit division on 32bit arch.
2003 * Note: currently we don't support such large bitmaps on 32bit
2004 * arch anyways, but no harm done to be prepared for it here.
2005 */
2006 unsigned int shift = device->rs_total > UINT_MAX ? 16 : 10;
2007 unsigned long left = *bits_left >> shift;
2008 unsigned long total = 1UL + (device->rs_total >> shift);
2009 unsigned long tmp = 1000UL - left * 1000UL/total;
2010 *per_mil_done = tmp;
2011 }
2012}
2013
2014
2015/* this throttles on-the-fly application requests 2155/* this throttles on-the-fly application requests
2016 * according to max_buffers settings; 2156 * according to max_buffers settings;
2017 * maybe re-implement using semaphores? */ 2157 * maybe re-implement using semaphores? */
@@ -2201,25 +2341,6 @@ static inline int drbd_queue_order_type(struct drbd_device *device)
2201 return QUEUE_ORDERED_NONE; 2341 return QUEUE_ORDERED_NONE;
2202} 2342}
2203 2343
2204static inline void drbd_md_flush(struct drbd_device *device)
2205{
2206 int r;
2207
2208 if (device->ldev == NULL) {
2209 drbd_warn(device, "device->ldev == NULL in drbd_md_flush\n");
2210 return;
2211 }
2212
2213 if (test_bit(MD_NO_FUA, &device->flags))
2214 return;
2215
2216 r = blkdev_issue_flush(device->ldev->md_bdev, GFP_NOIO, NULL);
2217 if (r) {
2218 set_bit(MD_NO_FUA, &device->flags);
2219 drbd_err(device, "meta data flush failed with status %d, disabling md-flushes\n", r);
2220 }
2221}
2222
2223static inline struct drbd_connection *first_connection(struct drbd_resource *resource) 2344static inline struct drbd_connection *first_connection(struct drbd_resource *resource)
2224{ 2345{
2225 return list_first_entry_or_null(&resource->connections, 2346 return list_first_entry_or_null(&resource->connections,
diff --git a/drivers/block/drbd/drbd_interval.h b/drivers/block/drbd/drbd_interval.h
index f38fcb00c10d..f210543f05f4 100644
--- a/drivers/block/drbd/drbd_interval.h
+++ b/drivers/block/drbd/drbd_interval.h
@@ -10,7 +10,9 @@ struct drbd_interval {
10 unsigned int size; /* size in bytes */ 10 unsigned int size; /* size in bytes */
11 sector_t end; /* highest interval end in subtree */ 11 sector_t end; /* highest interval end in subtree */
12 int local:1 /* local or remote request? */; 12 int local:1 /* local or remote request? */;
13 int waiting:1; 13 int waiting:1; /* someone is waiting for this to complete */
14 int completed:1; /* this has been completed already;
15 * ignore for conflict detection */
14}; 16};
15 17
16static inline void drbd_clear_interval(struct drbd_interval *i) 18static inline void drbd_clear_interval(struct drbd_interval *i)
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 960645c26e6f..9b465bb68487 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -26,7 +26,10 @@
26 26
27 */ 27 */
28 28
29#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
30
29#include <linux/module.h> 31#include <linux/module.h>
32#include <linux/jiffies.h>
30#include <linux/drbd.h> 33#include <linux/drbd.h>
31#include <asm/uaccess.h> 34#include <asm/uaccess.h>
32#include <asm/types.h> 35#include <asm/types.h>
@@ -54,16 +57,14 @@
54#include "drbd_int.h" 57#include "drbd_int.h"
55#include "drbd_protocol.h" 58#include "drbd_protocol.h"
56#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */ 59#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
57
58#include "drbd_vli.h" 60#include "drbd_vli.h"
61#include "drbd_debugfs.h"
59 62
60static DEFINE_MUTEX(drbd_main_mutex); 63static DEFINE_MUTEX(drbd_main_mutex);
61static int drbd_open(struct block_device *bdev, fmode_t mode); 64static int drbd_open(struct block_device *bdev, fmode_t mode);
62static void drbd_release(struct gendisk *gd, fmode_t mode); 65static void drbd_release(struct gendisk *gd, fmode_t mode);
63static int w_md_sync(struct drbd_work *w, int unused);
64static void md_sync_timer_fn(unsigned long data); 66static void md_sync_timer_fn(unsigned long data);
65static int w_bitmap_io(struct drbd_work *w, int unused); 67static int w_bitmap_io(struct drbd_work *w, int unused);
66static int w_go_diskless(struct drbd_work *w, int unused);
67 68
68MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 69MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
69 "Lars Ellenberg <lars@linbit.com>"); 70 "Lars Ellenberg <lars@linbit.com>");
@@ -264,7 +265,7 @@ bail:
264 265
265/** 266/**
266 * _tl_restart() - Walks the transfer log, and applies an action to all requests 267 * _tl_restart() - Walks the transfer log, and applies an action to all requests
267 * @device: DRBD device. 268 * @connection: DRBD connection to operate on.
268 * @what: The action/event to perform with all request objects 269 * @what: The action/event to perform with all request objects
269 * 270 *
270 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO, 271 * @what might be one of CONNECTION_LOST_WHILE_PENDING, RESEND, FAIL_FROZEN_DISK_IO,
@@ -662,6 +663,11 @@ static int __send_command(struct drbd_connection *connection, int vnr,
662 msg_flags); 663 msg_flags);
663 if (data && !err) 664 if (data && !err)
664 err = drbd_send_all(connection, sock->socket, data, size, 0); 665 err = drbd_send_all(connection, sock->socket, data, size, 0);
666 /* DRBD protocol "pings" are latency critical.
667 * This is supposed to trigger tcp_push_pending_frames() */
668 if (!err && (cmd == P_PING || cmd == P_PING_ACK))
669 drbd_tcp_nodelay(sock->socket);
670
665 return err; 671 return err;
666} 672}
667 673
@@ -1636,7 +1642,10 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *
1636 if (peer_device->connection->agreed_pro_version >= 100) { 1642 if (peer_device->connection->agreed_pro_version >= 100) {
1637 if (req->rq_state & RQ_EXP_RECEIVE_ACK) 1643 if (req->rq_state & RQ_EXP_RECEIVE_ACK)
1638 dp_flags |= DP_SEND_RECEIVE_ACK; 1644 dp_flags |= DP_SEND_RECEIVE_ACK;
1639 if (req->rq_state & RQ_EXP_WRITE_ACK) 1645 /* During resync, request an explicit write ack,
1646 * even in protocol != C */
1647 if (req->rq_state & RQ_EXP_WRITE_ACK
1648 || (dp_flags & DP_MAY_SET_IN_SYNC))
1640 dp_flags |= DP_SEND_WRITE_ACK; 1649 dp_flags |= DP_SEND_WRITE_ACK;
1641 } 1650 }
1642 p->dp_flags = cpu_to_be32(dp_flags); 1651 p->dp_flags = cpu_to_be32(dp_flags);
@@ -1900,6 +1909,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
1900 drbd_set_defaults(device); 1909 drbd_set_defaults(device);
1901 1910
1902 atomic_set(&device->ap_bio_cnt, 0); 1911 atomic_set(&device->ap_bio_cnt, 0);
1912 atomic_set(&device->ap_actlog_cnt, 0);
1903 atomic_set(&device->ap_pending_cnt, 0); 1913 atomic_set(&device->ap_pending_cnt, 0);
1904 atomic_set(&device->rs_pending_cnt, 0); 1914 atomic_set(&device->rs_pending_cnt, 0);
1905 atomic_set(&device->unacked_cnt, 0); 1915 atomic_set(&device->unacked_cnt, 0);
@@ -1908,7 +1918,7 @@ void drbd_init_set_defaults(struct drbd_device *device)
1908 atomic_set(&device->rs_sect_in, 0); 1918 atomic_set(&device->rs_sect_in, 0);
1909 atomic_set(&device->rs_sect_ev, 0); 1919 atomic_set(&device->rs_sect_ev, 0);
1910 atomic_set(&device->ap_in_flight, 0); 1920 atomic_set(&device->ap_in_flight, 0);
1911 atomic_set(&device->md_io_in_use, 0); 1921 atomic_set(&device->md_io.in_use, 0);
1912 1922
1913 mutex_init(&device->own_state_mutex); 1923 mutex_init(&device->own_state_mutex);
1914 device->state_mutex = &device->own_state_mutex; 1924 device->state_mutex = &device->own_state_mutex;
@@ -1924,17 +1934,15 @@ void drbd_init_set_defaults(struct drbd_device *device)
1924 INIT_LIST_HEAD(&device->resync_reads); 1934 INIT_LIST_HEAD(&device->resync_reads);
1925 INIT_LIST_HEAD(&device->resync_work.list); 1935 INIT_LIST_HEAD(&device->resync_work.list);
1926 INIT_LIST_HEAD(&device->unplug_work.list); 1936 INIT_LIST_HEAD(&device->unplug_work.list);
1927 INIT_LIST_HEAD(&device->go_diskless.list);
1928 INIT_LIST_HEAD(&device->md_sync_work.list);
1929 INIT_LIST_HEAD(&device->start_resync_work.list);
1930 INIT_LIST_HEAD(&device->bm_io_work.w.list); 1937 INIT_LIST_HEAD(&device->bm_io_work.w.list);
1938 INIT_LIST_HEAD(&device->pending_master_completion[0]);
1939 INIT_LIST_HEAD(&device->pending_master_completion[1]);
1940 INIT_LIST_HEAD(&device->pending_completion[0]);
1941 INIT_LIST_HEAD(&device->pending_completion[1]);
1931 1942
1932 device->resync_work.cb = w_resync_timer; 1943 device->resync_work.cb = w_resync_timer;
1933 device->unplug_work.cb = w_send_write_hint; 1944 device->unplug_work.cb = w_send_write_hint;
1934 device->go_diskless.cb = w_go_diskless;
1935 device->md_sync_work.cb = w_md_sync;
1936 device->bm_io_work.w.cb = w_bitmap_io; 1945 device->bm_io_work.w.cb = w_bitmap_io;
1937 device->start_resync_work.cb = w_start_resync;
1938 1946
1939 init_timer(&device->resync_timer); 1947 init_timer(&device->resync_timer);
1940 init_timer(&device->md_sync_timer); 1948 init_timer(&device->md_sync_timer);
@@ -1992,7 +2000,7 @@ void drbd_device_cleanup(struct drbd_device *device)
1992 drbd_bm_cleanup(device); 2000 drbd_bm_cleanup(device);
1993 } 2001 }
1994 2002
1995 drbd_free_bc(device->ldev); 2003 drbd_free_ldev(device->ldev);
1996 device->ldev = NULL; 2004 device->ldev = NULL;
1997 2005
1998 clear_bit(AL_SUSPENDED, &device->flags); 2006 clear_bit(AL_SUSPENDED, &device->flags);
@@ -2006,7 +2014,6 @@ void drbd_device_cleanup(struct drbd_device *device)
2006 D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q)); 2014 D_ASSERT(device, list_empty(&first_peer_device(device)->connection->sender_work.q));
2007 D_ASSERT(device, list_empty(&device->resync_work.list)); 2015 D_ASSERT(device, list_empty(&device->resync_work.list));
2008 D_ASSERT(device, list_empty(&device->unplug_work.list)); 2016 D_ASSERT(device, list_empty(&device->unplug_work.list));
2009 D_ASSERT(device, list_empty(&device->go_diskless.list));
2010 2017
2011 drbd_set_defaults(device); 2018 drbd_set_defaults(device);
2012} 2019}
@@ -2129,20 +2136,6 @@ Enomem:
2129 return -ENOMEM; 2136 return -ENOMEM;
2130} 2137}
2131 2138
2132static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
2133 void *unused)
2134{
2135 /* just so we have it. you never know what interesting things we
2136 * might want to do here some day...
2137 */
2138
2139 return NOTIFY_DONE;
2140}
2141
2142static struct notifier_block drbd_notifier = {
2143 .notifier_call = drbd_notify_sys,
2144};
2145
2146static void drbd_release_all_peer_reqs(struct drbd_device *device) 2139static void drbd_release_all_peer_reqs(struct drbd_device *device)
2147{ 2140{
2148 int rr; 2141 int rr;
@@ -2173,7 +2166,7 @@ void drbd_destroy_device(struct kref *kref)
2173{ 2166{
2174 struct drbd_device *device = container_of(kref, struct drbd_device, kref); 2167 struct drbd_device *device = container_of(kref, struct drbd_device, kref);
2175 struct drbd_resource *resource = device->resource; 2168 struct drbd_resource *resource = device->resource;
2176 struct drbd_connection *connection; 2169 struct drbd_peer_device *peer_device, *tmp_peer_device;
2177 2170
2178 del_timer_sync(&device->request_timer); 2171 del_timer_sync(&device->request_timer);
2179 2172
@@ -2187,7 +2180,7 @@ void drbd_destroy_device(struct kref *kref)
2187 if (device->this_bdev) 2180 if (device->this_bdev)
2188 bdput(device->this_bdev); 2181 bdput(device->this_bdev);
2189 2182
2190 drbd_free_bc(device->ldev); 2183 drbd_free_ldev(device->ldev);
2191 device->ldev = NULL; 2184 device->ldev = NULL;
2192 2185
2193 drbd_release_all_peer_reqs(device); 2186 drbd_release_all_peer_reqs(device);
@@ -2200,15 +2193,20 @@ void drbd_destroy_device(struct kref *kref)
2200 2193
2201 if (device->bitmap) /* should no longer be there. */ 2194 if (device->bitmap) /* should no longer be there. */
2202 drbd_bm_cleanup(device); 2195 drbd_bm_cleanup(device);
2203 __free_page(device->md_io_page); 2196 __free_page(device->md_io.page);
2204 put_disk(device->vdisk); 2197 put_disk(device->vdisk);
2205 blk_cleanup_queue(device->rq_queue); 2198 blk_cleanup_queue(device->rq_queue);
2206 kfree(device->rs_plan_s); 2199 kfree(device->rs_plan_s);
2207 kfree(first_peer_device(device));
2208 kfree(device);
2209 2200
2210 for_each_connection(connection, resource) 2201 /* not for_each_connection(connection, resource):
2211 kref_put(&connection->kref, drbd_destroy_connection); 2202 * those may have been cleaned up and disassociated already.
2203 */
2204 for_each_peer_device_safe(peer_device, tmp_peer_device, device) {
2205 kref_put(&peer_device->connection->kref, drbd_destroy_connection);
2206 kfree(peer_device);
2207 }
2208 memset(device, 0xfd, sizeof(*device));
2209 kfree(device);
2212 kref_put(&resource->kref, drbd_destroy_resource); 2210 kref_put(&resource->kref, drbd_destroy_resource);
2213} 2211}
2214 2212
@@ -2236,7 +2234,7 @@ static void do_retry(struct work_struct *ws)
2236 list_for_each_entry_safe(req, tmp, &writes, tl_requests) { 2234 list_for_each_entry_safe(req, tmp, &writes, tl_requests) {
2237 struct drbd_device *device = req->device; 2235 struct drbd_device *device = req->device;
2238 struct bio *bio = req->master_bio; 2236 struct bio *bio = req->master_bio;
2239 unsigned long start_time = req->start_time; 2237 unsigned long start_jif = req->start_jif;
2240 bool expected; 2238 bool expected;
2241 2239
2242 expected = 2240 expected =
@@ -2271,10 +2269,12 @@ static void do_retry(struct work_struct *ws)
2271 /* We are not just doing generic_make_request(), 2269 /* We are not just doing generic_make_request(),
2272 * as we want to keep the start_time information. */ 2270 * as we want to keep the start_time information. */
2273 inc_ap_bio(device); 2271 inc_ap_bio(device);
2274 __drbd_make_request(device, bio, start_time); 2272 __drbd_make_request(device, bio, start_jif);
2275 } 2273 }
2276} 2274}
2277 2275
2276/* called via drbd_req_put_completion_ref(),
2277 * holds resource->req_lock */
2278void drbd_restart_request(struct drbd_request *req) 2278void drbd_restart_request(struct drbd_request *req)
2279{ 2279{
2280 unsigned long flags; 2280 unsigned long flags;
@@ -2298,6 +2298,7 @@ void drbd_destroy_resource(struct kref *kref)
2298 idr_destroy(&resource->devices); 2298 idr_destroy(&resource->devices);
2299 free_cpumask_var(resource->cpu_mask); 2299 free_cpumask_var(resource->cpu_mask);
2300 kfree(resource->name); 2300 kfree(resource->name);
2301 memset(resource, 0xf2, sizeof(*resource));
2301 kfree(resource); 2302 kfree(resource);
2302} 2303}
2303 2304
@@ -2307,8 +2308,10 @@ void drbd_free_resource(struct drbd_resource *resource)
2307 2308
2308 for_each_connection_safe(connection, tmp, resource) { 2309 for_each_connection_safe(connection, tmp, resource) {
2309 list_del(&connection->connections); 2310 list_del(&connection->connections);
2311 drbd_debugfs_connection_cleanup(connection);
2310 kref_put(&connection->kref, drbd_destroy_connection); 2312 kref_put(&connection->kref, drbd_destroy_connection);
2311 } 2313 }
2314 drbd_debugfs_resource_cleanup(resource);
2312 kref_put(&resource->kref, drbd_destroy_resource); 2315 kref_put(&resource->kref, drbd_destroy_resource);
2313} 2316}
2314 2317
@@ -2318,8 +2321,6 @@ static void drbd_cleanup(void)
2318 struct drbd_device *device; 2321 struct drbd_device *device;
2319 struct drbd_resource *resource, *tmp; 2322 struct drbd_resource *resource, *tmp;
2320 2323
2321 unregister_reboot_notifier(&drbd_notifier);
2322
2323 /* first remove proc, 2324 /* first remove proc,
2324 * drbdsetup uses it's presence to detect 2325 * drbdsetup uses it's presence to detect
2325 * whether DRBD is loaded. 2326 * whether DRBD is loaded.
@@ -2335,6 +2336,7 @@ static void drbd_cleanup(void)
2335 destroy_workqueue(retry.wq); 2336 destroy_workqueue(retry.wq);
2336 2337
2337 drbd_genl_unregister(); 2338 drbd_genl_unregister();
2339 drbd_debugfs_cleanup();
2338 2340
2339 idr_for_each_entry(&drbd_devices, device, i) 2341 idr_for_each_entry(&drbd_devices, device, i)
2340 drbd_delete_device(device); 2342 drbd_delete_device(device);
@@ -2350,7 +2352,7 @@ static void drbd_cleanup(void)
2350 2352
2351 idr_destroy(&drbd_devices); 2353 idr_destroy(&drbd_devices);
2352 2354
2353 printk(KERN_INFO "drbd: module cleanup done.\n"); 2355 pr_info("module cleanup done.\n");
2354} 2356}
2355 2357
2356/** 2358/**
@@ -2539,6 +2541,20 @@ int set_resource_options(struct drbd_resource *resource, struct res_opts *res_op
2539 if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) { 2541 if (nr_cpu_ids > 1 && res_opts->cpu_mask[0] != 0) {
2540 err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE, 2542 err = bitmap_parse(res_opts->cpu_mask, DRBD_CPU_MASK_SIZE,
2541 cpumask_bits(new_cpu_mask), nr_cpu_ids); 2543 cpumask_bits(new_cpu_mask), nr_cpu_ids);
2544 if (err == -EOVERFLOW) {
2545 /* So what. mask it out. */
2546 cpumask_var_t tmp_cpu_mask;
2547 if (zalloc_cpumask_var(&tmp_cpu_mask, GFP_KERNEL)) {
2548 cpumask_setall(tmp_cpu_mask);
2549 cpumask_and(new_cpu_mask, new_cpu_mask, tmp_cpu_mask);
2550 drbd_warn(resource, "Overflow in bitmap_parse(%.12s%s), truncating to %u bits\n",
2551 res_opts->cpu_mask,
2552 strlen(res_opts->cpu_mask) > 12 ? "..." : "",
2553 nr_cpu_ids);
2554 free_cpumask_var(tmp_cpu_mask);
2555 err = 0;
2556 }
2557 }
2542 if (err) { 2558 if (err) {
2543 drbd_warn(resource, "bitmap_parse() failed with %d\n", err); 2559 drbd_warn(resource, "bitmap_parse() failed with %d\n", err);
2544 /* retcode = ERR_CPU_MASK_PARSE; */ 2560 /* retcode = ERR_CPU_MASK_PARSE; */
@@ -2579,10 +2595,12 @@ struct drbd_resource *drbd_create_resource(const char *name)
2579 kref_init(&resource->kref); 2595 kref_init(&resource->kref);
2580 idr_init(&resource->devices); 2596 idr_init(&resource->devices);
2581 INIT_LIST_HEAD(&resource->connections); 2597 INIT_LIST_HEAD(&resource->connections);
2598 resource->write_ordering = WO_bdev_flush;
2582 list_add_tail_rcu(&resource->resources, &drbd_resources); 2599 list_add_tail_rcu(&resource->resources, &drbd_resources);
2583 mutex_init(&resource->conf_update); 2600 mutex_init(&resource->conf_update);
2584 mutex_init(&resource->adm_mutex); 2601 mutex_init(&resource->adm_mutex);
2585 spin_lock_init(&resource->req_lock); 2602 spin_lock_init(&resource->req_lock);
2603 drbd_debugfs_resource_add(resource);
2586 return resource; 2604 return resource;
2587 2605
2588fail_free_name: 2606fail_free_name:
@@ -2593,7 +2611,7 @@ fail:
2593 return NULL; 2611 return NULL;
2594} 2612}
2595 2613
2596/* caller must be under genl_lock() */ 2614/* caller must be under adm_mutex */
2597struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts) 2615struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
2598{ 2616{
2599 struct drbd_resource *resource; 2617 struct drbd_resource *resource;
@@ -2617,7 +2635,6 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
2617 INIT_LIST_HEAD(&connection->current_epoch->list); 2635 INIT_LIST_HEAD(&connection->current_epoch->list);
2618 connection->epochs = 1; 2636 connection->epochs = 1;
2619 spin_lock_init(&connection->epoch_lock); 2637 spin_lock_init(&connection->epoch_lock);
2620 connection->write_ordering = WO_bdev_flush;
2621 2638
2622 connection->send.seen_any_write_yet = false; 2639 connection->send.seen_any_write_yet = false;
2623 connection->send.current_epoch_nr = 0; 2640 connection->send.current_epoch_nr = 0;
@@ -2652,6 +2669,7 @@ struct drbd_connection *conn_create(const char *name, struct res_opts *res_opts)
2652 2669
2653 kref_get(&resource->kref); 2670 kref_get(&resource->kref);
2654 list_add_tail_rcu(&connection->connections, &resource->connections); 2671 list_add_tail_rcu(&connection->connections, &resource->connections);
2672 drbd_debugfs_connection_add(connection);
2655 return connection; 2673 return connection;
2656 2674
2657fail_resource: 2675fail_resource:
@@ -2680,6 +2698,7 @@ void drbd_destroy_connection(struct kref *kref)
2680 drbd_free_socket(&connection->data); 2698 drbd_free_socket(&connection->data);
2681 kfree(connection->int_dig_in); 2699 kfree(connection->int_dig_in);
2682 kfree(connection->int_dig_vv); 2700 kfree(connection->int_dig_vv);
2701 memset(connection, 0xfc, sizeof(*connection));
2683 kfree(connection); 2702 kfree(connection);
2684 kref_put(&resource->kref, drbd_destroy_resource); 2703 kref_put(&resource->kref, drbd_destroy_resource);
2685} 2704}
@@ -2694,7 +2713,6 @@ static int init_submitter(struct drbd_device *device)
2694 return -ENOMEM; 2713 return -ENOMEM;
2695 2714
2696 INIT_WORK(&device->submit.worker, do_submit); 2715 INIT_WORK(&device->submit.worker, do_submit);
2697 spin_lock_init(&device->submit.lock);
2698 INIT_LIST_HEAD(&device->submit.writes); 2716 INIT_LIST_HEAD(&device->submit.writes);
2699 return 0; 2717 return 0;
2700} 2718}
@@ -2764,8 +2782,8 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2764 blk_queue_merge_bvec(q, drbd_merge_bvec); 2782 blk_queue_merge_bvec(q, drbd_merge_bvec);
2765 q->queue_lock = &resource->req_lock; 2783 q->queue_lock = &resource->req_lock;
2766 2784
2767 device->md_io_page = alloc_page(GFP_KERNEL); 2785 device->md_io.page = alloc_page(GFP_KERNEL);
2768 if (!device->md_io_page) 2786 if (!device->md_io.page)
2769 goto out_no_io_page; 2787 goto out_no_io_page;
2770 2788
2771 if (drbd_bm_init(device)) 2789 if (drbd_bm_init(device))
@@ -2794,6 +2812,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2794 kref_get(&device->kref); 2812 kref_get(&device->kref);
2795 2813
2796 INIT_LIST_HEAD(&device->peer_devices); 2814 INIT_LIST_HEAD(&device->peer_devices);
2815 INIT_LIST_HEAD(&device->pending_bitmap_io);
2797 for_each_connection(connection, resource) { 2816 for_each_connection(connection, resource) {
2798 peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL); 2817 peer_device = kzalloc(sizeof(struct drbd_peer_device), GFP_KERNEL);
2799 if (!peer_device) 2818 if (!peer_device)
@@ -2829,7 +2848,10 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
2829 for_each_peer_device(peer_device, device) 2848 for_each_peer_device(peer_device, device)
2830 drbd_connected(peer_device); 2849 drbd_connected(peer_device);
2831 } 2850 }
2832 2851 /* move to create_peer_device() */
2852 for_each_peer_device(peer_device, device)
2853 drbd_debugfs_peer_device_add(peer_device);
2854 drbd_debugfs_device_add(device);
2833 return NO_ERROR; 2855 return NO_ERROR;
2834 2856
2835out_idr_remove_vol: 2857out_idr_remove_vol:
@@ -2853,7 +2875,7 @@ out_idr_remove_minor:
2853out_no_minor_idr: 2875out_no_minor_idr:
2854 drbd_bm_cleanup(device); 2876 drbd_bm_cleanup(device);
2855out_no_bitmap: 2877out_no_bitmap:
2856 __free_page(device->md_io_page); 2878 __free_page(device->md_io.page);
2857out_no_io_page: 2879out_no_io_page:
2858 put_disk(disk); 2880 put_disk(disk);
2859out_no_disk: 2881out_no_disk:
@@ -2868,8 +2890,13 @@ void drbd_delete_device(struct drbd_device *device)
2868{ 2890{
2869 struct drbd_resource *resource = device->resource; 2891 struct drbd_resource *resource = device->resource;
2870 struct drbd_connection *connection; 2892 struct drbd_connection *connection;
2893 struct drbd_peer_device *peer_device;
2871 int refs = 3; 2894 int refs = 3;
2872 2895
2896 /* move to free_peer_device() */
2897 for_each_peer_device(peer_device, device)
2898 drbd_debugfs_peer_device_cleanup(peer_device);
2899 drbd_debugfs_device_cleanup(device);
2873 for_each_connection(connection, resource) { 2900 for_each_connection(connection, resource) {
2874 idr_remove(&connection->peer_devices, device->vnr); 2901 idr_remove(&connection->peer_devices, device->vnr);
2875 refs++; 2902 refs++;
@@ -2881,13 +2908,12 @@ void drbd_delete_device(struct drbd_device *device)
2881 kref_sub(&device->kref, refs, drbd_destroy_device); 2908 kref_sub(&device->kref, refs, drbd_destroy_device);
2882} 2909}
2883 2910
2884int __init drbd_init(void) 2911static int __init drbd_init(void)
2885{ 2912{
2886 int err; 2913 int err;
2887 2914
2888 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) { 2915 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
2889 printk(KERN_ERR 2916 pr_err("invalid minor_count (%d)\n", minor_count);
2890 "drbd: invalid minor_count (%d)\n", minor_count);
2891#ifdef MODULE 2917#ifdef MODULE
2892 return -EINVAL; 2918 return -EINVAL;
2893#else 2919#else
@@ -2897,14 +2923,11 @@ int __init drbd_init(void)
2897 2923
2898 err = register_blkdev(DRBD_MAJOR, "drbd"); 2924 err = register_blkdev(DRBD_MAJOR, "drbd");
2899 if (err) { 2925 if (err) {
2900 printk(KERN_ERR 2926 pr_err("unable to register block device major %d\n",
2901 "drbd: unable to register block device major %d\n",
2902 DRBD_MAJOR); 2927 DRBD_MAJOR);
2903 return err; 2928 return err;
2904 } 2929 }
2905 2930
2906 register_reboot_notifier(&drbd_notifier);
2907
2908 /* 2931 /*
2909 * allocate all necessary structs 2932 * allocate all necessary structs
2910 */ 2933 */
@@ -2918,7 +2941,7 @@ int __init drbd_init(void)
2918 2941
2919 err = drbd_genl_register(); 2942 err = drbd_genl_register();
2920 if (err) { 2943 if (err) {
2921 printk(KERN_ERR "drbd: unable to register generic netlink family\n"); 2944 pr_err("unable to register generic netlink family\n");
2922 goto fail; 2945 goto fail;
2923 } 2946 }
2924 2947
@@ -2929,38 +2952,39 @@ int __init drbd_init(void)
2929 err = -ENOMEM; 2952 err = -ENOMEM;
2930 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL); 2953 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
2931 if (!drbd_proc) { 2954 if (!drbd_proc) {
2932 printk(KERN_ERR "drbd: unable to register proc file\n"); 2955 pr_err("unable to register proc file\n");
2933 goto fail; 2956 goto fail;
2934 } 2957 }
2935 2958
2936 retry.wq = create_singlethread_workqueue("drbd-reissue"); 2959 retry.wq = create_singlethread_workqueue("drbd-reissue");
2937 if (!retry.wq) { 2960 if (!retry.wq) {
2938 printk(KERN_ERR "drbd: unable to create retry workqueue\n"); 2961 pr_err("unable to create retry workqueue\n");
2939 goto fail; 2962 goto fail;
2940 } 2963 }
2941 INIT_WORK(&retry.worker, do_retry); 2964 INIT_WORK(&retry.worker, do_retry);
2942 spin_lock_init(&retry.lock); 2965 spin_lock_init(&retry.lock);
2943 INIT_LIST_HEAD(&retry.writes); 2966 INIT_LIST_HEAD(&retry.writes);
2944 2967
2945 printk(KERN_INFO "drbd: initialized. " 2968 if (drbd_debugfs_init())
2969 pr_notice("failed to initialize debugfs -- will not be available\n");
2970
2971 pr_info("initialized. "
2946 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n", 2972 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
2947 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX); 2973 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
2948 printk(KERN_INFO "drbd: %s\n", drbd_buildtag()); 2974 pr_info("%s\n", drbd_buildtag());
2949 printk(KERN_INFO "drbd: registered as block device major %d\n", 2975 pr_info("registered as block device major %d\n", DRBD_MAJOR);
2950 DRBD_MAJOR);
2951
2952 return 0; /* Success! */ 2976 return 0; /* Success! */
2953 2977
2954fail: 2978fail:
2955 drbd_cleanup(); 2979 drbd_cleanup();
2956 if (err == -ENOMEM) 2980 if (err == -ENOMEM)
2957 printk(KERN_ERR "drbd: ran out of memory\n"); 2981 pr_err("ran out of memory\n");
2958 else 2982 else
2959 printk(KERN_ERR "drbd: initialization failure\n"); 2983 pr_err("initialization failure\n");
2960 return err; 2984 return err;
2961} 2985}
2962 2986
2963void drbd_free_bc(struct drbd_backing_dev *ldev) 2987void drbd_free_ldev(struct drbd_backing_dev *ldev)
2964{ 2988{
2965 if (ldev == NULL) 2989 if (ldev == NULL)
2966 return; 2990 return;
@@ -2972,24 +2996,29 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
2972 kfree(ldev); 2996 kfree(ldev);
2973} 2997}
2974 2998
2975void drbd_free_sock(struct drbd_connection *connection) 2999static void drbd_free_one_sock(struct drbd_socket *ds)
2976{ 3000{
2977 if (connection->data.socket) { 3001 struct socket *s;
2978 mutex_lock(&connection->data.mutex); 3002 mutex_lock(&ds->mutex);
2979 kernel_sock_shutdown(connection->data.socket, SHUT_RDWR); 3003 s = ds->socket;
2980 sock_release(connection->data.socket); 3004 ds->socket = NULL;
2981 connection->data.socket = NULL; 3005 mutex_unlock(&ds->mutex);
2982 mutex_unlock(&connection->data.mutex); 3006 if (s) {
2983 } 3007 /* so debugfs does not need to mutex_lock() */
2984 if (connection->meta.socket) { 3008 synchronize_rcu();
2985 mutex_lock(&connection->meta.mutex); 3009 kernel_sock_shutdown(s, SHUT_RDWR);
2986 kernel_sock_shutdown(connection->meta.socket, SHUT_RDWR); 3010 sock_release(s);
2987 sock_release(connection->meta.socket);
2988 connection->meta.socket = NULL;
2989 mutex_unlock(&connection->meta.mutex);
2990 } 3011 }
2991} 3012}
2992 3013
3014void drbd_free_sock(struct drbd_connection *connection)
3015{
3016 if (connection->data.socket)
3017 drbd_free_one_sock(&connection->data);
3018 if (connection->meta.socket)
3019 drbd_free_one_sock(&connection->meta);
3020}
3021
2993/* meta data management */ 3022/* meta data management */
2994 3023
2995void conn_md_sync(struct drbd_connection *connection) 3024void conn_md_sync(struct drbd_connection *connection)
@@ -3093,7 +3122,7 @@ void drbd_md_sync(struct drbd_device *device)
3093 if (!get_ldev_if_state(device, D_FAILED)) 3122 if (!get_ldev_if_state(device, D_FAILED))
3094 return; 3123 return;
3095 3124
3096 buffer = drbd_md_get_buffer(device); 3125 buffer = drbd_md_get_buffer(device, __func__);
3097 if (!buffer) 3126 if (!buffer)
3098 goto out; 3127 goto out;
3099 3128
@@ -3253,7 +3282,7 @@ int drbd_md_read(struct drbd_device *device, struct drbd_backing_dev *bdev)
3253 if (device->state.disk != D_DISKLESS) 3282 if (device->state.disk != D_DISKLESS)
3254 return ERR_DISK_CONFIGURED; 3283 return ERR_DISK_CONFIGURED;
3255 3284
3256 buffer = drbd_md_get_buffer(device); 3285 buffer = drbd_md_get_buffer(device, __func__);
3257 if (!buffer) 3286 if (!buffer)
3258 return ERR_NOMEM; 3287 return ERR_NOMEM;
3259 3288
@@ -3466,23 +3495,19 @@ void drbd_uuid_set_bm(struct drbd_device *device, u64 val) __must_hold(local)
3466 * 3495 *
3467 * Sets all bits in the bitmap and writes the whole bitmap to stable storage. 3496 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
3468 */ 3497 */
3469int drbd_bmio_set_n_write(struct drbd_device *device) 3498int drbd_bmio_set_n_write(struct drbd_device *device) __must_hold(local)
3470{ 3499{
3471 int rv = -EIO; 3500 int rv = -EIO;
3472 3501
3473 if (get_ldev_if_state(device, D_ATTACHING)) { 3502 drbd_md_set_flag(device, MDF_FULL_SYNC);
3474 drbd_md_set_flag(device, MDF_FULL_SYNC); 3503 drbd_md_sync(device);
3475 drbd_md_sync(device); 3504 drbd_bm_set_all(device);
3476 drbd_bm_set_all(device);
3477
3478 rv = drbd_bm_write(device);
3479 3505
3480 if (!rv) { 3506 rv = drbd_bm_write(device);
3481 drbd_md_clear_flag(device, MDF_FULL_SYNC);
3482 drbd_md_sync(device);
3483 }
3484 3507
3485 put_ldev(device); 3508 if (!rv) {
3509 drbd_md_clear_flag(device, MDF_FULL_SYNC);
3510 drbd_md_sync(device);
3486 } 3511 }
3487 3512
3488 return rv; 3513 return rv;
@@ -3494,18 +3519,11 @@ int drbd_bmio_set_n_write(struct drbd_device *device)
3494 * 3519 *
3495 * Clears all bits in the bitmap and writes the whole bitmap to stable storage. 3520 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
3496 */ 3521 */
3497int drbd_bmio_clear_n_write(struct drbd_device *device) 3522int drbd_bmio_clear_n_write(struct drbd_device *device) __must_hold(local)
3498{ 3523{
3499 int rv = -EIO;
3500
3501 drbd_resume_al(device); 3524 drbd_resume_al(device);
3502 if (get_ldev_if_state(device, D_ATTACHING)) { 3525 drbd_bm_clear_all(device);
3503 drbd_bm_clear_all(device); 3526 return drbd_bm_write(device);
3504 rv = drbd_bm_write(device);
3505 put_ldev(device);
3506 }
3507
3508 return rv;
3509} 3527}
3510 3528
3511static int w_bitmap_io(struct drbd_work *w, int unused) 3529static int w_bitmap_io(struct drbd_work *w, int unused)
@@ -3537,61 +3555,6 @@ static int w_bitmap_io(struct drbd_work *w, int unused)
3537 return 0; 3555 return 0;
3538} 3556}
3539 3557
3540void drbd_ldev_destroy(struct drbd_device *device)
3541{
3542 lc_destroy(device->resync);
3543 device->resync = NULL;
3544 lc_destroy(device->act_log);
3545 device->act_log = NULL;
3546 __no_warn(local,
3547 drbd_free_bc(device->ldev);
3548 device->ldev = NULL;);
3549
3550 clear_bit(GO_DISKLESS, &device->flags);
3551}
3552
3553static int w_go_diskless(struct drbd_work *w, int unused)
3554{
3555 struct drbd_device *device =
3556 container_of(w, struct drbd_device, go_diskless);
3557
3558 D_ASSERT(device, device->state.disk == D_FAILED);
3559 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
3560 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
3561 * the protected members anymore, though, so once put_ldev reaches zero
3562 * again, it will be safe to free them. */
3563
3564 /* Try to write changed bitmap pages, read errors may have just
3565 * set some bits outside the area covered by the activity log.
3566 *
3567 * If we have an IO error during the bitmap writeout,
3568 * we will want a full sync next time, just in case.
3569 * (Do we want a specific meta data flag for this?)
3570 *
3571 * If that does not make it to stable storage either,
3572 * we cannot do anything about that anymore.
3573 *
3574 * We still need to check if both bitmap and ldev are present, we may
3575 * end up here after a failed attach, before ldev was even assigned.
3576 */
3577 if (device->bitmap && device->ldev) {
3578 /* An interrupted resync or similar is allowed to recounts bits
3579 * while we detach.
3580 * Any modifications would not be expected anymore, though.
3581 */
3582 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
3583 "detach", BM_LOCKED_TEST_ALLOWED)) {
3584 if (test_bit(WAS_READ_ERROR, &device->flags)) {
3585 drbd_md_set_flag(device, MDF_FULL_SYNC);
3586 drbd_md_sync(device);
3587 }
3588 }
3589 }
3590
3591 drbd_force_state(device, NS(disk, D_DISKLESS));
3592 return 0;
3593}
3594
3595/** 3558/**
3596 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap 3559 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3597 * @device: DRBD device. 3560 * @device: DRBD device.
@@ -3603,6 +3566,9 @@ static int w_go_diskless(struct drbd_work *w, int unused)
3603 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be 3566 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
3604 * called from worker context. It MUST NOT be used while a previous such 3567 * called from worker context. It MUST NOT be used while a previous such
3605 * work is still pending! 3568 * work is still pending!
3569 *
3570 * Its worker function encloses the call of io_fn() by get_ldev() and
3571 * put_ldev().
3606 */ 3572 */
3607void drbd_queue_bitmap_io(struct drbd_device *device, 3573void drbd_queue_bitmap_io(struct drbd_device *device,
3608 int (*io_fn)(struct drbd_device *), 3574 int (*io_fn)(struct drbd_device *),
@@ -3685,25 +3651,7 @@ int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
3685static void md_sync_timer_fn(unsigned long data) 3651static void md_sync_timer_fn(unsigned long data)
3686{ 3652{
3687 struct drbd_device *device = (struct drbd_device *) data; 3653 struct drbd_device *device = (struct drbd_device *) data;
3688 3654 drbd_device_post_work(device, MD_SYNC);
3689 /* must not double-queue! */
3690 if (list_empty(&device->md_sync_work.list))
3691 drbd_queue_work_front(&first_peer_device(device)->connection->sender_work,
3692 &device->md_sync_work);
3693}
3694
3695static int w_md_sync(struct drbd_work *w, int unused)
3696{
3697 struct drbd_device *device =
3698 container_of(w, struct drbd_device, md_sync_work);
3699
3700 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
3701#ifdef DEBUG
3702 drbd_warn(device, "last md_mark_dirty: %s:%u\n",
3703 device->last_md_mark_dirty.func, device->last_md_mark_dirty.line);
3704#endif
3705 drbd_md_sync(device);
3706 return 0;
3707} 3655}
3708 3656
3709const char *cmdname(enum drbd_packet cmd) 3657const char *cmdname(enum drbd_packet cmd)
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3f2e16738080..1cd47df44bda 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -23,6 +23,8 @@
23 23
24 */ 24 */
25 25
26#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
27
26#include <linux/module.h> 28#include <linux/module.h>
27#include <linux/drbd.h> 29#include <linux/drbd.h>
28#include <linux/in.h> 30#include <linux/in.h>
@@ -85,7 +87,7 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info)
85{ 87{
86 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); 88 genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb))));
87 if (genlmsg_reply(skb, info)) 89 if (genlmsg_reply(skb, info))
88 printk(KERN_ERR "drbd: error sending genl reply\n"); 90 pr_err("error sending genl reply\n");
89} 91}
90 92
91/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only 93/* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only
@@ -558,8 +560,10 @@ void conn_try_outdate_peer_async(struct drbd_connection *connection)
558} 560}
559 561
560enum drbd_state_rv 562enum drbd_state_rv
561drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) 563drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int force)
562{ 564{
565 struct drbd_peer_device *const peer_device = first_peer_device(device);
566 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
563 const int max_tries = 4; 567 const int max_tries = 4;
564 enum drbd_state_rv rv = SS_UNKNOWN_ERROR; 568 enum drbd_state_rv rv = SS_UNKNOWN_ERROR;
565 struct net_conf *nc; 569 struct net_conf *nc;
@@ -607,7 +611,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
607 device->state.disk == D_CONSISTENT && mask.pdsk == 0) { 611 device->state.disk == D_CONSISTENT && mask.pdsk == 0) {
608 D_ASSERT(device, device->state.pdsk == D_UNKNOWN); 612 D_ASSERT(device, device->state.pdsk == D_UNKNOWN);
609 613
610 if (conn_try_outdate_peer(first_peer_device(device)->connection)) { 614 if (conn_try_outdate_peer(connection)) {
611 val.disk = D_UP_TO_DATE; 615 val.disk = D_UP_TO_DATE;
612 mask.disk = D_MASK; 616 mask.disk = D_MASK;
613 } 617 }
@@ -617,7 +621,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
617 if (rv == SS_NOTHING_TO_DO) 621 if (rv == SS_NOTHING_TO_DO)
618 goto out; 622 goto out;
619 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) { 623 if (rv == SS_PRIMARY_NOP && mask.pdsk == 0) {
620 if (!conn_try_outdate_peer(first_peer_device(device)->connection) && force) { 624 if (!conn_try_outdate_peer(connection) && force) {
621 drbd_warn(device, "Forced into split brain situation!\n"); 625 drbd_warn(device, "Forced into split brain situation!\n");
622 mask.pdsk = D_MASK; 626 mask.pdsk = D_MASK;
623 val.pdsk = D_OUTDATED; 627 val.pdsk = D_OUTDATED;
@@ -630,7 +634,7 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
630 retry at most once more in this case. */ 634 retry at most once more in this case. */
631 int timeo; 635 int timeo;
632 rcu_read_lock(); 636 rcu_read_lock();
633 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 637 nc = rcu_dereference(connection->net_conf);
634 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; 638 timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1;
635 rcu_read_unlock(); 639 rcu_read_unlock();
636 schedule_timeout_interruptible(timeo); 640 schedule_timeout_interruptible(timeo);
@@ -659,19 +663,17 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
659 /* FIXME also wait for all pending P_BARRIER_ACK? */ 663 /* FIXME also wait for all pending P_BARRIER_ACK? */
660 664
661 if (new_role == R_SECONDARY) { 665 if (new_role == R_SECONDARY) {
662 set_disk_ro(device->vdisk, true);
663 if (get_ldev(device)) { 666 if (get_ldev(device)) {
664 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1; 667 device->ldev->md.uuid[UI_CURRENT] &= ~(u64)1;
665 put_ldev(device); 668 put_ldev(device);
666 } 669 }
667 } else { 670 } else {
668 /* Called from drbd_adm_set_role only. 671 mutex_lock(&device->resource->conf_update);
669 * We are still holding the conf_update mutex. */ 672 nc = connection->net_conf;
670 nc = first_peer_device(device)->connection->net_conf;
671 if (nc) 673 if (nc)
672 nc->discard_my_data = 0; /* without copy; single bit op is atomic */ 674 nc->discard_my_data = 0; /* without copy; single bit op is atomic */
675 mutex_unlock(&device->resource->conf_update);
673 676
674 set_disk_ro(device->vdisk, false);
675 if (get_ldev(device)) { 677 if (get_ldev(device)) {
676 if (((device->state.conn < C_CONNECTED || 678 if (((device->state.conn < C_CONNECTED ||
677 device->state.pdsk <= D_FAILED) 679 device->state.pdsk <= D_FAILED)
@@ -689,12 +691,12 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force)
689 if (device->state.conn >= C_WF_REPORT_PARAMS) { 691 if (device->state.conn >= C_WF_REPORT_PARAMS) {
690 /* if this was forced, we should consider sync */ 692 /* if this was forced, we should consider sync */
691 if (forced) 693 if (forced)
692 drbd_send_uuids(first_peer_device(device)); 694 drbd_send_uuids(peer_device);
693 drbd_send_current_state(first_peer_device(device)); 695 drbd_send_current_state(peer_device);
694 } 696 }
695 697
696 drbd_md_sync(device); 698 drbd_md_sync(device);
697 699 set_disk_ro(device->vdisk, new_role == R_SECONDARY);
698 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); 700 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
699out: 701out:
700 mutex_unlock(device->state_mutex); 702 mutex_unlock(device->state_mutex);
@@ -891,7 +893,7 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
891 * still lock the act_log to not trigger ASSERTs there. 893 * still lock the act_log to not trigger ASSERTs there.
892 */ 894 */
893 drbd_suspend_io(device); 895 drbd_suspend_io(device);
894 buffer = drbd_md_get_buffer(device); /* Lock meta-data IO */ 896 buffer = drbd_md_get_buffer(device, __func__); /* Lock meta-data IO */
895 if (!buffer) { 897 if (!buffer) {
896 drbd_resume_io(device); 898 drbd_resume_io(device);
897 return DS_ERROR; 899 return DS_ERROR;
@@ -971,6 +973,10 @@ drbd_determine_dev_size(struct drbd_device *device, enum dds_flags flags, struct
971 if (la_size_changed || md_moved || rs) { 973 if (la_size_changed || md_moved || rs) {
972 u32 prev_flags; 974 u32 prev_flags;
973 975
976 /* We do some synchronous IO below, which may take some time.
977 * Clear the timer, to avoid scary "timer expired!" messages,
978 * "Superblock" is written out at least twice below, anyways. */
979 del_timer(&device->md_sync_timer);
974 drbd_al_shrink(device); /* All extents inactive. */ 980 drbd_al_shrink(device); /* All extents inactive. */
975 981
976 prev_flags = md->flags; 982 prev_flags = md->flags;
@@ -1116,15 +1122,16 @@ static int drbd_check_al_size(struct drbd_device *device, struct disk_conf *dc)
1116 return 0; 1122 return 0;
1117} 1123}
1118 1124
1119static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_bio_size) 1125static void drbd_setup_queue_param(struct drbd_device *device, struct drbd_backing_dev *bdev,
1126 unsigned int max_bio_size)
1120{ 1127{
1121 struct request_queue * const q = device->rq_queue; 1128 struct request_queue * const q = device->rq_queue;
1122 unsigned int max_hw_sectors = max_bio_size >> 9; 1129 unsigned int max_hw_sectors = max_bio_size >> 9;
1123 unsigned int max_segments = 0; 1130 unsigned int max_segments = 0;
1124 struct request_queue *b = NULL; 1131 struct request_queue *b = NULL;
1125 1132
1126 if (get_ldev_if_state(device, D_ATTACHING)) { 1133 if (bdev) {
1127 b = device->ldev->backing_bdev->bd_disk->queue; 1134 b = bdev->backing_bdev->bd_disk->queue;
1128 1135
1129 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); 1136 max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9);
1130 rcu_read_lock(); 1137 rcu_read_lock();
@@ -1169,11 +1176,10 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_
1169 b->backing_dev_info.ra_pages); 1176 b->backing_dev_info.ra_pages);
1170 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages; 1177 q->backing_dev_info.ra_pages = b->backing_dev_info.ra_pages;
1171 } 1178 }
1172 put_ldev(device);
1173 } 1179 }
1174} 1180}
1175 1181
1176void drbd_reconsider_max_bio_size(struct drbd_device *device) 1182void drbd_reconsider_max_bio_size(struct drbd_device *device, struct drbd_backing_dev *bdev)
1177{ 1183{
1178 unsigned int now, new, local, peer; 1184 unsigned int now, new, local, peer;
1179 1185
@@ -1181,10 +1187,9 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
1181 local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */ 1187 local = device->local_max_bio_size; /* Eventually last known value, from volatile memory */
1182 peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */ 1188 peer = device->peer_max_bio_size; /* Eventually last known value, from meta data */
1183 1189
1184 if (get_ldev_if_state(device, D_ATTACHING)) { 1190 if (bdev) {
1185 local = queue_max_hw_sectors(device->ldev->backing_bdev->bd_disk->queue) << 9; 1191 local = queue_max_hw_sectors(bdev->backing_bdev->bd_disk->queue) << 9;
1186 device->local_max_bio_size = local; 1192 device->local_max_bio_size = local;
1187 put_ldev(device);
1188 } 1193 }
1189 local = min(local, DRBD_MAX_BIO_SIZE); 1194 local = min(local, DRBD_MAX_BIO_SIZE);
1190 1195
@@ -1217,7 +1222,7 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device)
1217 if (new != now) 1222 if (new != now)
1218 drbd_info(device, "max BIO size = %u\n", new); 1223 drbd_info(device, "max BIO size = %u\n", new);
1219 1224
1220 drbd_setup_queue_param(device, new); 1225 drbd_setup_queue_param(device, bdev, new);
1221} 1226}
1222 1227
1223/* Starts the worker thread */ 1228/* Starts the worker thread */
@@ -1299,6 +1304,13 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev)
1299 return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION; 1304 return (al_size_4k - 1) * AL_CONTEXT_PER_TRANSACTION;
1300} 1305}
1301 1306
1307static bool write_ordering_changed(struct disk_conf *a, struct disk_conf *b)
1308{
1309 return a->disk_barrier != b->disk_barrier ||
1310 a->disk_flushes != b->disk_flushes ||
1311 a->disk_drain != b->disk_drain;
1312}
1313
1302int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) 1314int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1303{ 1315{
1304 struct drbd_config_context adm_ctx; 1316 struct drbd_config_context adm_ctx;
@@ -1405,7 +1417,8 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info)
1405 else 1417 else
1406 set_bit(MD_NO_FUA, &device->flags); 1418 set_bit(MD_NO_FUA, &device->flags);
1407 1419
1408 drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); 1420 if (write_ordering_changed(old_disk_conf, new_disk_conf))
1421 drbd_bump_write_ordering(device->resource, NULL, WO_bdev_flush);
1409 1422
1410 drbd_md_sync(device); 1423 drbd_md_sync(device);
1411 1424
@@ -1440,6 +1453,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1440{ 1453{
1441 struct drbd_config_context adm_ctx; 1454 struct drbd_config_context adm_ctx;
1442 struct drbd_device *device; 1455 struct drbd_device *device;
1456 struct drbd_peer_device *peer_device;
1457 struct drbd_connection *connection;
1443 int err; 1458 int err;
1444 enum drbd_ret_code retcode; 1459 enum drbd_ret_code retcode;
1445 enum determine_dev_size dd; 1460 enum determine_dev_size dd;
@@ -1462,7 +1477,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1462 1477
1463 device = adm_ctx.device; 1478 device = adm_ctx.device;
1464 mutex_lock(&adm_ctx.resource->adm_mutex); 1479 mutex_lock(&adm_ctx.resource->adm_mutex);
1465 conn_reconfig_start(first_peer_device(device)->connection); 1480 peer_device = first_peer_device(device);
1481 connection = peer_device ? peer_device->connection : NULL;
1482 conn_reconfig_start(connection);
1466 1483
1467 /* if you want to reconfigure, please tear down first */ 1484 /* if you want to reconfigure, please tear down first */
1468 if (device->state.disk > D_DISKLESS) { 1485 if (device->state.disk > D_DISKLESS) {
@@ -1473,7 +1490,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1473 * drbd_ldev_destroy is done already, we may end up here very fast, 1490 * drbd_ldev_destroy is done already, we may end up here very fast,
1474 * e.g. if someone calls attach from the on-io-error handler, 1491 * e.g. if someone calls attach from the on-io-error handler,
1475 * to realize a "hot spare" feature (not that I'd recommend that) */ 1492 * to realize a "hot spare" feature (not that I'd recommend that) */
1476 wait_event(device->misc_wait, !atomic_read(&device->local_cnt)); 1493 wait_event(device->misc_wait, !test_bit(GOING_DISKLESS, &device->flags));
1477 1494
1478 /* make sure there is no leftover from previous force-detach attempts */ 1495 /* make sure there is no leftover from previous force-detach attempts */
1479 clear_bit(FORCE_DETACH, &device->flags); 1496 clear_bit(FORCE_DETACH, &device->flags);
@@ -1529,7 +1546,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1529 goto fail; 1546 goto fail;
1530 1547
1531 rcu_read_lock(); 1548 rcu_read_lock();
1532 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 1549 nc = rcu_dereference(connection->net_conf);
1533 if (nc) { 1550 if (nc) {
1534 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) { 1551 if (new_disk_conf->fencing == FP_STONITH && nc->wire_protocol == DRBD_PROT_A) {
1535 rcu_read_unlock(); 1552 rcu_read_unlock();
@@ -1649,7 +1666,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1649 */ 1666 */
1650 wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device)); 1667 wait_event(device->misc_wait, !atomic_read(&device->ap_pending_cnt) || drbd_suspended(device));
1651 /* and for any other previously queued work */ 1668 /* and for any other previously queued work */
1652 drbd_flush_workqueue(&first_peer_device(device)->connection->sender_work); 1669 drbd_flush_workqueue(&connection->sender_work);
1653 1670
1654 rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE); 1671 rv = _drbd_request_state(device, NS(disk, D_ATTACHING), CS_VERBOSE);
1655 retcode = rv; /* FIXME: Type mismatch. */ 1672 retcode = rv; /* FIXME: Type mismatch. */
@@ -1710,7 +1727,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1710 new_disk_conf = NULL; 1727 new_disk_conf = NULL;
1711 new_plan = NULL; 1728 new_plan = NULL;
1712 1729
1713 drbd_bump_write_ordering(first_peer_device(device)->connection, WO_bdev_flush); 1730 drbd_bump_write_ordering(device->resource, device->ldev, WO_bdev_flush);
1714 1731
1715 if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY)) 1732 if (drbd_md_test_flag(device->ldev, MDF_CRASHED_PRIMARY))
1716 set_bit(CRASHED_PRIMARY, &device->flags); 1733 set_bit(CRASHED_PRIMARY, &device->flags);
@@ -1726,7 +1743,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1726 device->read_cnt = 0; 1743 device->read_cnt = 0;
1727 device->writ_cnt = 0; 1744 device->writ_cnt = 0;
1728 1745
1729 drbd_reconsider_max_bio_size(device); 1746 drbd_reconsider_max_bio_size(device, device->ldev);
1730 1747
1731 /* If I am currently not R_PRIMARY, 1748 /* If I am currently not R_PRIMARY,
1732 * but meta data primary indicator is set, 1749 * but meta data primary indicator is set,
@@ -1845,7 +1862,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1845 1862
1846 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); 1863 kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE);
1847 put_ldev(device); 1864 put_ldev(device);
1848 conn_reconfig_done(first_peer_device(device)->connection); 1865 conn_reconfig_done(connection);
1849 mutex_unlock(&adm_ctx.resource->adm_mutex); 1866 mutex_unlock(&adm_ctx.resource->adm_mutex);
1850 drbd_adm_finish(&adm_ctx, info, retcode); 1867 drbd_adm_finish(&adm_ctx, info, retcode);
1851 return 0; 1868 return 0;
@@ -1856,7 +1873,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
1856 drbd_force_state(device, NS(disk, D_DISKLESS)); 1873 drbd_force_state(device, NS(disk, D_DISKLESS));
1857 drbd_md_sync(device); 1874 drbd_md_sync(device);
1858 fail: 1875 fail:
1859 conn_reconfig_done(first_peer_device(device)->connection); 1876 conn_reconfig_done(connection);
1860 if (nbc) { 1877 if (nbc) {
1861 if (nbc->backing_bdev) 1878 if (nbc->backing_bdev)
1862 blkdev_put(nbc->backing_bdev, 1879 blkdev_put(nbc->backing_bdev,
@@ -1888,7 +1905,7 @@ static int adm_detach(struct drbd_device *device, int force)
1888 } 1905 }
1889 1906
1890 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */ 1907 drbd_suspend_io(device); /* so no-one is stuck in drbd_al_begin_io */
1891 drbd_md_get_buffer(device); /* make sure there is no in-flight meta-data IO */ 1908 drbd_md_get_buffer(device, __func__); /* make sure there is no in-flight meta-data IO */
1892 retcode = drbd_request_state(device, NS(disk, D_FAILED)); 1909 retcode = drbd_request_state(device, NS(disk, D_FAILED));
1893 drbd_md_put_buffer(device); 1910 drbd_md_put_buffer(device);
1894 /* D_FAILED will transition to DISKLESS. */ 1911 /* D_FAILED will transition to DISKLESS. */
@@ -2654,8 +2671,13 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2654 if (retcode != NO_ERROR) 2671 if (retcode != NO_ERROR)
2655 goto out; 2672 goto out;
2656 2673
2657 mutex_lock(&adm_ctx.resource->adm_mutex);
2658 device = adm_ctx.device; 2674 device = adm_ctx.device;
2675 if (!get_ldev(device)) {
2676 retcode = ERR_NO_DISK;
2677 goto out;
2678 }
2679
2680 mutex_lock(&adm_ctx.resource->adm_mutex);
2659 2681
2660 /* If there is still bitmap IO pending, probably because of a previous 2682 /* If there is still bitmap IO pending, probably because of a previous
2661 * resync just being finished, wait for it before requesting a new resync. 2683 * resync just being finished, wait for it before requesting a new resync.
@@ -2679,6 +2701,7 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info)
2679 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); 2701 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T));
2680 drbd_resume_io(device); 2702 drbd_resume_io(device);
2681 mutex_unlock(&adm_ctx.resource->adm_mutex); 2703 mutex_unlock(&adm_ctx.resource->adm_mutex);
2704 put_ldev(device);
2682out: 2705out:
2683 drbd_adm_finish(&adm_ctx, info, retcode); 2706 drbd_adm_finish(&adm_ctx, info, retcode);
2684 return 0; 2707 return 0;
@@ -2704,7 +2727,7 @@ out:
2704 return 0; 2727 return 0;
2705} 2728}
2706 2729
2707static int drbd_bmio_set_susp_al(struct drbd_device *device) 2730static int drbd_bmio_set_susp_al(struct drbd_device *device) __must_hold(local)
2708{ 2731{
2709 int rv; 2732 int rv;
2710 2733
@@ -2725,8 +2748,13 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2725 if (retcode != NO_ERROR) 2748 if (retcode != NO_ERROR)
2726 goto out; 2749 goto out;
2727 2750
2728 mutex_lock(&adm_ctx.resource->adm_mutex);
2729 device = adm_ctx.device; 2751 device = adm_ctx.device;
2752 if (!get_ldev(device)) {
2753 retcode = ERR_NO_DISK;
2754 goto out;
2755 }
2756
2757 mutex_lock(&adm_ctx.resource->adm_mutex);
2730 2758
2731 /* If there is still bitmap IO pending, probably because of a previous 2759 /* If there is still bitmap IO pending, probably because of a previous
2732 * resync just being finished, wait for it before requesting a new resync. 2760 * resync just being finished, wait for it before requesting a new resync.
@@ -2753,6 +2781,7 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info)
2753 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); 2781 retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S));
2754 drbd_resume_io(device); 2782 drbd_resume_io(device);
2755 mutex_unlock(&adm_ctx.resource->adm_mutex); 2783 mutex_unlock(&adm_ctx.resource->adm_mutex);
2784 put_ldev(device);
2756out: 2785out:
2757 drbd_adm_finish(&adm_ctx, info, retcode); 2786 drbd_adm_finish(&adm_ctx, info, retcode);
2758 return 0; 2787 return 0;
@@ -2892,7 +2921,7 @@ static struct drbd_connection *the_only_connection(struct drbd_resource *resourc
2892 return list_first_entry(&resource->connections, struct drbd_connection, connections); 2921 return list_first_entry(&resource->connections, struct drbd_connection, connections);
2893} 2922}
2894 2923
2895int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device, 2924static int nla_put_status_info(struct sk_buff *skb, struct drbd_device *device,
2896 const struct sib_info *sib) 2925 const struct sib_info *sib)
2897{ 2926{
2898 struct drbd_resource *resource = device->resource; 2927 struct drbd_resource *resource = device->resource;
@@ -3622,13 +3651,6 @@ void drbd_bcast_event(struct drbd_device *device, const struct sib_info *sib)
3622 unsigned seq; 3651 unsigned seq;
3623 int err = -ENOMEM; 3652 int err = -ENOMEM;
3624 3653
3625 if (sib->sib_reason == SIB_SYNC_PROGRESS) {
3626 if (time_after(jiffies, device->rs_last_bcast + HZ))
3627 device->rs_last_bcast = jiffies;
3628 else
3629 return;
3630 }
3631
3632 seq = atomic_inc_return(&drbd_genl_seq); 3654 seq = atomic_inc_return(&drbd_genl_seq);
3633 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO); 3655 msg = genlmsg_new(NLMSG_GOODSIZE, GFP_NOIO);
3634 if (!msg) 3656 if (!msg)
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index 89736bdbbc70..06e6147c7601 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -60,20 +60,65 @@ static void seq_printf_with_thousands_grouping(struct seq_file *seq, long v)
60 seq_printf(seq, "%ld", v); 60 seq_printf(seq, "%ld", v);
61} 61}
62 62
63static void drbd_get_syncer_progress(struct drbd_device *device,
64 union drbd_dev_state state, unsigned long *rs_total,
65 unsigned long *bits_left, unsigned int *per_mil_done)
66{
67 /* this is to break it at compile time when we change that, in case we
68 * want to support more than (1<<32) bits on a 32bit arch. */
69 typecheck(unsigned long, device->rs_total);
70 *rs_total = device->rs_total;
71
72 /* note: both rs_total and rs_left are in bits, i.e. in
73 * units of BM_BLOCK_SIZE.
74 * for the percentage, we don't care. */
75
76 if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
77 *bits_left = device->ov_left;
78 else
79 *bits_left = drbd_bm_total_weight(device) - device->rs_failed;
80 /* >> 10 to prevent overflow,
81 * +1 to prevent division by zero */
82 if (*bits_left > *rs_total) {
83 /* D'oh. Maybe a logic bug somewhere. More likely just a race
84 * between state change and reset of rs_total.
85 */
86 *bits_left = *rs_total;
87 *per_mil_done = *rs_total ? 0 : 1000;
88 } else {
89 /* Make sure the division happens in long context.
90 * We allow up to one petabyte storage right now,
91 * at a granularity of 4k per bit that is 2**38 bits.
92 * After shift right and multiplication by 1000,
93 * this should still fit easily into a 32bit long,
94 * so we don't need a 64bit division on 32bit arch.
95 * Note: currently we don't support such large bitmaps on 32bit
96 * arch anyways, but no harm done to be prepared for it here.
97 */
98 unsigned int shift = *rs_total > UINT_MAX ? 16 : 10;
99 unsigned long left = *bits_left >> shift;
100 unsigned long total = 1UL + (*rs_total >> shift);
101 unsigned long tmp = 1000UL - left * 1000UL/total;
102 *per_mil_done = tmp;
103 }
104}
105
106
63/*lge 107/*lge
64 * progress bars shamelessly adapted from driver/md/md.c 108 * progress bars shamelessly adapted from driver/md/md.c
65 * output looks like 109 * output looks like
66 * [=====>..............] 33.5% (23456/123456) 110 * [=====>..............] 33.5% (23456/123456)
67 * finish: 2:20:20 speed: 6,345 (6,456) K/sec 111 * finish: 2:20:20 speed: 6,345 (6,456) K/sec
68 */ 112 */
69static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq) 113static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *seq,
114 union drbd_dev_state state)
70{ 115{
71 unsigned long db, dt, dbdt, rt, rs_left; 116 unsigned long db, dt, dbdt, rt, rs_total, rs_left;
72 unsigned int res; 117 unsigned int res;
73 int i, x, y; 118 int i, x, y;
74 int stalled = 0; 119 int stalled = 0;
75 120
76 drbd_get_syncer_progress(device, &rs_left, &res); 121 drbd_get_syncer_progress(device, state, &rs_total, &rs_left, &res);
77 122
78 x = res/50; 123 x = res/50;
79 y = 20-x; 124 y = 20-x;
@@ -85,21 +130,21 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
85 seq_printf(seq, "."); 130 seq_printf(seq, ".");
86 seq_printf(seq, "] "); 131 seq_printf(seq, "] ");
87 132
88 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T) 133 if (state.conn == C_VERIFY_S || state.conn == C_VERIFY_T)
89 seq_printf(seq, "verified:"); 134 seq_printf(seq, "verified:");
90 else 135 else
91 seq_printf(seq, "sync'ed:"); 136 seq_printf(seq, "sync'ed:");
92 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); 137 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
93 138
94 /* if more than a few GB, display in MB */ 139 /* if more than a few GB, display in MB */
95 if (device->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) 140 if (rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
96 seq_printf(seq, "(%lu/%lu)M", 141 seq_printf(seq, "(%lu/%lu)M",
97 (unsigned long) Bit2KB(rs_left >> 10), 142 (unsigned long) Bit2KB(rs_left >> 10),
98 (unsigned long) Bit2KB(device->rs_total >> 10)); 143 (unsigned long) Bit2KB(rs_total >> 10));
99 else 144 else
100 seq_printf(seq, "(%lu/%lu)K\n\t", 145 seq_printf(seq, "(%lu/%lu)K\n\t",
101 (unsigned long) Bit2KB(rs_left), 146 (unsigned long) Bit2KB(rs_left),
102 (unsigned long) Bit2KB(device->rs_total)); 147 (unsigned long) Bit2KB(rs_total));
103 148
104 /* see drivers/md/md.c 149 /* see drivers/md/md.c
105 * We do not want to overflow, so the order of operands and 150 * We do not want to overflow, so the order of operands and
@@ -150,13 +195,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
150 dt = (jiffies - device->rs_start - device->rs_paused) / HZ; 195 dt = (jiffies - device->rs_start - device->rs_paused) / HZ;
151 if (dt == 0) 196 if (dt == 0)
152 dt = 1; 197 dt = 1;
153 db = device->rs_total - rs_left; 198 db = rs_total - rs_left;
154 dbdt = Bit2KB(db/dt); 199 dbdt = Bit2KB(db/dt);
155 seq_printf_with_thousands_grouping(seq, dbdt); 200 seq_printf_with_thousands_grouping(seq, dbdt);
156 seq_printf(seq, ")"); 201 seq_printf(seq, ")");
157 202
158 if (device->state.conn == C_SYNC_TARGET || 203 if (state.conn == C_SYNC_TARGET ||
159 device->state.conn == C_VERIFY_S) { 204 state.conn == C_VERIFY_S) {
160 seq_printf(seq, " want: "); 205 seq_printf(seq, " want: ");
161 seq_printf_with_thousands_grouping(seq, device->c_sync_rate); 206 seq_printf_with_thousands_grouping(seq, device->c_sync_rate);
162 } 207 }
@@ -168,8 +213,8 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
168 unsigned long bm_bits = drbd_bm_bits(device); 213 unsigned long bm_bits = drbd_bm_bits(device);
169 unsigned long bit_pos; 214 unsigned long bit_pos;
170 unsigned long long stop_sector = 0; 215 unsigned long long stop_sector = 0;
171 if (device->state.conn == C_VERIFY_S || 216 if (state.conn == C_VERIFY_S ||
172 device->state.conn == C_VERIFY_T) { 217 state.conn == C_VERIFY_T) {
173 bit_pos = bm_bits - device->ov_left; 218 bit_pos = bm_bits - device->ov_left;
174 if (verify_can_do_stop_sector(device)) 219 if (verify_can_do_stop_sector(device))
175 stop_sector = device->ov_stop_sector; 220 stop_sector = device->ov_stop_sector;
@@ -188,22 +233,13 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se
188 } 233 }
189} 234}
190 235
191static void resync_dump_detail(struct seq_file *seq, struct lc_element *e)
192{
193 struct bm_extent *bme = lc_entry(e, struct bm_extent, lce);
194
195 seq_printf(seq, "%5d %s %s\n", bme->rs_left,
196 bme->flags & BME_NO_WRITES ? "NO_WRITES" : "---------",
197 bme->flags & BME_LOCKED ? "LOCKED" : "------"
198 );
199}
200
201static int drbd_seq_show(struct seq_file *seq, void *v) 236static int drbd_seq_show(struct seq_file *seq, void *v)
202{ 237{
203 int i, prev_i = -1; 238 int i, prev_i = -1;
204 const char *sn; 239 const char *sn;
205 struct drbd_device *device; 240 struct drbd_device *device;
206 struct net_conf *nc; 241 struct net_conf *nc;
242 union drbd_dev_state state;
207 char wp; 243 char wp;
208 244
209 static char write_ordering_chars[] = { 245 static char write_ordering_chars[] = {
@@ -241,11 +277,12 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
241 seq_printf(seq, "\n"); 277 seq_printf(seq, "\n");
242 prev_i = i; 278 prev_i = i;
243 279
244 sn = drbd_conn_str(device->state.conn); 280 state = device->state;
281 sn = drbd_conn_str(state.conn);
245 282
246 if (device->state.conn == C_STANDALONE && 283 if (state.conn == C_STANDALONE &&
247 device->state.disk == D_DISKLESS && 284 state.disk == D_DISKLESS &&
248 device->state.role == R_SECONDARY) { 285 state.role == R_SECONDARY) {
249 seq_printf(seq, "%2d: cs:Unconfigured\n", i); 286 seq_printf(seq, "%2d: cs:Unconfigured\n", i);
250 } else { 287 } else {
251 /* reset device->congestion_reason */ 288 /* reset device->congestion_reason */
@@ -258,15 +295,15 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
258 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u " 295 " ns:%u nr:%u dw:%u dr:%u al:%u bm:%u "
259 "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c", 296 "lo:%d pe:%d ua:%d ap:%d ep:%d wo:%c",
260 i, sn, 297 i, sn,
261 drbd_role_str(device->state.role), 298 drbd_role_str(state.role),
262 drbd_role_str(device->state.peer), 299 drbd_role_str(state.peer),
263 drbd_disk_str(device->state.disk), 300 drbd_disk_str(state.disk),
264 drbd_disk_str(device->state.pdsk), 301 drbd_disk_str(state.pdsk),
265 wp, 302 wp,
266 drbd_suspended(device) ? 's' : 'r', 303 drbd_suspended(device) ? 's' : 'r',
267 device->state.aftr_isp ? 'a' : '-', 304 state.aftr_isp ? 'a' : '-',
268 device->state.peer_isp ? 'p' : '-', 305 state.peer_isp ? 'p' : '-',
269 device->state.user_isp ? 'u' : '-', 306 state.user_isp ? 'u' : '-',
270 device->congestion_reason ?: '-', 307 device->congestion_reason ?: '-',
271 test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-', 308 test_bit(AL_SUSPENDED, &device->flags) ? 's' : '-',
272 device->send_cnt/2, 309 device->send_cnt/2,
@@ -281,17 +318,17 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
281 atomic_read(&device->unacked_cnt), 318 atomic_read(&device->unacked_cnt),
282 atomic_read(&device->ap_bio_cnt), 319 atomic_read(&device->ap_bio_cnt),
283 first_peer_device(device)->connection->epochs, 320 first_peer_device(device)->connection->epochs,
284 write_ordering_chars[first_peer_device(device)->connection->write_ordering] 321 write_ordering_chars[device->resource->write_ordering]
285 ); 322 );
286 seq_printf(seq, " oos:%llu\n", 323 seq_printf(seq, " oos:%llu\n",
287 Bit2KB((unsigned long long) 324 Bit2KB((unsigned long long)
288 drbd_bm_total_weight(device))); 325 drbd_bm_total_weight(device)));
289 } 326 }
290 if (device->state.conn == C_SYNC_SOURCE || 327 if (state.conn == C_SYNC_SOURCE ||
291 device->state.conn == C_SYNC_TARGET || 328 state.conn == C_SYNC_TARGET ||
292 device->state.conn == C_VERIFY_S || 329 state.conn == C_VERIFY_S ||
293 device->state.conn == C_VERIFY_T) 330 state.conn == C_VERIFY_T)
294 drbd_syncer_progress(device, seq); 331 drbd_syncer_progress(device, seq, state);
295 332
296 if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) { 333 if (proc_details >= 1 && get_ldev_if_state(device, D_FAILED)) {
297 lc_seq_printf_stats(seq, device->resync); 334 lc_seq_printf_stats(seq, device->resync);
@@ -299,12 +336,8 @@ static int drbd_seq_show(struct seq_file *seq, void *v)
299 put_ldev(device); 336 put_ldev(device);
300 } 337 }
301 338
302 if (proc_details >= 2) { 339 if (proc_details >= 2)
303 if (device->resync) { 340 seq_printf(seq, "\tblocked on activity log: %d\n", atomic_read(&device->ap_actlog_cnt));
304 lc_seq_dump_details(seq, device->resync, "rs_left",
305 resync_dump_detail);
306 }
307 }
308 } 341 }
309 rcu_read_unlock(); 342 rcu_read_unlock();
310 343
@@ -316,7 +349,7 @@ static int drbd_proc_open(struct inode *inode, struct file *file)
316 int err; 349 int err;
317 350
318 if (try_module_get(THIS_MODULE)) { 351 if (try_module_get(THIS_MODULE)) {
319 err = single_open(file, drbd_seq_show, PDE_DATA(inode)); 352 err = single_open(file, drbd_seq_show, NULL);
320 if (err) 353 if (err)
321 module_put(THIS_MODULE); 354 module_put(THIS_MODULE);
322 return err; 355 return err;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 5b17ec88ea05..9342b8da73ab 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -362,17 +362,14 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
362 goto fail; 362 goto fail;
363 } 363 }
364 364
365 memset(peer_req, 0, sizeof(*peer_req));
366 INIT_LIST_HEAD(&peer_req->w.list);
365 drbd_clear_interval(&peer_req->i); 367 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size; 368 peer_req->i.size = data_size;
367 peer_req->i.sector = sector; 369 peer_req->i.sector = sector;
368 peer_req->i.local = false; 370 peer_req->submit_jif = jiffies;
369 peer_req->i.waiting = false;
370
371 peer_req->epoch = NULL;
372 peer_req->peer_device = peer_device; 371 peer_req->peer_device = peer_device;
373 peer_req->pages = page; 372 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
376 /* 373 /*
377 * The block_id is opaque to the receiver. It is not endianness 374 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged. 375 * converted, and sent back to the sender unchanged.
@@ -389,11 +386,16 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto
389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req, 386void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
390 int is_net) 387 int is_net)
391{ 388{
389 might_sleep();
392 if (peer_req->flags & EE_HAS_DIGEST) 390 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest); 391 kfree(peer_req->digest);
394 drbd_free_pages(device, peer_req->pages, is_net); 392 drbd_free_pages(device, peer_req->pages, is_net);
395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0); 393 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i)); 394 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
395 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
396 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
397 drbd_al_complete_io(device, &peer_req->i);
398 }
397 mempool_free(peer_req, drbd_ee_mempool); 399 mempool_free(peer_req, drbd_ee_mempool);
398} 400}
399 401
@@ -791,8 +793,18 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
791{ 793{
792 unsigned int header_size = drbd_header_size(connection); 794 unsigned int header_size = drbd_header_size(connection);
793 struct packet_info pi; 795 struct packet_info pi;
796 struct net_conf *nc;
794 int err; 797 int err;
795 798
799 rcu_read_lock();
800 nc = rcu_dereference(connection->net_conf);
801 if (!nc) {
802 rcu_read_unlock();
803 return -EIO;
804 }
805 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
806 rcu_read_unlock();
807
796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0); 808 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
797 if (err != header_size) { 809 if (err != header_size) {
798 if (err >= 0) 810 if (err >= 0)
@@ -809,7 +821,7 @@ static int receive_first_packet(struct drbd_connection *connection, struct socke
809 * drbd_socket_okay() - Free the socket if its connection is not okay 821 * drbd_socket_okay() - Free the socket if its connection is not okay
810 * @sock: pointer to the pointer to the socket. 822 * @sock: pointer to the pointer to the socket.
811 */ 823 */
812static int drbd_socket_okay(struct socket **sock) 824static bool drbd_socket_okay(struct socket **sock)
813{ 825{
814 int rr; 826 int rr;
815 char tb[4]; 827 char tb[4];
@@ -827,6 +839,30 @@ static int drbd_socket_okay(struct socket **sock)
827 return false; 839 return false;
828 } 840 }
829} 841}
842
843static bool connection_established(struct drbd_connection *connection,
844 struct socket **sock1,
845 struct socket **sock2)
846{
847 struct net_conf *nc;
848 int timeout;
849 bool ok;
850
851 if (!*sock1 || !*sock2)
852 return false;
853
854 rcu_read_lock();
855 nc = rcu_dereference(connection->net_conf);
856 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
857 rcu_read_unlock();
858 schedule_timeout_interruptible(timeout);
859
860 ok = drbd_socket_okay(sock1);
861 ok = drbd_socket_okay(sock2) && ok;
862
863 return ok;
864}
865
830/* Gets called if a connection is established, or if a new minor gets created 866/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */ 867 in a connection */
832int drbd_connected(struct drbd_peer_device *peer_device) 868int drbd_connected(struct drbd_peer_device *peer_device)
@@ -868,8 +904,8 @@ static int conn_connect(struct drbd_connection *connection)
868 struct drbd_socket sock, msock; 904 struct drbd_socket sock, msock;
869 struct drbd_peer_device *peer_device; 905 struct drbd_peer_device *peer_device;
870 struct net_conf *nc; 906 struct net_conf *nc;
871 int vnr, timeout, h, ok; 907 int vnr, timeout, h;
872 bool discard_my_data; 908 bool discard_my_data, ok;
873 enum drbd_state_rv rv; 909 enum drbd_state_rv rv;
874 struct accept_wait_data ad = { 910 struct accept_wait_data ad = {
875 .connection = connection, 911 .connection = connection,
@@ -913,17 +949,8 @@ static int conn_connect(struct drbd_connection *connection)
913 } 949 }
914 } 950 }
915 951
916 if (sock.socket && msock.socket) { 952 if (connection_established(connection, &sock.socket, &msock.socket))
917 rcu_read_lock(); 953 break;
918 nc = rcu_dereference(connection->net_conf);
919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
924 if (ok)
925 break;
926 }
927 954
928retry: 955retry:
929 s = drbd_wait_for_connect(connection, &ad); 956 s = drbd_wait_for_connect(connection, &ad);
@@ -969,8 +996,7 @@ randomize:
969 goto out_release_sockets; 996 goto out_release_sockets;
970 } 997 }
971 998
972 ok = drbd_socket_okay(&sock.socket); 999 ok = connection_established(connection, &sock.socket, &msock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok); 1000 } while (!ok);
975 1001
976 if (ad.s_listen) 1002 if (ad.s_listen)
@@ -1151,7 +1177,7 @@ static void drbd_flush(struct drbd_connection *connection)
1151 struct drbd_peer_device *peer_device; 1177 struct drbd_peer_device *peer_device;
1152 int vnr; 1178 int vnr;
1153 1179
1154 if (connection->write_ordering >= WO_bdev_flush) { 1180 if (connection->resource->write_ordering >= WO_bdev_flush) {
1155 rcu_read_lock(); 1181 rcu_read_lock();
1156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1182 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device; 1183 struct drbd_device *device = peer_device->device;
@@ -1161,14 +1187,22 @@ static void drbd_flush(struct drbd_connection *connection)
1161 kref_get(&device->kref); 1187 kref_get(&device->kref);
1162 rcu_read_unlock(); 1188 rcu_read_unlock();
1163 1189
1190 /* Right now, we have only this one synchronous code path
1191 * for flushes between request epochs.
1192 * We may want to make those asynchronous,
1193 * or at least parallelize the flushes to the volume devices.
1194 */
1195 device->flush_jif = jiffies;
1196 set_bit(FLUSH_PENDING, &device->flags);
1164 rv = blkdev_issue_flush(device->ldev->backing_bdev, 1197 rv = blkdev_issue_flush(device->ldev->backing_bdev,
1165 GFP_NOIO, NULL); 1198 GFP_NOIO, NULL);
1199 clear_bit(FLUSH_PENDING, &device->flags);
1166 if (rv) { 1200 if (rv) {
1167 drbd_info(device, "local disk flush failed with status %d\n", rv); 1201 drbd_info(device, "local disk flush failed with status %d\n", rv);
1168 /* would rather check on EOPNOTSUPP, but that is not reliable. 1202 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0 1203 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */ 1204 * if (rv == -EOPNOTSUPP) */
1171 drbd_bump_write_ordering(connection, WO_drain_io); 1205 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
1172 } 1206 }
1173 put_ldev(device); 1207 put_ldev(device);
1174 kref_put(&device->kref, drbd_destroy_device); 1208 kref_put(&device->kref, drbd_destroy_device);
@@ -1257,15 +1291,30 @@ static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connectio
1257 return rv; 1291 return rv;
1258} 1292}
1259 1293
1294static enum write_ordering_e
1295max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1296{
1297 struct disk_conf *dc;
1298
1299 dc = rcu_dereference(bdev->disk_conf);
1300
1301 if (wo == WO_bdev_flush && !dc->disk_flushes)
1302 wo = WO_drain_io;
1303 if (wo == WO_drain_io && !dc->disk_drain)
1304 wo = WO_none;
1305
1306 return wo;
1307}
1308
1260/** 1309/**
1261 * drbd_bump_write_ordering() - Fall back to an other write ordering method 1310 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1262 * @connection: DRBD connection. 1311 * @connection: DRBD connection.
1263 * @wo: Write ordering method to try. 1312 * @wo: Write ordering method to try.
1264 */ 1313 */
1265void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo) 1314void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1315 enum write_ordering_e wo)
1266{ 1316{
1267 struct disk_conf *dc; 1317 struct drbd_device *device;
1268 struct drbd_peer_device *peer_device;
1269 enum write_ordering_e pwo; 1318 enum write_ordering_e pwo;
1270 int vnr; 1319 int vnr;
1271 static char *write_ordering_str[] = { 1320 static char *write_ordering_str[] = {
@@ -1274,26 +1323,27 @@ void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ord
1274 [WO_bdev_flush] = "flush", 1323 [WO_bdev_flush] = "flush",
1275 }; 1324 };
1276 1325
1277 pwo = connection->write_ordering; 1326 pwo = resource->write_ordering;
1278 wo = min(pwo, wo); 1327 if (wo != WO_bdev_flush)
1328 wo = min(pwo, wo);
1279 rcu_read_lock(); 1329 rcu_read_lock();
1280 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 1330 idr_for_each_entry(&resource->devices, device, vnr) {
1281 struct drbd_device *device = peer_device->device; 1331 if (get_ldev(device)) {
1332 wo = max_allowed_wo(device->ldev, wo);
1333 if (device->ldev == bdev)
1334 bdev = NULL;
1335 put_ldev(device);
1336 }
1337 }
1282 1338
1283 if (!get_ldev_if_state(device, D_ATTACHING)) 1339 if (bdev)
1284 continue; 1340 wo = max_allowed_wo(bdev, wo);
1285 dc = rcu_dereference(device->ldev->disk_conf);
1286 1341
1287 if (wo == WO_bdev_flush && !dc->disk_flushes)
1288 wo = WO_drain_io;
1289 if (wo == WO_drain_io && !dc->disk_drain)
1290 wo = WO_none;
1291 put_ldev(device);
1292 }
1293 rcu_read_unlock(); 1342 rcu_read_unlock();
1294 connection->write_ordering = wo; 1343
1295 if (pwo != connection->write_ordering || wo == WO_bdev_flush) 1344 resource->write_ordering = wo;
1296 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]); 1345 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1346 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
1297} 1347}
1298 1348
1299/** 1349/**
@@ -1330,6 +1380,13 @@ int drbd_submit_peer_request(struct drbd_device *device,
1330 /* wait for all pending IO completions, before we start 1380 /* wait for all pending IO completions, before we start
1331 * zeroing things out. */ 1381 * zeroing things out. */
1332 conn_wait_active_ee_empty(first_peer_device(device)->connection); 1382 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1383 /* add it to the active list now,
1384 * so we can find it to present it in debugfs */
1385 peer_req->submit_jif = jiffies;
1386 peer_req->flags |= EE_SUBMITTED;
1387 spin_lock_irq(&device->resource->req_lock);
1388 list_add_tail(&peer_req->w.list, &device->active_ee);
1389 spin_unlock_irq(&device->resource->req_lock);
1333 if (blkdev_issue_zeroout(device->ldev->backing_bdev, 1390 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1334 sector, ds >> 9, GFP_NOIO)) 1391 sector, ds >> 9, GFP_NOIO))
1335 peer_req->flags |= EE_WAS_ERROR; 1392 peer_req->flags |= EE_WAS_ERROR;
@@ -1398,6 +1455,9 @@ submit:
1398 D_ASSERT(device, page == NULL); 1455 D_ASSERT(device, page == NULL);
1399 1456
1400 atomic_set(&peer_req->pending_bios, n_bios); 1457 atomic_set(&peer_req->pending_bios, n_bios);
1458 /* for debugfs: update timestamp, mark as submitted */
1459 peer_req->submit_jif = jiffies;
1460 peer_req->flags |= EE_SUBMITTED;
1401 do { 1461 do {
1402 bio = bios; 1462 bio = bios;
1403 bios = bios->bi_next; 1463 bios = bios->bi_next;
@@ -1471,7 +1531,7 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
1471 * R_PRIMARY crashes now. 1531 * R_PRIMARY crashes now.
1472 * Therefore we must send the barrier_ack after the barrier request was 1532 * Therefore we must send the barrier_ack after the barrier request was
1473 * completed. */ 1533 * completed. */
1474 switch (connection->write_ordering) { 1534 switch (connection->resource->write_ordering) {
1475 case WO_none: 1535 case WO_none:
1476 if (rv == FE_RECYCLED) 1536 if (rv == FE_RECYCLED)
1477 return 0; 1537 return 0;
@@ -1498,7 +1558,8 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf
1498 1558
1499 return 0; 1559 return 0;
1500 default: 1560 default:
1501 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering); 1561 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1562 connection->resource->write_ordering);
1502 return -EIO; 1563 return -EIO;
1503 } 1564 }
1504 1565
@@ -1531,7 +1592,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1531 struct drbd_peer_request *peer_req; 1592 struct drbd_peer_request *peer_req;
1532 struct page *page; 1593 struct page *page;
1533 int dgs, ds, err; 1594 int dgs, ds, err;
1534 int data_size = pi->size; 1595 unsigned int data_size = pi->size;
1535 void *dig_in = peer_device->connection->int_dig_in; 1596 void *dig_in = peer_device->connection->int_dig_in;
1536 void *dig_vv = peer_device->connection->int_dig_vv; 1597 void *dig_vv = peer_device->connection->int_dig_vv;
1537 unsigned long *data; 1598 unsigned long *data;
@@ -1578,6 +1639,7 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
1578 if (!peer_req) 1639 if (!peer_req)
1579 return NULL; 1640 return NULL;
1580 1641
1642 peer_req->flags |= EE_WRITE;
1581 if (trim) 1643 if (trim)
1582 return peer_req; 1644 return peer_req;
1583 1645
@@ -1734,9 +1796,10 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto
1734 * respective _drbd_clear_done_ee */ 1796 * respective _drbd_clear_done_ee */
1735 1797
1736 peer_req->w.cb = e_end_resync_block; 1798 peer_req->w.cb = e_end_resync_block;
1799 peer_req->submit_jif = jiffies;
1737 1800
1738 spin_lock_irq(&device->resource->req_lock); 1801 spin_lock_irq(&device->resource->req_lock);
1739 list_add(&peer_req->w.list, &device->sync_ee); 1802 list_add_tail(&peer_req->w.list, &device->sync_ee);
1740 spin_unlock_irq(&device->resource->req_lock); 1803 spin_unlock_irq(&device->resource->req_lock);
1741 1804
1742 atomic_add(pi->size >> 9, &device->rs_sect_ev); 1805 atomic_add(pi->size >> 9, &device->rs_sect_ev);
@@ -1889,6 +1952,7 @@ static int e_end_block(struct drbd_work *w, int cancel)
1889 } 1952 }
1890 dec_unacked(device); 1953 dec_unacked(device);
1891 } 1954 }
1955
1892 /* we delete from the conflict detection hash _after_ we sent out the 1956 /* we delete from the conflict detection hash _after_ we sent out the
1893 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */ 1957 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
1894 if (peer_req->flags & EE_IN_INTERVAL_TREE) { 1958 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
@@ -2115,6 +2179,8 @@ static int handle_write_conflicts(struct drbd_device *device,
2115 drbd_for_each_overlap(i, &device->write_requests, sector, size) { 2179 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
2116 if (i == &peer_req->i) 2180 if (i == &peer_req->i)
2117 continue; 2181 continue;
2182 if (i->completed)
2183 continue;
2118 2184
2119 if (!i->local) { 2185 if (!i->local) {
2120 /* 2186 /*
@@ -2147,7 +2213,6 @@ static int handle_write_conflicts(struct drbd_device *device,
2147 (unsigned long long)sector, size, 2213 (unsigned long long)sector, size,
2148 superseded ? "local" : "remote"); 2214 superseded ? "local" : "remote");
2149 2215
2150 inc_unacked(device);
2151 peer_req->w.cb = superseded ? e_send_superseded : 2216 peer_req->w.cb = superseded ? e_send_superseded :
2152 e_send_retry_write; 2217 e_send_retry_write;
2153 list_add_tail(&peer_req->w.list, &device->done_ee); 2218 list_add_tail(&peer_req->w.list, &device->done_ee);
@@ -2206,6 +2271,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2206{ 2271{
2207 struct drbd_peer_device *peer_device; 2272 struct drbd_peer_device *peer_device;
2208 struct drbd_device *device; 2273 struct drbd_device *device;
2274 struct net_conf *nc;
2209 sector_t sector; 2275 sector_t sector;
2210 struct drbd_peer_request *peer_req; 2276 struct drbd_peer_request *peer_req;
2211 struct p_data *p = pi->data; 2277 struct p_data *p = pi->data;
@@ -2245,6 +2311,8 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2245 } 2311 }
2246 2312
2247 peer_req->w.cb = e_end_block; 2313 peer_req->w.cb = e_end_block;
2314 peer_req->submit_jif = jiffies;
2315 peer_req->flags |= EE_APPLICATION;
2248 2316
2249 dp_flags = be32_to_cpu(p->dp_flags); 2317 dp_flags = be32_to_cpu(p->dp_flags);
2250 rw |= wire_flags_to_bio(dp_flags); 2318 rw |= wire_flags_to_bio(dp_flags);
@@ -2271,9 +2339,36 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2271 spin_unlock(&connection->epoch_lock); 2339 spin_unlock(&connection->epoch_lock);
2272 2340
2273 rcu_read_lock(); 2341 rcu_read_lock();
2274 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries; 2342 nc = rcu_dereference(peer_device->connection->net_conf);
2343 tp = nc->two_primaries;
2344 if (peer_device->connection->agreed_pro_version < 100) {
2345 switch (nc->wire_protocol) {
2346 case DRBD_PROT_C:
2347 dp_flags |= DP_SEND_WRITE_ACK;
2348 break;
2349 case DRBD_PROT_B:
2350 dp_flags |= DP_SEND_RECEIVE_ACK;
2351 break;
2352 }
2353 }
2275 rcu_read_unlock(); 2354 rcu_read_unlock();
2355
2356 if (dp_flags & DP_SEND_WRITE_ACK) {
2357 peer_req->flags |= EE_SEND_WRITE_ACK;
2358 inc_unacked(device);
2359 /* corresponding dec_unacked() in e_end_block()
2360 * respective _drbd_clear_done_ee */
2361 }
2362
2363 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2364 /* I really don't like it that the receiver thread
2365 * sends on the msock, but anyways */
2366 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
2367 }
2368
2276 if (tp) { 2369 if (tp) {
2370 /* two primaries implies protocol C */
2371 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
2277 peer_req->flags |= EE_IN_INTERVAL_TREE; 2372 peer_req->flags |= EE_IN_INTERVAL_TREE;
2278 err = wait_for_and_update_peer_seq(peer_device, peer_seq); 2373 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2279 if (err) 2374 if (err)
@@ -2297,44 +2392,18 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2297 * active_ee to become empty in drbd_submit_peer_request(); 2392 * active_ee to become empty in drbd_submit_peer_request();
2298 * better not add ourselves here. */ 2393 * better not add ourselves here. */
2299 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) 2394 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2300 list_add(&peer_req->w.list, &device->active_ee); 2395 list_add_tail(&peer_req->w.list, &device->active_ee);
2301 spin_unlock_irq(&device->resource->req_lock); 2396 spin_unlock_irq(&device->resource->req_lock);
2302 2397
2303 if (device->state.conn == C_SYNC_TARGET) 2398 if (device->state.conn == C_SYNC_TARGET)
2304 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req)); 2399 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
2305 2400
2306 if (peer_device->connection->agreed_pro_version < 100) {
2307 rcu_read_lock();
2308 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
2309 case DRBD_PROT_C:
2310 dp_flags |= DP_SEND_WRITE_ACK;
2311 break;
2312 case DRBD_PROT_B:
2313 dp_flags |= DP_SEND_RECEIVE_ACK;
2314 break;
2315 }
2316 rcu_read_unlock();
2317 }
2318
2319 if (dp_flags & DP_SEND_WRITE_ACK) {
2320 peer_req->flags |= EE_SEND_WRITE_ACK;
2321 inc_unacked(device);
2322 /* corresponding dec_unacked() in e_end_block()
2323 * respective _drbd_clear_done_ee */
2324 }
2325
2326 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2327 /* I really don't like it that the receiver thread
2328 * sends on the msock, but anyways */
2329 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
2330 }
2331
2332 if (device->state.pdsk < D_INCONSISTENT) { 2401 if (device->state.pdsk < D_INCONSISTENT) {
2333 /* In case we have the only disk of the cluster, */ 2402 /* In case we have the only disk of the cluster, */
2334 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size); 2403 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
2335 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2336 peer_req->flags &= ~EE_MAY_SET_IN_SYNC; 2404 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2337 drbd_al_begin_io(device, &peer_req->i, true); 2405 drbd_al_begin_io(device, &peer_req->i);
2406 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2338 } 2407 }
2339 2408
2340 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR); 2409 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
@@ -2347,8 +2416,10 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info *
2347 list_del(&peer_req->w.list); 2416 list_del(&peer_req->w.list);
2348 drbd_remove_epoch_entry_interval(device, peer_req); 2417 drbd_remove_epoch_entry_interval(device, peer_req);
2349 spin_unlock_irq(&device->resource->req_lock); 2418 spin_unlock_irq(&device->resource->req_lock);
2350 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) 2419 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2420 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
2351 drbd_al_complete_io(device, &peer_req->i); 2421 drbd_al_complete_io(device, &peer_req->i);
2422 }
2352 2423
2353out_interrupted: 2424out_interrupted:
2354 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP); 2425 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
@@ -2368,13 +2439,14 @@ out_interrupted:
2368 * The current sync rate used here uses only the most recent two step marks, 2439 * The current sync rate used here uses only the most recent two step marks,
2369 * to have a short time average so we can react faster. 2440 * to have a short time average so we can react faster.
2370 */ 2441 */
2371bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) 2442bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2443 bool throttle_if_app_is_waiting)
2372{ 2444{
2373 struct lc_element *tmp; 2445 struct lc_element *tmp;
2374 bool throttle = true; 2446 bool throttle = drbd_rs_c_min_rate_throttle(device);
2375 2447
2376 if (!drbd_rs_c_min_rate_throttle(device)) 2448 if (!throttle || throttle_if_app_is_waiting)
2377 return false; 2449 return throttle;
2378 2450
2379 spin_lock_irq(&device->al_lock); 2451 spin_lock_irq(&device->al_lock);
2380 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); 2452 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
@@ -2382,7 +2454,8 @@ bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2382 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); 2454 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2383 if (test_bit(BME_PRIORITY, &bm_ext->flags)) 2455 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2384 throttle = false; 2456 throttle = false;
2385 /* Do not slow down if app IO is already waiting for this extent */ 2457 /* Do not slow down if app IO is already waiting for this extent,
2458 * and our progress is necessary for application IO to complete. */
2386 } 2459 }
2387 spin_unlock_irq(&device->al_lock); 2460 spin_unlock_irq(&device->al_lock);
2388 2461
@@ -2407,7 +2480,9 @@ bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
2407 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + 2480 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2408 (int)part_stat_read(&disk->part0, sectors[1]) - 2481 (int)part_stat_read(&disk->part0, sectors[1]) -
2409 atomic_read(&device->rs_sect_ev); 2482 atomic_read(&device->rs_sect_ev);
2410 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { 2483
2484 if (atomic_read(&device->ap_actlog_cnt)
2485 || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
2411 unsigned long rs_left; 2486 unsigned long rs_left;
2412 int i; 2487 int i;
2413 2488
@@ -2508,6 +2583,7 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
2508 peer_req->w.cb = w_e_end_data_req; 2583 peer_req->w.cb = w_e_end_data_req;
2509 fault_type = DRBD_FAULT_DT_RD; 2584 fault_type = DRBD_FAULT_DT_RD;
2510 /* application IO, don't drbd_rs_begin_io */ 2585 /* application IO, don't drbd_rs_begin_io */
2586 peer_req->flags |= EE_APPLICATION;
2511 goto submit; 2587 goto submit;
2512 2588
2513 case P_RS_DATA_REQUEST: 2589 case P_RS_DATA_REQUEST:
@@ -2538,6 +2614,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
2538 peer_req->w.cb = w_e_end_csum_rs_req; 2614 peer_req->w.cb = w_e_end_csum_rs_req;
2539 /* used in the sector offset progress display */ 2615 /* used in the sector offset progress display */
2540 device->bm_resync_fo = BM_SECT_TO_BIT(sector); 2616 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
2617 /* remember to report stats in drbd_resync_finished */
2618 device->use_csums = true;
2541 } else if (pi->cmd == P_OV_REPLY) { 2619 } else if (pi->cmd == P_OV_REPLY) {
2542 /* track progress, we may need to throttle */ 2620 /* track progress, we may need to throttle */
2543 atomic_add(size >> 9, &device->rs_sect_in); 2621 atomic_add(size >> 9, &device->rs_sect_in);
@@ -2595,8 +2673,20 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet
2595 * we would also throttle its application reads. 2673 * we would also throttle its application reads.
2596 * In that case, throttling is done on the SyncTarget only. 2674 * In that case, throttling is done on the SyncTarget only.
2597 */ 2675 */
2598 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector)) 2676
2677 /* Even though this may be a resync request, we do add to "read_ee";
2678 * "sync_ee" is only used for resync WRITEs.
2679 * Add to list early, so debugfs can find this request
2680 * even if we have to sleep below. */
2681 spin_lock_irq(&device->resource->req_lock);
2682 list_add_tail(&peer_req->w.list, &device->read_ee);
2683 spin_unlock_irq(&device->resource->req_lock);
2684
2685 update_receiver_timing_details(connection, drbd_rs_should_slow_down);
2686 if (device->state.peer != R_PRIMARY
2687 && drbd_rs_should_slow_down(device, sector, false))
2599 schedule_timeout_uninterruptible(HZ/10); 2688 schedule_timeout_uninterruptible(HZ/10);
2689 update_receiver_timing_details(connection, drbd_rs_begin_io);
2600 if (drbd_rs_begin_io(device, sector)) 2690 if (drbd_rs_begin_io(device, sector))
2601 goto out_free_e; 2691 goto out_free_e;
2602 2692
@@ -2604,22 +2694,20 @@ submit_for_resync:
2604 atomic_add(size >> 9, &device->rs_sect_ev); 2694 atomic_add(size >> 9, &device->rs_sect_ev);
2605 2695
2606submit: 2696submit:
2697 update_receiver_timing_details(connection, drbd_submit_peer_request);
2607 inc_unacked(device); 2698 inc_unacked(device);
2608 spin_lock_irq(&device->resource->req_lock);
2609 list_add_tail(&peer_req->w.list, &device->read_ee);
2610 spin_unlock_irq(&device->resource->req_lock);
2611
2612 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0) 2699 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
2613 return 0; 2700 return 0;
2614 2701
2615 /* don't care for the reason here */ 2702 /* don't care for the reason here */
2616 drbd_err(device, "submit failed, triggering re-connect\n"); 2703 drbd_err(device, "submit failed, triggering re-connect\n");
2704
2705out_free_e:
2617 spin_lock_irq(&device->resource->req_lock); 2706 spin_lock_irq(&device->resource->req_lock);
2618 list_del(&peer_req->w.list); 2707 list_del(&peer_req->w.list);
2619 spin_unlock_irq(&device->resource->req_lock); 2708 spin_unlock_irq(&device->resource->req_lock);
2620 /* no drbd_rs_complete_io(), we are dropping the connection anyways */ 2709 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2621 2710
2622out_free_e:
2623 put_ldev(device); 2711 put_ldev(device);
2624 drbd_free_peer_req(device, peer_req); 2712 drbd_free_peer_req(device, peer_req);
2625 return -EIO; 2713 return -EIO;
@@ -2842,8 +2930,10 @@ static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
2842-1091 requires proto 91 2930-1091 requires proto 91
2843-1096 requires proto 96 2931-1096 requires proto 96
2844 */ 2932 */
2845static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local) 2933static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
2846{ 2934{
2935 struct drbd_peer_device *const peer_device = first_peer_device(device);
2936 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
2847 u64 self, peer; 2937 u64 self, peer;
2848 int i, j; 2938 int i, j;
2849 2939
@@ -2869,7 +2959,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
2869 2959
2870 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) { 2960 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2871 2961
2872 if (first_peer_device(device)->connection->agreed_pro_version < 91) 2962 if (connection->agreed_pro_version < 91)
2873 return -1091; 2963 return -1091;
2874 2964
2875 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) && 2965 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
@@ -2892,7 +2982,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
2892 2982
2893 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) { 2983 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
2894 2984
2895 if (first_peer_device(device)->connection->agreed_pro_version < 91) 2985 if (connection->agreed_pro_version < 91)
2896 return -1091; 2986 return -1091;
2897 2987
2898 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) && 2988 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
@@ -2925,7 +3015,7 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
2925 case 1: /* self_pri && !peer_pri */ return 1; 3015 case 1: /* self_pri && !peer_pri */ return 1;
2926 case 2: /* !self_pri && peer_pri */ return -1; 3016 case 2: /* !self_pri && peer_pri */ return -1;
2927 case 3: /* self_pri && peer_pri */ 3017 case 3: /* self_pri && peer_pri */
2928 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags); 3018 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
2929 return dc ? -1 : 1; 3019 return dc ? -1 : 1;
2930 } 3020 }
2931 } 3021 }
@@ -2938,14 +3028,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
2938 *rule_nr = 51; 3028 *rule_nr = 51;
2939 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1); 3029 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
2940 if (self == peer) { 3030 if (self == peer) {
2941 if (first_peer_device(device)->connection->agreed_pro_version < 96 ? 3031 if (connection->agreed_pro_version < 96 ?
2942 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == 3032 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2943 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) : 3033 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2944 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) { 3034 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
2945 /* The last P_SYNC_UUID did not get though. Undo the last start of 3035 /* The last P_SYNC_UUID did not get though. Undo the last start of
2946 resync as sync source modifications of the peer's UUIDs. */ 3036 resync as sync source modifications of the peer's UUIDs. */
2947 3037
2948 if (first_peer_device(device)->connection->agreed_pro_version < 91) 3038 if (connection->agreed_pro_version < 91)
2949 return -1091; 3039 return -1091;
2950 3040
2951 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START]; 3041 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
@@ -2975,14 +3065,14 @@ static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_ho
2975 *rule_nr = 71; 3065 *rule_nr = 71;
2976 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1); 3066 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2977 if (self == peer) { 3067 if (self == peer) {
2978 if (first_peer_device(device)->connection->agreed_pro_version < 96 ? 3068 if (connection->agreed_pro_version < 96 ?
2979 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == 3069 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2980 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) : 3070 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2981 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) { 3071 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
2982 /* The last P_SYNC_UUID did not get though. Undo the last start of 3072 /* The last P_SYNC_UUID did not get though. Undo the last start of
2983 resync as sync source modifications of our UUIDs. */ 3073 resync as sync source modifications of our UUIDs. */
2984 3074
2985 if (first_peer_device(device)->connection->agreed_pro_version < 91) 3075 if (connection->agreed_pro_version < 91)
2986 return -1091; 3076 return -1091;
2987 3077
2988 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]); 3078 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
@@ -3352,8 +3442,7 @@ disconnect:
3352 * return: NULL (alg name was "") 3442 * return: NULL (alg name was "")
3353 * ERR_PTR(error) if something goes wrong 3443 * ERR_PTR(error) if something goes wrong
3354 * or the crypto hash ptr, if it worked out ok. */ 3444 * or the crypto hash ptr, if it worked out ok. */
3355static 3445static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
3356struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
3357 const char *alg, const char *name) 3446 const char *alg, const char *name)
3358{ 3447{
3359 struct crypto_hash *tfm; 3448 struct crypto_hash *tfm;
@@ -3639,7 +3728,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3639 struct drbd_device *device; 3728 struct drbd_device *device;
3640 struct p_sizes *p = pi->data; 3729 struct p_sizes *p = pi->data;
3641 enum determine_dev_size dd = DS_UNCHANGED; 3730 enum determine_dev_size dd = DS_UNCHANGED;
3642 sector_t p_size, p_usize, my_usize; 3731 sector_t p_size, p_usize, p_csize, my_usize;
3643 int ldsc = 0; /* local disk size changed */ 3732 int ldsc = 0; /* local disk size changed */
3644 enum dds_flags ddsf; 3733 enum dds_flags ddsf;
3645 3734
@@ -3650,6 +3739,7 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3650 3739
3651 p_size = be64_to_cpu(p->d_size); 3740 p_size = be64_to_cpu(p->d_size);
3652 p_usize = be64_to_cpu(p->u_size); 3741 p_usize = be64_to_cpu(p->u_size);
3742 p_csize = be64_to_cpu(p->c_size);
3653 3743
3654 /* just store the peer's disk size for now. 3744 /* just store the peer's disk size for now.
3655 * we still need to figure out whether we accept that. */ 3745 * we still need to figure out whether we accept that. */
@@ -3710,7 +3800,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3710 } 3800 }
3711 3801
3712 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); 3802 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3713 drbd_reconsider_max_bio_size(device);
3714 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). 3803 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3715 In case we cleared the QUEUE_FLAG_DISCARD from our queue in 3804 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3716 drbd_reconsider_max_bio_size(), we can be sure that after 3805 drbd_reconsider_max_bio_size(), we can be sure that after
@@ -3718,14 +3807,28 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info
3718 3807
3719 ddsf = be16_to_cpu(p->dds_flags); 3808 ddsf = be16_to_cpu(p->dds_flags);
3720 if (get_ldev(device)) { 3809 if (get_ldev(device)) {
3810 drbd_reconsider_max_bio_size(device, device->ldev);
3721 dd = drbd_determine_dev_size(device, ddsf, NULL); 3811 dd = drbd_determine_dev_size(device, ddsf, NULL);
3722 put_ldev(device); 3812 put_ldev(device);
3723 if (dd == DS_ERROR) 3813 if (dd == DS_ERROR)
3724 return -EIO; 3814 return -EIO;
3725 drbd_md_sync(device); 3815 drbd_md_sync(device);
3726 } else { 3816 } else {
3727 /* I am diskless, need to accept the peer's size. */ 3817 /*
3728 drbd_set_my_capacity(device, p_size); 3818 * I am diskless, need to accept the peer's *current* size.
3819 * I must NOT accept the peers backing disk size,
3820 * it may have been larger than mine all along...
3821 *
3822 * At this point, the peer knows more about my disk, or at
3823 * least about what we last agreed upon, than myself.
3824 * So if his c_size is less than his d_size, the most likely
3825 * reason is that *my* d_size was smaller last time we checked.
3826 *
3827 * However, if he sends a zero current size,
3828 * take his (user-capped or) backing disk size anyways.
3829 */
3830 drbd_reconsider_max_bio_size(device, NULL);
3831 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
3729 } 3832 }
3730 3833
3731 if (get_ldev(device)) { 3834 if (get_ldev(device)) {
@@ -4501,6 +4604,7 @@ static void drbdd(struct drbd_connection *connection)
4501 struct data_cmd *cmd; 4604 struct data_cmd *cmd;
4502 4605
4503 drbd_thread_current_set_cpu(&connection->receiver); 4606 drbd_thread_current_set_cpu(&connection->receiver);
4607 update_receiver_timing_details(connection, drbd_recv_header);
4504 if (drbd_recv_header(connection, &pi)) 4608 if (drbd_recv_header(connection, &pi))
4505 goto err_out; 4609 goto err_out;
4506 4610
@@ -4519,12 +4623,14 @@ static void drbdd(struct drbd_connection *connection)
4519 } 4623 }
4520 4624
4521 if (shs) { 4625 if (shs) {
4626 update_receiver_timing_details(connection, drbd_recv_all_warn);
4522 err = drbd_recv_all_warn(connection, pi.data, shs); 4627 err = drbd_recv_all_warn(connection, pi.data, shs);
4523 if (err) 4628 if (err)
4524 goto err_out; 4629 goto err_out;
4525 pi.size -= shs; 4630 pi.size -= shs;
4526 } 4631 }
4527 4632
4633 update_receiver_timing_details(connection, cmd->fn);
4528 err = cmd->fn(connection, &pi); 4634 err = cmd->fn(connection, &pi);
4529 if (err) { 4635 if (err) {
4530 drbd_err(connection, "error receiving %s, e: %d l: %d!\n", 4636 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 09803d0d5207..c67717d572d1 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -52,7 +52,7 @@ static void _drbd_start_io_acct(struct drbd_device *device, struct drbd_request
52static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req) 52static void _drbd_end_io_acct(struct drbd_device *device, struct drbd_request *req)
53{ 53{
54 int rw = bio_data_dir(req->master_bio); 54 int rw = bio_data_dir(req->master_bio);
55 unsigned long duration = jiffies - req->start_time; 55 unsigned long duration = jiffies - req->start_jif;
56 int cpu; 56 int cpu;
57 cpu = part_stat_lock(); 57 cpu = part_stat_lock();
58 part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration); 58 part_stat_add(cpu, &device->vdisk->part0, ticks[rw], duration);
@@ -66,7 +66,7 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
66{ 66{
67 struct drbd_request *req; 67 struct drbd_request *req;
68 68
69 req = mempool_alloc(drbd_request_mempool, GFP_NOIO); 69 req = mempool_alloc(drbd_request_mempool, GFP_NOIO | __GFP_ZERO);
70 if (!req) 70 if (!req)
71 return NULL; 71 return NULL;
72 72
@@ -84,6 +84,8 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
84 84
85 INIT_LIST_HEAD(&req->tl_requests); 85 INIT_LIST_HEAD(&req->tl_requests);
86 INIT_LIST_HEAD(&req->w.list); 86 INIT_LIST_HEAD(&req->w.list);
87 INIT_LIST_HEAD(&req->req_pending_master_completion);
88 INIT_LIST_HEAD(&req->req_pending_local);
87 89
88 /* one reference to be put by __drbd_make_request */ 90 /* one reference to be put by __drbd_make_request */
89 atomic_set(&req->completion_ref, 1); 91 atomic_set(&req->completion_ref, 1);
@@ -92,6 +94,19 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device,
92 return req; 94 return req;
93} 95}
94 96
97static void drbd_remove_request_interval(struct rb_root *root,
98 struct drbd_request *req)
99{
100 struct drbd_device *device = req->device;
101 struct drbd_interval *i = &req->i;
102
103 drbd_remove_interval(root, i);
104
105 /* Wake up any processes waiting for this request to complete. */
106 if (i->waiting)
107 wake_up(&device->misc_wait);
108}
109
95void drbd_req_destroy(struct kref *kref) 110void drbd_req_destroy(struct kref *kref)
96{ 111{
97 struct drbd_request *req = container_of(kref, struct drbd_request, kref); 112 struct drbd_request *req = container_of(kref, struct drbd_request, kref);
@@ -107,14 +122,30 @@ void drbd_req_destroy(struct kref *kref)
107 return; 122 return;
108 } 123 }
109 124
110 /* remove it from the transfer log. 125 /* If called from mod_rq_state (expected normal case) or
111 * well, only if it had been there in the first 126 * drbd_send_and_submit (the less likely normal path), this holds the
112 * place... if it had not (local only or conflicting 127 * req_lock, and req->tl_requests will typicaly be on ->transfer_log,
113 * and never sent), it should still be "empty" as 128 * though it may be still empty (never added to the transfer log).
114 * initialized in drbd_req_new(), so we can list_del() it 129 *
115 * here unconditionally */ 130 * If called from do_retry(), we do NOT hold the req_lock, but we are
131 * still allowed to unconditionally list_del(&req->tl_requests),
132 * because it will be on a local on-stack list only. */
116 list_del_init(&req->tl_requests); 133 list_del_init(&req->tl_requests);
117 134
135 /* finally remove the request from the conflict detection
136 * respective block_id verification interval tree. */
137 if (!drbd_interval_empty(&req->i)) {
138 struct rb_root *root;
139
140 if (s & RQ_WRITE)
141 root = &device->write_requests;
142 else
143 root = &device->read_requests;
144 drbd_remove_request_interval(root, req);
145 } else if (s & (RQ_NET_MASK & ~RQ_NET_DONE) && req->i.size != 0)
146 drbd_err(device, "drbd_req_destroy: Logic BUG: interval empty, but: rq_state=0x%x, sect=%llu, size=%u\n",
147 s, (unsigned long long)req->i.sector, req->i.size);
148
118 /* if it was a write, we may have to set the corresponding 149 /* if it was a write, we may have to set the corresponding
119 * bit(s) out-of-sync first. If it had a local part, we need to 150 * bit(s) out-of-sync first. If it had a local part, we need to
120 * release the reference to the activity log. */ 151 * release the reference to the activity log. */
@@ -188,19 +219,6 @@ void complete_master_bio(struct drbd_device *device,
188} 219}
189 220
190 221
191static void drbd_remove_request_interval(struct rb_root *root,
192 struct drbd_request *req)
193{
194 struct drbd_device *device = req->device;
195 struct drbd_interval *i = &req->i;
196
197 drbd_remove_interval(root, i);
198
199 /* Wake up any processes waiting for this request to complete. */
200 if (i->waiting)
201 wake_up(&device->misc_wait);
202}
203
204/* Helper for __req_mod(). 222/* Helper for __req_mod().
205 * Set m->bio to the master bio, if it is fit to be completed, 223 * Set m->bio to the master bio, if it is fit to be completed,
206 * or leave it alone (it is initialized to NULL in __req_mod), 224 * or leave it alone (it is initialized to NULL in __req_mod),
@@ -254,18 +272,6 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
254 ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK); 272 ok = (s & RQ_LOCAL_OK) || (s & RQ_NET_OK);
255 error = PTR_ERR(req->private_bio); 273 error = PTR_ERR(req->private_bio);
256 274
257 /* remove the request from the conflict detection
258 * respective block_id verification hash */
259 if (!drbd_interval_empty(&req->i)) {
260 struct rb_root *root;
261
262 if (rw == WRITE)
263 root = &device->write_requests;
264 else
265 root = &device->read_requests;
266 drbd_remove_request_interval(root, req);
267 }
268
269 /* Before we can signal completion to the upper layers, 275 /* Before we can signal completion to the upper layers,
270 * we may need to close the current transfer log epoch. 276 * we may need to close the current transfer log epoch.
271 * We are within the request lock, so we can simply compare 277 * We are within the request lock, so we can simply compare
@@ -301,9 +307,24 @@ void drbd_req_complete(struct drbd_request *req, struct bio_and_error *m)
301 m->error = ok ? 0 : (error ?: -EIO); 307 m->error = ok ? 0 : (error ?: -EIO);
302 m->bio = req->master_bio; 308 m->bio = req->master_bio;
303 req->master_bio = NULL; 309 req->master_bio = NULL;
310 /* We leave it in the tree, to be able to verify later
311 * write-acks in protocol != C during resync.
312 * But we mark it as "complete", so it won't be counted as
313 * conflict in a multi-primary setup. */
314 req->i.completed = true;
304 } 315 }
316
317 if (req->i.waiting)
318 wake_up(&device->misc_wait);
319
320 /* Either we are about to complete to upper layers,
321 * or we will restart this request.
322 * In either case, the request object will be destroyed soon,
323 * so better remove it from all lists. */
324 list_del_init(&req->req_pending_master_completion);
305} 325}
306 326
327/* still holds resource->req_lock */
307static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put) 328static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_error *m, int put)
308{ 329{
309 struct drbd_device *device = req->device; 330 struct drbd_device *device = req->device;
@@ -324,12 +345,91 @@ static int drbd_req_put_completion_ref(struct drbd_request *req, struct bio_and_
324 return 1; 345 return 1;
325} 346}
326 347
348static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
349{
350 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
351 if (!connection)
352 return;
353 if (connection->req_next == NULL)
354 connection->req_next = req;
355}
356
357static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req)
358{
359 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
360 if (!connection)
361 return;
362 if (connection->req_next != req)
363 return;
364 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
365 const unsigned s = req->rq_state;
366 if (s & RQ_NET_QUEUED)
367 break;
368 }
369 if (&req->tl_requests == &connection->transfer_log)
370 req = NULL;
371 connection->req_next = req;
372}
373
374static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
375{
376 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
377 if (!connection)
378 return;
379 if (connection->req_ack_pending == NULL)
380 connection->req_ack_pending = req;
381}
382
383static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req)
384{
385 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
386 if (!connection)
387 return;
388 if (connection->req_ack_pending != req)
389 return;
390 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
391 const unsigned s = req->rq_state;
392 if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING))
393 break;
394 }
395 if (&req->tl_requests == &connection->transfer_log)
396 req = NULL;
397 connection->req_ack_pending = req;
398}
399
400static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
401{
402 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
403 if (!connection)
404 return;
405 if (connection->req_not_net_done == NULL)
406 connection->req_not_net_done = req;
407}
408
409static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req)
410{
411 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
412 if (!connection)
413 return;
414 if (connection->req_not_net_done != req)
415 return;
416 list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) {
417 const unsigned s = req->rq_state;
418 if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE))
419 break;
420 }
421 if (&req->tl_requests == &connection->transfer_log)
422 req = NULL;
423 connection->req_not_net_done = req;
424}
425
327/* I'd like this to be the only place that manipulates 426/* I'd like this to be the only place that manipulates
328 * req->completion_ref and req->kref. */ 427 * req->completion_ref and req->kref. */
329static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m, 428static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
330 int clear, int set) 429 int clear, int set)
331{ 430{
332 struct drbd_device *device = req->device; 431 struct drbd_device *device = req->device;
432 struct drbd_peer_device *peer_device = first_peer_device(device);
333 unsigned s = req->rq_state; 433 unsigned s = req->rq_state;
334 int c_put = 0; 434 int c_put = 0;
335 int k_put = 0; 435 int k_put = 0;
@@ -356,14 +456,23 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
356 atomic_inc(&req->completion_ref); 456 atomic_inc(&req->completion_ref);
357 } 457 }
358 458
359 if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) 459 if (!(s & RQ_NET_QUEUED) && (set & RQ_NET_QUEUED)) {
360 atomic_inc(&req->completion_ref); 460 atomic_inc(&req->completion_ref);
461 set_if_null_req_next(peer_device, req);
462 }
361 463
362 if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK)) 464 if (!(s & RQ_EXP_BARR_ACK) && (set & RQ_EXP_BARR_ACK))
363 kref_get(&req->kref); /* wait for the DONE */ 465 kref_get(&req->kref); /* wait for the DONE */
364 466
365 if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) 467 if (!(s & RQ_NET_SENT) && (set & RQ_NET_SENT)) {
366 atomic_add(req->i.size >> 9, &device->ap_in_flight); 468 /* potentially already completed in the asender thread */
469 if (!(s & RQ_NET_DONE)) {
470 atomic_add(req->i.size >> 9, &device->ap_in_flight);
471 set_if_null_req_not_net_done(peer_device, req);
472 }
473 if (s & RQ_NET_PENDING)
474 set_if_null_req_ack_pending(peer_device, req);
475 }
367 476
368 if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP)) 477 if (!(s & RQ_COMPLETION_SUSP) && (set & RQ_COMPLETION_SUSP))
369 atomic_inc(&req->completion_ref); 478 atomic_inc(&req->completion_ref);
@@ -386,20 +495,34 @@ static void mod_rq_state(struct drbd_request *req, struct bio_and_error *m,
386 ++k_put; 495 ++k_put;
387 else 496 else
388 ++c_put; 497 ++c_put;
498 list_del_init(&req->req_pending_local);
389 } 499 }
390 500
391 if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) { 501 if ((s & RQ_NET_PENDING) && (clear & RQ_NET_PENDING)) {
392 dec_ap_pending(device); 502 dec_ap_pending(device);
393 ++c_put; 503 ++c_put;
504 req->acked_jif = jiffies;
505 advance_conn_req_ack_pending(peer_device, req);
394 } 506 }
395 507
396 if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) 508 if ((s & RQ_NET_QUEUED) && (clear & RQ_NET_QUEUED)) {
397 ++c_put; 509 ++c_put;
510 advance_conn_req_next(peer_device, req);
511 }
398 512
399 if ((s & RQ_EXP_BARR_ACK) && !(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) { 513 if (!(s & RQ_NET_DONE) && (set & RQ_NET_DONE)) {
400 if (req->rq_state & RQ_NET_SENT) 514 if (s & RQ_NET_SENT)
401 atomic_sub(req->i.size >> 9, &device->ap_in_flight); 515 atomic_sub(req->i.size >> 9, &device->ap_in_flight);
402 ++k_put; 516 if (s & RQ_EXP_BARR_ACK)
517 ++k_put;
518 req->net_done_jif = jiffies;
519
520 /* in ahead/behind mode, or just in case,
521 * before we finally destroy this request,
522 * the caching pointers must not reference it anymore */
523 advance_conn_req_next(peer_device, req);
524 advance_conn_req_ack_pending(peer_device, req);
525 advance_conn_req_not_net_done(peer_device, req);
403 } 526 }
404 527
405 /* potentially complete and destroy */ 528 /* potentially complete and destroy */
@@ -439,6 +562,19 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
439 bdevname(device->ldev->backing_bdev, b)); 562 bdevname(device->ldev->backing_bdev, b));
440} 563}
441 564
565/* Helper for HANDED_OVER_TO_NETWORK.
566 * Is this a protocol A write (neither WRITE_ACK nor RECEIVE_ACK expected)?
567 * Is it also still "PENDING"?
568 * --> If so, clear PENDING and set NET_OK below.
569 * If it is a protocol A write, but not RQ_PENDING anymore, neg-ack was faster
570 * (and we must not set RQ_NET_OK) */
571static inline bool is_pending_write_protocol_A(struct drbd_request *req)
572{
573 return (req->rq_state &
574 (RQ_WRITE|RQ_NET_PENDING|RQ_EXP_WRITE_ACK|RQ_EXP_RECEIVE_ACK))
575 == (RQ_WRITE|RQ_NET_PENDING);
576}
577
442/* obviously this could be coded as many single functions 578/* obviously this could be coded as many single functions
443 * instead of one huge switch, 579 * instead of one huge switch,
444 * or by putting the code directly in the respective locations 580 * or by putting the code directly in the respective locations
@@ -454,7 +590,9 @@ static void drbd_report_io_error(struct drbd_device *device, struct drbd_request
454int __req_mod(struct drbd_request *req, enum drbd_req_event what, 590int __req_mod(struct drbd_request *req, enum drbd_req_event what,
455 struct bio_and_error *m) 591 struct bio_and_error *m)
456{ 592{
457 struct drbd_device *device = req->device; 593 struct drbd_device *const device = req->device;
594 struct drbd_peer_device *const peer_device = first_peer_device(device);
595 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
458 struct net_conf *nc; 596 struct net_conf *nc;
459 int p, rv = 0; 597 int p, rv = 0;
460 598
@@ -477,7 +615,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
477 * and from w_read_retry_remote */ 615 * and from w_read_retry_remote */
478 D_ASSERT(device, !(req->rq_state & RQ_NET_MASK)); 616 D_ASSERT(device, !(req->rq_state & RQ_NET_MASK));
479 rcu_read_lock(); 617 rcu_read_lock();
480 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 618 nc = rcu_dereference(connection->net_conf);
481 p = nc->wire_protocol; 619 p = nc->wire_protocol;
482 rcu_read_unlock(); 620 rcu_read_unlock();
483 req->rq_state |= 621 req->rq_state |=
@@ -549,7 +687,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
549 D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0); 687 D_ASSERT(device, (req->rq_state & RQ_LOCAL_MASK) == 0);
550 mod_rq_state(req, m, 0, RQ_NET_QUEUED); 688 mod_rq_state(req, m, 0, RQ_NET_QUEUED);
551 req->w.cb = w_send_read_req; 689 req->w.cb = w_send_read_req;
552 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 690 drbd_queue_work(&connection->sender_work,
553 &req->w); 691 &req->w);
554 break; 692 break;
555 693
@@ -585,23 +723,23 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
585 D_ASSERT(device, req->rq_state & RQ_NET_PENDING); 723 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
586 mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK); 724 mod_rq_state(req, m, 0, RQ_NET_QUEUED|RQ_EXP_BARR_ACK);
587 req->w.cb = w_send_dblock; 725 req->w.cb = w_send_dblock;
588 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 726 drbd_queue_work(&connection->sender_work,
589 &req->w); 727 &req->w);
590 728
591 /* close the epoch, in case it outgrew the limit */ 729 /* close the epoch, in case it outgrew the limit */
592 rcu_read_lock(); 730 rcu_read_lock();
593 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 731 nc = rcu_dereference(connection->net_conf);
594 p = nc->max_epoch_size; 732 p = nc->max_epoch_size;
595 rcu_read_unlock(); 733 rcu_read_unlock();
596 if (first_peer_device(device)->connection->current_tle_writes >= p) 734 if (connection->current_tle_writes >= p)
597 start_new_tl_epoch(first_peer_device(device)->connection); 735 start_new_tl_epoch(connection);
598 736
599 break; 737 break;
600 738
601 case QUEUE_FOR_SEND_OOS: 739 case QUEUE_FOR_SEND_OOS:
602 mod_rq_state(req, m, 0, RQ_NET_QUEUED); 740 mod_rq_state(req, m, 0, RQ_NET_QUEUED);
603 req->w.cb = w_send_out_of_sync; 741 req->w.cb = w_send_out_of_sync;
604 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 742 drbd_queue_work(&connection->sender_work,
605 &req->w); 743 &req->w);
606 break; 744 break;
607 745
@@ -615,18 +753,16 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
615 753
616 case HANDED_OVER_TO_NETWORK: 754 case HANDED_OVER_TO_NETWORK:
617 /* assert something? */ 755 /* assert something? */
618 if (bio_data_dir(req->master_bio) == WRITE && 756 if (is_pending_write_protocol_A(req))
619 !(req->rq_state & (RQ_EXP_RECEIVE_ACK | RQ_EXP_WRITE_ACK))) {
620 /* this is what is dangerous about protocol A: 757 /* this is what is dangerous about protocol A:
621 * pretend it was successfully written on the peer. */ 758 * pretend it was successfully written on the peer. */
622 if (req->rq_state & RQ_NET_PENDING) 759 mod_rq_state(req, m, RQ_NET_QUEUED|RQ_NET_PENDING,
623 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); 760 RQ_NET_SENT|RQ_NET_OK);
624 /* else: neg-ack was faster... */ 761 else
625 /* it is still not yet RQ_NET_DONE until the 762 mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
626 * corresponding epoch barrier got acked as well, 763 /* It is still not yet RQ_NET_DONE until the
627 * so we know what to dirty on connection loss */ 764 * corresponding epoch barrier got acked as well,
628 } 765 * so we know what to dirty on connection loss. */
629 mod_rq_state(req, m, RQ_NET_QUEUED, RQ_NET_SENT);
630 break; 766 break;
631 767
632 case OOS_HANDED_TO_NETWORK: 768 case OOS_HANDED_TO_NETWORK:
@@ -658,12 +794,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
658 case WRITE_ACKED_BY_PEER_AND_SIS: 794 case WRITE_ACKED_BY_PEER_AND_SIS:
659 req->rq_state |= RQ_NET_SIS; 795 req->rq_state |= RQ_NET_SIS;
660 case WRITE_ACKED_BY_PEER: 796 case WRITE_ACKED_BY_PEER:
661 D_ASSERT(device, req->rq_state & RQ_EXP_WRITE_ACK); 797 /* Normal operation protocol C: successfully written on peer.
662 /* protocol C; successfully written on peer. 798 * During resync, even in protocol != C,
799 * we requested an explicit write ack anyways.
800 * Which means we cannot even assert anything here.
663 * Nothing more to do here. 801 * Nothing more to do here.
664 * We want to keep the tl in place for all protocols, to cater 802 * We want to keep the tl in place for all protocols, to cater
665 * for volatile write-back caches on lower level devices. */ 803 * for volatile write-back caches on lower level devices. */
666
667 goto ack_common; 804 goto ack_common;
668 case RECV_ACKED_BY_PEER: 805 case RECV_ACKED_BY_PEER:
669 D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK); 806 D_ASSERT(device, req->rq_state & RQ_EXP_RECEIVE_ACK);
@@ -671,7 +808,6 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
671 * see also notes above in HANDED_OVER_TO_NETWORK about 808 * see also notes above in HANDED_OVER_TO_NETWORK about
672 * protocol != C */ 809 * protocol != C */
673 ack_common: 810 ack_common:
674 D_ASSERT(device, req->rq_state & RQ_NET_PENDING);
675 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK); 811 mod_rq_state(req, m, RQ_NET_PENDING, RQ_NET_OK);
676 break; 812 break;
677 813
@@ -714,7 +850,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
714 850
715 get_ldev(device); /* always succeeds in this call path */ 851 get_ldev(device); /* always succeeds in this call path */
716 req->w.cb = w_restart_disk_io; 852 req->w.cb = w_restart_disk_io;
717 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 853 drbd_queue_work(&connection->sender_work,
718 &req->w); 854 &req->w);
719 break; 855 break;
720 856
@@ -736,7 +872,8 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
736 872
737 mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING); 873 mod_rq_state(req, m, RQ_COMPLETION_SUSP, RQ_NET_QUEUED|RQ_NET_PENDING);
738 if (req->w.cb) { 874 if (req->w.cb) {
739 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 875 /* w.cb expected to be w_send_dblock, or w_send_read_req */
876 drbd_queue_work(&connection->sender_work,
740 &req->w); 877 &req->w);
741 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ; 878 rv = req->rq_state & RQ_WRITE ? MR_WRITE : MR_READ;
742 } /* else: FIXME can this happen? */ 879 } /* else: FIXME can this happen? */
@@ -769,7 +906,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
769 break; 906 break;
770 907
771 case QUEUE_AS_DRBD_BARRIER: 908 case QUEUE_AS_DRBD_BARRIER:
772 start_new_tl_epoch(first_peer_device(device)->connection); 909 start_new_tl_epoch(connection);
773 mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE); 910 mod_rq_state(req, m, 0, RQ_NET_OK|RQ_NET_DONE);
774 break; 911 break;
775 }; 912 };
@@ -886,6 +1023,9 @@ static void maybe_pull_ahead(struct drbd_device *device)
886 connection->agreed_pro_version < 96) 1023 connection->agreed_pro_version < 96)
887 return; 1024 return;
888 1025
1026 if (on_congestion == OC_PULL_AHEAD && device->state.conn == C_AHEAD)
1027 return; /* nothing to do ... */
1028
889 /* If I don't even have good local storage, we can not reasonably try 1029 /* If I don't even have good local storage, we can not reasonably try
890 * to pull ahead of the peer. We also need the local reference to make 1030 * to pull ahead of the peer. We also need the local reference to make
891 * sure device->act_log is there. 1031 * sure device->act_log is there.
@@ -1021,6 +1161,7 @@ drbd_submit_req_private_bio(struct drbd_request *req)
1021 * stable storage, and this is a WRITE, we may not even submit 1161 * stable storage, and this is a WRITE, we may not even submit
1022 * this bio. */ 1162 * this bio. */
1023 if (get_ldev(device)) { 1163 if (get_ldev(device)) {
1164 req->pre_submit_jif = jiffies;
1024 if (drbd_insert_fault(device, 1165 if (drbd_insert_fault(device,
1025 rw == WRITE ? DRBD_FAULT_DT_WR 1166 rw == WRITE ? DRBD_FAULT_DT_WR
1026 : rw == READ ? DRBD_FAULT_DT_RD 1167 : rw == READ ? DRBD_FAULT_DT_RD
@@ -1035,10 +1176,14 @@ drbd_submit_req_private_bio(struct drbd_request *req)
1035 1176
1036static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req) 1177static void drbd_queue_write(struct drbd_device *device, struct drbd_request *req)
1037{ 1178{
1038 spin_lock(&device->submit.lock); 1179 spin_lock_irq(&device->resource->req_lock);
1039 list_add_tail(&req->tl_requests, &device->submit.writes); 1180 list_add_tail(&req->tl_requests, &device->submit.writes);
1040 spin_unlock(&device->submit.lock); 1181 list_add_tail(&req->req_pending_master_completion,
1182 &device->pending_master_completion[1 /* WRITE */]);
1183 spin_unlock_irq(&device->resource->req_lock);
1041 queue_work(device->submit.wq, &device->submit.worker); 1184 queue_work(device->submit.wq, &device->submit.worker);
1185 /* do_submit() may sleep internally on al_wait, too */
1186 wake_up(&device->al_wait);
1042} 1187}
1043 1188
1044/* returns the new drbd_request pointer, if the caller is expected to 1189/* returns the new drbd_request pointer, if the caller is expected to
@@ -1047,7 +1192,7 @@ static void drbd_queue_write(struct drbd_device *device, struct drbd_request *re
1047 * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request. 1192 * Returns ERR_PTR(-ENOMEM) if we cannot allocate a drbd_request.
1048 */ 1193 */
1049static struct drbd_request * 1194static struct drbd_request *
1050drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_time) 1195drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
1051{ 1196{
1052 const int rw = bio_data_dir(bio); 1197 const int rw = bio_data_dir(bio);
1053 struct drbd_request *req; 1198 struct drbd_request *req;
@@ -1062,7 +1207,7 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
1062 bio_endio(bio, -ENOMEM); 1207 bio_endio(bio, -ENOMEM);
1063 return ERR_PTR(-ENOMEM); 1208 return ERR_PTR(-ENOMEM);
1064 } 1209 }
1065 req->start_time = start_time; 1210 req->start_jif = start_jif;
1066 1211
1067 if (!get_ldev(device)) { 1212 if (!get_ldev(device)) {
1068 bio_put(req->private_bio); 1213 bio_put(req->private_bio);
@@ -1075,10 +1220,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
1075 if (rw == WRITE && req->private_bio && req->i.size 1220 if (rw == WRITE && req->private_bio && req->i.size
1076 && !test_bit(AL_SUSPENDED, &device->flags)) { 1221 && !test_bit(AL_SUSPENDED, &device->flags)) {
1077 if (!drbd_al_begin_io_fastpath(device, &req->i)) { 1222 if (!drbd_al_begin_io_fastpath(device, &req->i)) {
1223 atomic_inc(&device->ap_actlog_cnt);
1078 drbd_queue_write(device, req); 1224 drbd_queue_write(device, req);
1079 return NULL; 1225 return NULL;
1080 } 1226 }
1081 req->rq_state |= RQ_IN_ACT_LOG; 1227 req->rq_state |= RQ_IN_ACT_LOG;
1228 req->in_actlog_jif = jiffies;
1082 } 1229 }
1083 1230
1084 return req; 1231 return req;
@@ -1086,11 +1233,13 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio, unsigned long
1086 1233
1087static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req) 1234static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request *req)
1088{ 1235{
1236 struct drbd_resource *resource = device->resource;
1089 const int rw = bio_rw(req->master_bio); 1237 const int rw = bio_rw(req->master_bio);
1090 struct bio_and_error m = { NULL, }; 1238 struct bio_and_error m = { NULL, };
1091 bool no_remote = false; 1239 bool no_remote = false;
1240 bool submit_private_bio = false;
1092 1241
1093 spin_lock_irq(&device->resource->req_lock); 1242 spin_lock_irq(&resource->req_lock);
1094 if (rw == WRITE) { 1243 if (rw == WRITE) {
1095 /* This may temporarily give up the req_lock, 1244 /* This may temporarily give up the req_lock,
1096 * but will re-aquire it before it returns here. 1245 * but will re-aquire it before it returns here.
@@ -1148,13 +1297,18 @@ static void drbd_send_and_submit(struct drbd_device *device, struct drbd_request
1148 no_remote = true; 1297 no_remote = true;
1149 } 1298 }
1150 1299
1300 /* If it took the fast path in drbd_request_prepare, add it here.
1301 * The slow path has added it already. */
1302 if (list_empty(&req->req_pending_master_completion))
1303 list_add_tail(&req->req_pending_master_completion,
1304 &device->pending_master_completion[rw == WRITE]);
1151 if (req->private_bio) { 1305 if (req->private_bio) {
1152 /* needs to be marked within the same spinlock */ 1306 /* needs to be marked within the same spinlock */
1307 list_add_tail(&req->req_pending_local,
1308 &device->pending_completion[rw == WRITE]);
1153 _req_mod(req, TO_BE_SUBMITTED); 1309 _req_mod(req, TO_BE_SUBMITTED);
1154 /* but we need to give up the spinlock to submit */ 1310 /* but we need to give up the spinlock to submit */
1155 spin_unlock_irq(&device->resource->req_lock); 1311 submit_private_bio = true;
1156 drbd_submit_req_private_bio(req);
1157 spin_lock_irq(&device->resource->req_lock);
1158 } else if (no_remote) { 1312 } else if (no_remote) {
1159nodata: 1313nodata:
1160 if (__ratelimit(&drbd_ratelimit_state)) 1314 if (__ratelimit(&drbd_ratelimit_state))
@@ -1167,15 +1321,23 @@ nodata:
1167out: 1321out:
1168 if (drbd_req_put_completion_ref(req, &m, 1)) 1322 if (drbd_req_put_completion_ref(req, &m, 1))
1169 kref_put(&req->kref, drbd_req_destroy); 1323 kref_put(&req->kref, drbd_req_destroy);
1170 spin_unlock_irq(&device->resource->req_lock); 1324 spin_unlock_irq(&resource->req_lock);
1171 1325
1326 /* Even though above is a kref_put(), this is safe.
1327 * As long as we still need to submit our private bio,
1328 * we hold a completion ref, and the request cannot disappear.
1329 * If however this request did not even have a private bio to submit
1330 * (e.g. remote read), req may already be invalid now.
1331 * That's why we cannot check on req->private_bio. */
1332 if (submit_private_bio)
1333 drbd_submit_req_private_bio(req);
1172 if (m.bio) 1334 if (m.bio)
1173 complete_master_bio(device, &m); 1335 complete_master_bio(device, &m);
1174} 1336}
1175 1337
1176void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_time) 1338void __drbd_make_request(struct drbd_device *device, struct bio *bio, unsigned long start_jif)
1177{ 1339{
1178 struct drbd_request *req = drbd_request_prepare(device, bio, start_time); 1340 struct drbd_request *req = drbd_request_prepare(device, bio, start_jif);
1179 if (IS_ERR_OR_NULL(req)) 1341 if (IS_ERR_OR_NULL(req))
1180 return; 1342 return;
1181 drbd_send_and_submit(device, req); 1343 drbd_send_and_submit(device, req);
@@ -1194,6 +1356,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
1194 continue; 1356 continue;
1195 1357
1196 req->rq_state |= RQ_IN_ACT_LOG; 1358 req->rq_state |= RQ_IN_ACT_LOG;
1359 req->in_actlog_jif = jiffies;
1360 atomic_dec(&device->ap_actlog_cnt);
1197 } 1361 }
1198 1362
1199 list_del_init(&req->tl_requests); 1363 list_del_init(&req->tl_requests);
@@ -1203,7 +1367,8 @@ static void submit_fast_path(struct drbd_device *device, struct list_head *incom
1203 1367
1204static bool prepare_al_transaction_nonblock(struct drbd_device *device, 1368static bool prepare_al_transaction_nonblock(struct drbd_device *device,
1205 struct list_head *incoming, 1369 struct list_head *incoming,
1206 struct list_head *pending) 1370 struct list_head *pending,
1371 struct list_head *later)
1207{ 1372{
1208 struct drbd_request *req, *tmp; 1373 struct drbd_request *req, *tmp;
1209 int wake = 0; 1374 int wake = 0;
@@ -1212,45 +1377,105 @@ static bool prepare_al_transaction_nonblock(struct drbd_device *device,
1212 spin_lock_irq(&device->al_lock); 1377 spin_lock_irq(&device->al_lock);
1213 list_for_each_entry_safe(req, tmp, incoming, tl_requests) { 1378 list_for_each_entry_safe(req, tmp, incoming, tl_requests) {
1214 err = drbd_al_begin_io_nonblock(device, &req->i); 1379 err = drbd_al_begin_io_nonblock(device, &req->i);
1380 if (err == -ENOBUFS)
1381 break;
1215 if (err == -EBUSY) 1382 if (err == -EBUSY)
1216 wake = 1; 1383 wake = 1;
1217 if (err) 1384 if (err)
1218 continue; 1385 list_move_tail(&req->tl_requests, later);
1219 req->rq_state |= RQ_IN_ACT_LOG; 1386 else
1220 list_move_tail(&req->tl_requests, pending); 1387 list_move_tail(&req->tl_requests, pending);
1221 } 1388 }
1222 spin_unlock_irq(&device->al_lock); 1389 spin_unlock_irq(&device->al_lock);
1223 if (wake) 1390 if (wake)
1224 wake_up(&device->al_wait); 1391 wake_up(&device->al_wait);
1225
1226 return !list_empty(pending); 1392 return !list_empty(pending);
1227} 1393}
1228 1394
1395void send_and_submit_pending(struct drbd_device *device, struct list_head *pending)
1396{
1397 struct drbd_request *req, *tmp;
1398
1399 list_for_each_entry_safe(req, tmp, pending, tl_requests) {
1400 req->rq_state |= RQ_IN_ACT_LOG;
1401 req->in_actlog_jif = jiffies;
1402 atomic_dec(&device->ap_actlog_cnt);
1403 list_del_init(&req->tl_requests);
1404 drbd_send_and_submit(device, req);
1405 }
1406}
1407
1229void do_submit(struct work_struct *ws) 1408void do_submit(struct work_struct *ws)
1230{ 1409{
1231 struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker); 1410 struct drbd_device *device = container_of(ws, struct drbd_device, submit.worker);
1232 LIST_HEAD(incoming); 1411 LIST_HEAD(incoming); /* from drbd_make_request() */
1233 LIST_HEAD(pending); 1412 LIST_HEAD(pending); /* to be submitted after next AL-transaction commit */
1234 struct drbd_request *req, *tmp; 1413 LIST_HEAD(busy); /* blocked by resync requests */
1414
1415 /* grab new incoming requests */
1416 spin_lock_irq(&device->resource->req_lock);
1417 list_splice_tail_init(&device->submit.writes, &incoming);
1418 spin_unlock_irq(&device->resource->req_lock);
1235 1419
1236 for (;;) { 1420 for (;;) {
1237 spin_lock(&device->submit.lock); 1421 DEFINE_WAIT(wait);
1238 list_splice_tail_init(&device->submit.writes, &incoming);
1239 spin_unlock(&device->submit.lock);
1240 1422
1423 /* move used-to-be-busy back to front of incoming */
1424 list_splice_init(&busy, &incoming);
1241 submit_fast_path(device, &incoming); 1425 submit_fast_path(device, &incoming);
1242 if (list_empty(&incoming)) 1426 if (list_empty(&incoming))
1243 break; 1427 break;
1244 1428
1245skip_fast_path:
1246 wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending));
1247 /* Maybe more was queued, while we prepared the transaction?
1248 * Try to stuff them into this transaction as well.
1249 * Be strictly non-blocking here, no wait_event, we already
1250 * have something to commit.
1251 * Stop if we don't make any more progres.
1252 */
1253 for (;;) { 1429 for (;;) {
1430 prepare_to_wait(&device->al_wait, &wait, TASK_UNINTERRUPTIBLE);
1431
1432 list_splice_init(&busy, &incoming);
1433 prepare_al_transaction_nonblock(device, &incoming, &pending, &busy);
1434 if (!list_empty(&pending))
1435 break;
1436
1437 schedule();
1438
1439 /* If all currently "hot" activity log extents are kept busy by
1440 * incoming requests, we still must not totally starve new
1441 * requests to "cold" extents.
1442 * Something left on &incoming means there had not been
1443 * enough update slots available, and the activity log
1444 * has been marked as "starving".
1445 *
1446 * Try again now, without looking for new requests,
1447 * effectively blocking all new requests until we made
1448 * at least _some_ progress with what we currently have.
1449 */
1450 if (!list_empty(&incoming))
1451 continue;
1452
1453 /* Nothing moved to pending, but nothing left
1454 * on incoming: all moved to busy!
1455 * Grab new and iterate. */
1456 spin_lock_irq(&device->resource->req_lock);
1457 list_splice_tail_init(&device->submit.writes, &incoming);
1458 spin_unlock_irq(&device->resource->req_lock);
1459 }
1460 finish_wait(&device->al_wait, &wait);
1461
1462 /* If the transaction was full, before all incoming requests
1463 * had been processed, skip ahead to commit, and iterate
1464 * without splicing in more incoming requests from upper layers.
1465 *
1466 * Else, if all incoming have been processed,
1467 * they have become either "pending" (to be submitted after
1468 * next transaction commit) or "busy" (blocked by resync).
1469 *
1470 * Maybe more was queued, while we prepared the transaction?
1471 * Try to stuff those into this transaction as well.
1472 * Be strictly non-blocking here,
1473 * we already have something to commit.
1474 *
1475 * Commit if we don't make any more progres.
1476 */
1477
1478 while (list_empty(&incoming)) {
1254 LIST_HEAD(more_pending); 1479 LIST_HEAD(more_pending);
1255 LIST_HEAD(more_incoming); 1480 LIST_HEAD(more_incoming);
1256 bool made_progress; 1481 bool made_progress;
@@ -1260,55 +1485,32 @@ skip_fast_path:
1260 if (list_empty(&device->submit.writes)) 1485 if (list_empty(&device->submit.writes))
1261 break; 1486 break;
1262 1487
1263 spin_lock(&device->submit.lock); 1488 spin_lock_irq(&device->resource->req_lock);
1264 list_splice_tail_init(&device->submit.writes, &more_incoming); 1489 list_splice_tail_init(&device->submit.writes, &more_incoming);
1265 spin_unlock(&device->submit.lock); 1490 spin_unlock_irq(&device->resource->req_lock);
1266 1491
1267 if (list_empty(&more_incoming)) 1492 if (list_empty(&more_incoming))
1268 break; 1493 break;
1269 1494
1270 made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending); 1495 made_progress = prepare_al_transaction_nonblock(device, &more_incoming, &more_pending, &busy);
1271 1496
1272 list_splice_tail_init(&more_pending, &pending); 1497 list_splice_tail_init(&more_pending, &pending);
1273 list_splice_tail_init(&more_incoming, &incoming); 1498 list_splice_tail_init(&more_incoming, &incoming);
1274
1275 if (!made_progress) 1499 if (!made_progress)
1276 break; 1500 break;
1277 } 1501 }
1278 drbd_al_begin_io_commit(device, false);
1279
1280 list_for_each_entry_safe(req, tmp, &pending, tl_requests) {
1281 list_del_init(&req->tl_requests);
1282 drbd_send_and_submit(device, req);
1283 }
1284 1502
1285 /* If all currently hot activity log extents are kept busy by 1503 drbd_al_begin_io_commit(device);
1286 * incoming requests, we still must not totally starve new 1504 send_and_submit_pending(device, &pending);
1287 * requests to cold extents. In that case, prepare one request
1288 * in blocking mode. */
1289 list_for_each_entry_safe(req, tmp, &incoming, tl_requests) {
1290 list_del_init(&req->tl_requests);
1291 req->rq_state |= RQ_IN_ACT_LOG;
1292 if (!drbd_al_begin_io_prepare(device, &req->i)) {
1293 /* Corresponding extent was hot after all? */
1294 drbd_send_and_submit(device, req);
1295 } else {
1296 /* Found a request to a cold extent.
1297 * Put on "pending" list,
1298 * and try to cumulate with more. */
1299 list_add(&req->tl_requests, &pending);
1300 goto skip_fast_path;
1301 }
1302 }
1303 } 1505 }
1304} 1506}
1305 1507
1306void drbd_make_request(struct request_queue *q, struct bio *bio) 1508void drbd_make_request(struct request_queue *q, struct bio *bio)
1307{ 1509{
1308 struct drbd_device *device = (struct drbd_device *) q->queuedata; 1510 struct drbd_device *device = (struct drbd_device *) q->queuedata;
1309 unsigned long start_time; 1511 unsigned long start_jif;
1310 1512
1311 start_time = jiffies; 1513 start_jif = jiffies;
1312 1514
1313 /* 1515 /*
1314 * what we "blindly" assume: 1516 * what we "blindly" assume:
@@ -1316,7 +1518,7 @@ void drbd_make_request(struct request_queue *q, struct bio *bio)
1316 D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512)); 1518 D_ASSERT(device, IS_ALIGNED(bio->bi_iter.bi_size, 512));
1317 1519
1318 inc_ap_bio(device); 1520 inc_ap_bio(device);
1319 __drbd_make_request(device, bio, start_time); 1521 __drbd_make_request(device, bio, start_jif);
1320} 1522}
1321 1523
1322/* This is called by bio_add_page(). 1524/* This is called by bio_add_page().
@@ -1353,36 +1555,13 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct
1353 return limit; 1555 return limit;
1354} 1556}
1355 1557
1356static void find_oldest_requests(
1357 struct drbd_connection *connection,
1358 struct drbd_device *device,
1359 struct drbd_request **oldest_req_waiting_for_peer,
1360 struct drbd_request **oldest_req_waiting_for_disk)
1361{
1362 struct drbd_request *r;
1363 *oldest_req_waiting_for_peer = NULL;
1364 *oldest_req_waiting_for_disk = NULL;
1365 list_for_each_entry(r, &connection->transfer_log, tl_requests) {
1366 const unsigned s = r->rq_state;
1367 if (!*oldest_req_waiting_for_peer
1368 && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE)))
1369 *oldest_req_waiting_for_peer = r;
1370
1371 if (!*oldest_req_waiting_for_disk
1372 && (s & RQ_LOCAL_PENDING) && r->device == device)
1373 *oldest_req_waiting_for_disk = r;
1374
1375 if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk)
1376 break;
1377 }
1378}
1379
1380void request_timer_fn(unsigned long data) 1558void request_timer_fn(unsigned long data)
1381{ 1559{
1382 struct drbd_device *device = (struct drbd_device *) data; 1560 struct drbd_device *device = (struct drbd_device *) data;
1383 struct drbd_connection *connection = first_peer_device(device)->connection; 1561 struct drbd_connection *connection = first_peer_device(device)->connection;
1384 struct drbd_request *req_disk, *req_peer; /* oldest request */ 1562 struct drbd_request *req_read, *req_write, *req_peer; /* oldest request */
1385 struct net_conf *nc; 1563 struct net_conf *nc;
1564 unsigned long oldest_submit_jif;
1386 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ 1565 unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
1387 unsigned long now; 1566 unsigned long now;
1388 1567
@@ -1403,14 +1582,31 @@ void request_timer_fn(unsigned long data)
1403 return; /* Recurring timer stopped */ 1582 return; /* Recurring timer stopped */
1404 1583
1405 now = jiffies; 1584 now = jiffies;
1585 nt = now + et;
1406 1586
1407 spin_lock_irq(&device->resource->req_lock); 1587 spin_lock_irq(&device->resource->req_lock);
1408 find_oldest_requests(connection, device, &req_peer, &req_disk); 1588 req_read = list_first_entry_or_null(&device->pending_completion[0], struct drbd_request, req_pending_local);
1409 if (req_peer == NULL && req_disk == NULL) { 1589 req_write = list_first_entry_or_null(&device->pending_completion[1], struct drbd_request, req_pending_local);
1410 spin_unlock_irq(&device->resource->req_lock); 1590 req_peer = connection->req_not_net_done;
1411 mod_timer(&device->request_timer, now + et); 1591 /* maybe the oldest request waiting for the peer is in fact still
1412 return; 1592 * blocking in tcp sendmsg */
1413 } 1593 if (!req_peer && connection->req_next && connection->req_next->pre_send_jif)
1594 req_peer = connection->req_next;
1595
1596 /* evaluate the oldest peer request only in one timer! */
1597 if (req_peer && req_peer->device != device)
1598 req_peer = NULL;
1599
1600 /* do we have something to evaluate? */
1601 if (req_peer == NULL && req_write == NULL && req_read == NULL)
1602 goto out;
1603
1604 oldest_submit_jif =
1605 (req_write && req_read)
1606 ? ( time_before(req_write->pre_submit_jif, req_read->pre_submit_jif)
1607 ? req_write->pre_submit_jif : req_read->pre_submit_jif )
1608 : req_write ? req_write->pre_submit_jif
1609 : req_read ? req_read->pre_submit_jif : now;
1414 1610
1415 /* The request is considered timed out, if 1611 /* The request is considered timed out, if
1416 * - we have some effective timeout from the configuration, 1612 * - we have some effective timeout from the configuration,
@@ -1429,13 +1625,13 @@ void request_timer_fn(unsigned long data)
1429 * to expire twice (worst case) to become effective. Good enough. 1625 * to expire twice (worst case) to become effective. Good enough.
1430 */ 1626 */
1431 if (ent && req_peer && 1627 if (ent && req_peer &&
1432 time_after(now, req_peer->start_time + ent) && 1628 time_after(now, req_peer->pre_send_jif + ent) &&
1433 !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { 1629 !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) {
1434 drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); 1630 drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n");
1435 _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); 1631 _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
1436 } 1632 }
1437 if (dt && req_disk && 1633 if (dt && oldest_submit_jif != now &&
1438 time_after(now, req_disk->start_time + dt) && 1634 time_after(now, oldest_submit_jif + dt) &&
1439 !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { 1635 !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) {
1440 drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); 1636 drbd_warn(device, "Local backing device failed to meet the disk-timeout\n");
1441 __drbd_chk_io_error(device, DRBD_FORCE_DETACH); 1637 __drbd_chk_io_error(device, DRBD_FORCE_DETACH);
@@ -1443,11 +1639,12 @@ void request_timer_fn(unsigned long data)
1443 1639
1444 /* Reschedule timer for the nearest not already expired timeout. 1640 /* Reschedule timer for the nearest not already expired timeout.
1445 * Fallback to now + min(effective network timeout, disk timeout). */ 1641 * Fallback to now + min(effective network timeout, disk timeout). */
1446 ent = (ent && req_peer && time_before(now, req_peer->start_time + ent)) 1642 ent = (ent && req_peer && time_before(now, req_peer->pre_send_jif + ent))
1447 ? req_peer->start_time + ent : now + et; 1643 ? req_peer->pre_send_jif + ent : now + et;
1448 dt = (dt && req_disk && time_before(now, req_disk->start_time + dt)) 1644 dt = (dt && oldest_submit_jif != now && time_before(now, oldest_submit_jif + dt))
1449 ? req_disk->start_time + dt : now + et; 1645 ? oldest_submit_jif + dt : now + et;
1450 nt = time_before(ent, dt) ? ent : dt; 1646 nt = time_before(ent, dt) ? ent : dt;
1647out:
1451 spin_unlock_irq(&connection->resource->req_lock); 1648 spin_unlock_irq(&connection->resource->req_lock);
1452 mod_timer(&device->request_timer, nt); 1649 mod_timer(&device->request_timer, nt);
1453} 1650}
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h
index 8566cd5866b4..9f6a04080e9f 100644
--- a/drivers/block/drbd/drbd_req.h
+++ b/drivers/block/drbd/drbd_req.h
@@ -288,6 +288,7 @@ extern void complete_master_bio(struct drbd_device *device,
288extern void request_timer_fn(unsigned long data); 288extern void request_timer_fn(unsigned long data);
289extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what); 289extern void tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
290extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what); 290extern void _tl_restart(struct drbd_connection *connection, enum drbd_req_event what);
291extern void tl_abort_disk_io(struct drbd_device *device);
291 292
292/* this is in drbd_main.c */ 293/* this is in drbd_main.c */
293extern void drbd_restart_request(struct drbd_request *req); 294extern void drbd_restart_request(struct drbd_request *req);
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c
index a5d8aae00e04..c35c0f001bb7 100644
--- a/drivers/block/drbd/drbd_state.c
+++ b/drivers/block/drbd/drbd_state.c
@@ -410,7 +410,7 @@ _drbd_request_state(struct drbd_device *device, union drbd_state mask,
410 return rv; 410 return rv;
411} 411}
412 412
413static void print_st(struct drbd_device *device, char *name, union drbd_state ns) 413static void print_st(struct drbd_device *device, const char *name, union drbd_state ns)
414{ 414{
415 drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n", 415 drbd_err(device, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c%c%c }\n",
416 name, 416 name,
@@ -952,11 +952,12 @@ enum drbd_state_rv
952__drbd_set_state(struct drbd_device *device, union drbd_state ns, 952__drbd_set_state(struct drbd_device *device, union drbd_state ns,
953 enum chg_state_flags flags, struct completion *done) 953 enum chg_state_flags flags, struct completion *done)
954{ 954{
955 struct drbd_peer_device *peer_device = first_peer_device(device);
956 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
955 union drbd_state os; 957 union drbd_state os;
956 enum drbd_state_rv rv = SS_SUCCESS; 958 enum drbd_state_rv rv = SS_SUCCESS;
957 enum sanitize_state_warnings ssw; 959 enum sanitize_state_warnings ssw;
958 struct after_state_chg_work *ascw; 960 struct after_state_chg_work *ascw;
959 bool did_remote, should_do_remote;
960 961
961 os = drbd_read_state(device); 962 os = drbd_read_state(device);
962 963
@@ -978,9 +979,9 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
978 this happen...*/ 979 this happen...*/
979 980
980 if (is_valid_state(device, os) == rv) 981 if (is_valid_state(device, os) == rv)
981 rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); 982 rv = is_valid_soft_transition(os, ns, connection);
982 } else 983 } else
983 rv = is_valid_soft_transition(os, ns, first_peer_device(device)->connection); 984 rv = is_valid_soft_transition(os, ns, connection);
984 } 985 }
985 986
986 if (rv < SS_SUCCESS) { 987 if (rv < SS_SUCCESS) {
@@ -997,7 +998,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
997 sanitize_state(). Only display it here if we where not called from 998 sanitize_state(). Only display it here if we where not called from
998 _conn_request_state() */ 999 _conn_request_state() */
999 if (!(flags & CS_DC_SUSP)) 1000 if (!(flags & CS_DC_SUSP))
1000 conn_pr_state_change(first_peer_device(device)->connection, os, ns, 1001 conn_pr_state_change(connection, os, ns,
1001 (flags & ~CS_DC_MASK) | CS_DC_SUSP); 1002 (flags & ~CS_DC_MASK) | CS_DC_SUSP);
1002 1003
1003 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference 1004 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
@@ -1008,28 +1009,35 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
1008 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) 1009 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1009 atomic_inc(&device->local_cnt); 1010 atomic_inc(&device->local_cnt);
1010 1011
1011 did_remote = drbd_should_do_remote(device->state); 1012 if (!is_sync_state(os.conn) && is_sync_state(ns.conn))
1013 clear_bit(RS_DONE, &device->flags);
1014
1015 /* changes to local_cnt and device flags should be visible before
1016 * changes to state, which again should be visible before anything else
1017 * depending on that change happens. */
1018 smp_wmb();
1012 device->state.i = ns.i; 1019 device->state.i = ns.i;
1013 should_do_remote = drbd_should_do_remote(device->state);
1014 device->resource->susp = ns.susp; 1020 device->resource->susp = ns.susp;
1015 device->resource->susp_nod = ns.susp_nod; 1021 device->resource->susp_nod = ns.susp_nod;
1016 device->resource->susp_fen = ns.susp_fen; 1022 device->resource->susp_fen = ns.susp_fen;
1023 smp_wmb();
1017 1024
1018 /* put replicated vs not-replicated requests in seperate epochs */ 1025 /* put replicated vs not-replicated requests in seperate epochs */
1019 if (did_remote != should_do_remote) 1026 if (drbd_should_do_remote((union drbd_dev_state)os.i) !=
1020 start_new_tl_epoch(first_peer_device(device)->connection); 1027 drbd_should_do_remote((union drbd_dev_state)ns.i))
1028 start_new_tl_epoch(connection);
1021 1029
1022 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) 1030 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1023 drbd_print_uuids(device, "attached to UUIDs"); 1031 drbd_print_uuids(device, "attached to UUIDs");
1024 1032
1025 /* Wake up role changes, that were delayed because of connection establishing */ 1033 /* Wake up role changes, that were delayed because of connection establishing */
1026 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS && 1034 if (os.conn == C_WF_REPORT_PARAMS && ns.conn != C_WF_REPORT_PARAMS &&
1027 no_peer_wf_report_params(first_peer_device(device)->connection)) 1035 no_peer_wf_report_params(connection))
1028 clear_bit(STATE_SENT, &first_peer_device(device)->connection->flags); 1036 clear_bit(STATE_SENT, &connection->flags);
1029 1037
1030 wake_up(&device->misc_wait); 1038 wake_up(&device->misc_wait);
1031 wake_up(&device->state_wait); 1039 wake_up(&device->state_wait);
1032 wake_up(&first_peer_device(device)->connection->ping_wait); 1040 wake_up(&connection->ping_wait);
1033 1041
1034 /* Aborted verify run, or we reached the stop sector. 1042 /* Aborted verify run, or we reached the stop sector.
1035 * Log the last position, unless end-of-device. */ 1043 * Log the last position, unless end-of-device. */
@@ -1118,21 +1126,21 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
1118 1126
1119 /* Receiver should clean up itself */ 1127 /* Receiver should clean up itself */
1120 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING) 1128 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1121 drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); 1129 drbd_thread_stop_nowait(&connection->receiver);
1122 1130
1123 /* Now the receiver finished cleaning up itself, it should die */ 1131 /* Now the receiver finished cleaning up itself, it should die */
1124 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE) 1132 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1125 drbd_thread_stop_nowait(&first_peer_device(device)->connection->receiver); 1133 drbd_thread_stop_nowait(&connection->receiver);
1126 1134
1127 /* Upon network failure, we need to restart the receiver. */ 1135 /* Upon network failure, we need to restart the receiver. */
1128 if (os.conn > C_WF_CONNECTION && 1136 if (os.conn > C_WF_CONNECTION &&
1129 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT) 1137 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1130 drbd_thread_restart_nowait(&first_peer_device(device)->connection->receiver); 1138 drbd_thread_restart_nowait(&connection->receiver);
1131 1139
1132 /* Resume AL writing if we get a connection */ 1140 /* Resume AL writing if we get a connection */
1133 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) { 1141 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1134 drbd_resume_al(device); 1142 drbd_resume_al(device);
1135 first_peer_device(device)->connection->connect_cnt++; 1143 connection->connect_cnt++;
1136 } 1144 }
1137 1145
1138 /* remember last attach time so request_timer_fn() won't 1146 /* remember last attach time so request_timer_fn() won't
@@ -1150,7 +1158,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns,
1150 ascw->w.cb = w_after_state_ch; 1158 ascw->w.cb = w_after_state_ch;
1151 ascw->device = device; 1159 ascw->device = device;
1152 ascw->done = done; 1160 ascw->done = done;
1153 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 1161 drbd_queue_work(&connection->sender_work,
1154 &ascw->w); 1162 &ascw->w);
1155 } else { 1163 } else {
1156 drbd_err(device, "Could not kmalloc an ascw\n"); 1164 drbd_err(device, "Could not kmalloc an ascw\n");
@@ -1222,13 +1230,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1222 union drbd_state ns, enum chg_state_flags flags) 1230 union drbd_state ns, enum chg_state_flags flags)
1223{ 1231{
1224 struct drbd_resource *resource = device->resource; 1232 struct drbd_resource *resource = device->resource;
1233 struct drbd_peer_device *peer_device = first_peer_device(device);
1234 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1225 struct sib_info sib; 1235 struct sib_info sib;
1226 1236
1227 sib.sib_reason = SIB_STATE_CHANGE; 1237 sib.sib_reason = SIB_STATE_CHANGE;
1228 sib.os = os; 1238 sib.os = os;
1229 sib.ns = ns; 1239 sib.ns = ns;
1230 1240
1231 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) { 1241 if ((os.disk != D_UP_TO_DATE || os.pdsk != D_UP_TO_DATE)
1242 && (ns.disk == D_UP_TO_DATE && ns.pdsk == D_UP_TO_DATE)) {
1232 clear_bit(CRASHED_PRIMARY, &device->flags); 1243 clear_bit(CRASHED_PRIMARY, &device->flags);
1233 if (device->p_uuid) 1244 if (device->p_uuid)
1234 device->p_uuid[UI_FLAGS] &= ~((u64)2); 1245 device->p_uuid[UI_FLAGS] &= ~((u64)2);
@@ -1245,7 +1256,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1245 state change. This function might sleep */ 1256 state change. This function might sleep */
1246 1257
1247 if (ns.susp_nod) { 1258 if (ns.susp_nod) {
1248 struct drbd_connection *connection = first_peer_device(device)->connection;
1249 enum drbd_req_event what = NOTHING; 1259 enum drbd_req_event what = NOTHING;
1250 1260
1251 spin_lock_irq(&device->resource->req_lock); 1261 spin_lock_irq(&device->resource->req_lock);
@@ -1267,8 +1277,6 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1267 } 1277 }
1268 1278
1269 if (ns.susp_fen) { 1279 if (ns.susp_fen) {
1270 struct drbd_connection *connection = first_peer_device(device)->connection;
1271
1272 spin_lock_irq(&device->resource->req_lock); 1280 spin_lock_irq(&device->resource->req_lock);
1273 if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) { 1281 if (resource->susp_fen && conn_lowest_conn(connection) >= C_CONNECTED) {
1274 /* case2: The connection was established again: */ 1282 /* case2: The connection was established again: */
@@ -1294,8 +1302,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1294 * which is unexpected. */ 1302 * which is unexpected. */
1295 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) && 1303 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1296 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) && 1304 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1297 first_peer_device(device)->connection->agreed_pro_version >= 96 && get_ldev(device)) { 1305 connection->agreed_pro_version >= 96 && get_ldev(device)) {
1298 drbd_gen_and_send_sync_uuid(first_peer_device(device)); 1306 drbd_gen_and_send_sync_uuid(peer_device);
1299 put_ldev(device); 1307 put_ldev(device);
1300 } 1308 }
1301 1309
@@ -1309,8 +1317,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1309 atomic_set(&device->rs_pending_cnt, 0); 1317 atomic_set(&device->rs_pending_cnt, 0);
1310 drbd_rs_cancel_all(device); 1318 drbd_rs_cancel_all(device);
1311 1319
1312 drbd_send_uuids(first_peer_device(device)); 1320 drbd_send_uuids(peer_device);
1313 drbd_send_state(first_peer_device(device), ns); 1321 drbd_send_state(peer_device, ns);
1314 } 1322 }
1315 /* No point in queuing send_bitmap if we don't have a connection 1323 /* No point in queuing send_bitmap if we don't have a connection
1316 * anymore, so check also the _current_ state, not only the new state 1324 * anymore, so check also the _current_ state, not only the new state
@@ -1335,7 +1343,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1335 set_bit(NEW_CUR_UUID, &device->flags); 1343 set_bit(NEW_CUR_UUID, &device->flags);
1336 } else { 1344 } else {
1337 drbd_uuid_new_current(device); 1345 drbd_uuid_new_current(device);
1338 drbd_send_uuids(first_peer_device(device)); 1346 drbd_send_uuids(peer_device);
1339 } 1347 }
1340 } 1348 }
1341 put_ldev(device); 1349 put_ldev(device);
@@ -1346,7 +1354,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1346 if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY && 1354 if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
1347 device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) { 1355 device->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
1348 drbd_uuid_new_current(device); 1356 drbd_uuid_new_current(device);
1349 drbd_send_uuids(first_peer_device(device)); 1357 drbd_send_uuids(peer_device);
1350 } 1358 }
1351 /* D_DISKLESS Peer becomes secondary */ 1359 /* D_DISKLESS Peer becomes secondary */
1352 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1360 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
@@ -1373,16 +1381,16 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1373 /* Last part of the attaching process ... */ 1381 /* Last part of the attaching process ... */
1374 if (ns.conn >= C_CONNECTED && 1382 if (ns.conn >= C_CONNECTED &&
1375 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) { 1383 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
1376 drbd_send_sizes(first_peer_device(device), 0, 0); /* to start sync... */ 1384 drbd_send_sizes(peer_device, 0, 0); /* to start sync... */
1377 drbd_send_uuids(first_peer_device(device)); 1385 drbd_send_uuids(peer_device);
1378 drbd_send_state(first_peer_device(device), ns); 1386 drbd_send_state(peer_device, ns);
1379 } 1387 }
1380 1388
1381 /* We want to pause/continue resync, tell peer. */ 1389 /* We want to pause/continue resync, tell peer. */
1382 if (ns.conn >= C_CONNECTED && 1390 if (ns.conn >= C_CONNECTED &&
1383 ((os.aftr_isp != ns.aftr_isp) || 1391 ((os.aftr_isp != ns.aftr_isp) ||
1384 (os.user_isp != ns.user_isp))) 1392 (os.user_isp != ns.user_isp)))
1385 drbd_send_state(first_peer_device(device), ns); 1393 drbd_send_state(peer_device, ns);
1386 1394
1387 /* In case one of the isp bits got set, suspend other devices. */ 1395 /* In case one of the isp bits got set, suspend other devices. */
1388 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) && 1396 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
@@ -1392,10 +1400,10 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1392 /* Make sure the peer gets informed about eventual state 1400 /* Make sure the peer gets informed about eventual state
1393 changes (ISP bits) while we were in WFReportParams. */ 1401 changes (ISP bits) while we were in WFReportParams. */
1394 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED) 1402 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
1395 drbd_send_state(first_peer_device(device), ns); 1403 drbd_send_state(peer_device, ns);
1396 1404
1397 if (os.conn != C_AHEAD && ns.conn == C_AHEAD) 1405 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
1398 drbd_send_state(first_peer_device(device), ns); 1406 drbd_send_state(peer_device, ns);
1399 1407
1400 /* We are in the progress to start a full sync... */ 1408 /* We are in the progress to start a full sync... */
1401 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || 1409 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
@@ -1449,7 +1457,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1449 drbd_disk_str(device->state.disk)); 1457 drbd_disk_str(device->state.disk));
1450 1458
1451 if (ns.conn >= C_CONNECTED) 1459 if (ns.conn >= C_CONNECTED)
1452 drbd_send_state(first_peer_device(device), ns); 1460 drbd_send_state(peer_device, ns);
1453 1461
1454 drbd_rs_cancel_all(device); 1462 drbd_rs_cancel_all(device);
1455 1463
@@ -1473,7 +1481,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1473 drbd_disk_str(device->state.disk)); 1481 drbd_disk_str(device->state.disk));
1474 1482
1475 if (ns.conn >= C_CONNECTED) 1483 if (ns.conn >= C_CONNECTED)
1476 drbd_send_state(first_peer_device(device), ns); 1484 drbd_send_state(peer_device, ns);
1477 /* corresponding get_ldev in __drbd_set_state 1485 /* corresponding get_ldev in __drbd_set_state
1478 * this may finally trigger drbd_ldev_destroy. */ 1486 * this may finally trigger drbd_ldev_destroy. */
1479 put_ldev(device); 1487 put_ldev(device);
@@ -1481,7 +1489,7 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1481 1489
1482 /* Notify peer that I had a local IO error, and did not detached.. */ 1490 /* Notify peer that I had a local IO error, and did not detached.. */
1483 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED) 1491 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT && ns.conn >= C_CONNECTED)
1484 drbd_send_state(first_peer_device(device), ns); 1492 drbd_send_state(peer_device, ns);
1485 1493
1486 /* Disks got bigger while they were detached */ 1494 /* Disks got bigger while they were detached */
1487 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING && 1495 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
@@ -1499,14 +1507,14 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os,
1499 /* sync target done with resync. Explicitly notify peer, even though 1507 /* sync target done with resync. Explicitly notify peer, even though
1500 * it should (at least for non-empty resyncs) already know itself. */ 1508 * it should (at least for non-empty resyncs) already know itself. */
1501 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED) 1509 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
1502 drbd_send_state(first_peer_device(device), ns); 1510 drbd_send_state(peer_device, ns);
1503 1511
1504 /* Verify finished, or reached stop sector. Peer did not know about 1512 /* Verify finished, or reached stop sector. Peer did not know about
1505 * the stop sector, and we may even have changed the stop sector during 1513 * the stop sector, and we may even have changed the stop sector during
1506 * verify to interrupt/stop early. Send the new state. */ 1514 * verify to interrupt/stop early. Send the new state. */
1507 if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED 1515 if (os.conn == C_VERIFY_S && ns.conn == C_CONNECTED
1508 && verify_can_do_stop_sector(device)) 1516 && verify_can_do_stop_sector(device))
1509 drbd_send_state(first_peer_device(device), ns); 1517 drbd_send_state(peer_device, ns);
1510 1518
1511 /* This triggers bitmap writeout of potentially still unwritten pages 1519 /* This triggers bitmap writeout of potentially still unwritten pages
1512 * if the resync finished cleanly, or aborted because of peer disk 1520 * if the resync finished cleanly, or aborted because of peer disk
@@ -1563,7 +1571,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
1563 old_conf = connection->net_conf; 1571 old_conf = connection->net_conf;
1564 connection->my_addr_len = 0; 1572 connection->my_addr_len = 0;
1565 connection->peer_addr_len = 0; 1573 connection->peer_addr_len = 0;
1566 rcu_assign_pointer(connection->net_conf, NULL); 1574 RCU_INIT_POINTER(connection->net_conf, NULL);
1567 conn_free_crypto(connection); 1575 conn_free_crypto(connection);
1568 mutex_unlock(&connection->resource->conf_update); 1576 mutex_unlock(&connection->resource->conf_update);
1569 1577
@@ -1599,7 +1607,7 @@ static int w_after_conn_state_ch(struct drbd_work *w, int unused)
1599 return 0; 1607 return 0;
1600} 1608}
1601 1609
1602void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf) 1610static void conn_old_common_state(struct drbd_connection *connection, union drbd_state *pcs, enum chg_state_flags *pf)
1603{ 1611{
1604 enum chg_state_flags flags = ~0; 1612 enum chg_state_flags flags = ~0;
1605 struct drbd_peer_device *peer_device; 1613 struct drbd_peer_device *peer_device;
@@ -1688,7 +1696,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma
1688 return rv; 1696 return rv;
1689} 1697}
1690 1698
1691void 1699static void
1692conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val, 1700conn_set_state(struct drbd_connection *connection, union drbd_state mask, union drbd_state val,
1693 union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags) 1701 union drbd_state *pns_min, union drbd_state *pns_max, enum chg_state_flags flags)
1694{ 1702{
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d8f57b6305cd..50776b362828 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -67,13 +67,10 @@ rwlock_t global_state_lock;
67 */ 67 */
68void drbd_md_io_complete(struct bio *bio, int error) 68void drbd_md_io_complete(struct bio *bio, int error)
69{ 69{
70 struct drbd_md_io *md_io;
71 struct drbd_device *device; 70 struct drbd_device *device;
72 71
73 md_io = (struct drbd_md_io *)bio->bi_private; 72 device = bio->bi_private;
74 device = container_of(md_io, struct drbd_device, md_io); 73 device->md_io.error = error;
75
76 md_io->error = error;
77 74
78 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able 75 /* We grabbed an extra reference in _drbd_md_sync_page_io() to be able
79 * to timeout on the lower level device, and eventually detach from it. 76 * to timeout on the lower level device, and eventually detach from it.
@@ -87,7 +84,7 @@ void drbd_md_io_complete(struct bio *bio, int error)
87 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there. 84 * ASSERT(atomic_read(&device->md_io_in_use) == 1) there.
88 */ 85 */
89 drbd_md_put_buffer(device); 86 drbd_md_put_buffer(device);
90 md_io->done = 1; 87 device->md_io.done = 1;
91 wake_up(&device->misc_wait); 88 wake_up(&device->misc_wait);
92 bio_put(bio); 89 bio_put(bio);
93 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */ 90 if (device->ldev) /* special case: drbd_md_read() during drbd_adm_attach() */
@@ -135,6 +132,7 @@ void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(l
135 i = peer_req->i; 132 i = peer_req->i;
136 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO; 133 do_al_complete_io = peer_req->flags & EE_CALL_AL_COMPLETE_IO;
137 block_id = peer_req->block_id; 134 block_id = peer_req->block_id;
135 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
138 136
139 spin_lock_irqsave(&device->resource->req_lock, flags); 137 spin_lock_irqsave(&device->resource->req_lock, flags);
140 device->writ_cnt += peer_req->i.size >> 9; 138 device->writ_cnt += peer_req->i.size >> 9;
@@ -398,9 +396,6 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
398 if (!get_ldev(device)) 396 if (!get_ldev(device))
399 return -EIO; 397 return -EIO;
400 398
401 if (drbd_rs_should_slow_down(device, sector))
402 goto defer;
403
404 /* GFP_TRY, because if there is no memory available right now, this may 399 /* GFP_TRY, because if there is no memory available right now, this may
405 * be rescheduled for later. It is "only" background resync, after all. */ 400 * be rescheduled for later. It is "only" background resync, after all. */
406 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, 401 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector,
@@ -410,7 +405,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector,
410 405
411 peer_req->w.cb = w_e_send_csum; 406 peer_req->w.cb = w_e_send_csum;
412 spin_lock_irq(&device->resource->req_lock); 407 spin_lock_irq(&device->resource->req_lock);
413 list_add(&peer_req->w.list, &device->read_ee); 408 list_add_tail(&peer_req->w.list, &device->read_ee);
414 spin_unlock_irq(&device->resource->req_lock); 409 spin_unlock_irq(&device->resource->req_lock);
415 410
416 atomic_add(size >> 9, &device->rs_sect_ev); 411 atomic_add(size >> 9, &device->rs_sect_ev);
@@ -452,9 +447,9 @@ void resync_timer_fn(unsigned long data)
452{ 447{
453 struct drbd_device *device = (struct drbd_device *) data; 448 struct drbd_device *device = (struct drbd_device *) data;
454 449
455 if (list_empty(&device->resync_work.list)) 450 drbd_queue_work_if_unqueued(
456 drbd_queue_work(&first_peer_device(device)->connection->sender_work, 451 &first_peer_device(device)->connection->sender_work,
457 &device->resync_work); 452 &device->resync_work);
458} 453}
459 454
460static void fifo_set(struct fifo_buffer *fb, int value) 455static void fifo_set(struct fifo_buffer *fb, int value)
@@ -504,9 +499,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size)
504static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) 499static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in)
505{ 500{
506 struct disk_conf *dc; 501 struct disk_conf *dc;
507 unsigned int want; /* The number of sectors we want in the proxy */ 502 unsigned int want; /* The number of sectors we want in-flight */
508 int req_sect; /* Number of sectors to request in this turn */ 503 int req_sect; /* Number of sectors to request in this turn */
509 int correction; /* Number of sectors more we need in the proxy*/ 504 int correction; /* Number of sectors more we need in-flight */
510 int cps; /* correction per invocation of drbd_rs_controller() */ 505 int cps; /* correction per invocation of drbd_rs_controller() */
511 int steps; /* Number of time steps to plan ahead */ 506 int steps; /* Number of time steps to plan ahead */
512 int curr_corr; 507 int curr_corr;
@@ -577,20 +572,27 @@ static int drbd_rs_number_requests(struct drbd_device *device)
577 * potentially causing a distributed deadlock on congestion during 572 * potentially causing a distributed deadlock on congestion during
578 * online-verify or (checksum-based) resync, if max-buffers, 573 * online-verify or (checksum-based) resync, if max-buffers,
579 * socket buffer sizes and resync rate settings are mis-configured. */ 574 * socket buffer sizes and resync rate settings are mis-configured. */
580 if (mxb - device->rs_in_flight < number) 575
581 number = mxb - device->rs_in_flight; 576 /* note that "number" is in units of "BM_BLOCK_SIZE" (which is 4k),
577 * mxb (as used here, and in drbd_alloc_pages on the peer) is
578 * "number of pages" (typically also 4k),
579 * but "rs_in_flight" is in "sectors" (512 Byte). */
580 if (mxb - device->rs_in_flight/8 < number)
581 number = mxb - device->rs_in_flight/8;
582 582
583 return number; 583 return number;
584} 584}
585 585
586static int make_resync_request(struct drbd_device *device, int cancel) 586static int make_resync_request(struct drbd_device *const device, int cancel)
587{ 587{
588 struct drbd_peer_device *const peer_device = first_peer_device(device);
589 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
588 unsigned long bit; 590 unsigned long bit;
589 sector_t sector; 591 sector_t sector;
590 const sector_t capacity = drbd_get_capacity(device->this_bdev); 592 const sector_t capacity = drbd_get_capacity(device->this_bdev);
591 int max_bio_size; 593 int max_bio_size;
592 int number, rollback_i, size; 594 int number, rollback_i, size;
593 int align, queued, sndbuf; 595 int align, requeue = 0;
594 int i = 0; 596 int i = 0;
595 597
596 if (unlikely(cancel)) 598 if (unlikely(cancel))
@@ -617,17 +619,22 @@ static int make_resync_request(struct drbd_device *device, int cancel)
617 goto requeue; 619 goto requeue;
618 620
619 for (i = 0; i < number; i++) { 621 for (i = 0; i < number; i++) {
620 /* Stop generating RS requests, when half of the send buffer is filled */ 622 /* Stop generating RS requests when half of the send buffer is filled,
621 mutex_lock(&first_peer_device(device)->connection->data.mutex); 623 * but notify TCP that we'd like to have more space. */
622 if (first_peer_device(device)->connection->data.socket) { 624 mutex_lock(&connection->data.mutex);
623 queued = first_peer_device(device)->connection->data.socket->sk->sk_wmem_queued; 625 if (connection->data.socket) {
624 sndbuf = first_peer_device(device)->connection->data.socket->sk->sk_sndbuf; 626 struct sock *sk = connection->data.socket->sk;
625 } else { 627 int queued = sk->sk_wmem_queued;
626 queued = 1; 628 int sndbuf = sk->sk_sndbuf;
627 sndbuf = 0; 629 if (queued > sndbuf / 2) {
628 } 630 requeue = 1;
629 mutex_unlock(&first_peer_device(device)->connection->data.mutex); 631 if (sk->sk_socket)
630 if (queued > sndbuf / 2) 632 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
633 }
634 } else
635 requeue = 1;
636 mutex_unlock(&connection->data.mutex);
637 if (requeue)
631 goto requeue; 638 goto requeue;
632 639
633next_sector: 640next_sector:
@@ -642,8 +649,7 @@ next_sector:
642 649
643 sector = BM_BIT_TO_SECT(bit); 650 sector = BM_BIT_TO_SECT(bit);
644 651
645 if (drbd_rs_should_slow_down(device, sector) || 652 if (drbd_try_rs_begin_io(device, sector)) {
646 drbd_try_rs_begin_io(device, sector)) {
647 device->bm_resync_fo = bit; 653 device->bm_resync_fo = bit;
648 goto requeue; 654 goto requeue;
649 } 655 }
@@ -696,9 +702,9 @@ next_sector:
696 /* adjust very last sectors, in case we are oddly sized */ 702 /* adjust very last sectors, in case we are oddly sized */
697 if (sector + (size>>9) > capacity) 703 if (sector + (size>>9) > capacity)
698 size = (capacity-sector)<<9; 704 size = (capacity-sector)<<9;
699 if (first_peer_device(device)->connection->agreed_pro_version >= 89 && 705
700 first_peer_device(device)->connection->csums_tfm) { 706 if (device->use_csums) {
701 switch (read_for_csum(first_peer_device(device), sector, size)) { 707 switch (read_for_csum(peer_device, sector, size)) {
702 case -EIO: /* Disk failure */ 708 case -EIO: /* Disk failure */
703 put_ldev(device); 709 put_ldev(device);
704 return -EIO; 710 return -EIO;
@@ -717,7 +723,7 @@ next_sector:
717 int err; 723 int err;
718 724
719 inc_rs_pending(device); 725 inc_rs_pending(device);
720 err = drbd_send_drequest(first_peer_device(device), P_RS_DATA_REQUEST, 726 err = drbd_send_drequest(peer_device, P_RS_DATA_REQUEST,
721 sector, size, ID_SYNCER); 727 sector, size, ID_SYNCER);
722 if (err) { 728 if (err) {
723 drbd_err(device, "drbd_send_drequest() failed, aborting...\n"); 729 drbd_err(device, "drbd_send_drequest() failed, aborting...\n");
@@ -774,8 +780,7 @@ static int make_ov_request(struct drbd_device *device, int cancel)
774 780
775 size = BM_BLOCK_SIZE; 781 size = BM_BLOCK_SIZE;
776 782
777 if (drbd_rs_should_slow_down(device, sector) || 783 if (drbd_try_rs_begin_io(device, sector)) {
778 drbd_try_rs_begin_io(device, sector)) {
779 device->ov_position = sector; 784 device->ov_position = sector;
780 goto requeue; 785 goto requeue;
781 } 786 }
@@ -911,7 +916,7 @@ int drbd_resync_finished(struct drbd_device *device)
911 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T) 916 if (os.conn == C_SYNC_TARGET || os.conn == C_PAUSED_SYNC_T)
912 khelper_cmd = "after-resync-target"; 917 khelper_cmd = "after-resync-target";
913 918
914 if (first_peer_device(device)->connection->csums_tfm && device->rs_total) { 919 if (device->use_csums && device->rs_total) {
915 const unsigned long s = device->rs_same_csum; 920 const unsigned long s = device->rs_same_csum;
916 const unsigned long t = device->rs_total; 921 const unsigned long t = device->rs_total;
917 const int ratio = 922 const int ratio =
@@ -1351,13 +1356,15 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
1351{ 1356{
1352 struct drbd_request *req = container_of(w, struct drbd_request, w); 1357 struct drbd_request *req = container_of(w, struct drbd_request, w);
1353 struct drbd_device *device = req->device; 1358 struct drbd_device *device = req->device;
1354 struct drbd_connection *connection = first_peer_device(device)->connection; 1359 struct drbd_peer_device *const peer_device = first_peer_device(device);
1360 struct drbd_connection *const connection = peer_device->connection;
1355 int err; 1361 int err;
1356 1362
1357 if (unlikely(cancel)) { 1363 if (unlikely(cancel)) {
1358 req_mod(req, SEND_CANCELED); 1364 req_mod(req, SEND_CANCELED);
1359 return 0; 1365 return 0;
1360 } 1366 }
1367 req->pre_send_jif = jiffies;
1361 1368
1362 /* this time, no connection->send.current_epoch_writes++; 1369 /* this time, no connection->send.current_epoch_writes++;
1363 * If it was sent, it was the closing barrier for the last 1370 * If it was sent, it was the closing barrier for the last
@@ -1365,7 +1372,7 @@ int w_send_out_of_sync(struct drbd_work *w, int cancel)
1365 * No more barriers will be sent, until we leave AHEAD mode again. */ 1372 * No more barriers will be sent, until we leave AHEAD mode again. */
1366 maybe_send_barrier(connection, req->epoch); 1373 maybe_send_barrier(connection, req->epoch);
1367 1374
1368 err = drbd_send_out_of_sync(first_peer_device(device), req); 1375 err = drbd_send_out_of_sync(peer_device, req);
1369 req_mod(req, OOS_HANDED_TO_NETWORK); 1376 req_mod(req, OOS_HANDED_TO_NETWORK);
1370 1377
1371 return err; 1378 return err;
@@ -1380,19 +1387,21 @@ int w_send_dblock(struct drbd_work *w, int cancel)
1380{ 1387{
1381 struct drbd_request *req = container_of(w, struct drbd_request, w); 1388 struct drbd_request *req = container_of(w, struct drbd_request, w);
1382 struct drbd_device *device = req->device; 1389 struct drbd_device *device = req->device;
1383 struct drbd_connection *connection = first_peer_device(device)->connection; 1390 struct drbd_peer_device *const peer_device = first_peer_device(device);
1391 struct drbd_connection *connection = peer_device->connection;
1384 int err; 1392 int err;
1385 1393
1386 if (unlikely(cancel)) { 1394 if (unlikely(cancel)) {
1387 req_mod(req, SEND_CANCELED); 1395 req_mod(req, SEND_CANCELED);
1388 return 0; 1396 return 0;
1389 } 1397 }
1398 req->pre_send_jif = jiffies;
1390 1399
1391 re_init_if_first_write(connection, req->epoch); 1400 re_init_if_first_write(connection, req->epoch);
1392 maybe_send_barrier(connection, req->epoch); 1401 maybe_send_barrier(connection, req->epoch);
1393 connection->send.current_epoch_writes++; 1402 connection->send.current_epoch_writes++;
1394 1403
1395 err = drbd_send_dblock(first_peer_device(device), req); 1404 err = drbd_send_dblock(peer_device, req);
1396 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1405 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
1397 1406
1398 return err; 1407 return err;
@@ -1407,19 +1416,21 @@ int w_send_read_req(struct drbd_work *w, int cancel)
1407{ 1416{
1408 struct drbd_request *req = container_of(w, struct drbd_request, w); 1417 struct drbd_request *req = container_of(w, struct drbd_request, w);
1409 struct drbd_device *device = req->device; 1418 struct drbd_device *device = req->device;
1410 struct drbd_connection *connection = first_peer_device(device)->connection; 1419 struct drbd_peer_device *const peer_device = first_peer_device(device);
1420 struct drbd_connection *connection = peer_device->connection;
1411 int err; 1421 int err;
1412 1422
1413 if (unlikely(cancel)) { 1423 if (unlikely(cancel)) {
1414 req_mod(req, SEND_CANCELED); 1424 req_mod(req, SEND_CANCELED);
1415 return 0; 1425 return 0;
1416 } 1426 }
1427 req->pre_send_jif = jiffies;
1417 1428
1418 /* Even read requests may close a write epoch, 1429 /* Even read requests may close a write epoch,
1419 * if there was any yet. */ 1430 * if there was any yet. */
1420 maybe_send_barrier(connection, req->epoch); 1431 maybe_send_barrier(connection, req->epoch);
1421 1432
1422 err = drbd_send_drequest(first_peer_device(device), P_DATA_REQUEST, req->i.sector, req->i.size, 1433 err = drbd_send_drequest(peer_device, P_DATA_REQUEST, req->i.sector, req->i.size,
1423 (unsigned long)req); 1434 (unsigned long)req);
1424 1435
1425 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK); 1436 req_mod(req, err ? SEND_FAILED : HANDED_OVER_TO_NETWORK);
@@ -1433,7 +1444,7 @@ int w_restart_disk_io(struct drbd_work *w, int cancel)
1433 struct drbd_device *device = req->device; 1444 struct drbd_device *device = req->device;
1434 1445
1435 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG) 1446 if (bio_data_dir(req->master_bio) == WRITE && req->rq_state & RQ_IN_ACT_LOG)
1436 drbd_al_begin_io(device, &req->i, false); 1447 drbd_al_begin_io(device, &req->i);
1437 1448
1438 drbd_req_make_private_bio(req, req->master_bio); 1449 drbd_req_make_private_bio(req, req->master_bio);
1439 req->private_bio->bi_bdev = device->ldev->backing_bdev; 1450 req->private_bio->bi_bdev = device->ldev->backing_bdev;
@@ -1601,26 +1612,32 @@ void drbd_rs_controller_reset(struct drbd_device *device)
1601void start_resync_timer_fn(unsigned long data) 1612void start_resync_timer_fn(unsigned long data)
1602{ 1613{
1603 struct drbd_device *device = (struct drbd_device *) data; 1614 struct drbd_device *device = (struct drbd_device *) data;
1604 1615 drbd_device_post_work(device, RS_START);
1605 drbd_queue_work(&first_peer_device(device)->connection->sender_work,
1606 &device->start_resync_work);
1607} 1616}
1608 1617
1609int w_start_resync(struct drbd_work *w, int cancel) 1618static void do_start_resync(struct drbd_device *device)
1610{ 1619{
1611 struct drbd_device *device =
1612 container_of(w, struct drbd_device, start_resync_work);
1613
1614 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) { 1620 if (atomic_read(&device->unacked_cnt) || atomic_read(&device->rs_pending_cnt)) {
1615 drbd_warn(device, "w_start_resync later...\n"); 1621 drbd_warn(device, "postponing start_resync ...\n");
1616 device->start_resync_timer.expires = jiffies + HZ/10; 1622 device->start_resync_timer.expires = jiffies + HZ/10;
1617 add_timer(&device->start_resync_timer); 1623 add_timer(&device->start_resync_timer);
1618 return 0; 1624 return;
1619 } 1625 }
1620 1626
1621 drbd_start_resync(device, C_SYNC_SOURCE); 1627 drbd_start_resync(device, C_SYNC_SOURCE);
1622 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags); 1628 clear_bit(AHEAD_TO_SYNC_SOURCE, &device->flags);
1623 return 0; 1629}
1630
1631static bool use_checksum_based_resync(struct drbd_connection *connection, struct drbd_device *device)
1632{
1633 bool csums_after_crash_only;
1634 rcu_read_lock();
1635 csums_after_crash_only = rcu_dereference(connection->net_conf)->csums_after_crash_only;
1636 rcu_read_unlock();
1637 return connection->agreed_pro_version >= 89 && /* supported? */
1638 connection->csums_tfm && /* configured? */
1639 (csums_after_crash_only == 0 /* use for each resync? */
1640 || test_bit(CRASHED_PRIMARY, &device->flags)); /* or only after Primary crash? */
1624} 1641}
1625 1642
1626/** 1643/**
@@ -1633,6 +1650,8 @@ int w_start_resync(struct drbd_work *w, int cancel)
1633 */ 1650 */
1634void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) 1651void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1635{ 1652{
1653 struct drbd_peer_device *peer_device = first_peer_device(device);
1654 struct drbd_connection *connection = peer_device ? peer_device->connection : NULL;
1636 union drbd_state ns; 1655 union drbd_state ns;
1637 int r; 1656 int r;
1638 1657
@@ -1651,7 +1670,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1651 if (r > 0) { 1670 if (r > 0) {
1652 drbd_info(device, "before-resync-target handler returned %d, " 1671 drbd_info(device, "before-resync-target handler returned %d, "
1653 "dropping connection.\n", r); 1672 "dropping connection.\n", r);
1654 conn_request_state(first_peer_device(device)->connection, NS(conn, C_DISCONNECTING), CS_HARD); 1673 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
1655 return; 1674 return;
1656 } 1675 }
1657 } else /* C_SYNC_SOURCE */ { 1676 } else /* C_SYNC_SOURCE */ {
@@ -1664,7 +1683,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1664 } else { 1683 } else {
1665 drbd_info(device, "before-resync-source handler returned %d, " 1684 drbd_info(device, "before-resync-source handler returned %d, "
1666 "dropping connection.\n", r); 1685 "dropping connection.\n", r);
1667 conn_request_state(first_peer_device(device)->connection, 1686 conn_request_state(connection,
1668 NS(conn, C_DISCONNECTING), CS_HARD); 1687 NS(conn, C_DISCONNECTING), CS_HARD);
1669 return; 1688 return;
1670 } 1689 }
@@ -1672,7 +1691,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1672 } 1691 }
1673 } 1692 }
1674 1693
1675 if (current == first_peer_device(device)->connection->worker.task) { 1694 if (current == connection->worker.task) {
1676 /* The worker should not sleep waiting for state_mutex, 1695 /* The worker should not sleep waiting for state_mutex,
1677 that can take long */ 1696 that can take long */
1678 if (!mutex_trylock(device->state_mutex)) { 1697 if (!mutex_trylock(device->state_mutex)) {
@@ -1733,11 +1752,20 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1733 device->rs_mark_time[i] = now; 1752 device->rs_mark_time[i] = now;
1734 } 1753 }
1735 _drbd_pause_after(device); 1754 _drbd_pause_after(device);
1755 /* Forget potentially stale cached per resync extent bit-counts.
1756 * Open coded drbd_rs_cancel_all(device), we already have IRQs
1757 * disabled, and know the disk state is ok. */
1758 spin_lock(&device->al_lock);
1759 lc_reset(device->resync);
1760 device->resync_locked = 0;
1761 device->resync_wenr = LC_FREE;
1762 spin_unlock(&device->al_lock);
1736 } 1763 }
1737 write_unlock(&global_state_lock); 1764 write_unlock(&global_state_lock);
1738 spin_unlock_irq(&device->resource->req_lock); 1765 spin_unlock_irq(&device->resource->req_lock);
1739 1766
1740 if (r == SS_SUCCESS) { 1767 if (r == SS_SUCCESS) {
1768 wake_up(&device->al_wait); /* for lc_reset() above */
1741 /* reset rs_last_bcast when a resync or verify is started, 1769 /* reset rs_last_bcast when a resync or verify is started,
1742 * to deal with potential jiffies wrap. */ 1770 * to deal with potential jiffies wrap. */
1743 device->rs_last_bcast = jiffies - HZ; 1771 device->rs_last_bcast = jiffies - HZ;
@@ -1746,8 +1774,12 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1746 drbd_conn_str(ns.conn), 1774 drbd_conn_str(ns.conn),
1747 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10), 1775 (unsigned long) device->rs_total << (BM_BLOCK_SHIFT-10),
1748 (unsigned long) device->rs_total); 1776 (unsigned long) device->rs_total);
1749 if (side == C_SYNC_TARGET) 1777 if (side == C_SYNC_TARGET) {
1750 device->bm_resync_fo = 0; 1778 device->bm_resync_fo = 0;
1779 device->use_csums = use_checksum_based_resync(connection, device);
1780 } else {
1781 device->use_csums = 0;
1782 }
1751 1783
1752 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid 1784 /* Since protocol 96, we must serialize drbd_gen_and_send_sync_uuid
1753 * with w_send_oos, or the sync target will get confused as to 1785 * with w_send_oos, or the sync target will get confused as to
@@ -1756,12 +1788,10 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1756 * drbd_resync_finished from here in that case. 1788 * drbd_resync_finished from here in that case.
1757 * We drbd_gen_and_send_sync_uuid here for protocol < 96, 1789 * We drbd_gen_and_send_sync_uuid here for protocol < 96,
1758 * and from after_state_ch otherwise. */ 1790 * and from after_state_ch otherwise. */
1759 if (side == C_SYNC_SOURCE && 1791 if (side == C_SYNC_SOURCE && connection->agreed_pro_version < 96)
1760 first_peer_device(device)->connection->agreed_pro_version < 96) 1792 drbd_gen_and_send_sync_uuid(peer_device);
1761 drbd_gen_and_send_sync_uuid(first_peer_device(device));
1762 1793
1763 if (first_peer_device(device)->connection->agreed_pro_version < 95 && 1794 if (connection->agreed_pro_version < 95 && device->rs_total == 0) {
1764 device->rs_total == 0) {
1765 /* This still has a race (about when exactly the peers 1795 /* This still has a race (about when exactly the peers
1766 * detect connection loss) that can lead to a full sync 1796 * detect connection loss) that can lead to a full sync
1767 * on next handshake. In 8.3.9 we fixed this with explicit 1797 * on next handshake. In 8.3.9 we fixed this with explicit
@@ -1777,7 +1807,7 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1777 int timeo; 1807 int timeo;
1778 1808
1779 rcu_read_lock(); 1809 rcu_read_lock();
1780 nc = rcu_dereference(first_peer_device(device)->connection->net_conf); 1810 nc = rcu_dereference(connection->net_conf);
1781 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9; 1811 timeo = nc->ping_int * HZ + nc->ping_timeo * HZ / 9;
1782 rcu_read_unlock(); 1812 rcu_read_unlock();
1783 schedule_timeout_interruptible(timeo); 1813 schedule_timeout_interruptible(timeo);
@@ -1799,10 +1829,165 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side)
1799 mutex_unlock(device->state_mutex); 1829 mutex_unlock(device->state_mutex);
1800} 1830}
1801 1831
1832static void update_on_disk_bitmap(struct drbd_device *device, bool resync_done)
1833{
1834 struct sib_info sib = { .sib_reason = SIB_SYNC_PROGRESS, };
1835 device->rs_last_bcast = jiffies;
1836
1837 if (!get_ldev(device))
1838 return;
1839
1840 drbd_bm_write_lazy(device, 0);
1841 if (resync_done && is_sync_state(device->state.conn))
1842 drbd_resync_finished(device);
1843
1844 drbd_bcast_event(device, &sib);
1845 /* update timestamp, in case it took a while to write out stuff */
1846 device->rs_last_bcast = jiffies;
1847 put_ldev(device);
1848}
1849
1850static void drbd_ldev_destroy(struct drbd_device *device)
1851{
1852 lc_destroy(device->resync);
1853 device->resync = NULL;
1854 lc_destroy(device->act_log);
1855 device->act_log = NULL;
1856 __no_warn(local,
1857 drbd_free_ldev(device->ldev);
1858 device->ldev = NULL;);
1859 clear_bit(GOING_DISKLESS, &device->flags);
1860 wake_up(&device->misc_wait);
1861}
1862
1863static void go_diskless(struct drbd_device *device)
1864{
1865 D_ASSERT(device, device->state.disk == D_FAILED);
1866 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
1867 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
1868 * the protected members anymore, though, so once put_ldev reaches zero
1869 * again, it will be safe to free them. */
1870
1871 /* Try to write changed bitmap pages, read errors may have just
1872 * set some bits outside the area covered by the activity log.
1873 *
1874 * If we have an IO error during the bitmap writeout,
1875 * we will want a full sync next time, just in case.
1876 * (Do we want a specific meta data flag for this?)
1877 *
1878 * If that does not make it to stable storage either,
1879 * we cannot do anything about that anymore.
1880 *
1881 * We still need to check if both bitmap and ldev are present, we may
1882 * end up here after a failed attach, before ldev was even assigned.
1883 */
1884 if (device->bitmap && device->ldev) {
1885 /* An interrupted resync or similar is allowed to recounts bits
1886 * while we detach.
1887 * Any modifications would not be expected anymore, though.
1888 */
1889 if (drbd_bitmap_io_from_worker(device, drbd_bm_write,
1890 "detach", BM_LOCKED_TEST_ALLOWED)) {
1891 if (test_bit(WAS_READ_ERROR, &device->flags)) {
1892 drbd_md_set_flag(device, MDF_FULL_SYNC);
1893 drbd_md_sync(device);
1894 }
1895 }
1896 }
1897
1898 drbd_force_state(device, NS(disk, D_DISKLESS));
1899}
1900
1901static int do_md_sync(struct drbd_device *device)
1902{
1903 drbd_warn(device, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
1904 drbd_md_sync(device);
1905 return 0;
1906}
1907
1908/* only called from drbd_worker thread, no locking */
1909void __update_timing_details(
1910 struct drbd_thread_timing_details *tdp,
1911 unsigned int *cb_nr,
1912 void *cb,
1913 const char *fn, const unsigned int line)
1914{
1915 unsigned int i = *cb_nr % DRBD_THREAD_DETAILS_HIST;
1916 struct drbd_thread_timing_details *td = tdp + i;
1917
1918 td->start_jif = jiffies;
1919 td->cb_addr = cb;
1920 td->caller_fn = fn;
1921 td->line = line;
1922 td->cb_nr = *cb_nr;
1923
1924 i = (i+1) % DRBD_THREAD_DETAILS_HIST;
1925 td = tdp + i;
1926 memset(td, 0, sizeof(*td));
1927
1928 ++(*cb_nr);
1929}
1930
1931#define WORK_PENDING(work_bit, todo) (todo & (1UL << work_bit))
1932static void do_device_work(struct drbd_device *device, const unsigned long todo)
1933{
1934 if (WORK_PENDING(MD_SYNC, todo))
1935 do_md_sync(device);
1936 if (WORK_PENDING(RS_DONE, todo) ||
1937 WORK_PENDING(RS_PROGRESS, todo))
1938 update_on_disk_bitmap(device, WORK_PENDING(RS_DONE, todo));
1939 if (WORK_PENDING(GO_DISKLESS, todo))
1940 go_diskless(device);
1941 if (WORK_PENDING(DESTROY_DISK, todo))
1942 drbd_ldev_destroy(device);
1943 if (WORK_PENDING(RS_START, todo))
1944 do_start_resync(device);
1945}
1946
1947#define DRBD_DEVICE_WORK_MASK \
1948 ((1UL << GO_DISKLESS) \
1949 |(1UL << DESTROY_DISK) \
1950 |(1UL << MD_SYNC) \
1951 |(1UL << RS_START) \
1952 |(1UL << RS_PROGRESS) \
1953 |(1UL << RS_DONE) \
1954 )
1955
1956static unsigned long get_work_bits(unsigned long *flags)
1957{
1958 unsigned long old, new;
1959 do {
1960 old = *flags;
1961 new = old & ~DRBD_DEVICE_WORK_MASK;
1962 } while (cmpxchg(flags, old, new) != old);
1963 return old & DRBD_DEVICE_WORK_MASK;
1964}
1965
1966static void do_unqueued_work(struct drbd_connection *connection)
1967{
1968 struct drbd_peer_device *peer_device;
1969 int vnr;
1970
1971 rcu_read_lock();
1972 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1973 struct drbd_device *device = peer_device->device;
1974 unsigned long todo = get_work_bits(&device->flags);
1975 if (!todo)
1976 continue;
1977
1978 kref_get(&device->kref);
1979 rcu_read_unlock();
1980 do_device_work(device, todo);
1981 kref_put(&device->kref, drbd_destroy_device);
1982 rcu_read_lock();
1983 }
1984 rcu_read_unlock();
1985}
1986
1802static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) 1987static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list)
1803{ 1988{
1804 spin_lock_irq(&queue->q_lock); 1989 spin_lock_irq(&queue->q_lock);
1805 list_splice_init(&queue->q, work_list); 1990 list_splice_tail_init(&queue->q, work_list);
1806 spin_unlock_irq(&queue->q_lock); 1991 spin_unlock_irq(&queue->q_lock);
1807 return !list_empty(work_list); 1992 return !list_empty(work_list);
1808} 1993}
@@ -1851,7 +2036,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
1851 /* dequeue single item only, 2036 /* dequeue single item only,
1852 * we still use drbd_queue_work_front() in some places */ 2037 * we still use drbd_queue_work_front() in some places */
1853 if (!list_empty(&connection->sender_work.q)) 2038 if (!list_empty(&connection->sender_work.q))
1854 list_move(connection->sender_work.q.next, work_list); 2039 list_splice_tail_init(&connection->sender_work.q, work_list);
1855 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */ 2040 spin_unlock(&connection->sender_work.q_lock); /* FIXME get rid of this one? */
1856 if (!list_empty(work_list) || signal_pending(current)) { 2041 if (!list_empty(work_list) || signal_pending(current)) {
1857 spin_unlock_irq(&connection->resource->req_lock); 2042 spin_unlock_irq(&connection->resource->req_lock);
@@ -1873,6 +2058,14 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
1873 if (send_barrier) 2058 if (send_barrier)
1874 maybe_send_barrier(connection, 2059 maybe_send_barrier(connection,
1875 connection->send.current_epoch_nr + 1); 2060 connection->send.current_epoch_nr + 1);
2061
2062 if (test_bit(DEVICE_WORK_PENDING, &connection->flags))
2063 break;
2064
2065 /* drbd_send() may have called flush_signals() */
2066 if (get_t_state(&connection->worker) != RUNNING)
2067 break;
2068
1876 schedule(); 2069 schedule();
1877 /* may be woken up for other things but new work, too, 2070 /* may be woken up for other things but new work, too,
1878 * e.g. if the current epoch got closed. 2071 * e.g. if the current epoch got closed.
@@ -1906,10 +2099,15 @@ int drbd_worker(struct drbd_thread *thi)
1906 while (get_t_state(thi) == RUNNING) { 2099 while (get_t_state(thi) == RUNNING) {
1907 drbd_thread_current_set_cpu(thi); 2100 drbd_thread_current_set_cpu(thi);
1908 2101
1909 /* as long as we use drbd_queue_work_front(), 2102 if (list_empty(&work_list)) {
1910 * we may only dequeue single work items here, not batches. */ 2103 update_worker_timing_details(connection, wait_for_work);
1911 if (list_empty(&work_list))
1912 wait_for_work(connection, &work_list); 2104 wait_for_work(connection, &work_list);
2105 }
2106
2107 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2108 update_worker_timing_details(connection, do_unqueued_work);
2109 do_unqueued_work(connection);
2110 }
1913 2111
1914 if (signal_pending(current)) { 2112 if (signal_pending(current)) {
1915 flush_signals(current); 2113 flush_signals(current);
@@ -1926,6 +2124,7 @@ int drbd_worker(struct drbd_thread *thi)
1926 while (!list_empty(&work_list)) { 2124 while (!list_empty(&work_list)) {
1927 w = list_first_entry(&work_list, struct drbd_work, list); 2125 w = list_first_entry(&work_list, struct drbd_work, list);
1928 list_del_init(&w->list); 2126 list_del_init(&w->list);
2127 update_worker_timing_details(connection, w->cb);
1929 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0) 2128 if (w->cb(w, connection->cstate < C_WF_REPORT_PARAMS) == 0)
1930 continue; 2129 continue;
1931 if (connection->cstate >= C_WF_REPORT_PARAMS) 2130 if (connection->cstate >= C_WF_REPORT_PARAMS)
@@ -1934,13 +2133,18 @@ int drbd_worker(struct drbd_thread *thi)
1934 } 2133 }
1935 2134
1936 do { 2135 do {
2136 if (test_and_clear_bit(DEVICE_WORK_PENDING, &connection->flags)) {
2137 update_worker_timing_details(connection, do_unqueued_work);
2138 do_unqueued_work(connection);
2139 }
1937 while (!list_empty(&work_list)) { 2140 while (!list_empty(&work_list)) {
1938 w = list_first_entry(&work_list, struct drbd_work, list); 2141 w = list_first_entry(&work_list, struct drbd_work, list);
1939 list_del_init(&w->list); 2142 list_del_init(&w->list);
2143 update_worker_timing_details(connection, w->cb);
1940 w->cb(w, 1); 2144 w->cb(w, 1);
1941 } 2145 }
1942 dequeue_work_batch(&connection->sender_work, &work_list); 2146 dequeue_work_batch(&connection->sender_work, &work_list);
1943 } while (!list_empty(&work_list)); 2147 } while (!list_empty(&work_list) || test_bit(DEVICE_WORK_PENDING, &connection->flags));
1944 2148
1945 rcu_read_lock(); 2149 rcu_read_lock();
1946 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { 2150 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f63d358f3d93..0a581400de0f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -15,17 +15,22 @@
15#include <linux/numa.h> 15#include <linux/numa.h>
16 16
17#define PART_BITS 4 17#define PART_BITS 4
18#define VQ_NAME_LEN 16
18 19
19static int major; 20static int major;
20static DEFINE_IDA(vd_index_ida); 21static DEFINE_IDA(vd_index_ida);
21 22
22static struct workqueue_struct *virtblk_wq; 23static struct workqueue_struct *virtblk_wq;
23 24
25struct virtio_blk_vq {
26 struct virtqueue *vq;
27 spinlock_t lock;
28 char name[VQ_NAME_LEN];
29} ____cacheline_aligned_in_smp;
30
24struct virtio_blk 31struct virtio_blk
25{ 32{
26 struct virtio_device *vdev; 33 struct virtio_device *vdev;
27 struct virtqueue *vq;
28 spinlock_t vq_lock;
29 34
30 /* The disk structure for the kernel. */ 35 /* The disk structure for the kernel. */
31 struct gendisk *disk; 36 struct gendisk *disk;
@@ -47,6 +52,10 @@ struct virtio_blk
47 52
48 /* Ida index - used to track minor number allocations. */ 53 /* Ida index - used to track minor number allocations. */
49 int index; 54 int index;
55
56 /* num of vqs */
57 int num_vqs;
58 struct virtio_blk_vq *vqs;
50}; 59};
51 60
52struct virtblk_req 61struct virtblk_req
@@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq)
133{ 142{
134 struct virtio_blk *vblk = vq->vdev->priv; 143 struct virtio_blk *vblk = vq->vdev->priv;
135 bool req_done = false; 144 bool req_done = false;
145 int qid = vq->index;
136 struct virtblk_req *vbr; 146 struct virtblk_req *vbr;
137 unsigned long flags; 147 unsigned long flags;
138 unsigned int len; 148 unsigned int len;
139 149
140 spin_lock_irqsave(&vblk->vq_lock, flags); 150 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
141 do { 151 do {
142 virtqueue_disable_cb(vq); 152 virtqueue_disable_cb(vq);
143 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { 153 while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
144 blk_mq_complete_request(vbr->req); 154 blk_mq_complete_request(vbr->req);
145 req_done = true; 155 req_done = true;
146 } 156 }
@@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq)
151 /* In case queue is stopped waiting for more buffers. */ 161 /* In case queue is stopped waiting for more buffers. */
152 if (req_done) 162 if (req_done)
153 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); 163 blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
154 spin_unlock_irqrestore(&vblk->vq_lock, flags); 164 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
155} 165}
156 166
157static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) 167static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
@@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
160 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 170 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
161 unsigned long flags; 171 unsigned long flags;
162 unsigned int num; 172 unsigned int num;
173 int qid = hctx->queue_num;
163 const bool last = (req->cmd_flags & REQ_END) != 0; 174 const bool last = (req->cmd_flags & REQ_END) != 0;
164 int err; 175 int err;
165 bool notify = false; 176 bool notify = false;
@@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
202 vbr->out_hdr.type |= VIRTIO_BLK_T_IN; 213 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
203 } 214 }
204 215
205 spin_lock_irqsave(&vblk->vq_lock, flags); 216 spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
206 err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); 217 err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
207 if (err) { 218 if (err) {
208 virtqueue_kick(vblk->vq); 219 virtqueue_kick(vblk->vqs[qid].vq);
209 blk_mq_stop_hw_queue(hctx); 220 blk_mq_stop_hw_queue(hctx);
210 spin_unlock_irqrestore(&vblk->vq_lock, flags); 221 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
211 /* Out of mem doesn't actually happen, since we fall back 222 /* Out of mem doesn't actually happen, since we fall back
212 * to direct descriptors */ 223 * to direct descriptors */
213 if (err == -ENOMEM || err == -ENOSPC) 224 if (err == -ENOMEM || err == -ENOSPC)
@@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
215 return BLK_MQ_RQ_QUEUE_ERROR; 226 return BLK_MQ_RQ_QUEUE_ERROR;
216 } 227 }
217 228
218 if (last && virtqueue_kick_prepare(vblk->vq)) 229 if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
219 notify = true; 230 notify = true;
220 spin_unlock_irqrestore(&vblk->vq_lock, flags); 231 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
221 232
222 if (notify) 233 if (notify)
223 virtqueue_notify(vblk->vq); 234 virtqueue_notify(vblk->vqs[qid].vq);
224 return BLK_MQ_RQ_QUEUE_OK; 235 return BLK_MQ_RQ_QUEUE_OK;
225} 236}
226 237
@@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev)
377static int init_vq(struct virtio_blk *vblk) 388static int init_vq(struct virtio_blk *vblk)
378{ 389{
379 int err = 0; 390 int err = 0;
391 int i;
392 vq_callback_t **callbacks;
393 const char **names;
394 struct virtqueue **vqs;
395 unsigned short num_vqs;
396 struct virtio_device *vdev = vblk->vdev;
397
398 err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
399 struct virtio_blk_config, num_queues,
400 &num_vqs);
401 if (err)
402 num_vqs = 1;
403
404 vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
405 if (!vblk->vqs) {
406 err = -ENOMEM;
407 goto out;
408 }
409
410 names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
411 if (!names)
412 goto err_names;
413
414 callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
415 if (!callbacks)
416 goto err_callbacks;
417
418 vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
419 if (!vqs)
420 goto err_vqs;
380 421
381 /* We expect one virtqueue, for output. */ 422 for (i = 0; i < num_vqs; i++) {
382 vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests"); 423 callbacks[i] = virtblk_done;
383 if (IS_ERR(vblk->vq)) 424 snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
384 err = PTR_ERR(vblk->vq); 425 names[i] = vblk->vqs[i].name;
426 }
427
428 /* Discover virtqueues and write information to configuration. */
429 err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
430 if (err)
431 goto err_find_vqs;
385 432
433 for (i = 0; i < num_vqs; i++) {
434 spin_lock_init(&vblk->vqs[i].lock);
435 vblk->vqs[i].vq = vqs[i];
436 }
437 vblk->num_vqs = num_vqs;
438
439 err_find_vqs:
440 kfree(vqs);
441 err_vqs:
442 kfree(callbacks);
443 err_callbacks:
444 kfree(names);
445 err_names:
446 if (err)
447 kfree(vblk->vqs);
448 out:
386 return err; 449 return err;
387} 450}
388 451
@@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev)
551 err = init_vq(vblk); 614 err = init_vq(vblk);
552 if (err) 615 if (err)
553 goto out_free_vblk; 616 goto out_free_vblk;
554 spin_lock_init(&vblk->vq_lock);
555 617
556 /* FIXME: How many partitions? How long is a piece of string? */ 618 /* FIXME: How many partitions? How long is a piece of string? */
557 vblk->disk = alloc_disk(1 << PART_BITS); 619 vblk->disk = alloc_disk(1 << PART_BITS);
@@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev)
562 624
563 /* Default queue sizing is to fill the ring. */ 625 /* Default queue sizing is to fill the ring. */
564 if (!virtblk_queue_depth) { 626 if (!virtblk_queue_depth) {
565 virtblk_queue_depth = vblk->vq->num_free; 627 virtblk_queue_depth = vblk->vqs[0].vq->num_free;
566 /* ... but without indirect descs, we use 2 descs per req */ 628 /* ... but without indirect descs, we use 2 descs per req */
567 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) 629 if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
568 virtblk_queue_depth /= 2; 630 virtblk_queue_depth /= 2;
@@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev)
570 632
571 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); 633 memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
572 vblk->tag_set.ops = &virtio_mq_ops; 634 vblk->tag_set.ops = &virtio_mq_ops;
573 vblk->tag_set.nr_hw_queues = 1;
574 vblk->tag_set.queue_depth = virtblk_queue_depth; 635 vblk->tag_set.queue_depth = virtblk_queue_depth;
575 vblk->tag_set.numa_node = NUMA_NO_NODE; 636 vblk->tag_set.numa_node = NUMA_NO_NODE;
576 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 637 vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev)
578 sizeof(struct virtblk_req) + 639 sizeof(struct virtblk_req) +
579 sizeof(struct scatterlist) * sg_elems; 640 sizeof(struct scatterlist) * sg_elems;
580 vblk->tag_set.driver_data = vblk; 641 vblk->tag_set.driver_data = vblk;
642 vblk->tag_set.nr_hw_queues = vblk->num_vqs;
581 643
582 err = blk_mq_alloc_tag_set(&vblk->tag_set); 644 err = blk_mq_alloc_tag_set(&vblk->tag_set);
583 if (err) 645 if (err)
@@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev)
727 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); 789 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
728 put_disk(vblk->disk); 790 put_disk(vblk->disk);
729 vdev->config->del_vqs(vdev); 791 vdev->config->del_vqs(vdev);
792 kfree(vblk->vqs);
730 kfree(vblk); 793 kfree(vblk);
731 794
732 /* Only free device id if we don't have any users */ 795 /* Only free device id if we don't have any users */
@@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = {
777static unsigned int features[] = { 840static unsigned int features[] = {
778 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY, 841 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
779 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI, 842 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
780 VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE 843 VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
844 VIRTIO_BLK_F_MQ,
781}; 845};
782 846
783static struct virtio_driver virtio_blk = { 847static struct virtio_driver virtio_blk = {
diff --git a/drivers/cpufreq/pmac64-cpufreq.c b/drivers/cpufreq/pmac64-cpufreq.c
index 8bc422977b5b..4ff86878727f 100644
--- a/drivers/cpufreq/pmac64-cpufreq.c
+++ b/drivers/cpufreq/pmac64-cpufreq.c
@@ -499,8 +499,7 @@ static int __init g5_pm72_cpufreq_init(struct device_node *cpunode)
499 } 499 }
500 500
501 /* Lookup the i2c hwclock */ 501 /* Lookup the i2c hwclock */
502 for (hwclock = NULL; 502 for_each_node_by_name(hwclock, "i2c-hwclock") {
503 (hwclock = of_find_node_by_name(hwclock, "i2c-hwclock")) != NULL;){
504 const char *loc = of_get_property(hwclock, 503 const char *loc = of_get_property(hwclock,
505 "hwctrl-location", NULL); 504 "hwctrl-location", NULL);
506 if (loc == NULL) 505 if (loc == NULL)
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 544f6d327ede..061407d59520 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -936,28 +936,14 @@ static int nx842_OF_upd(struct property *new_prop)
936 goto error_out; 936 goto error_out;
937 } 937 }
938 938
939 /* Set ptr to new property if provided */ 939 /*
940 if (new_prop) { 940 * If this is a property update, there are only certain properties that
941 /* Single property */ 941 * we care about. Bail if it isn't in the below list
942 if (!strncmp(new_prop->name, "status", new_prop->length)) { 942 */
943 status = new_prop; 943 if (new_prop && (strncmp(new_prop->name, "status", new_prop->length) ||
944 944 strncmp(new_prop->name, "ibm,max-sg-len", new_prop->length) ||
945 } else if (!strncmp(new_prop->name, "ibm,max-sg-len", 945 strncmp(new_prop->name, "ibm,max-sync-cop", new_prop->length)))
946 new_prop->length)) { 946 goto out;
947 maxsglen = new_prop;
948
949 } else if (!strncmp(new_prop->name, "ibm,max-sync-cop",
950 new_prop->length)) {
951 maxsyncop = new_prop;
952
953 } else {
954 /*
955 * Skip the update, the property being updated
956 * has no impact.
957 */
958 goto out;
959 }
960 }
961 947
962 /* Perform property updates */ 948 /* Perform property updates */
963 ret = nx842_OF_upd_status(new_devdata, status); 949 ret = nx842_OF_upd_status(new_devdata, status);
diff --git a/drivers/edac/cell_edac.c b/drivers/edac/cell_edac.c
index 374b57fc596d..a12c8552f6a6 100644
--- a/drivers/edac/cell_edac.c
+++ b/drivers/edac/cell_edac.c
@@ -134,8 +134,7 @@ static void cell_edac_init_csrows(struct mem_ctl_info *mci)
134 int j; 134 int j;
135 u32 nr_pages; 135 u32 nr_pages;
136 136
137 for (np = NULL; 137 for_each_node_by_name(np, "memory") {
138 (np = of_find_node_by_name(np, "memory")) != NULL;) {
139 struct resource r; 138 struct resource r;
140 139
141 /* We "know" that the Cell firmware only creates one entry 140 /* We "know" that the Cell firmware only creates one entry
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index d3d0e8cf27b4..d6c767ace916 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -382,6 +382,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
382 if (err) 382 if (err)
383 return err; 383 return err;
384 384
385 if (val > 255)
386 return -EINVAL;
387
385 data->vrm = val; 388 data->vrm = val;
386 return count; 389 return count;
387} 390}
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index ca8430f92564..e67b9a50ac7c 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -1085,6 +1085,9 @@ static ssize_t store_vrm_reg(struct device *dev, struct device_attribute *attr,
1085 if (err) 1085 if (err)
1086 return err; 1086 return err;
1087 1087
1088 if (val > 255)
1089 return -EINVAL;
1090
1088 data->vrm = val; 1091 data->vrm = val;
1089 return count; 1092 return count;
1090} 1093}
diff --git a/drivers/hwmon/ads1015.c b/drivers/hwmon/ads1015.c
index 22e0c926989d..126516414c11 100644
--- a/drivers/hwmon/ads1015.c
+++ b/drivers/hwmon/ads1015.c
@@ -212,6 +212,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
212 dev_err(&client->dev, 212 dev_err(&client->dev,
213 "invalid gain on %s\n", 213 "invalid gain on %s\n",
214 node->full_name); 214 node->full_name);
215 return -EINVAL;
215 } 216 }
216 } 217 }
217 218
@@ -222,6 +223,7 @@ static int ads1015_get_channels_config_of(struct i2c_client *client)
222 dev_err(&client->dev, 223 dev_err(&client->dev,
223 "invalid data_rate on %s\n", 224 "invalid data_rate on %s\n",
224 node->full_name); 225 node->full_name);
226 return -EINVAL;
225 } 227 }
226 } 228 }
227 229
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index f96063680e58..272fcc837ecc 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -510,6 +510,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
510 err = kstrtoul(buf, 10, &val); 510 err = kstrtoul(buf, 10, &val);
511 if (err) 511 if (err)
512 return err; 512 return err;
513
514 if (val > 255)
515 return -EINVAL;
516
513 data->vrm = val; 517 data->vrm = val;
514 return count; 518 return count;
515} 519}
diff --git a/drivers/hwmon/dme1737.c b/drivers/hwmon/dme1737.c
index 4ae3fff13f44..bea0a344fab5 100644
--- a/drivers/hwmon/dme1737.c
+++ b/drivers/hwmon/dme1737.c
@@ -247,8 +247,8 @@ struct dme1737_data {
247 u8 pwm_acz[3]; 247 u8 pwm_acz[3];
248 u8 pwm_freq[6]; 248 u8 pwm_freq[6];
249 u8 pwm_rr[2]; 249 u8 pwm_rr[2];
250 u8 zone_low[3]; 250 s8 zone_low[3];
251 u8 zone_abs[3]; 251 s8 zone_abs[3];
252 u8 zone_hyst[2]; 252 u8 zone_hyst[2];
253 u32 alarms; 253 u32 alarms;
254}; 254};
@@ -277,7 +277,7 @@ static inline int IN_FROM_REG(int reg, int nominal, int res)
277 return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2)); 277 return (reg * nominal + (3 << (res - 3))) / (3 << (res - 2));
278} 278}
279 279
280static inline int IN_TO_REG(int val, int nominal) 280static inline int IN_TO_REG(long val, int nominal)
281{ 281{
282 return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255); 282 return clamp_val((val * 192 + nominal / 2) / nominal, 0, 255);
283} 283}
@@ -293,7 +293,7 @@ static inline int TEMP_FROM_REG(int reg, int res)
293 return (reg * 1000) >> (res - 8); 293 return (reg * 1000) >> (res - 8);
294} 294}
295 295
296static inline int TEMP_TO_REG(int val) 296static inline int TEMP_TO_REG(long val)
297{ 297{
298 return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127); 298 return clamp_val((val < 0 ? val - 500 : val + 500) / 1000, -128, 127);
299} 299}
@@ -308,7 +308,7 @@ static inline int TEMP_RANGE_FROM_REG(int reg)
308 return TEMP_RANGE[(reg >> 4) & 0x0f]; 308 return TEMP_RANGE[(reg >> 4) & 0x0f];
309} 309}
310 310
311static int TEMP_RANGE_TO_REG(int val, int reg) 311static int TEMP_RANGE_TO_REG(long val, int reg)
312{ 312{
313 int i; 313 int i;
314 314
@@ -331,7 +331,7 @@ static inline int TEMP_HYST_FROM_REG(int reg, int ix)
331 return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000; 331 return (((ix == 1) ? reg : reg >> 4) & 0x0f) * 1000;
332} 332}
333 333
334static inline int TEMP_HYST_TO_REG(int val, int ix, int reg) 334static inline int TEMP_HYST_TO_REG(long val, int ix, int reg)
335{ 335{
336 int hyst = clamp_val((val + 500) / 1000, 0, 15); 336 int hyst = clamp_val((val + 500) / 1000, 0, 15);
337 337
@@ -347,7 +347,7 @@ static inline int FAN_FROM_REG(int reg, int tpc)
347 return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg; 347 return (reg == 0 || reg == 0xffff) ? 0 : 90000 * 60 / reg;
348} 348}
349 349
350static inline int FAN_TO_REG(int val, int tpc) 350static inline int FAN_TO_REG(long val, int tpc)
351{ 351{
352 if (tpc) { 352 if (tpc) {
353 return clamp_val(val / tpc, 0, 0xffff); 353 return clamp_val(val / tpc, 0, 0xffff);
@@ -379,7 +379,7 @@ static inline int FAN_TYPE_FROM_REG(int reg)
379 return (edge > 0) ? 1 << (edge - 1) : 0; 379 return (edge > 0) ? 1 << (edge - 1) : 0;
380} 380}
381 381
382static inline int FAN_TYPE_TO_REG(int val, int reg) 382static inline int FAN_TYPE_TO_REG(long val, int reg)
383{ 383{
384 int edge = (val == 4) ? 3 : val; 384 int edge = (val == 4) ? 3 : val;
385 385
@@ -402,7 +402,7 @@ static int FAN_MAX_FROM_REG(int reg)
402 return 1000 + i * 500; 402 return 1000 + i * 500;
403} 403}
404 404
405static int FAN_MAX_TO_REG(int val) 405static int FAN_MAX_TO_REG(long val)
406{ 406{
407 int i; 407 int i;
408 408
@@ -460,7 +460,7 @@ static inline int PWM_ACZ_FROM_REG(int reg)
460 return acz[(reg >> 5) & 0x07]; 460 return acz[(reg >> 5) & 0x07];
461} 461}
462 462
463static inline int PWM_ACZ_TO_REG(int val, int reg) 463static inline int PWM_ACZ_TO_REG(long val, int reg)
464{ 464{
465 int acz = (val == 4) ? 2 : val - 1; 465 int acz = (val == 4) ? 2 : val - 1;
466 466
@@ -476,7 +476,7 @@ static inline int PWM_FREQ_FROM_REG(int reg)
476 return PWM_FREQ[reg & 0x0f]; 476 return PWM_FREQ[reg & 0x0f];
477} 477}
478 478
479static int PWM_FREQ_TO_REG(int val, int reg) 479static int PWM_FREQ_TO_REG(long val, int reg)
480{ 480{
481 int i; 481 int i;
482 482
@@ -510,7 +510,7 @@ static inline int PWM_RR_FROM_REG(int reg, int ix)
510 return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0; 510 return (rr & 0x08) ? PWM_RR[rr & 0x07] : 0;
511} 511}
512 512
513static int PWM_RR_TO_REG(int val, int ix, int reg) 513static int PWM_RR_TO_REG(long val, int ix, int reg)
514{ 514{
515 int i; 515 int i;
516 516
@@ -528,7 +528,7 @@ static inline int PWM_RR_EN_FROM_REG(int reg, int ix)
528 return PWM_RR_FROM_REG(reg, ix) ? 1 : 0; 528 return PWM_RR_FROM_REG(reg, ix) ? 1 : 0;
529} 529}
530 530
531static inline int PWM_RR_EN_TO_REG(int val, int ix, int reg) 531static inline int PWM_RR_EN_TO_REG(long val, int ix, int reg)
532{ 532{
533 int en = (ix == 1) ? 0x80 : 0x08; 533 int en = (ix == 1) ? 0x80 : 0x08;
534 534
@@ -1481,13 +1481,16 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
1481 const char *buf, size_t count) 1481 const char *buf, size_t count)
1482{ 1482{
1483 struct dme1737_data *data = dev_get_drvdata(dev); 1483 struct dme1737_data *data = dev_get_drvdata(dev);
1484 long val; 1484 unsigned long val;
1485 int err; 1485 int err;
1486 1486
1487 err = kstrtol(buf, 10, &val); 1487 err = kstrtoul(buf, 10, &val);
1488 if (err) 1488 if (err)
1489 return err; 1489 return err;
1490 1490
1491 if (val > 255)
1492 return -EINVAL;
1493
1491 data->vrm = val; 1494 data->vrm = val;
1492 return count; 1495 return count;
1493} 1496}
diff --git a/drivers/hwmon/emc6w201.c b/drivers/hwmon/emc6w201.c
index e87da902f3ae..ada90716448d 100644
--- a/drivers/hwmon/emc6w201.c
+++ b/drivers/hwmon/emc6w201.c
@@ -252,12 +252,12 @@ static ssize_t set_temp(struct device *dev, struct device_attribute *devattr,
252 if (err < 0) 252 if (err < 0)
253 return err; 253 return err;
254 254
255 val /= 1000; 255 val = DIV_ROUND_CLOSEST(val, 1000);
256 reg = (sf == min) ? EMC6W201_REG_TEMP_LOW(nr) 256 reg = (sf == min) ? EMC6W201_REG_TEMP_LOW(nr)
257 : EMC6W201_REG_TEMP_HIGH(nr); 257 : EMC6W201_REG_TEMP_HIGH(nr);
258 258
259 mutex_lock(&data->update_lock); 259 mutex_lock(&data->update_lock);
260 data->temp[sf][nr] = clamp_val(val, -127, 128); 260 data->temp[sf][nr] = clamp_val(val, -127, 127);
261 err = emc6w201_write8(client, reg, data->temp[sf][nr]); 261 err = emc6w201_write8(client, reg, data->temp[sf][nr]);
262 mutex_unlock(&data->update_lock); 262 mutex_unlock(&data->update_lock);
263 263
diff --git a/drivers/hwmon/hih6130.c b/drivers/hwmon/hih6130.c
index 0e01c4e13e33..7b73d2002d3e 100644
--- a/drivers/hwmon/hih6130.c
+++ b/drivers/hwmon/hih6130.c
@@ -238,6 +238,9 @@ static int hih6130_probe(struct i2c_client *client,
238 hih6130->client = client; 238 hih6130->client = client;
239 mutex_init(&hih6130->lock); 239 mutex_init(&hih6130->lock);
240 240
241 if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_QUICK))
242 hih6130->write_length = 1;
243
241 hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name, 244 hwmon_dev = devm_hwmon_device_register_with_groups(dev, client->name,
242 hih6130, 245 hih6130,
243 hih6130_groups); 246 hih6130_groups);
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index ba1d83d48056..a5e295826aea 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -617,6 +617,10 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
617 err = kstrtoul(buf, 10, &val); 617 err = kstrtoul(buf, 10, &val);
618 if (err) 618 if (err)
619 return err; 619 return err;
620
621 if (val > 255)
622 return -EINVAL;
623
620 data->vrm = val; 624 data->vrm = val;
621 return count; 625 return count;
622} 626}
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index d2060e245ff5..cfaf70b9cba7 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -74,12 +74,9 @@ static inline int TEMP_FROM_REG(s16 reg)
74 return reg / 8 * 625 / 10; 74 return reg / 8 * 625 / 10;
75} 75}
76 76
77static inline s16 TEMP_TO_REG(int val) 77static inline s16 TEMP_TO_REG(long val)
78{ 78{
79 if (val <= -60000) 79 val = clamp_val(val, -60000, 160000);
80 return -60000 * 10 / 625 * 8;
81 if (val >= 160000)
82 return 160000 * 10 / 625 * 8;
83 return val * 10 / 625 * 8; 80 return val * 10 / 625 * 8;
84} 81}
85 82
@@ -206,10 +203,12 @@ static ssize_t set_temp_hyst(struct device *dev,
206 if (err) 203 if (err)
207 return err; 204 return err;
208 205
206 val = clamp_val(val, -120000, 220000);
209 mutex_lock(&data->update_lock); 207 mutex_lock(&data->update_lock);
210 data->temp[t_hyst] = TEMP_FROM_REG(data->temp[attr->index]) - val; 208 data->temp[t_hyst] =
209 TEMP_TO_REG(TEMP_FROM_REG(data->temp[attr->index]) - val);
211 i2c_smbus_write_word_swapped(client, LM92_REG_TEMP_HYST, 210 i2c_smbus_write_word_swapped(client, LM92_REG_TEMP_HYST,
212 TEMP_TO_REG(data->temp[t_hyst])); 211 data->temp[t_hyst]);
213 mutex_unlock(&data->update_lock); 212 mutex_unlock(&data->update_lock);
214 return count; 213 return count;
215} 214}
diff --git a/drivers/hwmon/pc87360.c b/drivers/hwmon/pc87360.c
index 988181e4cfcd..145f674c1d87 100644
--- a/drivers/hwmon/pc87360.c
+++ b/drivers/hwmon/pc87360.c
@@ -615,6 +615,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
615 if (err) 615 if (err)
616 return err; 616 return err;
617 617
618 if (val > 255)
619 return -EINVAL;
620
618 data->vrm = val; 621 data->vrm = val;
619 return count; 622 return count;
620} 623}
diff --git a/drivers/hwmon/tmp103.c b/drivers/hwmon/tmp103.c
index c74d2da389d9..e42964f07f67 100644
--- a/drivers/hwmon/tmp103.c
+++ b/drivers/hwmon/tmp103.c
@@ -131,13 +131,6 @@ static int tmp103_probe(struct i2c_client *client,
131 struct regmap *regmap; 131 struct regmap *regmap;
132 int ret; 132 int ret;
133 133
134 if (!i2c_check_functionality(client->adapter,
135 I2C_FUNC_SMBUS_BYTE_DATA)) {
136 dev_err(&client->dev,
137 "adapter doesn't support SMBus byte transactions\n");
138 return -ENODEV;
139 }
140
141 regmap = devm_regmap_init_i2c(client, &tmp103_regmap_config); 134 regmap = devm_regmap_init_i2c(client, &tmp103_regmap_config);
142 if (IS_ERR(regmap)) { 135 if (IS_ERR(regmap)) {
143 dev_err(dev, "failed to allocate register map\n"); 136 dev_err(dev, "failed to allocate register map\n");
diff --git a/drivers/hwmon/vt1211.c b/drivers/hwmon/vt1211.c
index 344b22ec2553..3ea57c3504e2 100644
--- a/drivers/hwmon/vt1211.c
+++ b/drivers/hwmon/vt1211.c
@@ -879,6 +879,9 @@ static ssize_t set_vrm(struct device *dev, struct device_attribute *attr,
879 if (err) 879 if (err)
880 return err; 880 return err;
881 881
882 if (val > 255)
883 return -EINVAL;
884
882 data->vrm = val; 885 data->vrm = val;
883 886
884 return count; 887 return count;
diff --git a/drivers/hwmon/w83627hf.c b/drivers/hwmon/w83627hf.c
index c1726be3654c..2f55973a8c4c 100644
--- a/drivers/hwmon/w83627hf.c
+++ b/drivers/hwmon/w83627hf.c
@@ -820,6 +820,9 @@ store_vrm_reg(struct device *dev, struct device_attribute *attr, const char *buf
820 err = kstrtoul(buf, 10, &val); 820 err = kstrtoul(buf, 10, &val);
821 if (err) 821 if (err)
822 return err; 822 return err;
823
824 if (val > 255)
825 return -EINVAL;
823 data->vrm = val; 826 data->vrm = val;
824 827
825 return count; 828 return count;
diff --git a/drivers/hwmon/w83791d.c b/drivers/hwmon/w83791d.c
index cb3765fec98c..001df856913f 100644
--- a/drivers/hwmon/w83791d.c
+++ b/drivers/hwmon/w83791d.c
@@ -1181,6 +1181,9 @@ static ssize_t store_vrm_reg(struct device *dev,
1181 if (err) 1181 if (err)
1182 return err; 1182 return err;
1183 1183
1184 if (val > 255)
1185 return -EINVAL;
1186
1184 data->vrm = val; 1187 data->vrm = val;
1185 return count; 1188 return count;
1186} 1189}
diff --git a/drivers/hwmon/w83793.c b/drivers/hwmon/w83793.c
index 9d63d71214ca..816aa6caf5d5 100644
--- a/drivers/hwmon/w83793.c
+++ b/drivers/hwmon/w83793.c
@@ -353,6 +353,9 @@ store_vrm(struct device *dev, struct device_attribute *attr,
353 if (err) 353 if (err)
354 return err; 354 return err;
355 355
356 if (val > 255)
357 return -EINVAL;
358
356 data->vrm = val; 359 data->vrm = val;
357 return count; 360 return count;
358} 361}
diff --git a/drivers/hwspinlock/Kconfig b/drivers/hwspinlock/Kconfig
index 70637d23b1f9..3612cb5b30b2 100644
--- a/drivers/hwspinlock/Kconfig
+++ b/drivers/hwspinlock/Kconfig
@@ -10,7 +10,7 @@ menu "Hardware Spinlock drivers"
10 10
11config HWSPINLOCK_OMAP 11config HWSPINLOCK_OMAP
12 tristate "OMAP Hardware Spinlock device" 12 tristate "OMAP Hardware Spinlock device"
13 depends on ARCH_OMAP4 || SOC_OMAP5 13 depends on ARCH_OMAP4 || SOC_OMAP5 || SOC_DRA7XX || SOC_AM33XX || SOC_AM43XX
14 select HWSPINLOCK 14 select HWSPINLOCK
15 help 15 help
16 Say y here to support the OMAP Hardware Spinlock device (firstly 16 Say y here to support the OMAP Hardware Spinlock device (firstly
diff --git a/drivers/hwspinlock/omap_hwspinlock.c b/drivers/hwspinlock/omap_hwspinlock.c
index 292869cc9034..c1e2cd4d85fe 100644
--- a/drivers/hwspinlock/omap_hwspinlock.c
+++ b/drivers/hwspinlock/omap_hwspinlock.c
@@ -98,10 +98,29 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
98 if (!io_base) 98 if (!io_base)
99 return -ENOMEM; 99 return -ENOMEM;
100 100
101 /*
102 * make sure the module is enabled and clocked before reading
103 * the module SYSSTATUS register
104 */
105 pm_runtime_enable(&pdev->dev);
106 ret = pm_runtime_get_sync(&pdev->dev);
107 if (ret < 0) {
108 pm_runtime_put_noidle(&pdev->dev);
109 goto iounmap_base;
110 }
111
101 /* Determine number of locks */ 112 /* Determine number of locks */
102 i = readl(io_base + SYSSTATUS_OFFSET); 113 i = readl(io_base + SYSSTATUS_OFFSET);
103 i >>= SPINLOCK_NUMLOCKS_BIT_OFFSET; 114 i >>= SPINLOCK_NUMLOCKS_BIT_OFFSET;
104 115
116 /*
117 * runtime PM will make sure the clock of this module is
118 * enabled again iff at least one lock is requested
119 */
120 ret = pm_runtime_put(&pdev->dev);
121 if (ret < 0)
122 goto iounmap_base;
123
105 /* one of the four lsb's must be set, and nothing else */ 124 /* one of the four lsb's must be set, and nothing else */
106 if (hweight_long(i & 0xf) != 1 || i > 8) { 125 if (hweight_long(i & 0xf) != 1 || i > 8) {
107 ret = -EINVAL; 126 ret = -EINVAL;
@@ -121,12 +140,6 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
121 for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++) 140 for (i = 0, hwlock = &bank->lock[0]; i < num_locks; i++, hwlock++)
122 hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i; 141 hwlock->priv = io_base + LOCK_BASE_OFFSET + sizeof(u32) * i;
123 142
124 /*
125 * runtime PM will make sure the clock of this module is
126 * enabled iff at least one lock is requested
127 */
128 pm_runtime_enable(&pdev->dev);
129
130 ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops, 143 ret = hwspin_lock_register(bank, &pdev->dev, &omap_hwspinlock_ops,
131 pdata->base_id, num_locks); 144 pdata->base_id, num_locks);
132 if (ret) 145 if (ret)
@@ -135,9 +148,9 @@ static int omap_hwspinlock_probe(struct platform_device *pdev)
135 return 0; 148 return 0;
136 149
137reg_fail: 150reg_fail:
138 pm_runtime_disable(&pdev->dev);
139 kfree(bank); 151 kfree(bank);
140iounmap_base: 152iounmap_base:
153 pm_runtime_disable(&pdev->dev);
141 iounmap(io_base); 154 iounmap(io_base);
142 return ret; 155 return ret;
143} 156}
diff --git a/drivers/infiniband/core/agent.c b/drivers/infiniband/core/agent.c
index 2bc7f5af64f4..f6d29614cb01 100644
--- a/drivers/infiniband/core/agent.c
+++ b/drivers/infiniband/core/agent.c
@@ -94,14 +94,14 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
94 port_priv = ib_get_agent_port(device, port_num); 94 port_priv = ib_get_agent_port(device, port_num);
95 95
96 if (!port_priv) { 96 if (!port_priv) {
97 printk(KERN_ERR SPFX "Unable to find port agent\n"); 97 dev_err(&device->dev, "Unable to find port agent\n");
98 return; 98 return;
99 } 99 }
100 100
101 agent = port_priv->agent[qpn]; 101 agent = port_priv->agent[qpn];
102 ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num); 102 ah = ib_create_ah_from_wc(agent->qp->pd, wc, grh, port_num);
103 if (IS_ERR(ah)) { 103 if (IS_ERR(ah)) {
104 printk(KERN_ERR SPFX "ib_create_ah_from_wc error %ld\n", 104 dev_err(&device->dev, "ib_create_ah_from_wc error %ld\n",
105 PTR_ERR(ah)); 105 PTR_ERR(ah));
106 return; 106 return;
107 } 107 }
@@ -110,7 +110,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
110 IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA, 110 IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
111 GFP_KERNEL); 111 GFP_KERNEL);
112 if (IS_ERR(send_buf)) { 112 if (IS_ERR(send_buf)) {
113 printk(KERN_ERR SPFX "ib_create_send_mad error\n"); 113 dev_err(&device->dev, "ib_create_send_mad error\n");
114 goto err1; 114 goto err1;
115 } 115 }
116 116
@@ -125,7 +125,7 @@ void agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
125 } 125 }
126 126
127 if (ib_post_send_mad(send_buf, NULL)) { 127 if (ib_post_send_mad(send_buf, NULL)) {
128 printk(KERN_ERR SPFX "ib_post_send_mad error\n"); 128 dev_err(&device->dev, "ib_post_send_mad error\n");
129 goto err2; 129 goto err2;
130 } 130 }
131 return; 131 return;
@@ -151,7 +151,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
151 /* Create new device info */ 151 /* Create new device info */
152 port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); 152 port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
153 if (!port_priv) { 153 if (!port_priv) {
154 printk(KERN_ERR SPFX "No memory for ib_agent_port_private\n"); 154 dev_err(&device->dev, "No memory for ib_agent_port_private\n");
155 ret = -ENOMEM; 155 ret = -ENOMEM;
156 goto error1; 156 goto error1;
157 } 157 }
@@ -161,7 +161,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
161 port_priv->agent[0] = ib_register_mad_agent(device, port_num, 161 port_priv->agent[0] = ib_register_mad_agent(device, port_num,
162 IB_QPT_SMI, NULL, 0, 162 IB_QPT_SMI, NULL, 0,
163 &agent_send_handler, 163 &agent_send_handler,
164 NULL, NULL); 164 NULL, NULL, 0);
165 if (IS_ERR(port_priv->agent[0])) { 165 if (IS_ERR(port_priv->agent[0])) {
166 ret = PTR_ERR(port_priv->agent[0]); 166 ret = PTR_ERR(port_priv->agent[0]);
167 goto error2; 167 goto error2;
@@ -172,7 +172,7 @@ int ib_agent_port_open(struct ib_device *device, int port_num)
172 port_priv->agent[1] = ib_register_mad_agent(device, port_num, 172 port_priv->agent[1] = ib_register_mad_agent(device, port_num,
173 IB_QPT_GSI, NULL, 0, 173 IB_QPT_GSI, NULL, 0,
174 &agent_send_handler, 174 &agent_send_handler,
175 NULL, NULL); 175 NULL, NULL, 0);
176 if (IS_ERR(port_priv->agent[1])) { 176 if (IS_ERR(port_priv->agent[1])) {
177 ret = PTR_ERR(port_priv->agent[1]); 177 ret = PTR_ERR(port_priv->agent[1]);
178 goto error3; 178 goto error3;
@@ -202,7 +202,7 @@ int ib_agent_port_close(struct ib_device *device, int port_num)
202 port_priv = __ib_get_agent_port(device, port_num); 202 port_priv = __ib_get_agent_port(device, port_num);
203 if (port_priv == NULL) { 203 if (port_priv == NULL) {
204 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags); 204 spin_unlock_irqrestore(&ib_agent_port_list_lock, flags);
205 printk(KERN_ERR SPFX "Port %d not found\n", port_num); 205 dev_err(&device->dev, "Port %d not found\n", port_num);
206 return -ENODEV; 206 return -ENODEV;
207 } 207 }
208 list_del(&port_priv->port_list); 208 list_del(&port_priv->port_list);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c3239170d8b7..e28a494e2a3a 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3753,7 +3753,7 @@ static void cm_add_one(struct ib_device *ib_device)
3753 struct cm_port *port; 3753 struct cm_port *port;
3754 struct ib_mad_reg_req reg_req = { 3754 struct ib_mad_reg_req reg_req = {
3755 .mgmt_class = IB_MGMT_CLASS_CM, 3755 .mgmt_class = IB_MGMT_CLASS_CM,
3756 .mgmt_class_version = IB_CM_CLASS_VERSION 3756 .mgmt_class_version = IB_CM_CLASS_VERSION,
3757 }; 3757 };
3758 struct ib_port_modify port_modify = { 3758 struct ib_port_modify port_modify = {
3759 .set_port_cap_mask = IB_PORT_CM_SUP 3759 .set_port_cap_mask = IB_PORT_CM_SUP
@@ -3801,7 +3801,8 @@ static void cm_add_one(struct ib_device *ib_device)
3801 0, 3801 0,
3802 cm_send_handler, 3802 cm_send_handler,
3803 cm_recv_handler, 3803 cm_recv_handler,
3804 port); 3804 port,
3805 0);
3805 if (IS_ERR(port->mad_agent)) 3806 if (IS_ERR(port->mad_agent))
3806 goto error2; 3807 goto error2;
3807 3808
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 3d2e489ab732..ff9163dc1596 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -46,6 +46,7 @@
46#include <linux/completion.h> 46#include <linux/completion.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/sysctl.h>
49 50
50#include <rdma/iw_cm.h> 51#include <rdma/iw_cm.h>
51#include <rdma/ib_addr.h> 52#include <rdma/ib_addr.h>
@@ -65,6 +66,20 @@ struct iwcm_work {
65 struct list_head free_list; 66 struct list_head free_list;
66}; 67};
67 68
69static unsigned int default_backlog = 256;
70
71static struct ctl_table_header *iwcm_ctl_table_hdr;
72static struct ctl_table iwcm_ctl_table[] = {
73 {
74 .procname = "default_backlog",
75 .data = &default_backlog,
76 .maxlen = sizeof(default_backlog),
77 .mode = 0644,
78 .proc_handler = proc_dointvec,
79 },
80 { }
81};
82
68/* 83/*
69 * The following services provide a mechanism for pre-allocating iwcm_work 84 * The following services provide a mechanism for pre-allocating iwcm_work
70 * elements. The design pre-allocates them based on the cm_id type: 85 * elements. The design pre-allocates them based on the cm_id type:
@@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
425 440
426 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); 441 cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
427 442
443 if (!backlog)
444 backlog = default_backlog;
445
428 ret = alloc_work_entries(cm_id_priv, backlog); 446 ret = alloc_work_entries(cm_id_priv, backlog);
429 if (ret) 447 if (ret)
430 return ret; 448 return ret;
@@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void)
1030 if (!iwcm_wq) 1048 if (!iwcm_wq)
1031 return -ENOMEM; 1049 return -ENOMEM;
1032 1050
1051 iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm",
1052 iwcm_ctl_table);
1053 if (!iwcm_ctl_table_hdr) {
1054 pr_err("iw_cm: couldn't register sysctl paths\n");
1055 destroy_workqueue(iwcm_wq);
1056 return -ENOMEM;
1057 }
1058
1033 return 0; 1059 return 0;
1034} 1060}
1035 1061
1036static void __exit iw_cm_cleanup(void) 1062static void __exit iw_cm_cleanup(void)
1037{ 1063{
1064 unregister_net_sysctl_table(iwcm_ctl_table_hdr);
1038 destroy_workqueue(iwcm_wq); 1065 destroy_workqueue(iwcm_wq);
1039} 1066}
1040 1067
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index ab31f136d04b..74c30f4c557e 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -33,6 +33,9 @@
33 * SOFTWARE. 33 * SOFTWARE.
34 * 34 *
35 */ 35 */
36
37#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
38
36#include <linux/dma-mapping.h> 39#include <linux/dma-mapping.h>
37#include <linux/slab.h> 40#include <linux/slab.h>
38#include <linux/module.h> 41#include <linux/module.h>
@@ -195,7 +198,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
195 u8 rmpp_version, 198 u8 rmpp_version,
196 ib_mad_send_handler send_handler, 199 ib_mad_send_handler send_handler,
197 ib_mad_recv_handler recv_handler, 200 ib_mad_recv_handler recv_handler,
198 void *context) 201 void *context,
202 u32 registration_flags)
199{ 203{
200 struct ib_mad_port_private *port_priv; 204 struct ib_mad_port_private *port_priv;
201 struct ib_mad_agent *ret = ERR_PTR(-EINVAL); 205 struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
@@ -211,68 +215,109 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
211 215
212 /* Validate parameters */ 216 /* Validate parameters */
213 qpn = get_spl_qp_index(qp_type); 217 qpn = get_spl_qp_index(qp_type);
214 if (qpn == -1) 218 if (qpn == -1) {
219 dev_notice(&device->dev,
220 "ib_register_mad_agent: invalid QP Type %d\n",
221 qp_type);
215 goto error1; 222 goto error1;
223 }
216 224
217 if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) 225 if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
226 dev_notice(&device->dev,
227 "ib_register_mad_agent: invalid RMPP Version %u\n",
228 rmpp_version);
218 goto error1; 229 goto error1;
230 }
219 231
220 /* Validate MAD registration request if supplied */ 232 /* Validate MAD registration request if supplied */
221 if (mad_reg_req) { 233 if (mad_reg_req) {
222 if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) 234 if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
235 dev_notice(&device->dev,
236 "ib_register_mad_agent: invalid Class Version %u\n",
237 mad_reg_req->mgmt_class_version);
223 goto error1; 238 goto error1;
224 if (!recv_handler) 239 }
240 if (!recv_handler) {
241 dev_notice(&device->dev,
242 "ib_register_mad_agent: no recv_handler\n");
225 goto error1; 243 goto error1;
244 }
226 if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) { 245 if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
227 /* 246 /*
228 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only 247 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
229 * one in this range currently allowed 248 * one in this range currently allowed
230 */ 249 */
231 if (mad_reg_req->mgmt_class != 250 if (mad_reg_req->mgmt_class !=
232 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) 251 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
252 dev_notice(&device->dev,
253 "ib_register_mad_agent: Invalid Mgmt Class 0x%x\n",
254 mad_reg_req->mgmt_class);
233 goto error1; 255 goto error1;
256 }
234 } else if (mad_reg_req->mgmt_class == 0) { 257 } else if (mad_reg_req->mgmt_class == 0) {
235 /* 258 /*
236 * Class 0 is reserved in IBA and is used for 259 * Class 0 is reserved in IBA and is used for
237 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 260 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
238 */ 261 */
262 dev_notice(&device->dev,
263 "ib_register_mad_agent: Invalid Mgmt Class 0\n");
239 goto error1; 264 goto error1;
240 } else if (is_vendor_class(mad_reg_req->mgmt_class)) { 265 } else if (is_vendor_class(mad_reg_req->mgmt_class)) {
241 /* 266 /*
242 * If class is in "new" vendor range, 267 * If class is in "new" vendor range,
243 * ensure supplied OUI is not zero 268 * ensure supplied OUI is not zero
244 */ 269 */
245 if (!is_vendor_oui(mad_reg_req->oui)) 270 if (!is_vendor_oui(mad_reg_req->oui)) {
271 dev_notice(&device->dev,
272 "ib_register_mad_agent: No OUI specified for class 0x%x\n",
273 mad_reg_req->mgmt_class);
246 goto error1; 274 goto error1;
275 }
247 } 276 }
248 /* Make sure class supplied is consistent with RMPP */ 277 /* Make sure class supplied is consistent with RMPP */
249 if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) { 278 if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
250 if (rmpp_version) 279 if (rmpp_version) {
280 dev_notice(&device->dev,
281 "ib_register_mad_agent: RMPP version for non-RMPP class 0x%x\n",
282 mad_reg_req->mgmt_class);
251 goto error1; 283 goto error1;
284 }
252 } 285 }
286
253 /* Make sure class supplied is consistent with QP type */ 287 /* Make sure class supplied is consistent with QP type */
254 if (qp_type == IB_QPT_SMI) { 288 if (qp_type == IB_QPT_SMI) {
255 if ((mad_reg_req->mgmt_class != 289 if ((mad_reg_req->mgmt_class !=
256 IB_MGMT_CLASS_SUBN_LID_ROUTED) && 290 IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
257 (mad_reg_req->mgmt_class != 291 (mad_reg_req->mgmt_class !=
258 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) 292 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
293 dev_notice(&device->dev,
294 "ib_register_mad_agent: Invalid SM QP type: class 0x%x\n",
295 mad_reg_req->mgmt_class);
259 goto error1; 296 goto error1;
297 }
260 } else { 298 } else {
261 if ((mad_reg_req->mgmt_class == 299 if ((mad_reg_req->mgmt_class ==
262 IB_MGMT_CLASS_SUBN_LID_ROUTED) || 300 IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
263 (mad_reg_req->mgmt_class == 301 (mad_reg_req->mgmt_class ==
264 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) 302 IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
303 dev_notice(&device->dev,
304 "ib_register_mad_agent: Invalid GS QP type: class 0x%x\n",
305 mad_reg_req->mgmt_class);
265 goto error1; 306 goto error1;
307 }
266 } 308 }
267 } else { 309 } else {
268 /* No registration request supplied */ 310 /* No registration request supplied */
269 if (!send_handler) 311 if (!send_handler)
270 goto error1; 312 goto error1;
313 if (registration_flags & IB_MAD_USER_RMPP)
314 goto error1;
271 } 315 }
272 316
273 /* Validate device and port */ 317 /* Validate device and port */
274 port_priv = ib_get_mad_port(device, port_num); 318 port_priv = ib_get_mad_port(device, port_num);
275 if (!port_priv) { 319 if (!port_priv) {
320 dev_notice(&device->dev, "ib_register_mad_agent: Invalid port\n");
276 ret = ERR_PTR(-ENODEV); 321 ret = ERR_PTR(-ENODEV);
277 goto error1; 322 goto error1;
278 } 323 }
@@ -280,6 +325,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
280 /* Verify the QP requested is supported. For example, Ethernet devices 325 /* Verify the QP requested is supported. For example, Ethernet devices
281 * will not have QP0 */ 326 * will not have QP0 */
282 if (!port_priv->qp_info[qpn].qp) { 327 if (!port_priv->qp_info[qpn].qp) {
328 dev_notice(&device->dev,
329 "ib_register_mad_agent: QP %d not supported\n", qpn);
283 ret = ERR_PTR(-EPROTONOSUPPORT); 330 ret = ERR_PTR(-EPROTONOSUPPORT);
284 goto error1; 331 goto error1;
285 } 332 }
@@ -316,6 +363,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
316 mad_agent_priv->agent.context = context; 363 mad_agent_priv->agent.context = context;
317 mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp; 364 mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
318 mad_agent_priv->agent.port_num = port_num; 365 mad_agent_priv->agent.port_num = port_num;
366 mad_agent_priv->agent.flags = registration_flags;
319 spin_lock_init(&mad_agent_priv->lock); 367 spin_lock_init(&mad_agent_priv->lock);
320 INIT_LIST_HEAD(&mad_agent_priv->send_list); 368 INIT_LIST_HEAD(&mad_agent_priv->send_list);
321 INIT_LIST_HEAD(&mad_agent_priv->wait_list); 369 INIT_LIST_HEAD(&mad_agent_priv->wait_list);
@@ -706,7 +754,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
706 smi_handle_dr_smp_send(smp, device->node_type, port_num) == 754 smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
707 IB_SMI_DISCARD) { 755 IB_SMI_DISCARD) {
708 ret = -EINVAL; 756 ret = -EINVAL;
709 printk(KERN_ERR PFX "Invalid directed route\n"); 757 dev_err(&device->dev, "Invalid directed route\n");
710 goto out; 758 goto out;
711 } 759 }
712 760
@@ -718,7 +766,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
718 local = kmalloc(sizeof *local, GFP_ATOMIC); 766 local = kmalloc(sizeof *local, GFP_ATOMIC);
719 if (!local) { 767 if (!local) {
720 ret = -ENOMEM; 768 ret = -ENOMEM;
721 printk(KERN_ERR PFX "No memory for ib_mad_local_private\n"); 769 dev_err(&device->dev, "No memory for ib_mad_local_private\n");
722 goto out; 770 goto out;
723 } 771 }
724 local->mad_priv = NULL; 772 local->mad_priv = NULL;
@@ -726,7 +774,7 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
726 mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC); 774 mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
727 if (!mad_priv) { 775 if (!mad_priv) {
728 ret = -ENOMEM; 776 ret = -ENOMEM;
729 printk(KERN_ERR PFX "No memory for local response MAD\n"); 777 dev_err(&device->dev, "No memory for local response MAD\n");
730 kfree(local); 778 kfree(local);
731 goto out; 779 goto out;
732 } 780 }
@@ -837,9 +885,9 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
837 for (left = send_buf->data_len + pad; left > 0; left -= seg_size) { 885 for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
838 seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask); 886 seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
839 if (!seg) { 887 if (!seg) {
840 printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem " 888 dev_err(&send_buf->mad_agent->device->dev,
841 "alloc failed for len %zd, gfp %#x\n", 889 "alloc_send_rmpp_segs: RMPP mem alloc failed for len %zd, gfp %#x\n",
842 sizeof (*seg) + seg_size, gfp_mask); 890 sizeof (*seg) + seg_size, gfp_mask);
843 free_send_rmpp_list(send_wr); 891 free_send_rmpp_list(send_wr);
844 return -ENOMEM; 892 return -ENOMEM;
845 } 893 }
@@ -862,6 +910,12 @@ static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
862 return 0; 910 return 0;
863} 911}
864 912
913int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent)
914{
915 return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
916}
917EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
918
865struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, 919struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
866 u32 remote_qpn, u16 pkey_index, 920 u32 remote_qpn, u16 pkey_index,
867 int rmpp_active, 921 int rmpp_active,
@@ -878,10 +932,12 @@ struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
878 pad = get_pad_size(hdr_len, data_len); 932 pad = get_pad_size(hdr_len, data_len);
879 message_size = hdr_len + data_len + pad; 933 message_size = hdr_len + data_len + pad;
880 934
881 if ((!mad_agent->rmpp_version && 935 if (ib_mad_kernel_rmpp_agent(mad_agent)) {
882 (rmpp_active || message_size > sizeof(struct ib_mad))) || 936 if (!rmpp_active && message_size > sizeof(struct ib_mad))
883 (!rmpp_active && message_size > sizeof(struct ib_mad))) 937 return ERR_PTR(-EINVAL);
884 return ERR_PTR(-EINVAL); 938 } else
939 if (rmpp_active || message_size > sizeof(struct ib_mad))
940 return ERR_PTR(-EINVAL);
885 941
886 size = rmpp_active ? hdr_len : sizeof(struct ib_mad); 942 size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
887 buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask); 943 buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
@@ -1135,7 +1191,7 @@ int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1135 &mad_agent_priv->send_list); 1191 &mad_agent_priv->send_list);
1136 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1192 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1137 1193
1138 if (mad_agent_priv->agent.rmpp_version) { 1194 if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1139 ret = ib_send_rmpp_mad(mad_send_wr); 1195 ret = ib_send_rmpp_mad(mad_send_wr);
1140 if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED) 1196 if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1141 ret = ib_send_mad(mad_send_wr); 1197 ret = ib_send_mad(mad_send_wr);
@@ -1199,7 +1255,8 @@ EXPORT_SYMBOL(ib_redirect_mad_qp);
1199int ib_process_mad_wc(struct ib_mad_agent *mad_agent, 1255int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
1200 struct ib_wc *wc) 1256 struct ib_wc *wc)
1201{ 1257{
1202 printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n"); 1258 dev_err(&mad_agent->device->dev,
1259 "ib_process_mad_wc() not implemented yet\n");
1203 return 0; 1260 return 0;
1204} 1261}
1205EXPORT_SYMBOL(ib_process_mad_wc); 1262EXPORT_SYMBOL(ib_process_mad_wc);
@@ -1211,7 +1268,7 @@ static int method_in_use(struct ib_mad_mgmt_method_table **method,
1211 1268
1212 for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) { 1269 for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1213 if ((*method)->agent[i]) { 1270 if ((*method)->agent[i]) {
1214 printk(KERN_ERR PFX "Method %d already in use\n", i); 1271 pr_err("Method %d already in use\n", i);
1215 return -EINVAL; 1272 return -EINVAL;
1216 } 1273 }
1217 } 1274 }
@@ -1223,8 +1280,7 @@ static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1223 /* Allocate management method table */ 1280 /* Allocate management method table */
1224 *method = kzalloc(sizeof **method, GFP_ATOMIC); 1281 *method = kzalloc(sizeof **method, GFP_ATOMIC);
1225 if (!*method) { 1282 if (!*method) {
1226 printk(KERN_ERR PFX "No memory for " 1283 pr_err("No memory for ib_mad_mgmt_method_table\n");
1227 "ib_mad_mgmt_method_table\n");
1228 return -ENOMEM; 1284 return -ENOMEM;
1229 } 1285 }
1230 1286
@@ -1319,8 +1375,8 @@ static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1319 /* Allocate management class table for "new" class version */ 1375 /* Allocate management class table for "new" class version */
1320 *class = kzalloc(sizeof **class, GFP_ATOMIC); 1376 *class = kzalloc(sizeof **class, GFP_ATOMIC);
1321 if (!*class) { 1377 if (!*class) {
1322 printk(KERN_ERR PFX "No memory for " 1378 dev_err(&agent_priv->agent.device->dev,
1323 "ib_mad_mgmt_class_table\n"); 1379 "No memory for ib_mad_mgmt_class_table\n");
1324 ret = -ENOMEM; 1380 ret = -ENOMEM;
1325 goto error1; 1381 goto error1;
1326 } 1382 }
@@ -1386,8 +1442,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1386 /* Allocate mgmt vendor class table for "new" class version */ 1442 /* Allocate mgmt vendor class table for "new" class version */
1387 vendor = kzalloc(sizeof *vendor, GFP_ATOMIC); 1443 vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1388 if (!vendor) { 1444 if (!vendor) {
1389 printk(KERN_ERR PFX "No memory for " 1445 dev_err(&agent_priv->agent.device->dev,
1390 "ib_mad_mgmt_vendor_class_table\n"); 1446 "No memory for ib_mad_mgmt_vendor_class_table\n");
1391 goto error1; 1447 goto error1;
1392 } 1448 }
1393 1449
@@ -1397,8 +1453,8 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1397 /* Allocate table for this management vendor class */ 1453 /* Allocate table for this management vendor class */
1398 vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC); 1454 vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1399 if (!vendor_class) { 1455 if (!vendor_class) {
1400 printk(KERN_ERR PFX "No memory for " 1456 dev_err(&agent_priv->agent.device->dev,
1401 "ib_mad_mgmt_vendor_class\n"); 1457 "No memory for ib_mad_mgmt_vendor_class\n");
1402 goto error2; 1458 goto error2;
1403 } 1459 }
1404 1460
@@ -1429,7 +1485,7 @@ static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1429 goto check_in_use; 1485 goto check_in_use;
1430 } 1486 }
1431 } 1487 }
1432 printk(KERN_ERR PFX "All OUI slots in use\n"); 1488 dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1433 goto error3; 1489 goto error3;
1434 1490
1435check_in_use: 1491check_in_use:
@@ -1640,9 +1696,9 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
1640 if (mad_agent->agent.recv_handler) 1696 if (mad_agent->agent.recv_handler)
1641 atomic_inc(&mad_agent->refcount); 1697 atomic_inc(&mad_agent->refcount);
1642 else { 1698 else {
1643 printk(KERN_NOTICE PFX "No receive handler for client " 1699 dev_notice(&port_priv->device->dev,
1644 "%p on port %d\n", 1700 "No receive handler for client %p on port %d\n",
1645 &mad_agent->agent, port_priv->port_num); 1701 &mad_agent->agent, port_priv->port_num);
1646 mad_agent = NULL; 1702 mad_agent = NULL;
1647 } 1703 }
1648 } 1704 }
@@ -1658,8 +1714,8 @@ static int validate_mad(struct ib_mad *mad, u32 qp_num)
1658 1714
1659 /* Make sure MAD base version is understood */ 1715 /* Make sure MAD base version is understood */
1660 if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) { 1716 if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
1661 printk(KERN_ERR PFX "MAD received with unsupported base " 1717 pr_err("MAD received with unsupported base version %d\n",
1662 "version %d\n", mad->mad_hdr.base_version); 1718 mad->mad_hdr.base_version);
1663 goto out; 1719 goto out;
1664 } 1720 }
1665 1721
@@ -1685,6 +1741,7 @@ static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1685 1741
1686 rmpp_mad = (struct ib_rmpp_mad *)mad_hdr; 1742 rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1687 return !mad_agent_priv->agent.rmpp_version || 1743 return !mad_agent_priv->agent.rmpp_version ||
1744 !ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1688 !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 1745 !(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1689 IB_MGMT_RMPP_FLAG_ACTIVE) || 1746 IB_MGMT_RMPP_FLAG_ACTIVE) ||
1690 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA); 1747 (rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
@@ -1812,7 +1869,7 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1812 1869
1813 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list); 1870 INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1814 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list); 1871 list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1815 if (mad_agent_priv->agent.rmpp_version) { 1872 if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1816 mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv, 1873 mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1817 mad_recv_wc); 1874 mad_recv_wc);
1818 if (!mad_recv_wc) { 1875 if (!mad_recv_wc) {
@@ -1827,23 +1884,39 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1827 mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc); 1884 mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1828 if (!mad_send_wr) { 1885 if (!mad_send_wr) {
1829 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1886 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1830 ib_free_recv_mad(mad_recv_wc); 1887 if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1831 deref_mad_agent(mad_agent_priv); 1888 && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1832 return; 1889 && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1833 } 1890 & IB_MGMT_RMPP_FLAG_ACTIVE)) {
1834 ib_mark_mad_done(mad_send_wr); 1891 /* user rmpp is in effect
1835 spin_unlock_irqrestore(&mad_agent_priv->lock, flags); 1892 * and this is an active RMPP MAD
1893 */
1894 mad_recv_wc->wc->wr_id = 0;
1895 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1896 mad_recv_wc);
1897 atomic_dec(&mad_agent_priv->refcount);
1898 } else {
1899 /* not user rmpp, revert to normal behavior and
1900 * drop the mad */
1901 ib_free_recv_mad(mad_recv_wc);
1902 deref_mad_agent(mad_agent_priv);
1903 return;
1904 }
1905 } else {
1906 ib_mark_mad_done(mad_send_wr);
1907 spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1836 1908
1837 /* Defined behavior is to complete response before request */ 1909 /* Defined behavior is to complete response before request */
1838 mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf; 1910 mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
1839 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 1911 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1840 mad_recv_wc); 1912 mad_recv_wc);
1841 atomic_dec(&mad_agent_priv->refcount); 1913 atomic_dec(&mad_agent_priv->refcount);
1842 1914
1843 mad_send_wc.status = IB_WC_SUCCESS; 1915 mad_send_wc.status = IB_WC_SUCCESS;
1844 mad_send_wc.vendor_err = 0; 1916 mad_send_wc.vendor_err = 0;
1845 mad_send_wc.send_buf = &mad_send_wr->send_buf; 1917 mad_send_wc.send_buf = &mad_send_wr->send_buf;
1846 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); 1918 ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1919 }
1847 } else { 1920 } else {
1848 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, 1921 mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1849 mad_recv_wc); 1922 mad_recv_wc);
@@ -1911,8 +1984,8 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1911 1984
1912 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); 1985 response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1913 if (!response) { 1986 if (!response) {
1914 printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory " 1987 dev_err(&port_priv->device->dev,
1915 "for response buffer\n"); 1988 "ib_mad_recv_done_handler no memory for response buffer\n");
1916 goto out; 1989 goto out;
1917 } 1990 }
1918 1991
@@ -2083,7 +2156,7 @@ void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2083 2156
2084 mad_agent_priv = mad_send_wr->mad_agent_priv; 2157 mad_agent_priv = mad_send_wr->mad_agent_priv;
2085 spin_lock_irqsave(&mad_agent_priv->lock, flags); 2158 spin_lock_irqsave(&mad_agent_priv->lock, flags);
2086 if (mad_agent_priv->agent.rmpp_version) { 2159 if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2087 ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc); 2160 ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2088 if (ret == IB_RMPP_RESULT_CONSUMED) 2161 if (ret == IB_RMPP_RESULT_CONSUMED)
2089 goto done; 2162 goto done;
@@ -2176,7 +2249,8 @@ retry:
2176 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr, 2249 ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
2177 &bad_send_wr); 2250 &bad_send_wr);
2178 if (ret) { 2251 if (ret) {
2179 printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret); 2252 dev_err(&port_priv->device->dev,
2253 "ib_post_send failed: %d\n", ret);
2180 mad_send_wr = queued_send_wr; 2254 mad_send_wr = queued_send_wr;
2181 wc->status = IB_WC_LOC_QP_OP_ERR; 2255 wc->status = IB_WC_LOC_QP_OP_ERR;
2182 goto retry; 2256 goto retry;
@@ -2248,8 +2322,9 @@ static void mad_error_handler(struct ib_mad_port_private *port_priv,
2248 IB_QP_STATE | IB_QP_CUR_STATE); 2322 IB_QP_STATE | IB_QP_CUR_STATE);
2249 kfree(attr); 2323 kfree(attr);
2250 if (ret) 2324 if (ret)
2251 printk(KERN_ERR PFX "mad_error_handler - " 2325 dev_err(&port_priv->device->dev,
2252 "ib_modify_qp to RTS : %d\n", ret); 2326 "mad_error_handler - ib_modify_qp to RTS : %d\n",
2327 ret);
2253 else 2328 else
2254 mark_sends_for_retry(qp_info); 2329 mark_sends_for_retry(qp_info);
2255 } 2330 }
@@ -2408,7 +2483,8 @@ static void local_completions(struct work_struct *work)
2408 if (local->mad_priv) { 2483 if (local->mad_priv) {
2409 recv_mad_agent = local->recv_mad_agent; 2484 recv_mad_agent = local->recv_mad_agent;
2410 if (!recv_mad_agent) { 2485 if (!recv_mad_agent) {
2411 printk(KERN_ERR PFX "No receive MAD agent for local completion\n"); 2486 dev_err(&mad_agent_priv->agent.device->dev,
2487 "No receive MAD agent for local completion\n");
2412 free_mad = 1; 2488 free_mad = 1;
2413 goto local_send_completion; 2489 goto local_send_completion;
2414 } 2490 }
@@ -2476,7 +2552,7 @@ static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2476 2552
2477 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms); 2553 mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2478 2554
2479 if (mad_send_wr->mad_agent_priv->agent.rmpp_version) { 2555 if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2480 ret = ib_retry_rmpp(mad_send_wr); 2556 ret = ib_retry_rmpp(mad_send_wr);
2481 switch (ret) { 2557 switch (ret) {
2482 case IB_RMPP_RESULT_UNHANDLED: 2558 case IB_RMPP_RESULT_UNHANDLED:
@@ -2589,7 +2665,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2589 } else { 2665 } else {
2590 mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL); 2666 mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
2591 if (!mad_priv) { 2667 if (!mad_priv) {
2592 printk(KERN_ERR PFX "No memory for receive buffer\n"); 2668 dev_err(&qp_info->port_priv->device->dev,
2669 "No memory for receive buffer\n");
2593 ret = -ENOMEM; 2670 ret = -ENOMEM;
2594 break; 2671 break;
2595 } 2672 }
@@ -2625,7 +2702,8 @@ static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2625 sizeof mad_priv->header, 2702 sizeof mad_priv->header,
2626 DMA_FROM_DEVICE); 2703 DMA_FROM_DEVICE);
2627 kmem_cache_free(ib_mad_cache, mad_priv); 2704 kmem_cache_free(ib_mad_cache, mad_priv);
2628 printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret); 2705 dev_err(&qp_info->port_priv->device->dev,
2706 "ib_post_recv failed: %d\n", ret);
2629 break; 2707 break;
2630 } 2708 }
2631 } while (post); 2709 } while (post);
@@ -2681,7 +2759,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2681 2759
2682 attr = kmalloc(sizeof *attr, GFP_KERNEL); 2760 attr = kmalloc(sizeof *attr, GFP_KERNEL);
2683 if (!attr) { 2761 if (!attr) {
2684 printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n"); 2762 dev_err(&port_priv->device->dev,
2763 "Couldn't kmalloc ib_qp_attr\n");
2685 return -ENOMEM; 2764 return -ENOMEM;
2686 } 2765 }
2687 2766
@@ -2705,16 +2784,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2705 ret = ib_modify_qp(qp, attr, IB_QP_STATE | 2784 ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2706 IB_QP_PKEY_INDEX | IB_QP_QKEY); 2785 IB_QP_PKEY_INDEX | IB_QP_QKEY);
2707 if (ret) { 2786 if (ret) {
2708 printk(KERN_ERR PFX "Couldn't change QP%d state to " 2787 dev_err(&port_priv->device->dev,
2709 "INIT: %d\n", i, ret); 2788 "Couldn't change QP%d state to INIT: %d\n",
2789 i, ret);
2710 goto out; 2790 goto out;
2711 } 2791 }
2712 2792
2713 attr->qp_state = IB_QPS_RTR; 2793 attr->qp_state = IB_QPS_RTR;
2714 ret = ib_modify_qp(qp, attr, IB_QP_STATE); 2794 ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2715 if (ret) { 2795 if (ret) {
2716 printk(KERN_ERR PFX "Couldn't change QP%d state to " 2796 dev_err(&port_priv->device->dev,
2717 "RTR: %d\n", i, ret); 2797 "Couldn't change QP%d state to RTR: %d\n",
2798 i, ret);
2718 goto out; 2799 goto out;
2719 } 2800 }
2720 2801
@@ -2722,16 +2803,18 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2722 attr->sq_psn = IB_MAD_SEND_Q_PSN; 2803 attr->sq_psn = IB_MAD_SEND_Q_PSN;
2723 ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN); 2804 ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2724 if (ret) { 2805 if (ret) {
2725 printk(KERN_ERR PFX "Couldn't change QP%d state to " 2806 dev_err(&port_priv->device->dev,
2726 "RTS: %d\n", i, ret); 2807 "Couldn't change QP%d state to RTS: %d\n",
2808 i, ret);
2727 goto out; 2809 goto out;
2728 } 2810 }
2729 } 2811 }
2730 2812
2731 ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP); 2813 ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2732 if (ret) { 2814 if (ret) {
2733 printk(KERN_ERR PFX "Failed to request completion " 2815 dev_err(&port_priv->device->dev,
2734 "notification: %d\n", ret); 2816 "Failed to request completion notification: %d\n",
2817 ret);
2735 goto out; 2818 goto out;
2736 } 2819 }
2737 2820
@@ -2741,7 +2824,8 @@ static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2741 2824
2742 ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL); 2825 ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2743 if (ret) { 2826 if (ret) {
2744 printk(KERN_ERR PFX "Couldn't post receive WRs\n"); 2827 dev_err(&port_priv->device->dev,
2828 "Couldn't post receive WRs\n");
2745 goto out; 2829 goto out;
2746 } 2830 }
2747 } 2831 }
@@ -2755,7 +2839,8 @@ static void qp_event_handler(struct ib_event *event, void *qp_context)
2755 struct ib_mad_qp_info *qp_info = qp_context; 2839 struct ib_mad_qp_info *qp_info = qp_context;
2756 2840
2757 /* It's worse than that! He's dead, Jim! */ 2841 /* It's worse than that! He's dead, Jim! */
2758 printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n", 2842 dev_err(&qp_info->port_priv->device->dev,
2843 "Fatal error (%d) on MAD QP (%d)\n",
2759 event->event, qp_info->qp->qp_num); 2844 event->event, qp_info->qp->qp_num);
2760} 2845}
2761 2846
@@ -2801,8 +2886,9 @@ static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2801 qp_init_attr.event_handler = qp_event_handler; 2886 qp_init_attr.event_handler = qp_event_handler;
2802 qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr); 2887 qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2803 if (IS_ERR(qp_info->qp)) { 2888 if (IS_ERR(qp_info->qp)) {
2804 printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n", 2889 dev_err(&qp_info->port_priv->device->dev,
2805 get_spl_qp_index(qp_type)); 2890 "Couldn't create ib_mad QP%d\n",
2891 get_spl_qp_index(qp_type));
2806 ret = PTR_ERR(qp_info->qp); 2892 ret = PTR_ERR(qp_info->qp);
2807 goto error; 2893 goto error;
2808 } 2894 }
@@ -2840,7 +2926,7 @@ static int ib_mad_port_open(struct ib_device *device,
2840 /* Create new device info */ 2926 /* Create new device info */
2841 port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL); 2927 port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2842 if (!port_priv) { 2928 if (!port_priv) {
2843 printk(KERN_ERR PFX "No memory for ib_mad_port_private\n"); 2929 dev_err(&device->dev, "No memory for ib_mad_port_private\n");
2844 return -ENOMEM; 2930 return -ENOMEM;
2845 } 2931 }
2846 2932
@@ -2860,21 +2946,21 @@ static int ib_mad_port_open(struct ib_device *device,
2860 ib_mad_thread_completion_handler, 2946 ib_mad_thread_completion_handler,
2861 NULL, port_priv, cq_size, 0); 2947 NULL, port_priv, cq_size, 0);
2862 if (IS_ERR(port_priv->cq)) { 2948 if (IS_ERR(port_priv->cq)) {
2863 printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n"); 2949 dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
2864 ret = PTR_ERR(port_priv->cq); 2950 ret = PTR_ERR(port_priv->cq);
2865 goto error3; 2951 goto error3;
2866 } 2952 }
2867 2953
2868 port_priv->pd = ib_alloc_pd(device); 2954 port_priv->pd = ib_alloc_pd(device);
2869 if (IS_ERR(port_priv->pd)) { 2955 if (IS_ERR(port_priv->pd)) {
2870 printk(KERN_ERR PFX "Couldn't create ib_mad PD\n"); 2956 dev_err(&device->dev, "Couldn't create ib_mad PD\n");
2871 ret = PTR_ERR(port_priv->pd); 2957 ret = PTR_ERR(port_priv->pd);
2872 goto error4; 2958 goto error4;
2873 } 2959 }
2874 2960
2875 port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE); 2961 port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
2876 if (IS_ERR(port_priv->mr)) { 2962 if (IS_ERR(port_priv->mr)) {
2877 printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n"); 2963 dev_err(&device->dev, "Couldn't get ib_mad DMA MR\n");
2878 ret = PTR_ERR(port_priv->mr); 2964 ret = PTR_ERR(port_priv->mr);
2879 goto error5; 2965 goto error5;
2880 } 2966 }
@@ -2902,7 +2988,7 @@ static int ib_mad_port_open(struct ib_device *device,
2902 2988
2903 ret = ib_mad_port_start(port_priv); 2989 ret = ib_mad_port_start(port_priv);
2904 if (ret) { 2990 if (ret) {
2905 printk(KERN_ERR PFX "Couldn't start port\n"); 2991 dev_err(&device->dev, "Couldn't start port\n");
2906 goto error9; 2992 goto error9;
2907 } 2993 }
2908 2994
@@ -2946,7 +3032,7 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
2946 port_priv = __ib_get_mad_port(device, port_num); 3032 port_priv = __ib_get_mad_port(device, port_num);
2947 if (port_priv == NULL) { 3033 if (port_priv == NULL) {
2948 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags); 3034 spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2949 printk(KERN_ERR PFX "Port %d not found\n", port_num); 3035 dev_err(&device->dev, "Port %d not found\n", port_num);
2950 return -ENODEV; 3036 return -ENODEV;
2951 } 3037 }
2952 list_del_init(&port_priv->port_list); 3038 list_del_init(&port_priv->port_list);
@@ -2984,14 +3070,12 @@ static void ib_mad_init_device(struct ib_device *device)
2984 3070
2985 for (i = start; i <= end; i++) { 3071 for (i = start; i <= end; i++) {
2986 if (ib_mad_port_open(device, i)) { 3072 if (ib_mad_port_open(device, i)) {
2987 printk(KERN_ERR PFX "Couldn't open %s port %d\n", 3073 dev_err(&device->dev, "Couldn't open port %d\n", i);
2988 device->name, i);
2989 goto error; 3074 goto error;
2990 } 3075 }
2991 if (ib_agent_port_open(device, i)) { 3076 if (ib_agent_port_open(device, i)) {
2992 printk(KERN_ERR PFX "Couldn't open %s port %d " 3077 dev_err(&device->dev,
2993 "for agents\n", 3078 "Couldn't open port %d for agents\n", i);
2994 device->name, i);
2995 goto error_agent; 3079 goto error_agent;
2996 } 3080 }
2997 } 3081 }
@@ -2999,20 +3083,17 @@ static void ib_mad_init_device(struct ib_device *device)
2999 3083
3000error_agent: 3084error_agent:
3001 if (ib_mad_port_close(device, i)) 3085 if (ib_mad_port_close(device, i))
3002 printk(KERN_ERR PFX "Couldn't close %s port %d\n", 3086 dev_err(&device->dev, "Couldn't close port %d\n", i);
3003 device->name, i);
3004 3087
3005error: 3088error:
3006 i--; 3089 i--;
3007 3090
3008 while (i >= start) { 3091 while (i >= start) {
3009 if (ib_agent_port_close(device, i)) 3092 if (ib_agent_port_close(device, i))
3010 printk(KERN_ERR PFX "Couldn't close %s port %d " 3093 dev_err(&device->dev,
3011 "for agents\n", 3094 "Couldn't close port %d for agents\n", i);
3012 device->name, i);
3013 if (ib_mad_port_close(device, i)) 3095 if (ib_mad_port_close(device, i))
3014 printk(KERN_ERR PFX "Couldn't close %s port %d\n", 3096 dev_err(&device->dev, "Couldn't close port %d\n", i);
3015 device->name, i);
3016 i--; 3097 i--;
3017 } 3098 }
3018} 3099}
@@ -3033,12 +3114,12 @@ static void ib_mad_remove_device(struct ib_device *device)
3033 } 3114 }
3034 for (i = 0; i < num_ports; i++, cur_port++) { 3115 for (i = 0; i < num_ports; i++, cur_port++) {
3035 if (ib_agent_port_close(device, cur_port)) 3116 if (ib_agent_port_close(device, cur_port))
3036 printk(KERN_ERR PFX "Couldn't close %s port %d " 3117 dev_err(&device->dev,
3037 "for agents\n", 3118 "Couldn't close port %d for agents\n",
3038 device->name, cur_port); 3119 cur_port);
3039 if (ib_mad_port_close(device, cur_port)) 3120 if (ib_mad_port_close(device, cur_port))
3040 printk(KERN_ERR PFX "Couldn't close %s port %d\n", 3121 dev_err(&device->dev, "Couldn't close port %d\n",
3041 device->name, cur_port); 3122 cur_port);
3042 } 3123 }
3043} 3124}
3044 3125
@@ -3064,7 +3145,7 @@ static int __init ib_mad_init_module(void)
3064 SLAB_HWCACHE_ALIGN, 3145 SLAB_HWCACHE_ALIGN,
3065 NULL); 3146 NULL);
3066 if (!ib_mad_cache) { 3147 if (!ib_mad_cache) {
3067 printk(KERN_ERR PFX "Couldn't create ib_mad cache\n"); 3148 pr_err("Couldn't create ib_mad cache\n");
3068 ret = -ENOMEM; 3149 ret = -ENOMEM;
3069 goto error1; 3150 goto error1;
3070 } 3151 }
@@ -3072,7 +3153,7 @@ static int __init ib_mad_init_module(void)
3072 INIT_LIST_HEAD(&ib_mad_port_list); 3153 INIT_LIST_HEAD(&ib_mad_port_list);
3073 3154
3074 if (ib_register_client(&mad_client)) { 3155 if (ib_register_client(&mad_client)) {
3075 printk(KERN_ERR PFX "Couldn't register ib_mad client\n"); 3156 pr_err("Couldn't register ib_mad client\n");
3076 ret = -EINVAL; 3157 ret = -EINVAL;
3077 goto error2; 3158 goto error2;
3078 } 3159 }
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 9430ab4969c5..d1a0b0ee9444 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -42,9 +42,6 @@
42#include <rdma/ib_mad.h> 42#include <rdma/ib_mad.h>
43#include <rdma/ib_smi.h> 43#include <rdma/ib_smi.h>
44 44
45
46#define PFX "ib_mad: "
47
48#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */ 45#define IB_MAD_QPS_CORE 2 /* Always QP0 and QP1 as a minimum */
49 46
50/* QP and CQ parameters */ 47/* QP and CQ parameters */
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index 233eaf541f55..c38f030f0dc9 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1184,7 +1184,7 @@ static void ib_sa_add_one(struct ib_device *device)
1184 sa_dev->port[i].agent = 1184 sa_dev->port[i].agent =
1185 ib_register_mad_agent(device, i + s, IB_QPT_GSI, 1185 ib_register_mad_agent(device, i + s, IB_QPT_GSI,
1186 NULL, 0, send_handler, 1186 NULL, 0, send_handler,
1187 recv_handler, sa_dev); 1187 recv_handler, sa_dev, 0);
1188 if (IS_ERR(sa_dev->port[i].agent)) 1188 if (IS_ERR(sa_dev->port[i].agent))
1189 goto err; 1189 goto err;
1190 1190
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 1acb99100556..928cdd20e2d1 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -33,6 +33,8 @@
33 * SOFTWARE. 33 * SOFTWARE.
34 */ 34 */
35 35
36#define pr_fmt(fmt) "user_mad: " fmt
37
36#include <linux/module.h> 38#include <linux/module.h>
37#include <linux/init.h> 39#include <linux/init.h>
38#include <linux/device.h> 40#include <linux/device.h>
@@ -504,13 +506,15 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
504 506
505 rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data; 507 rmpp_mad = (struct ib_rmpp_mad *) packet->mad.data;
506 hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class); 508 hdr_len = ib_get_mad_data_offset(rmpp_mad->mad_hdr.mgmt_class);
507 if (!ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)) { 509
508 copy_offset = IB_MGMT_MAD_HDR; 510 if (ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
509 rmpp_active = 0; 511 && ib_mad_kernel_rmpp_agent(agent)) {
510 } else {
511 copy_offset = IB_MGMT_RMPP_HDR; 512 copy_offset = IB_MGMT_RMPP_HDR;
512 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & 513 rmpp_active = ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
513 IB_MGMT_RMPP_FLAG_ACTIVE; 514 IB_MGMT_RMPP_FLAG_ACTIVE;
515 } else {
516 copy_offset = IB_MGMT_MAD_HDR;
517 rmpp_active = 0;
514 } 518 }
515 519
516 data_len = count - hdr_size(file) - hdr_len; 520 data_len = count - hdr_size(file) - hdr_len;
@@ -556,14 +560,22 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf,
556 rmpp_mad->mad_hdr.tid = *tid; 560 rmpp_mad->mad_hdr.tid = *tid;
557 } 561 }
558 562
559 spin_lock_irq(&file->send_lock); 563 if (!ib_mad_kernel_rmpp_agent(agent)
560 ret = is_duplicate(file, packet); 564 && ib_is_mad_class_rmpp(rmpp_mad->mad_hdr.mgmt_class)
561 if (!ret) 565 && (ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) & IB_MGMT_RMPP_FLAG_ACTIVE)) {
566 spin_lock_irq(&file->send_lock);
562 list_add_tail(&packet->list, &file->send_list); 567 list_add_tail(&packet->list, &file->send_list);
563 spin_unlock_irq(&file->send_lock); 568 spin_unlock_irq(&file->send_lock);
564 if (ret) { 569 } else {
565 ret = -EINVAL; 570 spin_lock_irq(&file->send_lock);
566 goto err_msg; 571 ret = is_duplicate(file, packet);
572 if (!ret)
573 list_add_tail(&packet->list, &file->send_list);
574 spin_unlock_irq(&file->send_lock);
575 if (ret) {
576 ret = -EINVAL;
577 goto err_msg;
578 }
567 } 579 }
568 580
569 ret = ib_post_send_mad(packet->msg, NULL); 581 ret = ib_post_send_mad(packet->msg, NULL);
@@ -614,6 +626,8 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
614 mutex_lock(&file->mutex); 626 mutex_lock(&file->mutex);
615 627
616 if (!file->port->ib_dev) { 628 if (!file->port->ib_dev) {
629 dev_notice(file->port->dev,
630 "ib_umad_reg_agent: invalid device\n");
617 ret = -EPIPE; 631 ret = -EPIPE;
618 goto out; 632 goto out;
619 } 633 }
@@ -624,6 +638,9 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
624 } 638 }
625 639
626 if (ureq.qpn != 0 && ureq.qpn != 1) { 640 if (ureq.qpn != 0 && ureq.qpn != 1) {
641 dev_notice(file->port->dev,
642 "ib_umad_reg_agent: invalid QPN %d specified\n",
643 ureq.qpn);
627 ret = -EINVAL; 644 ret = -EINVAL;
628 goto out; 645 goto out;
629 } 646 }
@@ -632,11 +649,15 @@ static int ib_umad_reg_agent(struct ib_umad_file *file, void __user *arg,
632 if (!__get_agent(file, agent_id)) 649 if (!__get_agent(file, agent_id))
633 goto found; 650 goto found;
634 651
652 dev_notice(file->port->dev,
653 "ib_umad_reg_agent: Max Agents (%u) reached\n",
654 IB_UMAD_MAX_AGENTS);
635 ret = -ENOMEM; 655 ret = -ENOMEM;
636 goto out; 656 goto out;
637 657
638found: 658found:
639 if (ureq.mgmt_class) { 659 if (ureq.mgmt_class) {
660 memset(&req, 0, sizeof(req));
640 req.mgmt_class = ureq.mgmt_class; 661 req.mgmt_class = ureq.mgmt_class;
641 req.mgmt_class_version = ureq.mgmt_class_version; 662 req.mgmt_class_version = ureq.mgmt_class_version;
642 memcpy(req.oui, ureq.oui, sizeof req.oui); 663 memcpy(req.oui, ureq.oui, sizeof req.oui);
@@ -657,7 +678,7 @@ found:
657 ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI, 678 ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
658 ureq.mgmt_class ? &req : NULL, 679 ureq.mgmt_class ? &req : NULL,
659 ureq.rmpp_version, 680 ureq.rmpp_version,
660 send_handler, recv_handler, file); 681 send_handler, recv_handler, file, 0);
661 if (IS_ERR(agent)) { 682 if (IS_ERR(agent)) {
662 ret = PTR_ERR(agent); 683 ret = PTR_ERR(agent);
663 agent = NULL; 684 agent = NULL;
@@ -673,10 +694,11 @@ found:
673 if (!file->already_used) { 694 if (!file->already_used) {
674 file->already_used = 1; 695 file->already_used = 1;
675 if (!file->use_pkey_index) { 696 if (!file->use_pkey_index) {
676 printk(KERN_WARNING "user_mad: process %s did not enable " 697 dev_warn(file->port->dev,
677 "P_Key index support.\n", current->comm); 698 "process %s did not enable P_Key index support.\n",
678 printk(KERN_WARNING "user_mad: Documentation/infiniband/user_mad.txt " 699 current->comm);
679 "has info on the new ABI.\n"); 700 dev_warn(file->port->dev,
701 " Documentation/infiniband/user_mad.txt has info on the new ABI.\n");
680 } 702 }
681 } 703 }
682 704
@@ -694,6 +716,119 @@ out:
694 return ret; 716 return ret;
695} 717}
696 718
719static int ib_umad_reg_agent2(struct ib_umad_file *file, void __user *arg)
720{
721 struct ib_user_mad_reg_req2 ureq;
722 struct ib_mad_reg_req req;
723 struct ib_mad_agent *agent = NULL;
724 int agent_id;
725 int ret;
726
727 mutex_lock(&file->port->file_mutex);
728 mutex_lock(&file->mutex);
729
730 if (!file->port->ib_dev) {
731 dev_notice(file->port->dev,
732 "ib_umad_reg_agent2: invalid device\n");
733 ret = -EPIPE;
734 goto out;
735 }
736
737 if (copy_from_user(&ureq, arg, sizeof(ureq))) {
738 ret = -EFAULT;
739 goto out;
740 }
741
742 if (ureq.qpn != 0 && ureq.qpn != 1) {
743 dev_notice(file->port->dev,
744 "ib_umad_reg_agent2: invalid QPN %d specified\n",
745 ureq.qpn);
746 ret = -EINVAL;
747 goto out;
748 }
749
750 if (ureq.flags & ~IB_USER_MAD_REG_FLAGS_CAP) {
751 dev_notice(file->port->dev,
752 "ib_umad_reg_agent2 failed: invalid registration flags specified 0x%x; supported 0x%x\n",
753 ureq.flags, IB_USER_MAD_REG_FLAGS_CAP);
754 ret = -EINVAL;
755
756 if (put_user((u32)IB_USER_MAD_REG_FLAGS_CAP,
757 (u32 __user *) (arg + offsetof(struct
758 ib_user_mad_reg_req2, flags))))
759 ret = -EFAULT;
760
761 goto out;
762 }
763
764 for (agent_id = 0; agent_id < IB_UMAD_MAX_AGENTS; ++agent_id)
765 if (!__get_agent(file, agent_id))
766 goto found;
767
768 dev_notice(file->port->dev,
769 "ib_umad_reg_agent2: Max Agents (%u) reached\n",
770 IB_UMAD_MAX_AGENTS);
771 ret = -ENOMEM;
772 goto out;
773
774found:
775 if (ureq.mgmt_class) {
776 memset(&req, 0, sizeof(req));
777 req.mgmt_class = ureq.mgmt_class;
778 req.mgmt_class_version = ureq.mgmt_class_version;
779 if (ureq.oui & 0xff000000) {
780 dev_notice(file->port->dev,
781 "ib_umad_reg_agent2 failed: oui invalid 0x%08x\n",
782 ureq.oui);
783 ret = -EINVAL;
784 goto out;
785 }
786 req.oui[2] = ureq.oui & 0x0000ff;
787 req.oui[1] = (ureq.oui & 0x00ff00) >> 8;
788 req.oui[0] = (ureq.oui & 0xff0000) >> 16;
789 memcpy(req.method_mask, ureq.method_mask,
790 sizeof(req.method_mask));
791 }
792
793 agent = ib_register_mad_agent(file->port->ib_dev, file->port->port_num,
794 ureq.qpn ? IB_QPT_GSI : IB_QPT_SMI,
795 ureq.mgmt_class ? &req : NULL,
796 ureq.rmpp_version,
797 send_handler, recv_handler, file,
798 ureq.flags);
799 if (IS_ERR(agent)) {
800 ret = PTR_ERR(agent);
801 agent = NULL;
802 goto out;
803 }
804
805 if (put_user(agent_id,
806 (u32 __user *)(arg +
807 offsetof(struct ib_user_mad_reg_req2, id)))) {
808 ret = -EFAULT;
809 goto out;
810 }
811
812 if (!file->already_used) {
813 file->already_used = 1;
814 file->use_pkey_index = 1;
815 }
816
817 file->agent[agent_id] = agent;
818 ret = 0;
819
820out:
821 mutex_unlock(&file->mutex);
822
823 if (ret && agent)
824 ib_unregister_mad_agent(agent);
825
826 mutex_unlock(&file->port->file_mutex);
827
828 return ret;
829}
830
831
697static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg) 832static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
698{ 833{
699 struct ib_mad_agent *agent = NULL; 834 struct ib_mad_agent *agent = NULL;
@@ -749,6 +884,8 @@ static long ib_umad_ioctl(struct file *filp, unsigned int cmd,
749 return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg); 884 return ib_umad_unreg_agent(filp->private_data, (__u32 __user *) arg);
750 case IB_USER_MAD_ENABLE_PKEY: 885 case IB_USER_MAD_ENABLE_PKEY:
751 return ib_umad_enable_pkey(filp->private_data); 886 return ib_umad_enable_pkey(filp->private_data);
887 case IB_USER_MAD_REGISTER_AGENT2:
888 return ib_umad_reg_agent2(filp->private_data, (void __user *) arg);
752 default: 889 default:
753 return -ENOIOCTLCMD; 890 return -ENOIOCTLCMD;
754 } 891 }
@@ -765,6 +902,8 @@ static long ib_umad_compat_ioctl(struct file *filp, unsigned int cmd,
765 return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg)); 902 return ib_umad_unreg_agent(filp->private_data, compat_ptr(arg));
766 case IB_USER_MAD_ENABLE_PKEY: 903 case IB_USER_MAD_ENABLE_PKEY:
767 return ib_umad_enable_pkey(filp->private_data); 904 return ib_umad_enable_pkey(filp->private_data);
905 case IB_USER_MAD_REGISTER_AGENT2:
906 return ib_umad_reg_agent2(filp->private_data, compat_ptr(arg));
768 default: 907 default:
769 return -ENOIOCTLCMD; 908 return -ENOIOCTLCMD;
770 } 909 }
@@ -983,7 +1122,7 @@ static CLASS_ATTR_STRING(abi_version, S_IRUGO,
983 1122
984static dev_t overflow_maj; 1123static dev_t overflow_maj;
985static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS); 1124static DECLARE_BITMAP(overflow_map, IB_UMAD_MAX_PORTS);
986static int find_overflow_devnum(void) 1125static int find_overflow_devnum(struct ib_device *device)
987{ 1126{
988 int ret; 1127 int ret;
989 1128
@@ -991,7 +1130,8 @@ static int find_overflow_devnum(void)
991 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2, 1130 ret = alloc_chrdev_region(&overflow_maj, 0, IB_UMAD_MAX_PORTS * 2,
992 "infiniband_mad"); 1131 "infiniband_mad");
993 if (ret) { 1132 if (ret) {
994 printk(KERN_ERR "user_mad: couldn't register dynamic device number\n"); 1133 dev_err(&device->dev,
1134 "couldn't register dynamic device number\n");
995 return ret; 1135 return ret;
996 } 1136 }
997 } 1137 }
@@ -1014,7 +1154,7 @@ static int ib_umad_init_port(struct ib_device *device, int port_num,
1014 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS); 1154 devnum = find_first_zero_bit(dev_map, IB_UMAD_MAX_PORTS);
1015 if (devnum >= IB_UMAD_MAX_PORTS) { 1155 if (devnum >= IB_UMAD_MAX_PORTS) {
1016 spin_unlock(&port_lock); 1156 spin_unlock(&port_lock);
1017 devnum = find_overflow_devnum(); 1157 devnum = find_overflow_devnum(device);
1018 if (devnum < 0) 1158 if (devnum < 0)
1019 return -1; 1159 return -1;
1020 1160
@@ -1200,14 +1340,14 @@ static int __init ib_umad_init(void)
1200 ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2, 1340 ret = register_chrdev_region(base_dev, IB_UMAD_MAX_PORTS * 2,
1201 "infiniband_mad"); 1341 "infiniband_mad");
1202 if (ret) { 1342 if (ret) {
1203 printk(KERN_ERR "user_mad: couldn't register device number\n"); 1343 pr_err("couldn't register device number\n");
1204 goto out; 1344 goto out;
1205 } 1345 }
1206 1346
1207 umad_class = class_create(THIS_MODULE, "infiniband_mad"); 1347 umad_class = class_create(THIS_MODULE, "infiniband_mad");
1208 if (IS_ERR(umad_class)) { 1348 if (IS_ERR(umad_class)) {
1209 ret = PTR_ERR(umad_class); 1349 ret = PTR_ERR(umad_class);
1210 printk(KERN_ERR "user_mad: couldn't create class infiniband_mad\n"); 1350 pr_err("couldn't create class infiniband_mad\n");
1211 goto out_chrdev; 1351 goto out_chrdev;
1212 } 1352 }
1213 1353
@@ -1215,13 +1355,13 @@ static int __init ib_umad_init(void)
1215 1355
1216 ret = class_create_file(umad_class, &class_attr_abi_version.attr); 1356 ret = class_create_file(umad_class, &class_attr_abi_version.attr);
1217 if (ret) { 1357 if (ret) {
1218 printk(KERN_ERR "user_mad: couldn't create abi_version attribute\n"); 1358 pr_err("couldn't create abi_version attribute\n");
1219 goto out_class; 1359 goto out_class;
1220 } 1360 }
1221 1361
1222 ret = ib_register_client(&umad_client); 1362 ret = ib_register_client(&umad_client);
1223 if (ret) { 1363 if (ret) {
1224 printk(KERN_ERR "user_mad: couldn't register ib_umad client\n"); 1364 pr_err("couldn't register ib_umad client\n");
1225 goto out_class; 1365 goto out_class;
1226 } 1366 }
1227 1367
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index a283274a5a09..643c08a025a5 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port);
221IB_UVERBS_DECLARE_CMD(alloc_pd); 221IB_UVERBS_DECLARE_CMD(alloc_pd);
222IB_UVERBS_DECLARE_CMD(dealloc_pd); 222IB_UVERBS_DECLARE_CMD(dealloc_pd);
223IB_UVERBS_DECLARE_CMD(reg_mr); 223IB_UVERBS_DECLARE_CMD(reg_mr);
224IB_UVERBS_DECLARE_CMD(rereg_mr);
224IB_UVERBS_DECLARE_CMD(dereg_mr); 225IB_UVERBS_DECLARE_CMD(dereg_mr);
225IB_UVERBS_DECLARE_CMD(alloc_mw); 226IB_UVERBS_DECLARE_CMD(alloc_mw);
226IB_UVERBS_DECLARE_CMD(dealloc_mw); 227IB_UVERBS_DECLARE_CMD(dealloc_mw);
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index ea6203ee7bcc..0600c50e6215 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1002,6 +1002,99 @@ err_free:
1002 return ret; 1002 return ret;
1003} 1003}
1004 1004
1005ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
1006 const char __user *buf, int in_len,
1007 int out_len)
1008{
1009 struct ib_uverbs_rereg_mr cmd;
1010 struct ib_uverbs_rereg_mr_resp resp;
1011 struct ib_udata udata;
1012 struct ib_pd *pd = NULL;
1013 struct ib_mr *mr;
1014 struct ib_pd *old_pd;
1015 int ret;
1016 struct ib_uobject *uobj;
1017
1018 if (out_len < sizeof(resp))
1019 return -ENOSPC;
1020
1021 if (copy_from_user(&cmd, buf, sizeof(cmd)))
1022 return -EFAULT;
1023
1024 INIT_UDATA(&udata, buf + sizeof(cmd),
1025 (unsigned long) cmd.response + sizeof(resp),
1026 in_len - sizeof(cmd), out_len - sizeof(resp));
1027
1028 if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags)
1029 return -EINVAL;
1030
1031 if ((cmd.flags & IB_MR_REREG_TRANS) &&
1032 (!cmd.start || !cmd.hca_va || 0 >= cmd.length ||
1033 (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
1034 return -EINVAL;
1035
1036 uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle,
1037 file->ucontext);
1038
1039 if (!uobj)
1040 return -EINVAL;
1041
1042 mr = uobj->object;
1043
1044 if (cmd.flags & IB_MR_REREG_ACCESS) {
1045 ret = ib_check_mr_access(cmd.access_flags);
1046 if (ret)
1047 goto put_uobjs;
1048 }
1049
1050 if (cmd.flags & IB_MR_REREG_PD) {
1051 pd = idr_read_pd(cmd.pd_handle, file->ucontext);
1052 if (!pd) {
1053 ret = -EINVAL;
1054 goto put_uobjs;
1055 }
1056 }
1057
1058 if (atomic_read(&mr->usecnt)) {
1059 ret = -EBUSY;
1060 goto put_uobj_pd;
1061 }
1062
1063 old_pd = mr->pd;
1064 ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start,
1065 cmd.length, cmd.hca_va,
1066 cmd.access_flags, pd, &udata);
1067 if (!ret) {
1068 if (cmd.flags & IB_MR_REREG_PD) {
1069 atomic_inc(&pd->usecnt);
1070 mr->pd = pd;
1071 atomic_dec(&old_pd->usecnt);
1072 }
1073 } else {
1074 goto put_uobj_pd;
1075 }
1076
1077 memset(&resp, 0, sizeof(resp));
1078 resp.lkey = mr->lkey;
1079 resp.rkey = mr->rkey;
1080
1081 if (copy_to_user((void __user *)(unsigned long)cmd.response,
1082 &resp, sizeof(resp)))
1083 ret = -EFAULT;
1084 else
1085 ret = in_len;
1086
1087put_uobj_pd:
1088 if (cmd.flags & IB_MR_REREG_PD)
1089 put_pd_read(pd);
1090
1091put_uobjs:
1092
1093 put_uobj_write(mr->uobject);
1094
1095 return ret;
1096}
1097
1005ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, 1098ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
1006 const char __user *buf, int in_len, 1099 const char __user *buf, int in_len,
1007 int out_len) 1100 int out_len)
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 08219fb3338b..c73b22a257fe 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
87 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, 87 [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd,
88 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, 88 [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd,
89 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, 89 [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr,
90 [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr,
90 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, 91 [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr,
91 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, 92 [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw,
92 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, 93 [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw,
diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c
index 49e0e8533f74..1b63185b4ad4 100644
--- a/drivers/infiniband/hw/amso1100/c2_cq.c
+++ b/drivers/infiniband/hw/amso1100/c2_cq.c
@@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq)
260 mq->msg_pool.host, dma_unmap_addr(mq, mapping)); 260 mq->msg_pool.host, dma_unmap_addr(mq, mapping));
261} 261}
262 262
263static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, 263static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq,
264 int msg_size) 264 size_t q_size, size_t msg_size)
265{ 265{
266 u8 *pool_start; 266 u8 *pool_start;
267 267
268 if (q_size > SIZE_MAX / msg_size)
269 return -EINVAL;
270
268 pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, 271 pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size,
269 &mq->host_dma, GFP_KERNEL); 272 &mq->host_dma, GFP_KERNEL);
270 if (!pool_start) 273 if (!pool_start)
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index fbe6051af254..c9df0549f51d 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -227,6 +227,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid)
227 227
228 chp = get_chp(dev, qid); 228 chp = get_chp(dev, qid);
229 if (chp) { 229 if (chp) {
230 t4_clear_cq_armed(&chp->cq);
230 spin_lock_irqsave(&chp->comp_handler_lock, flag); 231 spin_lock_irqsave(&chp->comp_handler_lock, flag);
231 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); 232 (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context);
232 spin_unlock_irqrestore(&chp->comp_handler_lock, flag); 233 spin_unlock_irqrestore(&chp->comp_handler_lock, flag);
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index c158fcc02bca..41cd6882b648 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1105,7 +1105,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1105 struct c4iw_cq *schp) 1105 struct c4iw_cq *schp)
1106{ 1106{
1107 int count; 1107 int count;
1108 int flushed; 1108 int rq_flushed, sq_flushed;
1109 unsigned long flag; 1109 unsigned long flag;
1110 1110
1111 PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); 1111 PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp);
@@ -1123,27 +1123,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp,
1123 1123
1124 c4iw_flush_hw_cq(rchp); 1124 c4iw_flush_hw_cq(rchp);
1125 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); 1125 c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count);
1126 flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); 1126 rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count);
1127 spin_unlock(&qhp->lock); 1127 spin_unlock(&qhp->lock);
1128 spin_unlock_irqrestore(&rchp->lock, flag); 1128 spin_unlock_irqrestore(&rchp->lock, flag);
1129 if (flushed) {
1130 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1131 (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
1132 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1133 }
1134 1129
1135 /* locking hierarchy: cq lock first, then qp lock. */ 1130 /* locking hierarchy: cq lock first, then qp lock. */
1136 spin_lock_irqsave(&schp->lock, flag); 1131 spin_lock_irqsave(&schp->lock, flag);
1137 spin_lock(&qhp->lock); 1132 spin_lock(&qhp->lock);
1138 if (schp != rchp) 1133 if (schp != rchp)
1139 c4iw_flush_hw_cq(schp); 1134 c4iw_flush_hw_cq(schp);
1140 flushed = c4iw_flush_sq(qhp); 1135 sq_flushed = c4iw_flush_sq(qhp);
1141 spin_unlock(&qhp->lock); 1136 spin_unlock(&qhp->lock);
1142 spin_unlock_irqrestore(&schp->lock, flag); 1137 spin_unlock_irqrestore(&schp->lock, flag);
1143 if (flushed) { 1138
1144 spin_lock_irqsave(&schp->comp_handler_lock, flag); 1139 if (schp == rchp) {
1145 (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); 1140 if (t4_clear_cq_armed(&rchp->cq) &&
1146 spin_unlock_irqrestore(&schp->comp_handler_lock, flag); 1141 (rq_flushed || sq_flushed)) {
1142 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1143 (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1144 rchp->ibcq.cq_context);
1145 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1146 }
1147 } else {
1148 if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) {
1149 spin_lock_irqsave(&rchp->comp_handler_lock, flag);
1150 (*rchp->ibcq.comp_handler)(&rchp->ibcq,
1151 rchp->ibcq.cq_context);
1152 spin_unlock_irqrestore(&rchp->comp_handler_lock, flag);
1153 }
1154 if (t4_clear_cq_armed(&schp->cq) && sq_flushed) {
1155 spin_lock_irqsave(&schp->comp_handler_lock, flag);
1156 (*schp->ibcq.comp_handler)(&schp->ibcq,
1157 schp->ibcq.cq_context);
1158 spin_unlock_irqrestore(&schp->comp_handler_lock, flag);
1159 }
1147 } 1160 }
1148} 1161}
1149 1162
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index df5edfa31a8f..c04e5134b30c 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -524,6 +524,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq)
524 return !wq->rq.queue[wq->rq.size].status.db_off; 524 return !wq->rq.queue[wq->rq.size].status.db_off;
525} 525}
526 526
527enum t4_cq_flags {
528 CQ_ARMED = 1,
529};
530
527struct t4_cq { 531struct t4_cq {
528 struct t4_cqe *queue; 532 struct t4_cqe *queue;
529 dma_addr_t dma_addr; 533 dma_addr_t dma_addr;
@@ -544,12 +548,19 @@ struct t4_cq {
544 u16 cidx_inc; 548 u16 cidx_inc;
545 u8 gen; 549 u8 gen;
546 u8 error; 550 u8 error;
551 unsigned long flags;
547}; 552};
548 553
554static inline int t4_clear_cq_armed(struct t4_cq *cq)
555{
556 return test_and_clear_bit(CQ_ARMED, &cq->flags);
557}
558
549static inline int t4_arm_cq(struct t4_cq *cq, int se) 559static inline int t4_arm_cq(struct t4_cq *cq, int se)
550{ 560{
551 u32 val; 561 u32 val;
552 562
563 set_bit(CQ_ARMED, &cq->flags);
553 while (cq->cidx_inc > CIDXINC_MASK) { 564 while (cq->cidx_inc > CIDXINC_MASK) {
554 val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | 565 val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) |
555 INGRESSQID(cq->cqid); 566 INGRESSQID(cq->cqid);
diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c
index 43f2d0424d4f..e890e5ba0e01 100644
--- a/drivers/infiniband/hw/ipath/ipath_mad.c
+++ b/drivers/infiniband/hw/ipath/ipath_mad.c
@@ -726,7 +726,7 @@ bail:
726 * @dd: the infinipath device 726 * @dd: the infinipath device
727 * @pkeys: the PKEY table 727 * @pkeys: the PKEY table
728 */ 728 */
729static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) 729static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port)
730{ 730{
731 struct ipath_portdata *pd; 731 struct ipath_portdata *pd;
732 int i; 732 int i;
@@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
759 } 759 }
760 if (changed) { 760 if (changed) {
761 u64 pkey; 761 u64 pkey;
762 struct ib_event event;
762 763
763 pkey = (u64) dd->ipath_pkeys[0] | 764 pkey = (u64) dd->ipath_pkeys[0] |
764 ((u64) dd->ipath_pkeys[1] << 16) | 765 ((u64) dd->ipath_pkeys[1] << 16) |
@@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys)
768 (unsigned long long) pkey); 769 (unsigned long long) pkey);
769 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, 770 ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey,
770 pkey); 771 pkey);
772
773 event.event = IB_EVENT_PKEY_CHANGE;
774 event.device = &dd->verbs_dev->ibdev;
775 event.element.port_num = port;
776 ib_dispatch_event(&event);
771 } 777 }
772 return 0; 778 return 0;
773} 779}
774 780
775static int recv_subn_set_pkeytable(struct ib_smp *smp, 781static int recv_subn_set_pkeytable(struct ib_smp *smp,
776 struct ib_device *ibdev) 782 struct ib_device *ibdev, u8 port)
777{ 783{
778 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); 784 u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff);
779 __be16 *p = (__be16 *) smp->data; 785 __be16 *p = (__be16 *) smp->data;
@@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp,
784 for (i = 0; i < n; i++) 790 for (i = 0; i < n; i++)
785 q[i] = be16_to_cpu(p[i]); 791 q[i] = be16_to_cpu(p[i]);
786 792
787 if (startpx != 0 || set_pkeys(dev->dd, q) != 0) 793 if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0)
788 smp->status |= IB_SMP_INVALID_FIELD; 794 smp->status |= IB_SMP_INVALID_FIELD;
789 795
790 return recv_subn_get_pkeytable(smp, ibdev); 796 return recv_subn_get_pkeytable(smp, ibdev);
@@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
1342 ret = recv_subn_set_portinfo(smp, ibdev, port_num); 1348 ret = recv_subn_set_portinfo(smp, ibdev, port_num);
1343 goto bail; 1349 goto bail;
1344 case IB_SMP_ATTR_PKEY_TABLE: 1350 case IB_SMP_ATTR_PKEY_TABLE:
1345 ret = recv_subn_set_pkeytable(smp, ibdev); 1351 ret = recv_subn_set_pkeytable(smp, ibdev, port_num);
1346 goto bail; 1352 goto bail;
1347 case IB_SMP_ATTR_SM_INFO: 1353 case IB_SMP_ATTR_SM_INFO:
1348 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { 1354 if (dev->port_cap_flags & IB_PORT_SM_DISABLED) {
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 287ad0564acd..82a7dd87089b 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -891,7 +891,7 @@ int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
891 agent = ib_register_mad_agent(&dev->ib_dev, p + 1, 891 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
892 q ? IB_QPT_GSI : IB_QPT_SMI, 892 q ? IB_QPT_GSI : IB_QPT_SMI,
893 NULL, 0, send_handler, 893 NULL, 0, send_handler,
894 NULL, NULL); 894 NULL, NULL, 0);
895 if (IS_ERR(agent)) { 895 if (IS_ERR(agent)) {
896 ret = PTR_ERR(agent); 896 ret = PTR_ERR(agent);
897 goto err; 897 goto err;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0f7027e7db13..e1e558a3d692 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -910,8 +910,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
910 const struct default_rules *pdefault_rules = default_table; 910 const struct default_rules *pdefault_rules = default_table;
911 u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); 911 u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
912 912
913 for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, 913 for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) {
914 pdefault_rules++) {
915 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; 914 __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
916 memset(&field_types, 0, sizeof(field_types)); 915 memset(&field_types, 0, sizeof(field_types));
917 916
@@ -965,8 +964,7 @@ static int __mlx4_ib_create_default_rules(
965 int size = 0; 964 int size = 0;
966 int i; 965 int i;
967 966
968 for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ 967 for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) {
969 sizeof(pdefault_rules->rules_create_list[0]); i++) {
970 int ret; 968 int ret;
971 union ib_flow_spec ib_spec; 969 union ib_flow_spec ib_spec;
972 switch (pdefault_rules->rules_create_list[i]) { 970 switch (pdefault_rules->rules_create_list[i]) {
@@ -2007,6 +2005,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2007 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | 2005 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2008 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | 2006 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2009 (1ull << IB_USER_VERBS_CMD_REG_MR) | 2007 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2008 (1ull << IB_USER_VERBS_CMD_REREG_MR) |
2010 (1ull << IB_USER_VERBS_CMD_DEREG_MR) | 2009 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2011 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | 2010 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2012 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | 2011 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
@@ -2059,6 +2058,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
2059 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; 2058 ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq;
2060 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; 2059 ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr;
2061 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; 2060 ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr;
2061 ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr;
2062 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; 2062 ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr;
2063 ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; 2063 ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr;
2064 ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; 2064 ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 369da3ca5d64..e8cad3926bfc 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -788,5 +788,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
788void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); 788void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
789int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, 789int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
790 int is_attach); 790 int is_attach);
791int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
792 u64 start, u64 length, u64 virt_addr,
793 int mr_access_flags, struct ib_pd *pd,
794 struct ib_udata *udata);
791 795
792#endif /* MLX4_IB_H */ 796#endif /* MLX4_IB_H */
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index cb2a8727f3fb..9b0e80e59b08 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
144 if (!mr) 144 if (!mr)
145 return ERR_PTR(-ENOMEM); 145 return ERR_PTR(-ENOMEM);
146 146
147 /* Force registering the memory as writable. */
148 /* Used for memory re-registeration. HCA protects the access */
147 mr->umem = ib_umem_get(pd->uobject->context, start, length, 149 mr->umem = ib_umem_get(pd->uobject->context, start, length,
148 access_flags, 0); 150 access_flags | IB_ACCESS_LOCAL_WRITE, 0);
149 if (IS_ERR(mr->umem)) { 151 if (IS_ERR(mr->umem)) {
150 err = PTR_ERR(mr->umem); 152 err = PTR_ERR(mr->umem);
151 goto err_free; 153 goto err_free;
@@ -183,6 +185,90 @@ err_free:
183 return ERR_PTR(err); 185 return ERR_PTR(err);
184} 186}
185 187
188int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
189 u64 start, u64 length, u64 virt_addr,
190 int mr_access_flags, struct ib_pd *pd,
191 struct ib_udata *udata)
192{
193 struct mlx4_ib_dev *dev = to_mdev(mr->device);
194 struct mlx4_ib_mr *mmr = to_mmr(mr);
195 struct mlx4_mpt_entry *mpt_entry;
196 struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
197 int err;
198
199 /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
200 * we assume that the calls can't run concurrently. Otherwise, a
201 * race exists.
202 */
203 err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
204
205 if (err)
206 return err;
207
208 if (flags & IB_MR_REREG_PD) {
209 err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
210 to_mpd(pd)->pdn);
211
212 if (err)
213 goto release_mpt_entry;
214 }
215
216 if (flags & IB_MR_REREG_ACCESS) {
217 err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
218 convert_access(mr_access_flags));
219
220 if (err)
221 goto release_mpt_entry;
222 }
223
224 if (flags & IB_MR_REREG_TRANS) {
225 int shift;
226 int err;
227 int n;
228
229 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
230 ib_umem_release(mmr->umem);
231 mmr->umem = ib_umem_get(mr->uobject->context, start, length,
232 mr_access_flags |
233 IB_ACCESS_LOCAL_WRITE,
234 0);
235 if (IS_ERR(mmr->umem)) {
236 err = PTR_ERR(mmr->umem);
237 mmr->umem = NULL;
238 goto release_mpt_entry;
239 }
240 n = ib_umem_page_count(mmr->umem);
241 shift = ilog2(mmr->umem->page_size);
242
243 mmr->mmr.iova = virt_addr;
244 mmr->mmr.size = length;
245 err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
246 virt_addr, length, n, shift,
247 *pmpt_entry);
248 if (err) {
249 ib_umem_release(mmr->umem);
250 goto release_mpt_entry;
251 }
252
253 err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
254 if (err) {
255 mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
256 ib_umem_release(mmr->umem);
257 goto release_mpt_entry;
258 }
259 }
260
261 /* If we couldn't transfer the MR to the HCA, just remember to
262 * return a failure. But dereg_mr will free the resources.
263 */
264 err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
265
266release_mpt_entry:
267 mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
268
269 return err;
270}
271
186int mlx4_ib_dereg_mr(struct ib_mr *ibmr) 272int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
187{ 273{
188 struct mlx4_ib_mr *mr = to_mmr(ibmr); 274 struct mlx4_ib_mr *mr = to_mmr(ibmr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 7efe6e3f3542..8c574b63d77b 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2501,7 +2501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2501 spin_lock_irqsave(&qp->sq.lock, flags); 2501 spin_lock_irqsave(&qp->sq.lock, flags);
2502 2502
2503 for (nreq = 0; wr; nreq++, wr = wr->next) { 2503 for (nreq = 0; wr; nreq++, wr = wr->next) {
2504 if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { 2504 if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) {
2505 mlx5_ib_warn(dev, "\n"); 2505 mlx5_ib_warn(dev, "\n");
2506 err = -EINVAL; 2506 err = -EINVAL;
2507 *bad_wr = wr; 2507 *bad_wr = wr;
diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c
index b6f7f457fc55..8881fa376e06 100644
--- a/drivers/infiniband/hw/mthca/mthca_mad.c
+++ b/drivers/infiniband/hw/mthca/mthca_mad.c
@@ -294,7 +294,7 @@ int mthca_create_agents(struct mthca_dev *dev)
294 agent = ib_register_mad_agent(&dev->ib_dev, p + 1, 294 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
295 q ? IB_QPT_GSI : IB_QPT_SMI, 295 q ? IB_QPT_GSI : IB_QPT_SMI,
296 NULL, 0, send_handler, 296 NULL, 0, send_handler,
297 NULL, NULL); 297 NULL, NULL, 0);
298 if (IS_ERR(agent)) { 298 if (IS_ERR(agent)) {
299 ret = PTR_ERR(agent); 299 ret = PTR_ERR(agent);
300 goto err; 300 goto err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h
index 19011dbb930f..b43456ae124b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma.h
@@ -40,7 +40,7 @@
40#include <be_roce.h> 40#include <be_roce.h>
41#include "ocrdma_sli.h" 41#include "ocrdma_sli.h"
42 42
43#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" 43#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u"
44 44
45#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" 45#define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver"
46#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" 46#define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA"
@@ -137,6 +137,7 @@ struct mqe_ctx {
137 u16 cqe_status; 137 u16 cqe_status;
138 u16 ext_status; 138 u16 ext_status;
139 bool cmd_done; 139 bool cmd_done;
140 bool fw_error_state;
140}; 141};
141 142
142struct ocrdma_hw_mr { 143struct ocrdma_hw_mr {
@@ -235,7 +236,10 @@ struct ocrdma_dev {
235 struct list_head entry; 236 struct list_head entry;
236 struct rcu_head rcu; 237 struct rcu_head rcu;
237 int id; 238 int id;
238 u64 stag_arr[OCRDMA_MAX_STAG]; 239 u64 *stag_arr;
240 u8 sl; /* service level */
241 bool pfc_state;
242 atomic_t update_sl;
239 u16 pvid; 243 u16 pvid;
240 u32 asic_id; 244 u32 asic_id;
241 245
@@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev)
518 OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; 522 OCRDMA_SLI_ASIC_GEN_NUM_SHIFT;
519} 523}
520 524
525static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio)
526{
527 return *(pfc + prio);
528}
529
530static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio)
531{
532 return *(app_prio + prio);
533}
534
535static inline u8 ocrdma_is_enabled_and_synced(u32 state)
536{ /* May also be used to interpret TC-state, QCN-state
537 * Appl-state and Logical-link-state in future.
538 */
539 return (state & OCRDMA_STATE_FLAG_ENABLED) &&
540 (state & OCRDMA_STATE_FLAG_SYNC);
541}
542
521#endif 543#endif
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index d4cc01f10c01..40f8536c10b0 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -35,6 +35,8 @@
35#include "ocrdma_ah.h" 35#include "ocrdma_ah.h"
36#include "ocrdma_hw.h" 36#include "ocrdma_hw.h"
37 37
38#define OCRDMA_VID_PCP_SHIFT 0xD
39
38static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, 40static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
39 struct ib_ah_attr *attr, int pdid) 41 struct ib_ah_attr *attr, int pdid)
40{ 42{
@@ -55,7 +57,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
55 if (vlan_tag && (vlan_tag < 0x1000)) { 57 if (vlan_tag && (vlan_tag < 0x1000)) {
56 eth.eth_type = cpu_to_be16(0x8100); 58 eth.eth_type = cpu_to_be16(0x8100);
57 eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); 59 eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
58 vlan_tag |= (attr->sl & 7) << 13; 60 vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
59 eth.vlan_tag = cpu_to_be16(vlan_tag); 61 eth.vlan_tag = cpu_to_be16(vlan_tag);
60 eth_sz = sizeof(struct ocrdma_eth_vlan); 62 eth_sz = sizeof(struct ocrdma_eth_vlan);
61 vlan_enabled = true; 63 vlan_enabled = true;
@@ -100,6 +102,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
100 if (!(attr->ah_flags & IB_AH_GRH)) 102 if (!(attr->ah_flags & IB_AH_GRH))
101 return ERR_PTR(-EINVAL); 103 return ERR_PTR(-EINVAL);
102 104
105 if (atomic_cmpxchg(&dev->update_sl, 1, 0))
106 ocrdma_init_service_level(dev);
103 ah = kzalloc(sizeof(*ah), GFP_ATOMIC); 107 ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
104 if (!ah) 108 if (!ah)
105 return ERR_PTR(-ENOMEM); 109 return ERR_PTR(-ENOMEM);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 3bbf2010a821..dd35ae558ae1 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -525,7 +525,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev,
525 525
526 cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; 526 cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS;
527 cmd->eqn = eq->id; 527 cmd->eqn = eq->id;
528 cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe); 528 cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe);
529 529
530 ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE, 530 ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE,
531 cq->dma, PAGE_SIZE_4K); 531 cq->dma, PAGE_SIZE_4K);
@@ -661,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
661{ 661{
662 struct ocrdma_qp *qp = NULL; 662 struct ocrdma_qp *qp = NULL;
663 struct ocrdma_cq *cq = NULL; 663 struct ocrdma_cq *cq = NULL;
664 struct ib_event ib_evt = { 0 }; 664 struct ib_event ib_evt;
665 int cq_event = 0; 665 int cq_event = 0;
666 int qp_event = 1; 666 int qp_event = 1;
667 int srq_event = 0; 667 int srq_event = 0;
@@ -674,6 +674,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev,
674 if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) 674 if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID)
675 cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; 675 cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK];
676 676
677 memset(&ib_evt, 0, sizeof(ib_evt));
678
677 ib_evt.device = &dev->ibdev; 679 ib_evt.device = &dev->ibdev;
678 680
679 switch (type) { 681 switch (type) {
@@ -771,6 +773,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev,
771 OCRDMA_AE_PVID_MCQE_TAG_MASK) >> 773 OCRDMA_AE_PVID_MCQE_TAG_MASK) >>
772 OCRDMA_AE_PVID_MCQE_TAG_SHIFT); 774 OCRDMA_AE_PVID_MCQE_TAG_SHIFT);
773 break; 775 break;
776
777 case OCRDMA_ASYNC_EVENT_COS_VALUE:
778 atomic_set(&dev->update_sl, 1);
779 break;
774 default: 780 default:
775 /* Not interested evts. */ 781 /* Not interested evts. */
776 break; 782 break;
@@ -962,8 +968,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
962 msecs_to_jiffies(30000)); 968 msecs_to_jiffies(30000));
963 if (status) 969 if (status)
964 return 0; 970 return 0;
965 else 971 else {
972 dev->mqe_ctx.fw_error_state = true;
973 pr_err("%s(%d) mailbox timeout: fw not responding\n",
974 __func__, dev->id);
966 return -1; 975 return -1;
976 }
967} 977}
968 978
969/* issue a mailbox command on the MQ */ 979/* issue a mailbox command on the MQ */
@@ -975,6 +985,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
975 struct ocrdma_mbx_rsp *rsp = NULL; 985 struct ocrdma_mbx_rsp *rsp = NULL;
976 986
977 mutex_lock(&dev->mqe_ctx.lock); 987 mutex_lock(&dev->mqe_ctx.lock);
988 if (dev->mqe_ctx.fw_error_state)
989 goto mbx_err;
978 ocrdma_post_mqe(dev, mqe); 990 ocrdma_post_mqe(dev, mqe);
979 status = ocrdma_wait_mqe_cmpl(dev); 991 status = ocrdma_wait_mqe_cmpl(dev);
980 if (status) 992 if (status)
@@ -1078,7 +1090,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
1078 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; 1090 OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT;
1079 attr->max_mw = rsp->max_mw; 1091 attr->max_mw = rsp->max_mw;
1080 attr->max_mr = rsp->max_mr; 1092 attr->max_mr = rsp->max_mr;
1081 attr->max_mr_size = ~0ull; 1093 attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) |
1094 rsp->max_mr_size_lo;
1082 attr->max_fmr = 0; 1095 attr->max_fmr = 0;
1083 attr->max_pages_per_frmr = rsp->max_pages_per_frmr; 1096 attr->max_pages_per_frmr = rsp->max_pages_per_frmr;
1084 attr->max_num_mr_pbl = rsp->max_num_mr_pbl; 1097 attr->max_num_mr_pbl = rsp->max_num_mr_pbl;
@@ -1252,7 +1265,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev)
1252 ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; 1265 ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va;
1253 hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; 1266 hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs;
1254 1267
1255 dev->hba_port_num = hba_attribs->phy_port; 1268 dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv &
1269 OCRDMA_HBA_ATTRB_PTNUM_MASK)
1270 >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT;
1256 strncpy(dev->model_number, 1271 strncpy(dev->model_number,
1257 hba_attribs->controller_model_number, 31); 1272 hba_attribs->controller_model_number, 31);
1258 } 1273 }
@@ -1302,7 +1317,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed)
1302 goto mbx_err; 1317 goto mbx_err;
1303 1318
1304 rsp = (struct ocrdma_get_link_speed_rsp *)cmd; 1319 rsp = (struct ocrdma_get_link_speed_rsp *)cmd;
1305 *lnk_speed = rsp->phys_port_speed; 1320 *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK)
1321 >> OCRDMA_PHY_PS_SHIFT;
1306 1322
1307mbx_err: 1323mbx_err:
1308 kfree(cmd); 1324 kfree(cmd);
@@ -1328,11 +1344,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev)
1328 goto mbx_err; 1344 goto mbx_err;
1329 1345
1330 rsp = (struct ocrdma_get_phy_info_rsp *)cmd; 1346 rsp = (struct ocrdma_get_phy_info_rsp *)cmd;
1331 dev->phy.phy_type = le16_to_cpu(rsp->phy_type); 1347 dev->phy.phy_type =
1348 (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK);
1349 dev->phy.interface_type =
1350 (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK)
1351 >> OCRDMA_IF_TYPE_SHIFT;
1332 dev->phy.auto_speeds_supported = 1352 dev->phy.auto_speeds_supported =
1333 le16_to_cpu(rsp->auto_speeds_supported); 1353 (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK);
1334 dev->phy.fixed_speeds_supported = 1354 dev->phy.fixed_speeds_supported =
1335 le16_to_cpu(rsp->fixed_speeds_supported); 1355 (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK)
1356 >> OCRDMA_FSPEED_SUPP_SHIFT;
1336mbx_err: 1357mbx_err:
1337 kfree(cmd); 1358 kfree(cmd);
1338 return status; 1359 return status;
@@ -1457,8 +1478,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev)
1457 1478
1458 pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va; 1479 pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va;
1459 for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) { 1480 for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) {
1460 pbes[i].pa_lo = (u32) (pa & 0xffffffff); 1481 pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff);
1461 pbes[i].pa_hi = (u32) upper_32_bits(pa); 1482 pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa));
1462 pa += PAGE_SIZE; 1483 pa += PAGE_SIZE;
1463 } 1484 }
1464 cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF); 1485 cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF);
@@ -1501,6 +1522,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev)
1501 ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); 1522 ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1502 dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va, 1523 dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va,
1503 dev->av_tbl.pa); 1524 dev->av_tbl.pa);
1525 dev->av_tbl.va = NULL;
1504 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va, 1526 dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va,
1505 dev->av_tbl.pbl.pa); 1527 dev->av_tbl.pbl.pa);
1506 kfree(cmd); 1528 kfree(cmd);
@@ -1624,14 +1646,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
1624 cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << 1646 cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP <<
1625 OCRDMA_CREATE_CQ_TYPE_SHIFT; 1647 OCRDMA_CREATE_CQ_TYPE_SHIFT;
1626 cq->phase_change = false; 1648 cq->phase_change = false;
1627 cmd->cmd.cqe_count = (cq->len / cqe_size); 1649 cmd->cmd.pdid_cqecnt = (cq->len / cqe_size);
1628 } else { 1650 } else {
1629 cmd->cmd.cqe_count = (cq->len / cqe_size) - 1; 1651 cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1;
1630 cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID; 1652 cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID;
1631 cq->phase_change = true; 1653 cq->phase_change = true;
1632 } 1654 }
1633 1655
1634 cmd->cmd.pd_id = pd_id; /* valid only for v3 */ 1656 /* pd_id valid only for v3 */
1657 cmd->cmd.pdid_cqecnt |= (pd_id <<
1658 OCRDMA_CREATE_CQ_CMD_PDID_SHIFT);
1635 ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); 1659 ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size);
1636 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); 1660 status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd);
1637 if (status) 1661 if (status)
@@ -2206,7 +2230,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs,
2206 OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; 2230 OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK;
2207 qp->rq_cq = cq; 2231 qp->rq_cq = cq;
2208 2232
2209 if (pd->dpp_enabled && pd->num_dpp_qp) { 2233 if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp &&
2234 (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) {
2210 ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, 2235 ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq,
2211 dpp_cq_id); 2236 dpp_cq_id);
2212 } 2237 }
@@ -2264,6 +2289,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2264 2289
2265 if ((ah_attr->ah_flags & IB_AH_GRH) == 0) 2290 if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
2266 return -EINVAL; 2291 return -EINVAL;
2292 if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0))
2293 ocrdma_init_service_level(qp->dev);
2267 cmd->params.tclass_sq_psn |= 2294 cmd->params.tclass_sq_psn |=
2268 (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); 2295 (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT);
2269 cmd->params.rnt_rc_sl_fl |= 2296 cmd->params.rnt_rc_sl_fl |=
@@ -2297,6 +2324,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
2297 cmd->params.vlan_dmac_b4_to_b5 |= 2324 cmd->params.vlan_dmac_b4_to_b5 |=
2298 vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; 2325 vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
2299 cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; 2326 cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID;
2327 cmd->params.rnt_rc_sl_fl |=
2328 (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
2300 } 2329 }
2301 return 0; 2330 return 0;
2302} 2331}
@@ -2604,6 +2633,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
2604 return status; 2633 return status;
2605} 2634}
2606 2635
2636static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
2637 struct ocrdma_dcbx_cfg *dcbxcfg)
2638{
2639 int status = 0;
2640 dma_addr_t pa;
2641 struct ocrdma_mqe cmd;
2642
2643 struct ocrdma_get_dcbx_cfg_req *req = NULL;
2644 struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL;
2645 struct pci_dev *pdev = dev->nic_info.pdev;
2646 struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge;
2647
2648 memset(&cmd, 0, sizeof(struct ocrdma_mqe));
2649 cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp),
2650 sizeof(struct ocrdma_get_dcbx_cfg_req));
2651 req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL);
2652 if (!req) {
2653 status = -ENOMEM;
2654 goto mem_err;
2655 }
2656
2657 cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) &
2658 OCRDMA_MQE_HDR_SGE_CNT_MASK;
2659 mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL);
2660 mqe_sge->pa_hi = (u32) upper_32_bits(pa);
2661 mqe_sge->len = cmd.hdr.pyld_len;
2662
2663 memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req));
2664 ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG,
2665 OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len);
2666 req->param_type = ptype;
2667
2668 status = ocrdma_mbx_cmd(dev, &cmd);
2669 if (status)
2670 goto mbx_err;
2671
2672 rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req;
2673 ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp));
2674 memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg));
2675
2676mbx_err:
2677 dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa);
2678mem_err:
2679 return status;
2680}
2681
2682#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08
2683#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05
2684
2685static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype,
2686 struct ocrdma_dcbx_cfg *dcbxcfg,
2687 u8 *srvc_lvl)
2688{
2689 int status = -EINVAL, indx, slindx;
2690 int ventry_cnt;
2691 struct ocrdma_app_parameter *app_param;
2692 u8 valid, proto_sel;
2693 u8 app_prio, pfc_prio;
2694 u16 proto;
2695
2696 if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) {
2697 pr_info("%s ocrdma%d DCBX is disabled\n",
2698 dev_name(&dev->nic_info.pdev->dev), dev->id);
2699 goto out;
2700 }
2701
2702 if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) {
2703 pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n",
2704 dev_name(&dev->nic_info.pdev->dev), dev->id,
2705 (ptype > 0 ? "operational" : "admin"),
2706 (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ?
2707 "enabled" : "disabled",
2708 (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ?
2709 "" : ", not sync'ed");
2710 goto out;
2711 } else {
2712 pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n",
2713 dev_name(&dev->nic_info.pdev->dev), dev->id);
2714 }
2715
2716 ventry_cnt = (dcbxcfg->tcv_aev_opv_st >>
2717 OCRDMA_DCBX_APP_ENTRY_SHIFT)
2718 & OCRDMA_DCBX_STATE_MASK;
2719
2720 for (indx = 0; indx < ventry_cnt; indx++) {
2721 app_param = &dcbxcfg->app_param[indx];
2722 valid = (app_param->valid_proto_app >>
2723 OCRDMA_APP_PARAM_VALID_SHIFT)
2724 & OCRDMA_APP_PARAM_VALID_MASK;
2725 proto_sel = (app_param->valid_proto_app
2726 >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT)
2727 & OCRDMA_APP_PARAM_PROTO_SEL_MASK;
2728 proto = app_param->valid_proto_app &
2729 OCRDMA_APP_PARAM_APP_PROTO_MASK;
2730
2731 if (
2732 valid && proto == OCRDMA_APP_PROTO_ROCE &&
2733 proto_sel == OCRDMA_PROTO_SELECT_L2) {
2734 for (slindx = 0; slindx <
2735 OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) {
2736 app_prio = ocrdma_get_app_prio(
2737 (u8 *)app_param->app_prio,
2738 slindx);
2739 pfc_prio = ocrdma_get_pfc_prio(
2740 (u8 *)dcbxcfg->pfc_prio,
2741 slindx);
2742
2743 if (app_prio && pfc_prio) {
2744 *srvc_lvl = slindx;
2745 status = 0;
2746 goto out;
2747 }
2748 }
2749 if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) {
2750 pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n",
2751 dev_name(&dev->nic_info.pdev->dev),
2752 dev->id, proto);
2753 }
2754 }
2755 }
2756
2757out:
2758 return status;
2759}
2760
2761void ocrdma_init_service_level(struct ocrdma_dev *dev)
2762{
2763 int status = 0, indx;
2764 struct ocrdma_dcbx_cfg dcbxcfg;
2765 u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL;
2766 int ptype = OCRDMA_PARAMETER_TYPE_OPER;
2767
2768 for (indx = 0; indx < 2; indx++) {
2769 status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg);
2770 if (status) {
2771 pr_err("%s(): status=%d\n", __func__, status);
2772 ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
2773 continue;
2774 }
2775
2776 status = ocrdma_parse_dcbxcfg_rsp(dev, ptype,
2777 &dcbxcfg, &srvc_lvl);
2778 if (status) {
2779 ptype = OCRDMA_PARAMETER_TYPE_ADMIN;
2780 continue;
2781 }
2782
2783 break;
2784 }
2785
2786 if (status)
2787 pr_info("%s ocrdma%d service level default\n",
2788 dev_name(&dev->nic_info.pdev->dev), dev->id);
2789 else
2790 pr_info("%s ocrdma%d service level %d\n",
2791 dev_name(&dev->nic_info.pdev->dev), dev->id,
2792 srvc_lvl);
2793
2794 dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state);
2795 dev->sl = srvc_lvl;
2796}
2797
2607int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) 2798int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah)
2608{ 2799{
2609 int i; 2800 int i;
@@ -2709,13 +2900,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev)
2709 goto conf_err; 2900 goto conf_err;
2710 status = ocrdma_mbx_get_phy_info(dev); 2901 status = ocrdma_mbx_get_phy_info(dev);
2711 if (status) 2902 if (status)
2712 goto conf_err; 2903 goto info_attrb_err;
2713 status = ocrdma_mbx_get_ctrl_attribs(dev); 2904 status = ocrdma_mbx_get_ctrl_attribs(dev);
2714 if (status) 2905 if (status)
2715 goto conf_err; 2906 goto info_attrb_err;
2716 2907
2717 return 0; 2908 return 0;
2718 2909
2910info_attrb_err:
2911 ocrdma_mbx_delete_ah_tbl(dev);
2719conf_err: 2912conf_err:
2720 ocrdma_destroy_mq(dev); 2913 ocrdma_destroy_mq(dev);
2721mq_err: 2914mq_err:
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
index e513f7293142..6eed8f191322 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h
@@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq);
135 135
136int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); 136int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset);
137char *port_speed_string(struct ocrdma_dev *dev); 137char *port_speed_string(struct ocrdma_dev *dev);
138void ocrdma_init_service_level(struct ocrdma_dev *);
139
138#endif /* __OCRDMA_HW_H__ */ 140#endif /* __OCRDMA_HW_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
index 7c504e079744..256a06bc0b68 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c
@@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
324 if (!dev->qp_tbl) 324 if (!dev->qp_tbl)
325 goto alloc_err; 325 goto alloc_err;
326 } 326 }
327
328 dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL);
329 if (dev->stag_arr == NULL)
330 goto alloc_err;
331
327 spin_lock_init(&dev->av_tbl.lock); 332 spin_lock_init(&dev->av_tbl.lock);
328 spin_lock_init(&dev->flush_q_lock); 333 spin_lock_init(&dev->flush_q_lock);
329 return 0; 334 return 0;
@@ -334,6 +339,7 @@ alloc_err:
334 339
335static void ocrdma_free_resources(struct ocrdma_dev *dev) 340static void ocrdma_free_resources(struct ocrdma_dev *dev)
336{ 341{
342 kfree(dev->stag_arr);
337 kfree(dev->qp_tbl); 343 kfree(dev->qp_tbl);
338 kfree(dev->cq_tbl); 344 kfree(dev->cq_tbl);
339 kfree(dev->sgid_tbl); 345 kfree(dev->sgid_tbl);
@@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
353{ 359{
354 struct ocrdma_dev *dev = dev_get_drvdata(device); 360 struct ocrdma_dev *dev = dev_get_drvdata(device);
355 361
356 return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); 362 return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]);
363}
364
365static ssize_t show_hca_type(struct device *device,
366 struct device_attribute *attr, char *buf)
367{
368 struct ocrdma_dev *dev = dev_get_drvdata(device);
369
370 return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]);
357} 371}
358 372
359static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); 373static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
360static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); 374static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL);
375static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL);
361 376
362static struct device_attribute *ocrdma_attributes[] = { 377static struct device_attribute *ocrdma_attributes[] = {
363 &dev_attr_hw_rev, 378 &dev_attr_hw_rev,
364 &dev_attr_fw_ver 379 &dev_attr_fw_ver,
380 &dev_attr_hca_type
365}; 381};
366 382
367static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) 383static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
@@ -372,6 +388,58 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev)
372 device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); 388 device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]);
373} 389}
374 390
391static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev,
392 struct net_device *net)
393{
394 struct in_device *in_dev;
395 union ib_gid gid;
396 in_dev = in_dev_get(net);
397 if (in_dev) {
398 for_ifa(in_dev) {
399 ipv6_addr_set_v4mapped(ifa->ifa_address,
400 (struct in6_addr *)&gid);
401 ocrdma_add_sgid(dev, &gid);
402 }
403 endfor_ifa(in_dev);
404 in_dev_put(in_dev);
405 }
406}
407
408static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev,
409 struct net_device *net)
410{
411#if IS_ENABLED(CONFIG_IPV6)
412 struct inet6_dev *in6_dev;
413 union ib_gid *pgid;
414 struct inet6_ifaddr *ifp;
415 in6_dev = in6_dev_get(net);
416 if (in6_dev) {
417 read_lock_bh(&in6_dev->lock);
418 list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
419 pgid = (union ib_gid *)&ifp->addr;
420 ocrdma_add_sgid(dev, pgid);
421 }
422 read_unlock_bh(&in6_dev->lock);
423 in6_dev_put(in6_dev);
424 }
425#endif
426}
427
428static void ocrdma_init_gid_table(struct ocrdma_dev *dev)
429{
430 struct net_device *net_dev;
431
432 for_each_netdev(&init_net, net_dev) {
433 struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ?
434 rdma_vlan_dev_real_dev(net_dev) : net_dev;
435
436 if (real_dev == dev->nic_info.netdev) {
437 ocrdma_init_ipv4_gids(dev, net_dev);
438 ocrdma_init_ipv6_gids(dev, net_dev);
439 }
440 }
441}
442
375static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) 443static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
376{ 444{
377 int status = 0, i; 445 int status = 0, i;
@@ -399,6 +467,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
399 if (status) 467 if (status)
400 goto alloc_err; 468 goto alloc_err;
401 469
470 ocrdma_init_service_level(dev);
471 ocrdma_init_gid_table(dev);
402 status = ocrdma_register_device(dev); 472 status = ocrdma_register_device(dev);
403 if (status) 473 if (status)
404 goto alloc_err; 474 goto alloc_err;
@@ -508,6 +578,12 @@ static int ocrdma_close(struct ocrdma_dev *dev)
508 return 0; 578 return 0;
509} 579}
510 580
581static void ocrdma_shutdown(struct ocrdma_dev *dev)
582{
583 ocrdma_close(dev);
584 ocrdma_remove(dev);
585}
586
511/* event handling via NIC driver ensures that all the NIC specific 587/* event handling via NIC driver ensures that all the NIC specific
512 * initialization done before RoCE driver notifies 588 * initialization done before RoCE driver notifies
513 * event to stack. 589 * event to stack.
@@ -521,6 +597,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event)
521 case BE_DEV_DOWN: 597 case BE_DEV_DOWN:
522 ocrdma_close(dev); 598 ocrdma_close(dev);
523 break; 599 break;
600 case BE_DEV_SHUTDOWN:
601 ocrdma_shutdown(dev);
602 break;
524 } 603 }
525} 604}
526 605
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
index 96c9ee602ba4..904989ec5eaa 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h
@@ -44,35 +44,39 @@ enum {
44#define OCRDMA_SUBSYS_ROCE 10 44#define OCRDMA_SUBSYS_ROCE 10
45enum { 45enum {
46 OCRDMA_CMD_QUERY_CONFIG = 1, 46 OCRDMA_CMD_QUERY_CONFIG = 1,
47 OCRDMA_CMD_ALLOC_PD, 47 OCRDMA_CMD_ALLOC_PD = 2,
48 OCRDMA_CMD_DEALLOC_PD, 48 OCRDMA_CMD_DEALLOC_PD = 3,
49 49
50 OCRDMA_CMD_CREATE_AH_TBL, 50 OCRDMA_CMD_CREATE_AH_TBL = 4,
51 OCRDMA_CMD_DELETE_AH_TBL, 51 OCRDMA_CMD_DELETE_AH_TBL = 5,
52 52
53 OCRDMA_CMD_CREATE_QP, 53 OCRDMA_CMD_CREATE_QP = 6,
54 OCRDMA_CMD_QUERY_QP, 54 OCRDMA_CMD_QUERY_QP = 7,
55 OCRDMA_CMD_MODIFY_QP, 55 OCRDMA_CMD_MODIFY_QP = 8 ,
56 OCRDMA_CMD_DELETE_QP, 56 OCRDMA_CMD_DELETE_QP = 9,
57 57
58 OCRDMA_CMD_RSVD1, 58 OCRDMA_CMD_RSVD1 = 10,
59 OCRDMA_CMD_ALLOC_LKEY, 59 OCRDMA_CMD_ALLOC_LKEY = 11,
60 OCRDMA_CMD_DEALLOC_LKEY, 60 OCRDMA_CMD_DEALLOC_LKEY = 12,
61 OCRDMA_CMD_REGISTER_NSMR, 61 OCRDMA_CMD_REGISTER_NSMR = 13,
62 OCRDMA_CMD_REREGISTER_NSMR, 62 OCRDMA_CMD_REREGISTER_NSMR = 14,
63 OCRDMA_CMD_REGISTER_NSMR_CONT, 63 OCRDMA_CMD_REGISTER_NSMR_CONT = 15,
64 OCRDMA_CMD_QUERY_NSMR, 64 OCRDMA_CMD_QUERY_NSMR = 16,
65 OCRDMA_CMD_ALLOC_MW, 65 OCRDMA_CMD_ALLOC_MW = 17,
66 OCRDMA_CMD_QUERY_MW, 66 OCRDMA_CMD_QUERY_MW = 18,
67 67
68 OCRDMA_CMD_CREATE_SRQ, 68 OCRDMA_CMD_CREATE_SRQ = 19,
69 OCRDMA_CMD_QUERY_SRQ, 69 OCRDMA_CMD_QUERY_SRQ = 20,
70 OCRDMA_CMD_MODIFY_SRQ, 70 OCRDMA_CMD_MODIFY_SRQ = 21,
71 OCRDMA_CMD_DELETE_SRQ, 71 OCRDMA_CMD_DELETE_SRQ = 22,
72 72
73 OCRDMA_CMD_ATTACH_MCAST, 73 OCRDMA_CMD_ATTACH_MCAST = 23,
74 OCRDMA_CMD_DETACH_MCAST, 74 OCRDMA_CMD_DETACH_MCAST = 24,
75 OCRDMA_CMD_GET_RDMA_STATS, 75
76 OCRDMA_CMD_CREATE_RBQ = 25,
77 OCRDMA_CMD_DESTROY_RBQ = 26,
78
79 OCRDMA_CMD_GET_RDMA_STATS = 27,
76 80
77 OCRDMA_CMD_MAX 81 OCRDMA_CMD_MAX
78}; 82};
@@ -103,7 +107,7 @@ enum {
103 107
104#define OCRDMA_MAX_QP 2048 108#define OCRDMA_MAX_QP 2048
105#define OCRDMA_MAX_CQ 2048 109#define OCRDMA_MAX_CQ 2048
106#define OCRDMA_MAX_STAG 8192 110#define OCRDMA_MAX_STAG 16384
107 111
108enum { 112enum {
109 OCRDMA_DB_RQ_OFFSET = 0xE0, 113 OCRDMA_DB_RQ_OFFSET = 0xE0,
@@ -422,7 +426,12 @@ struct ocrdma_ae_qp_mcqe {
422 426
423#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 427#define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14
424#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 428#define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5
425#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3 429
430enum ocrdma_async_grp5_events {
431 OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01,
432 OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02,
433 OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03
434};
426 435
427enum OCRDMA_ASYNC_EVENT_TYPE { 436enum OCRDMA_ASYNC_EVENT_TYPE {
428 OCRDMA_CQ_ERROR = 0x00, 437 OCRDMA_CQ_ERROR = 0x00,
@@ -525,8 +534,8 @@ struct ocrdma_mbx_query_config {
525 u32 max_ird_ord_per_qp; 534 u32 max_ird_ord_per_qp;
526 u32 max_shared_ird_ord; 535 u32 max_shared_ird_ord;
527 u32 max_mr; 536 u32 max_mr;
528 u32 max_mr_size_lo;
529 u32 max_mr_size_hi; 537 u32 max_mr_size_hi;
538 u32 max_mr_size_lo;
530 u32 max_num_mr_pbl; 539 u32 max_num_mr_pbl;
531 u32 max_mw; 540 u32 max_mw;
532 u32 max_fmr; 541 u32 max_fmr;
@@ -580,17 +589,26 @@ enum {
580 OCRDMA_FN_MODE_RDMA = 0x4 589 OCRDMA_FN_MODE_RDMA = 0x4
581}; 590};
582 591
592enum {
593 OCRDMA_IF_TYPE_MASK = 0xFFFF0000,
594 OCRDMA_IF_TYPE_SHIFT = 0x10,
595 OCRDMA_PHY_TYPE_MASK = 0x0000FFFF,
596 OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000,
597 OCRDMA_FUTURE_DETAILS_SHIFT = 0x10,
598 OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF,
599 OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000,
600 OCRDMA_FSPEED_SUPP_SHIFT = 0x10,
601 OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF
602};
603
583struct ocrdma_get_phy_info_rsp { 604struct ocrdma_get_phy_info_rsp {
584 struct ocrdma_mqe_hdr hdr; 605 struct ocrdma_mqe_hdr hdr;
585 struct ocrdma_mbx_rsp rsp; 606 struct ocrdma_mbx_rsp rsp;
586 607
587 u16 phy_type; 608 u32 ityp_ptyp;
588 u16 interface_type;
589 u32 misc_params; 609 u32 misc_params;
590 u16 ext_phy_details; 610 u32 ftrdtl_exphydtl;
591 u16 rsvd; 611 u32 fspeed_aspeed;
592 u16 auto_speeds_supported;
593 u16 fixed_speeds_supported;
594 u32 future_use[2]; 612 u32 future_use[2];
595}; 613};
596 614
@@ -603,19 +621,34 @@ enum {
603 OCRDMA_PHY_SPEED_40GBPS = 0x20 621 OCRDMA_PHY_SPEED_40GBPS = 0x20
604}; 622};
605 623
624enum {
625 OCRDMA_PORT_NUM_MASK = 0x3F,
626 OCRDMA_PT_MASK = 0xC0,
627 OCRDMA_PT_SHIFT = 0x6,
628 OCRDMA_LINK_DUP_MASK = 0x0000FF00,
629 OCRDMA_LINK_DUP_SHIFT = 0x8,
630 OCRDMA_PHY_PS_MASK = 0x00FF0000,
631 OCRDMA_PHY_PS_SHIFT = 0x10,
632 OCRDMA_PHY_PFLT_MASK = 0xFF000000,
633 OCRDMA_PHY_PFLT_SHIFT = 0x18,
634 OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000,
635 OCRDMA_QOS_LNKSP_SHIFT = 0x10,
636 OCRDMA_LLST_MASK = 0xFF,
637 OCRDMA_PLFC_MASK = 0x00000400,
638 OCRDMA_PLFC_SHIFT = 0x8,
639 OCRDMA_PLRFC_MASK = 0x00000200,
640 OCRDMA_PLRFC_SHIFT = 0x8,
641 OCRDMA_PLTFC_MASK = 0x00000100,
642 OCRDMA_PLTFC_SHIFT = 0x8
643};
606 644
607struct ocrdma_get_link_speed_rsp { 645struct ocrdma_get_link_speed_rsp {
608 struct ocrdma_mqe_hdr hdr; 646 struct ocrdma_mqe_hdr hdr;
609 struct ocrdma_mbx_rsp rsp; 647 struct ocrdma_mbx_rsp rsp;
610 648
611 u8 pt_port_num; 649 u32 pflt_pps_ld_pnum;
612 u8 link_duplex; 650 u32 qos_lsp;
613 u8 phys_port_speed; 651 u32 res_lls;
614 u8 phys_port_fault;
615 u16 rsvd1;
616 u16 qos_lnk_speed;
617 u8 logical_lnk_status;
618 u8 rsvd2[3];
619}; 652};
620 653
621enum { 654enum {
@@ -666,8 +699,7 @@ struct ocrdma_create_cq_cmd {
666 u32 pgsz_pgcnt; 699 u32 pgsz_pgcnt;
667 u32 ev_cnt_flags; 700 u32 ev_cnt_flags;
668 u32 eqn; 701 u32 eqn;
669 u16 cqe_count; 702 u32 pdid_cqecnt;
670 u16 pd_id;
671 u32 rsvd6; 703 u32 rsvd6;
672 struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; 704 struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES];
673}; 705};
@@ -678,6 +710,10 @@ struct ocrdma_create_cq {
678}; 710};
679 711
680enum { 712enum {
713 OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10
714};
715
716enum {
681 OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF 717 OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF
682}; 718};
683 719
@@ -1231,7 +1267,6 @@ struct ocrdma_destroy_srq {
1231 1267
1232enum { 1268enum {
1233 OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16), 1269 OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16),
1234 OCRDMA_PD_MAX_DPP_ENABLED_QP = 8,
1235 OCRDMA_DPP_PAGE_SIZE = 4096 1270 OCRDMA_DPP_PAGE_SIZE = 4096
1236}; 1271};
1237 1272
@@ -1896,12 +1931,62 @@ struct ocrdma_rdma_stats_resp {
1896 struct ocrdma_rx_dbg_stats rx_dbg_stats; 1931 struct ocrdma_rx_dbg_stats rx_dbg_stats;
1897} __packed; 1932} __packed;
1898 1933
1934enum {
1935 OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF,
1936 OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00,
1937 OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08,
1938 OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF,
1939 OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000,
1940 OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10,
1941 OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000,
1942 OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18,
1943 OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF,
1944 OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00,
1945 OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08,
1946 OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000,
1947 OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10,
1948 OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000,
1949 OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18,
1950 OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF,
1951 OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000,
1952 OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10,
1953 OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000,
1954 OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18,
1955 OCRDMA_HBA_ATTRB_CV_MASK = 0xFF,
1956 OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00,
1957 OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08,
1958 OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000,
1959 OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10,
1960 OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000,
1961 OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18,
1962 OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000,
1963 OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E,
1964 OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF,
1965 OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00,
1966 OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08,
1967 OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF,
1968 OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000,
1969 OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10,
1970 OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF,
1971 OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000,
1972 OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10,
1973 OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF,
1974 OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00,
1975 OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08,
1976 OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000,
1977 OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10,
1978 OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000,
1979 OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18,
1980 OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF
1981};
1899 1982
1900struct mgmt_hba_attribs { 1983struct mgmt_hba_attribs {
1901 u8 flashrom_version_string[32]; 1984 u8 flashrom_version_string[32];
1902 u8 manufacturer_name[32]; 1985 u8 manufacturer_name[32];
1903 u32 supported_modes; 1986 u32 supported_modes;
1904 u32 rsvd0[3]; 1987 u32 rsvd_eprom_verhi_verlo;
1988 u32 mbx_ds_ver;
1989 u32 epfw_ds_ver;
1905 u8 ncsi_ver_string[12]; 1990 u8 ncsi_ver_string[12];
1906 u32 default_extended_timeout; 1991 u32 default_extended_timeout;
1907 u8 controller_model_number[32]; 1992 u8 controller_model_number[32];
@@ -1914,34 +1999,26 @@ struct mgmt_hba_attribs {
1914 u8 driver_version_string[32]; 1999 u8 driver_version_string[32];
1915 u8 fw_on_flash_version_string[32]; 2000 u8 fw_on_flash_version_string[32];
1916 u32 functionalities_supported; 2001 u32 functionalities_supported;
1917 u16 max_cdblength; 2002 u32 guid0_asicrev_cdblen;
1918 u8 asic_revision; 2003 u8 generational_guid[12];
1919 u8 generational_guid[16]; 2004 u32 portcnt_guid15;
1920 u8 hba_port_count; 2005 u32 mfuncdev_iscsi_ldtout;
1921 u16 default_link_down_timeout; 2006 u32 ptpnum_maxdoms_hbast_cv;
1922 u8 iscsi_ver_min_max;
1923 u8 multifunction_device;
1924 u8 cache_valid;
1925 u8 hba_status;
1926 u8 max_domains_supported;
1927 u8 phy_port;
1928 u32 firmware_post_status; 2007 u32 firmware_post_status;
1929 u32 hba_mtu[8]; 2008 u32 hba_mtu[8];
1930 u32 rsvd1[4]; 2009 u32 res_asicgen_iscsi_feaures;
2010 u32 rsvd1[3];
1931}; 2011};
1932 2012
1933struct mgmt_controller_attrib { 2013struct mgmt_controller_attrib {
1934 struct mgmt_hba_attribs hba_attribs; 2014 struct mgmt_hba_attribs hba_attribs;
1935 u16 pci_vendor_id; 2015 u32 pci_did_vid;
1936 u16 pci_device_id; 2016 u32 pci_ssid_svid;
1937 u16 pci_sub_vendor_id; 2017 u32 ityp_fnum_devnum_bnum;
1938 u16 pci_sub_system_id; 2018 u32 uid_hi;
1939 u8 pci_bus_number; 2019 u32 uid_lo;
1940 u8 pci_device_number; 2020 u32 res_nnetfil;
1941 u8 pci_function_number; 2021 u32 rsvd0[4];
1942 u8 interface_type;
1943 u64 unique_identifier;
1944 u32 rsvd0[5];
1945}; 2022};
1946 2023
1947struct ocrdma_get_ctrl_attribs_rsp { 2024struct ocrdma_get_ctrl_attribs_rsp {
@@ -1949,5 +2026,79 @@ struct ocrdma_get_ctrl_attribs_rsp {
1949 struct mgmt_controller_attrib ctrl_attribs; 2026 struct mgmt_controller_attrib ctrl_attribs;
1950}; 2027};
1951 2028
2029#define OCRDMA_SUBSYS_DCBX 0x10
2030
2031enum OCRDMA_DCBX_OPCODE {
2032 OCRDMA_CMD_GET_DCBX_CONFIG = 0x01
2033};
2034
2035enum OCRDMA_DCBX_PARAM_TYPE {
2036 OCRDMA_PARAMETER_TYPE_ADMIN = 0x00,
2037 OCRDMA_PARAMETER_TYPE_OPER = 0x01,
2038 OCRDMA_PARAMETER_TYPE_PEER = 0x02
2039};
2040
2041enum OCRDMA_DCBX_APP_PROTO {
2042 OCRDMA_APP_PROTO_ROCE = 0x8915
2043};
2044
2045enum OCRDMA_DCBX_PROTO {
2046 OCRDMA_PROTO_SELECT_L2 = 0x00,
2047 OCRDMA_PROTO_SELECT_L4 = 0x01
2048};
2049
2050enum OCRDMA_DCBX_APP_PARAM {
2051 OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF,
2052 OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF,
2053 OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10,
2054 OCRDMA_APP_PARAM_VALID_MASK = 0xFF,
2055 OCRDMA_APP_PARAM_VALID_SHIFT = 0x18
2056};
2057
2058enum OCRDMA_DCBX_STATE_FLAGS {
2059 OCRDMA_STATE_FLAG_ENABLED = 0x01,
2060 OCRDMA_STATE_FLAG_ADDVERTISED = 0x02,
2061 OCRDMA_STATE_FLAG_WILLING = 0x04,
2062 OCRDMA_STATE_FLAG_SYNC = 0x08,
2063 OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000,
2064 OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000
2065};
2066
2067enum OCRDMA_TCV_AEV_OPV_ST {
2068 OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF,
2069 OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18,
2070 OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10,
2071 OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08,
2072 OCRDMA_DCBX_STATE_MASK = 0xFF
2073};
2074
2075struct ocrdma_app_parameter {
2076 u32 valid_proto_app;
2077 u32 oui;
2078 u32 app_prio[2];
2079};
2080
2081struct ocrdma_dcbx_cfg {
2082 u32 tcv_aev_opv_st;
2083 u32 tc_state;
2084 u32 pfc_state;
2085 u32 qcn_state;
2086 u32 appl_state;
2087 u32 ll_state;
2088 u32 tc_bw[2];
2089 u32 tc_prio[8];
2090 u32 pfc_prio[2];
2091 struct ocrdma_app_parameter app_param[15];
2092};
2093
2094struct ocrdma_get_dcbx_cfg_req {
2095 struct ocrdma_mbx_hdr hdr;
2096 u32 param_type;
2097} __packed;
2098
2099struct ocrdma_get_dcbx_cfg_rsp {
2100 struct ocrdma_mbx_rsp hdr;
2101 struct ocrdma_dcbx_cfg cfg;
2102} __packed;
1952 2103
1953#endif /* __OCRDMA_SLI_H__ */ 2104#endif /* __OCRDMA_SLI_H__ */
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index edf6211d84b8..acb434d16903 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
69 memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], 69 memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
70 min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); 70 min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
71 ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); 71 ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
72 attr->max_mr_size = ~0ull; 72 attr->max_mr_size = dev->attr.max_mr_size;
73 attr->page_size_cap = 0xffff000; 73 attr->page_size_cap = 0xffff000;
74 attr->vendor_id = dev->nic_info.pdev->vendor; 74 attr->vendor_id = dev->nic_info.pdev->vendor;
75 attr->vendor_part_id = dev->nic_info.pdev->device; 75 attr->vendor_part_id = dev->nic_info.pdev->device;
76 attr->hw_ver = 0; 76 attr->hw_ver = dev->asic_id;
77 attr->max_qp = dev->attr.max_qp; 77 attr->max_qp = dev->attr.max_qp;
78 attr->max_ah = OCRDMA_MAX_AH; 78 attr->max_ah = OCRDMA_MAX_AH;
79 attr->max_qp_wr = dev->attr.max_wqe; 79 attr->max_qp_wr = dev->attr.max_wqe;
@@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
268 pd->dpp_enabled = 268 pd->dpp_enabled =
269 ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; 269 ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
270 pd->num_dpp_qp = 270 pd->num_dpp_qp =
271 pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; 271 pd->dpp_enabled ? (dev->nic_info.db_page_size /
272 dev->attr.wqe_size) : 0;
272 } 273 }
273 274
274retry: 275retry:
@@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
328 struct ocrdma_pd *pd = uctx->cntxt_pd; 329 struct ocrdma_pd *pd = uctx->cntxt_pd;
329 struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); 330 struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
330 331
331 BUG_ON(uctx->pd_in_use); 332 if (uctx->pd_in_use) {
333 pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
334 __func__, dev->id, pd->id);
335 }
332 uctx->cntxt_pd = NULL; 336 uctx->cntxt_pd = NULL;
333 status = _ocrdma_dealloc_pd(dev, pd); 337 status = _ocrdma_dealloc_pd(dev, pd);
334 return status; 338 return status;
@@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
843 if (mr->umem) 847 if (mr->umem)
844 ib_umem_release(mr->umem); 848 ib_umem_release(mr->umem);
845 kfree(mr); 849 kfree(mr);
850
851 /* Don't stop cleanup, in case FW is unresponsive */
852 if (dev->mqe_ctx.fw_error_state) {
853 status = 0;
854 pr_err("%s(%d) fw not responding.\n",
855 __func__, dev->id);
856 }
846 return status; 857 return status;
847} 858}
848 859
@@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
2054 } 2065 }
2055 2066
2056 while (wr) { 2067 while (wr) {
2068 if (qp->qp_type == IB_QPT_UD &&
2069 (wr->opcode != IB_WR_SEND &&
2070 wr->opcode != IB_WR_SEND_WITH_IMM)) {
2071 *bad_wr = wr;
2072 status = -EINVAL;
2073 break;
2074 }
2057 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || 2075 if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
2058 wr->num_sge > qp->sq.max_sges) { 2076 wr->num_sge > qp->sq.max_sges) {
2059 *bad_wr = wr; 2077 *bad_wr = wr;
@@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2488 *stop = true; 2506 *stop = true;
2489 expand = false; 2507 expand = false;
2490 } 2508 }
2509 } else if (is_hw_sq_empty(qp)) {
2510 /* Do nothing */
2511 expand = false;
2512 *polled = false;
2513 *stop = false;
2491 } else { 2514 } else {
2492 *polled = true; 2515 *polled = true;
2493 expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); 2516 expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
@@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2593 *stop = true; 2616 *stop = true;
2594 expand = false; 2617 expand = false;
2595 } 2618 }
2619 } else if (is_hw_rq_empty(qp)) {
2620 /* Do nothing */
2621 expand = false;
2622 *polled = false;
2623 *stop = false;
2596 } else { 2624 } else {
2597 *polled = true; 2625 *polled = true;
2598 expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); 2626 expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c
index 22c720e5740d..636be117b578 100644
--- a/drivers/infiniband/hw/qib/qib_mad.c
+++ b/drivers/infiniband/hw/qib/qib_mad.c
@@ -2476,7 +2476,7 @@ int qib_create_agents(struct qib_ibdev *dev)
2476 ibp = &dd->pport[p].ibport_data; 2476 ibp = &dd->pport[p].ibport_data;
2477 agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI, 2477 agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
2478 NULL, 0, send_handler, 2478 NULL, 0, send_handler,
2479 NULL, NULL); 2479 NULL, NULL, 0);
2480 if (IS_ERR(agent)) { 2480 if (IS_ERR(agent)) {
2481 ret = PTR_ERR(agent); 2481 ret = PTR_ERR(agent);
2482 goto err; 2482 goto err;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index c639f90cfda4..3edce617c31b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -86,7 +86,6 @@ enum {
86 IPOIB_FLAG_INITIALIZED = 1, 86 IPOIB_FLAG_INITIALIZED = 1,
87 IPOIB_FLAG_ADMIN_UP = 2, 87 IPOIB_FLAG_ADMIN_UP = 2,
88 IPOIB_PKEY_ASSIGNED = 3, 88 IPOIB_PKEY_ASSIGNED = 3,
89 IPOIB_PKEY_STOP = 4,
90 IPOIB_FLAG_SUBINTERFACE = 5, 89 IPOIB_FLAG_SUBINTERFACE = 5,
91 IPOIB_MCAST_RUN = 6, 90 IPOIB_MCAST_RUN = 6,
92 IPOIB_STOP_REAPER = 7, 91 IPOIB_STOP_REAPER = 7,
@@ -312,7 +311,6 @@ struct ipoib_dev_priv {
312 struct list_head multicast_list; 311 struct list_head multicast_list;
313 struct rb_root multicast_tree; 312 struct rb_root multicast_tree;
314 313
315 struct delayed_work pkey_poll_task;
316 struct delayed_work mcast_task; 314 struct delayed_work mcast_task;
317 struct work_struct carrier_on_task; 315 struct work_struct carrier_on_task;
318 struct work_struct flush_light; 316 struct work_struct flush_light;
@@ -473,10 +471,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
473void ipoib_pkey_event(struct work_struct *work); 471void ipoib_pkey_event(struct work_struct *work);
474void ipoib_ib_dev_cleanup(struct net_device *dev); 472void ipoib_ib_dev_cleanup(struct net_device *dev);
475 473
476int ipoib_ib_dev_open(struct net_device *dev); 474int ipoib_ib_dev_open(struct net_device *dev, int flush);
477int ipoib_ib_dev_up(struct net_device *dev); 475int ipoib_ib_dev_up(struct net_device *dev);
478int ipoib_ib_dev_down(struct net_device *dev, int flush); 476int ipoib_ib_dev_down(struct net_device *dev, int flush);
479int ipoib_ib_dev_stop(struct net_device *dev, int flush); 477int ipoib_ib_dev_stop(struct net_device *dev, int flush);
478void ipoib_pkey_dev_check_presence(struct net_device *dev);
480 479
481int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); 480int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
482void ipoib_dev_cleanup(struct net_device *dev); 481void ipoib_dev_cleanup(struct net_device *dev);
@@ -532,8 +531,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf);
532 531
533void ipoib_setup(struct net_device *dev); 532void ipoib_setup(struct net_device *dev);
534 533
535void ipoib_pkey_poll(struct work_struct *work); 534void ipoib_pkey_open(struct ipoib_dev_priv *priv);
536int ipoib_pkey_dev_delay_open(struct net_device *dev);
537void ipoib_drain_cq(struct net_device *dev); 535void ipoib_drain_cq(struct net_device *dev);
538 536
539void ipoib_set_ethtool_ops(struct net_device *dev); 537void ipoib_set_ethtool_ops(struct net_device *dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index 50061854616e..6bd5740e2691 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev)
281{ 281{
282 struct ipoib_dev_priv *priv = netdev_priv(dev); 282 struct ipoib_dev_priv *priv = netdev_priv(dev);
283 283
284 if (priv->mcg_dentry) 284 debugfs_remove(priv->mcg_dentry);
285 debugfs_remove(priv->mcg_dentry); 285 debugfs_remove(priv->path_dentry);
286 if (priv->path_dentry)
287 debugfs_remove(priv->path_dentry);
288} 286}
289 287
290int ipoib_register_debugfs(void) 288int ipoib_register_debugfs(void)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 6a7003ddb0be..72626c348174 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx)
664 drain_tx_cq((struct net_device *)ctx); 664 drain_tx_cq((struct net_device *)ctx);
665} 665}
666 666
667int ipoib_ib_dev_open(struct net_device *dev) 667int ipoib_ib_dev_open(struct net_device *dev, int flush)
668{ 668{
669 struct ipoib_dev_priv *priv = netdev_priv(dev); 669 struct ipoib_dev_priv *priv = netdev_priv(dev);
670 int ret; 670 int ret;
671 671
672 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { 672 ipoib_pkey_dev_check_presence(dev);
673 ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); 673
674 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 674 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
675 ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey,
676 (!(priv->pkey & 0x7fff) ? "Invalid" : "not found"));
675 return -1; 677 return -1;
676 } 678 }
677 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
678 679
679 ret = ipoib_init_qp(dev); 680 ret = ipoib_init_qp(dev);
680 if (ret) { 681 if (ret) {
@@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev)
705dev_stop: 706dev_stop:
706 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) 707 if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
707 napi_enable(&priv->napi); 708 napi_enable(&priv->napi);
708 ipoib_ib_dev_stop(dev, 1); 709 ipoib_ib_dev_stop(dev, flush);
709 return -1; 710 return -1;
710} 711}
711 712
712static void ipoib_pkey_dev_check_presence(struct net_device *dev) 713void ipoib_pkey_dev_check_presence(struct net_device *dev)
713{ 714{
714 struct ipoib_dev_priv *priv = netdev_priv(dev); 715 struct ipoib_dev_priv *priv = netdev_priv(dev);
715 u16 pkey_index = 0;
716 716
717 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) 717 if (!(priv->pkey & 0x7fff) ||
718 ib_find_pkey(priv->ca, priv->port, priv->pkey,
719 &priv->pkey_index))
718 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 720 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
719 else 721 else
720 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 722 set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
@@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
745 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); 747 clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
746 netif_carrier_off(dev); 748 netif_carrier_off(dev);
747 749
748 /* Shutdown the P_Key thread if still active */
749 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
750 mutex_lock(&pkey_mutex);
751 set_bit(IPOIB_PKEY_STOP, &priv->flags);
752 cancel_delayed_work_sync(&priv->pkey_poll_task);
753 mutex_unlock(&pkey_mutex);
754 }
755
756 ipoib_mcast_stop_thread(dev, flush); 750 ipoib_mcast_stop_thread(dev, flush);
757 ipoib_mcast_dev_flush(dev); 751 ipoib_mcast_dev_flush(dev);
758 752
@@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
924 (unsigned long) dev); 918 (unsigned long) dev);
925 919
926 if (dev->flags & IFF_UP) { 920 if (dev->flags & IFF_UP) {
927 if (ipoib_ib_dev_open(dev)) { 921 if (ipoib_ib_dev_open(dev, 1)) {
928 ipoib_transport_dev_cleanup(dev); 922 ipoib_transport_dev_cleanup(dev);
929 return -ENODEV; 923 return -ENODEV;
930 } 924 }
@@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv)
966 960
967 return 1; 961 return 1;
968} 962}
963/*
964 * returns 0 if pkey value was found in a different slot.
965 */
966static inline int update_child_pkey(struct ipoib_dev_priv *priv)
967{
968 u16 old_index = priv->pkey_index;
969
970 priv->pkey_index = 0;
971 ipoib_pkey_dev_check_presence(priv->dev);
972
973 if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
974 (old_index == priv->pkey_index))
975 return 1;
976 return 0;
977}
969 978
970static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, 979static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
971 enum ipoib_flush_level level) 980 enum ipoib_flush_level level)
972{ 981{
973 struct ipoib_dev_priv *cpriv; 982 struct ipoib_dev_priv *cpriv;
974 struct net_device *dev = priv->dev; 983 struct net_device *dev = priv->dev;
975 u16 new_index;
976 int result; 984 int result;
977 985
978 down_read(&priv->vlan_rwsem); 986 down_read(&priv->vlan_rwsem);
@@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
986 994
987 up_read(&priv->vlan_rwsem); 995 up_read(&priv->vlan_rwsem);
988 996
989 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { 997 if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) &&
990 /* for non-child devices must check/update the pkey value here */ 998 level != IPOIB_FLUSH_HEAVY) {
991 if (level == IPOIB_FLUSH_HEAVY &&
992 !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
993 update_parent_pkey(priv);
994 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); 999 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n");
995 return; 1000 return;
996 } 1001 }
997 1002
998 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { 1003 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) {
1004 /* interface is down. update pkey and leave. */
1005 if (level == IPOIB_FLUSH_HEAVY) {
1006 if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags))
1007 update_parent_pkey(priv);
1008 else
1009 update_child_pkey(priv);
1010 }
999 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); 1011 ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n");
1000 return; 1012 return;
1001 } 1013 }
@@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1005 * (parent) devices should always takes what present in pkey index 0 1017 * (parent) devices should always takes what present in pkey index 0
1006 */ 1018 */
1007 if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { 1019 if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
1008 if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { 1020 result = update_child_pkey(priv);
1009 clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); 1021 if (result) {
1010 ipoib_ib_dev_down(dev, 0); 1022 /* restart QP only if P_Key index is changed */
1011 ipoib_ib_dev_stop(dev, 0);
1012 if (ipoib_pkey_dev_delay_open(dev))
1013 return;
1014 }
1015 /* restart QP only if P_Key index is changed */
1016 if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) &&
1017 new_index == priv->pkey_index) {
1018 ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); 1023 ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n");
1019 return; 1024 return;
1020 } 1025 }
1021 priv->pkey_index = new_index; 1026
1022 } else { 1027 } else {
1023 result = update_parent_pkey(priv); 1028 result = update_parent_pkey(priv);
1024 /* restart QP only if P_Key value changed */ 1029 /* restart QP only if P_Key value changed */
@@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
1038 ipoib_ib_dev_down(dev, 0); 1043 ipoib_ib_dev_down(dev, 0);
1039 1044
1040 if (level == IPOIB_FLUSH_HEAVY) { 1045 if (level == IPOIB_FLUSH_HEAVY) {
1041 ipoib_ib_dev_stop(dev, 0); 1046 if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
1042 ipoib_ib_dev_open(dev); 1047 ipoib_ib_dev_stop(dev, 0);
1048 if (ipoib_ib_dev_open(dev, 0) != 0)
1049 return;
1050 if (netif_queue_stopped(dev))
1051 netif_start_queue(dev);
1043 } 1052 }
1044 1053
1045 /* 1054 /*
@@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
1094 ipoib_transport_dev_cleanup(dev); 1103 ipoib_transport_dev_cleanup(dev);
1095} 1104}
1096 1105
1097/*
1098 * Delayed P_Key Assigment Interim Support
1099 *
1100 * The following is initial implementation of delayed P_Key assigment
1101 * mechanism. It is using the same approach implemented for the multicast
1102 * group join. The single goal of this implementation is to quickly address
1103 * Bug #2507. This implementation will probably be removed when the P_Key
1104 * change async notification is available.
1105 */
1106
1107void ipoib_pkey_poll(struct work_struct *work)
1108{
1109 struct ipoib_dev_priv *priv =
1110 container_of(work, struct ipoib_dev_priv, pkey_poll_task.work);
1111 struct net_device *dev = priv->dev;
1112
1113 ipoib_pkey_dev_check_presence(dev);
1114
1115 if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
1116 ipoib_open(dev);
1117 else {
1118 mutex_lock(&pkey_mutex);
1119 if (!test_bit(IPOIB_PKEY_STOP, &priv->flags))
1120 queue_delayed_work(ipoib_workqueue,
1121 &priv->pkey_poll_task,
1122 HZ);
1123 mutex_unlock(&pkey_mutex);
1124 }
1125}
1126
1127int ipoib_pkey_dev_delay_open(struct net_device *dev)
1128{
1129 struct ipoib_dev_priv *priv = netdev_priv(dev);
1130
1131 /* Look for the interface pkey value in the IB Port P_Key table and */
1132 /* set the interface pkey assigment flag */
1133 ipoib_pkey_dev_check_presence(dev);
1134 1106
1135 /* P_Key value not assigned yet - start polling */
1136 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) {
1137 mutex_lock(&pkey_mutex);
1138 clear_bit(IPOIB_PKEY_STOP, &priv->flags);
1139 queue_delayed_work(ipoib_workqueue,
1140 &priv->pkey_poll_task,
1141 HZ);
1142 mutex_unlock(&pkey_mutex);
1143 return 1;
1144 }
1145
1146 return 0;
1147}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 4e675f4fecc9..1310acf6bf92 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev)
108 108
109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); 109 set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
110 110
111 if (ipoib_pkey_dev_delay_open(dev)) 111 if (ipoib_ib_dev_open(dev, 1)) {
112 return 0; 112 if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
113 113 return 0;
114 if (ipoib_ib_dev_open(dev))
115 goto err_disable; 114 goto err_disable;
115 }
116 116
117 if (ipoib_ib_dev_up(dev)) 117 if (ipoib_ib_dev_up(dev))
118 goto err_stop; 118 goto err_stop;
@@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev)
1379 INIT_LIST_HEAD(&priv->dead_ahs); 1379 INIT_LIST_HEAD(&priv->dead_ahs);
1380 INIT_LIST_HEAD(&priv->multicast_list); 1380 INIT_LIST_HEAD(&priv->multicast_list);
1381 1381
1382 INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll);
1383 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); 1382 INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task);
1384 INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); 1383 INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
1385 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); 1384 INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light);
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index eb7973957a6e..61ee91d88380 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -596,20 +596,28 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr,
596 struct iser_conn *ib_conn; 596 struct iser_conn *ib_conn;
597 struct iscsi_endpoint *ep; 597 struct iscsi_endpoint *ep;
598 598
599 ep = iscsi_create_endpoint(sizeof(*ib_conn)); 599 ep = iscsi_create_endpoint(0);
600 if (!ep) 600 if (!ep)
601 return ERR_PTR(-ENOMEM); 601 return ERR_PTR(-ENOMEM);
602 602
603 ib_conn = ep->dd_data; 603 ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL);
604 if (!ib_conn) {
605 err = -ENOMEM;
606 goto failure;
607 }
608
609 ep->dd_data = ib_conn;
604 ib_conn->ep = ep; 610 ib_conn->ep = ep;
605 iser_conn_init(ib_conn); 611 iser_conn_init(ib_conn);
606 612
607 err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, 613 err = iser_connect(ib_conn, NULL, dst_addr, non_blocking);
608 non_blocking);
609 if (err) 614 if (err)
610 return ERR_PTR(err); 615 goto failure;
611 616
612 return ep; 617 return ep;
618failure:
619 iscsi_destroy_endpoint(ep);
620 return ERR_PTR(err);
613} 621}
614 622
615static int 623static int
@@ -619,15 +627,16 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms)
619 int rc; 627 int rc;
620 628
621 ib_conn = ep->dd_data; 629 ib_conn = ep->dd_data;
622 rc = wait_event_interruptible_timeout(ib_conn->wait, 630 rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion,
623 ib_conn->state == ISER_CONN_UP, 631 msecs_to_jiffies(timeout_ms));
624 msecs_to_jiffies(timeout_ms));
625
626 /* if conn establishment failed, return error code to iscsi */ 632 /* if conn establishment failed, return error code to iscsi */
627 if (!rc && 633 if (rc == 0) {
628 (ib_conn->state == ISER_CONN_TERMINATING || 634 mutex_lock(&ib_conn->state_mutex);
629 ib_conn->state == ISER_CONN_DOWN)) 635 if (ib_conn->state == ISER_CONN_TERMINATING ||
630 rc = -1; 636 ib_conn->state == ISER_CONN_DOWN)
637 rc = -1;
638 mutex_unlock(&ib_conn->state_mutex);
639 }
631 640
632 iser_info("ib conn %p rc = %d\n", ib_conn, rc); 641 iser_info("ib conn %p rc = %d\n", ib_conn, rc);
633 642
@@ -646,19 +655,25 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep)
646 655
647 ib_conn = ep->dd_data; 656 ib_conn = ep->dd_data;
648 iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); 657 iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state);
658 mutex_lock(&ib_conn->state_mutex);
649 iser_conn_terminate(ib_conn); 659 iser_conn_terminate(ib_conn);
650 660
651 /* 661 /*
652 * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop 662 * if iser_conn and iscsi_conn are bound, we must wait for
653 * call and ISER_CONN_DOWN state before freeing the iser resources. 663 * iscsi_conn_stop and flush errors completion before freeing
654 * otherwise we are safe to free resources immediately. 664 * the iser resources. Otherwise we are safe to free resources
665 * immediately.
655 */ 666 */
656 if (ib_conn->iscsi_conn) { 667 if (ib_conn->iscsi_conn) {
657 INIT_WORK(&ib_conn->release_work, iser_release_work); 668 INIT_WORK(&ib_conn->release_work, iser_release_work);
658 queue_work(release_wq, &ib_conn->release_work); 669 queue_work(release_wq, &ib_conn->release_work);
670 mutex_unlock(&ib_conn->state_mutex);
659 } else { 671 } else {
672 ib_conn->state = ISER_CONN_DOWN;
673 mutex_unlock(&ib_conn->state_mutex);
660 iser_conn_release(ib_conn); 674 iser_conn_release(ib_conn);
661 } 675 }
676 iscsi_destroy_endpoint(ep);
662} 677}
663 678
664static umode_t iser_attr_is_visible(int param_type, int param) 679static umode_t iser_attr_is_visible(int param_type, int param)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 97cd385bf7f7..c877dad381cb 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -326,7 +326,6 @@ struct iser_conn {
326 struct iser_device *device; /* device context */ 326 struct iser_device *device; /* device context */
327 struct rdma_cm_id *cma_id; /* CMA ID */ 327 struct rdma_cm_id *cma_id; /* CMA ID */
328 struct ib_qp *qp; /* QP */ 328 struct ib_qp *qp; /* QP */
329 wait_queue_head_t wait; /* waitq for conn/disconn */
330 unsigned qp_max_recv_dtos; /* num of rx buffers */ 329 unsigned qp_max_recv_dtos; /* num of rx buffers */
331 unsigned qp_max_recv_dtos_mask; /* above minus 1 */ 330 unsigned qp_max_recv_dtos_mask; /* above minus 1 */
332 unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ 331 unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
@@ -335,6 +334,9 @@ struct iser_conn {
335 char name[ISER_OBJECT_NAME_SIZE]; 334 char name[ISER_OBJECT_NAME_SIZE];
336 struct work_struct release_work; 335 struct work_struct release_work;
337 struct completion stop_completion; 336 struct completion stop_completion;
337 struct mutex state_mutex;
338 struct completion flush_completion;
339 struct completion up_completion;
338 struct list_head conn_list; /* entry in ig conn list */ 340 struct list_head conn_list; /* entry in ig conn list */
339 341
340 char *login_buf; 342 char *login_buf;
@@ -448,8 +450,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task,
448 enum iser_data_dir cmd_dir); 450 enum iser_data_dir cmd_dir);
449 451
450int iser_connect(struct iser_conn *ib_conn, 452int iser_connect(struct iser_conn *ib_conn,
451 struct sockaddr_in *src_addr, 453 struct sockaddr *src_addr,
452 struct sockaddr_in *dst_addr, 454 struct sockaddr *dst_addr,
453 int non_blocking); 455 int non_blocking);
454 456
455int iser_reg_page_vec(struct iser_conn *ib_conn, 457int iser_reg_page_vec(struct iser_conn *ib_conn,
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index ea01075f9f9b..3ef167f97d6f 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -491,10 +491,9 @@ out_err:
491} 491}
492 492
493/** 493/**
494 * releases the QP objects, returns 0 on success, 494 * releases the QP object
495 * -1 on failure
496 */ 495 */
497static int iser_free_ib_conn_res(struct iser_conn *ib_conn) 496static void iser_free_ib_conn_res(struct iser_conn *ib_conn)
498{ 497{
499 int cq_index; 498 int cq_index;
500 BUG_ON(ib_conn == NULL); 499 BUG_ON(ib_conn == NULL);
@@ -513,8 +512,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
513 } 512 }
514 513
515 ib_conn->qp = NULL; 514 ib_conn->qp = NULL;
516
517 return 0;
518} 515}
519 516
520/** 517/**
@@ -568,31 +565,40 @@ static void iser_device_try_release(struct iser_device *device)
568 mutex_unlock(&ig.device_list_mutex); 565 mutex_unlock(&ig.device_list_mutex);
569} 566}
570 567
568/**
569 * Called with state mutex held
570 **/
571static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, 571static int iser_conn_state_comp_exch(struct iser_conn *ib_conn,
572 enum iser_ib_conn_state comp, 572 enum iser_ib_conn_state comp,
573 enum iser_ib_conn_state exch) 573 enum iser_ib_conn_state exch)
574{ 574{
575 int ret; 575 int ret;
576 576
577 spin_lock_bh(&ib_conn->lock);
578 if ((ret = (ib_conn->state == comp))) 577 if ((ret = (ib_conn->state == comp)))
579 ib_conn->state = exch; 578 ib_conn->state = exch;
580 spin_unlock_bh(&ib_conn->lock);
581 return ret; 579 return ret;
582} 580}
583 581
584void iser_release_work(struct work_struct *work) 582void iser_release_work(struct work_struct *work)
585{ 583{
586 struct iser_conn *ib_conn; 584 struct iser_conn *ib_conn;
585 int rc;
587 586
588 ib_conn = container_of(work, struct iser_conn, release_work); 587 ib_conn = container_of(work, struct iser_conn, release_work);
589 588
590 /* wait for .conn_stop callback */ 589 /* wait for .conn_stop callback */
591 wait_for_completion(&ib_conn->stop_completion); 590 rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ);
591 WARN_ON(rc == 0);
592 592
593 /* wait for the qp`s post send and post receive buffers to empty */ 593 /* wait for the qp`s post send and post receive buffers to empty */
594 wait_event_interruptible(ib_conn->wait, 594 rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ);
595 ib_conn->state == ISER_CONN_DOWN); 595 WARN_ON(rc == 0);
596
597 ib_conn->state = ISER_CONN_DOWN;
598
599 mutex_lock(&ib_conn->state_mutex);
600 ib_conn->state = ISER_CONN_DOWN;
601 mutex_unlock(&ib_conn->state_mutex);
596 602
597 iser_conn_release(ib_conn); 603 iser_conn_release(ib_conn);
598} 604}
@@ -604,23 +610,27 @@ void iser_conn_release(struct iser_conn *ib_conn)
604{ 610{
605 struct iser_device *device = ib_conn->device; 611 struct iser_device *device = ib_conn->device;
606 612
607 BUG_ON(ib_conn->state == ISER_CONN_UP);
608
609 mutex_lock(&ig.connlist_mutex); 613 mutex_lock(&ig.connlist_mutex);
610 list_del(&ib_conn->conn_list); 614 list_del(&ib_conn->conn_list);
611 mutex_unlock(&ig.connlist_mutex); 615 mutex_unlock(&ig.connlist_mutex);
616
617 mutex_lock(&ib_conn->state_mutex);
618 BUG_ON(ib_conn->state != ISER_CONN_DOWN);
619
612 iser_free_rx_descriptors(ib_conn); 620 iser_free_rx_descriptors(ib_conn);
613 iser_free_ib_conn_res(ib_conn); 621 iser_free_ib_conn_res(ib_conn);
614 ib_conn->device = NULL; 622 ib_conn->device = NULL;
615 /* on EVENT_ADDR_ERROR there's no device yet for this conn */ 623 /* on EVENT_ADDR_ERROR there's no device yet for this conn */
616 if (device != NULL) 624 if (device != NULL)
617 iser_device_try_release(device); 625 iser_device_try_release(device);
626 mutex_unlock(&ib_conn->state_mutex);
627
618 /* if cma handler context, the caller actually destroy the id */ 628 /* if cma handler context, the caller actually destroy the id */
619 if (ib_conn->cma_id != NULL) { 629 if (ib_conn->cma_id != NULL) {
620 rdma_destroy_id(ib_conn->cma_id); 630 rdma_destroy_id(ib_conn->cma_id);
621 ib_conn->cma_id = NULL; 631 ib_conn->cma_id = NULL;
622 } 632 }
623 iscsi_destroy_endpoint(ib_conn->ep); 633 kfree(ib_conn);
624} 634}
625 635
626/** 636/**
@@ -642,22 +652,31 @@ void iser_conn_terminate(struct iser_conn *ib_conn)
642 ib_conn,err); 652 ib_conn,err);
643} 653}
644 654
655/**
656 * Called with state mutex held
657 **/
645static void iser_connect_error(struct rdma_cm_id *cma_id) 658static void iser_connect_error(struct rdma_cm_id *cma_id)
646{ 659{
647 struct iser_conn *ib_conn; 660 struct iser_conn *ib_conn;
648 661
649 ib_conn = (struct iser_conn *)cma_id->context; 662 ib_conn = (struct iser_conn *)cma_id->context;
650
651 ib_conn->state = ISER_CONN_DOWN; 663 ib_conn->state = ISER_CONN_DOWN;
652 wake_up_interruptible(&ib_conn->wait);
653} 664}
654 665
666/**
667 * Called with state mutex held
668 **/
655static void iser_addr_handler(struct rdma_cm_id *cma_id) 669static void iser_addr_handler(struct rdma_cm_id *cma_id)
656{ 670{
657 struct iser_device *device; 671 struct iser_device *device;
658 struct iser_conn *ib_conn; 672 struct iser_conn *ib_conn;
659 int ret; 673 int ret;
660 674
675 ib_conn = (struct iser_conn *)cma_id->context;
676 if (ib_conn->state != ISER_CONN_PENDING)
677 /* bailout */
678 return;
679
661 device = iser_device_find_by_ib_device(cma_id); 680 device = iser_device_find_by_ib_device(cma_id);
662 if (!device) { 681 if (!device) {
663 iser_err("device lookup/creation failed\n"); 682 iser_err("device lookup/creation failed\n");
@@ -665,7 +684,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
665 return; 684 return;
666 } 685 }
667 686
668 ib_conn = (struct iser_conn *)cma_id->context;
669 ib_conn->device = device; 687 ib_conn->device = device;
670 688
671 /* connection T10-PI support */ 689 /* connection T10-PI support */
@@ -689,18 +707,27 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id)
689 } 707 }
690} 708}
691 709
710/**
711 * Called with state mutex held
712 **/
692static void iser_route_handler(struct rdma_cm_id *cma_id) 713static void iser_route_handler(struct rdma_cm_id *cma_id)
693{ 714{
694 struct rdma_conn_param conn_param; 715 struct rdma_conn_param conn_param;
695 int ret; 716 int ret;
696 struct iser_cm_hdr req_hdr; 717 struct iser_cm_hdr req_hdr;
718 struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context;
719 struct iser_device *device = ib_conn->device;
720
721 if (ib_conn->state != ISER_CONN_PENDING)
722 /* bailout */
723 return;
697 724
698 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); 725 ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context);
699 if (ret) 726 if (ret)
700 goto failure; 727 goto failure;
701 728
702 memset(&conn_param, 0, sizeof conn_param); 729 memset(&conn_param, 0, sizeof conn_param);
703 conn_param.responder_resources = 4; 730 conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
704 conn_param.initiator_depth = 1; 731 conn_param.initiator_depth = 1;
705 conn_param.retry_count = 7; 732 conn_param.retry_count = 7;
706 conn_param.rnr_retry_count = 6; 733 conn_param.rnr_retry_count = 6;
@@ -728,12 +755,16 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id)
728 struct ib_qp_attr attr; 755 struct ib_qp_attr attr;
729 struct ib_qp_init_attr init_attr; 756 struct ib_qp_init_attr init_attr;
730 757
758 ib_conn = (struct iser_conn *)cma_id->context;
759 if (ib_conn->state != ISER_CONN_PENDING)
760 /* bailout */
761 return;
762
731 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); 763 (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
732 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); 764 iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
733 765
734 ib_conn = (struct iser_conn *)cma_id->context; 766 ib_conn->state = ISER_CONN_UP;
735 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) 767 complete(&ib_conn->up_completion);
736 wake_up_interruptible(&ib_conn->wait);
737} 768}
738 769
739static void iser_disconnected_handler(struct rdma_cm_id *cma_id) 770static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
@@ -752,19 +783,25 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
752 iser_err("iscsi_iser connection isn't bound\n"); 783 iser_err("iscsi_iser connection isn't bound\n");
753 } 784 }
754 785
755 /* Complete the termination process if no posts are pending */ 786 /* Complete the termination process if no posts are pending. This code
787 * block also exists in iser_handle_comp_error(), but it is needed here
788 * for cases of no flushes at all, e.g. discovery over rdma.
789 */
756 if (ib_conn->post_recv_buf_count == 0 && 790 if (ib_conn->post_recv_buf_count == 0 &&
757 (atomic_read(&ib_conn->post_send_buf_count) == 0)) { 791 (atomic_read(&ib_conn->post_send_buf_count) == 0)) {
758 ib_conn->state = ISER_CONN_DOWN; 792 complete(&ib_conn->flush_completion);
759 wake_up_interruptible(&ib_conn->wait);
760 } 793 }
761} 794}
762 795
763static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) 796static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
764{ 797{
798 struct iser_conn *ib_conn;
799
800 ib_conn = (struct iser_conn *)cma_id->context;
765 iser_info("event %d status %d conn %p id %p\n", 801 iser_info("event %d status %d conn %p id %p\n",
766 event->event, event->status, cma_id->context, cma_id); 802 event->event, event->status, cma_id->context, cma_id);
767 803
804 mutex_lock(&ib_conn->state_mutex);
768 switch (event->event) { 805 switch (event->event) {
769 case RDMA_CM_EVENT_ADDR_RESOLVED: 806 case RDMA_CM_EVENT_ADDR_RESOLVED:
770 iser_addr_handler(cma_id); 807 iser_addr_handler(cma_id);
@@ -785,24 +822,28 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve
785 case RDMA_CM_EVENT_DISCONNECTED: 822 case RDMA_CM_EVENT_DISCONNECTED:
786 case RDMA_CM_EVENT_DEVICE_REMOVAL: 823 case RDMA_CM_EVENT_DEVICE_REMOVAL:
787 case RDMA_CM_EVENT_ADDR_CHANGE: 824 case RDMA_CM_EVENT_ADDR_CHANGE:
825 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
788 iser_disconnected_handler(cma_id); 826 iser_disconnected_handler(cma_id);
789 break; 827 break;
790 default: 828 default:
791 iser_err("Unexpected RDMA CM event (%d)\n", event->event); 829 iser_err("Unexpected RDMA CM event (%d)\n", event->event);
792 break; 830 break;
793 } 831 }
832 mutex_unlock(&ib_conn->state_mutex);
794 return 0; 833 return 0;
795} 834}
796 835
797void iser_conn_init(struct iser_conn *ib_conn) 836void iser_conn_init(struct iser_conn *ib_conn)
798{ 837{
799 ib_conn->state = ISER_CONN_INIT; 838 ib_conn->state = ISER_CONN_INIT;
800 init_waitqueue_head(&ib_conn->wait);
801 ib_conn->post_recv_buf_count = 0; 839 ib_conn->post_recv_buf_count = 0;
802 atomic_set(&ib_conn->post_send_buf_count, 0); 840 atomic_set(&ib_conn->post_send_buf_count, 0);
803 init_completion(&ib_conn->stop_completion); 841 init_completion(&ib_conn->stop_completion);
842 init_completion(&ib_conn->flush_completion);
843 init_completion(&ib_conn->up_completion);
804 INIT_LIST_HEAD(&ib_conn->conn_list); 844 INIT_LIST_HEAD(&ib_conn->conn_list);
805 spin_lock_init(&ib_conn->lock); 845 spin_lock_init(&ib_conn->lock);
846 mutex_init(&ib_conn->state_mutex);
806} 847}
807 848
808 /** 849 /**
@@ -810,22 +851,21 @@ void iser_conn_init(struct iser_conn *ib_conn)
810 * sleeps until the connection is established or rejected 851 * sleeps until the connection is established or rejected
811 */ 852 */
812int iser_connect(struct iser_conn *ib_conn, 853int iser_connect(struct iser_conn *ib_conn,
813 struct sockaddr_in *src_addr, 854 struct sockaddr *src_addr,
814 struct sockaddr_in *dst_addr, 855 struct sockaddr *dst_addr,
815 int non_blocking) 856 int non_blocking)
816{ 857{
817 struct sockaddr *src, *dst;
818 int err = 0; 858 int err = 0;
819 859
820 sprintf(ib_conn->name, "%pI4:%d", 860 mutex_lock(&ib_conn->state_mutex);
821 &dst_addr->sin_addr.s_addr, dst_addr->sin_port); 861
862 sprintf(ib_conn->name, "%pISp", dst_addr);
863
864 iser_info("connecting to: %s\n", ib_conn->name);
822 865
823 /* the device is known only --after-- address resolution */ 866 /* the device is known only --after-- address resolution */
824 ib_conn->device = NULL; 867 ib_conn->device = NULL;
825 868
826 iser_info("connecting to: %pI4, port 0x%x\n",
827 &dst_addr->sin_addr, dst_addr->sin_port);
828
829 ib_conn->state = ISER_CONN_PENDING; 869 ib_conn->state = ISER_CONN_PENDING;
830 870
831 ib_conn->cma_id = rdma_create_id(iser_cma_handler, 871 ib_conn->cma_id = rdma_create_id(iser_cma_handler,
@@ -837,23 +877,21 @@ int iser_connect(struct iser_conn *ib_conn,
837 goto id_failure; 877 goto id_failure;
838 } 878 }
839 879
840 src = (struct sockaddr *)src_addr; 880 err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
841 dst = (struct sockaddr *)dst_addr;
842 err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000);
843 if (err) { 881 if (err) {
844 iser_err("rdma_resolve_addr failed: %d\n", err); 882 iser_err("rdma_resolve_addr failed: %d\n", err);
845 goto addr_failure; 883 goto addr_failure;
846 } 884 }
847 885
848 if (!non_blocking) { 886 if (!non_blocking) {
849 wait_event_interruptible(ib_conn->wait, 887 wait_for_completion_interruptible(&ib_conn->up_completion);
850 (ib_conn->state != ISER_CONN_PENDING));
851 888
852 if (ib_conn->state != ISER_CONN_UP) { 889 if (ib_conn->state != ISER_CONN_UP) {
853 err = -EIO; 890 err = -EIO;
854 goto connect_failure; 891 goto connect_failure;
855 } 892 }
856 } 893 }
894 mutex_unlock(&ib_conn->state_mutex);
857 895
858 mutex_lock(&ig.connlist_mutex); 896 mutex_lock(&ig.connlist_mutex);
859 list_add(&ib_conn->conn_list, &ig.connlist); 897 list_add(&ib_conn->conn_list, &ig.connlist);
@@ -865,6 +903,7 @@ id_failure:
865addr_failure: 903addr_failure:
866 ib_conn->state = ISER_CONN_DOWN; 904 ib_conn->state = ISER_CONN_DOWN;
867connect_failure: 905connect_failure:
906 mutex_unlock(&ib_conn->state_mutex);
868 iser_conn_release(ib_conn); 907 iser_conn_release(ib_conn);
869 return err; 908 return err;
870} 909}
@@ -1049,18 +1088,19 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
1049 1088
1050 if (ib_conn->post_recv_buf_count == 0 && 1089 if (ib_conn->post_recv_buf_count == 0 &&
1051 atomic_read(&ib_conn->post_send_buf_count) == 0) { 1090 atomic_read(&ib_conn->post_send_buf_count) == 0) {
1052 /* getting here when the state is UP means that the conn is * 1091 /**
1053 * being terminated asynchronously from the iSCSI layer's * 1092 * getting here when the state is UP means that the conn is
1054 * perspective. */ 1093 * being terminated asynchronously from the iSCSI layer's
1055 if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, 1094 * perspective. It is safe to peek at the connection state
1056 ISER_CONN_TERMINATING)) 1095 * since iscsi_conn_failure is allowed to be called twice.
1096 **/
1097 if (ib_conn->state == ISER_CONN_UP)
1057 iscsi_conn_failure(ib_conn->iscsi_conn, 1098 iscsi_conn_failure(ib_conn->iscsi_conn,
1058 ISCSI_ERR_CONN_FAILED); 1099 ISCSI_ERR_CONN_FAILED);
1059 1100
1060 /* no more non completed posts to the QP, complete the 1101 /* no more non completed posts to the QP, complete the
1061 * termination process w.o worrying on disconnect event */ 1102 * termination process w.o worrying on disconnect event */
1062 ib_conn->state = ISER_CONN_DOWN; 1103 complete(&ib_conn->flush_completion);
1063 wake_up_interruptible(&ib_conn->wait);
1064 } 1104 }
1065} 1105}
1066 1106
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index e3c2c5b4297f..62d2a18e1b41 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr);
130static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); 130static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
131 131
132static struct scsi_transport_template *ib_srp_transport_template; 132static struct scsi_transport_template *ib_srp_transport_template;
133static struct workqueue_struct *srp_remove_wq;
133 134
134static struct ib_client srp_client = { 135static struct ib_client srp_client = {
135 .name = "srp", 136 .name = "srp",
@@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target)
731 spin_unlock_irq(&target->lock); 732 spin_unlock_irq(&target->lock);
732 733
733 if (changed) 734 if (changed)
734 queue_work(system_long_wq, &target->remove_work); 735 queue_work(srp_remove_wq, &target->remove_work);
735 736
736 return changed; 737 return changed;
737} 738}
@@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp)
1643 SCSI_SENSE_BUFFERSIZE)); 1644 SCSI_SENSE_BUFFERSIZE));
1644 } 1645 }
1645 1646
1646 if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) 1647 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1647 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1648 else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER))
1649 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); 1648 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1649 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1650 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1651 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1652 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1653 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1654 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1650 1655
1651 srp_free_req(target, req, scmnd, 1656 srp_free_req(target, req, scmnd,
1652 be32_to_cpu(rsp->req_lim_delta)); 1657 be32_to_cpu(rsp->req_lim_delta));
@@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device)
3261 spin_unlock(&host->target_lock); 3266 spin_unlock(&host->target_lock);
3262 3267
3263 /* 3268 /*
3264 * Wait for target port removal tasks. 3269 * Wait for tl_err and target port removal tasks.
3265 */ 3270 */
3266 flush_workqueue(system_long_wq); 3271 flush_workqueue(system_long_wq);
3272 flush_workqueue(srp_remove_wq);
3267 3273
3268 kfree(host); 3274 kfree(host);
3269 } 3275 }
@@ -3313,16 +3319,22 @@ static int __init srp_init_module(void)
3313 indirect_sg_entries = cmd_sg_entries; 3319 indirect_sg_entries = cmd_sg_entries;
3314 } 3320 }
3315 3321
3322 srp_remove_wq = create_workqueue("srp_remove");
3323 if (!srp_remove_wq) {
3324 ret = -ENOMEM;
3325 goto out;
3326 }
3327
3328 ret = -ENOMEM;
3316 ib_srp_transport_template = 3329 ib_srp_transport_template =
3317 srp_attach_transport(&ib_srp_transport_functions); 3330 srp_attach_transport(&ib_srp_transport_functions);
3318 if (!ib_srp_transport_template) 3331 if (!ib_srp_transport_template)
3319 return -ENOMEM; 3332 goto destroy_wq;
3320 3333
3321 ret = class_register(&srp_class); 3334 ret = class_register(&srp_class);
3322 if (ret) { 3335 if (ret) {
3323 pr_err("couldn't register class infiniband_srp\n"); 3336 pr_err("couldn't register class infiniband_srp\n");
3324 srp_release_transport(ib_srp_transport_template); 3337 goto release_tr;
3325 return ret;
3326 } 3338 }
3327 3339
3328 ib_sa_register_client(&srp_sa_client); 3340 ib_sa_register_client(&srp_sa_client);
@@ -3330,13 +3342,22 @@ static int __init srp_init_module(void)
3330 ret = ib_register_client(&srp_client); 3342 ret = ib_register_client(&srp_client);
3331 if (ret) { 3343 if (ret) {
3332 pr_err("couldn't register IB client\n"); 3344 pr_err("couldn't register IB client\n");
3333 srp_release_transport(ib_srp_transport_template); 3345 goto unreg_sa;
3334 ib_sa_unregister_client(&srp_sa_client);
3335 class_unregister(&srp_class);
3336 return ret;
3337 } 3346 }
3338 3347
3339 return 0; 3348out:
3349 return ret;
3350
3351unreg_sa:
3352 ib_sa_unregister_client(&srp_sa_client);
3353 class_unregister(&srp_class);
3354
3355release_tr:
3356 srp_release_transport(ib_srp_transport_template);
3357
3358destroy_wq:
3359 destroy_workqueue(srp_remove_wq);
3360 goto out;
3340} 3361}
3341 3362
3342static void __exit srp_cleanup_module(void) 3363static void __exit srp_cleanup_module(void)
@@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void)
3345 ib_sa_unregister_client(&srp_sa_client); 3366 ib_sa_unregister_client(&srp_sa_client);
3346 class_unregister(&srp_class); 3367 class_unregister(&srp_class);
3347 srp_release_transport(ib_srp_transport_template); 3368 srp_release_transport(ib_srp_transport_template);
3369 destroy_workqueue(srp_remove_wq);
3348} 3370}
3349 3371
3350module_init(srp_init_module); 3372module_init(srp_init_module);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index fe09f2788b15..d28a8c284da9 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler,
198 case IB_EVENT_PKEY_CHANGE: 198 case IB_EVENT_PKEY_CHANGE:
199 case IB_EVENT_SM_CHANGE: 199 case IB_EVENT_SM_CHANGE:
200 case IB_EVENT_CLIENT_REREGISTER: 200 case IB_EVENT_CLIENT_REREGISTER:
201 case IB_EVENT_GID_CHANGE:
201 /* Refresh port data asynchronously. */ 202 /* Refresh port data asynchronously. */
202 if (event->element.port_num <= sdev->device->phys_port_cnt) { 203 if (event->element.port_num <= sdev->device->phys_port_cnt) {
203 sport = &sdev->port[event->element.port_num - 1]; 204 sport = &sdev->port[event->element.port_num - 1];
@@ -563,7 +564,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
563 &reg_req, 0, 564 &reg_req, 0,
564 srpt_mad_send_handler, 565 srpt_mad_send_handler,
565 srpt_mad_recv_handler, 566 srpt_mad_recv_handler,
566 sport); 567 sport, 0);
567 if (IS_ERR(sport->mad_agent)) { 568 if (IS_ERR(sport->mad_agent)) {
568 ret = PTR_ERR(sport->mad_agent); 569 ret = PTR_ERR(sport->mad_agent);
569 sport->mad_agent = NULL; 570 sport->mad_agent = NULL;
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 443d03fbac47..8eeab72b93e2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -331,7 +331,7 @@ static int bch_allocator_thread(void *arg)
331 mutex_unlock(&ca->set->bucket_lock); 331 mutex_unlock(&ca->set->bucket_lock);
332 blkdev_issue_discard(ca->bdev, 332 blkdev_issue_discard(ca->bdev,
333 bucket_to_sector(ca->set, bucket), 333 bucket_to_sector(ca->set, bucket),
334 ca->sb.block_size, GFP_KERNEL, 0); 334 ca->sb.bucket_size, GFP_KERNEL, 0);
335 mutex_lock(&ca->set->bucket_lock); 335 mutex_lock(&ca->set->bucket_lock);
336 } 336 }
337 337
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index d2ebcf323094..04f7bc28ef83 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -477,9 +477,13 @@ struct gc_stat {
477 * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; 477 * CACHE_SET_STOPPING always gets set first when we're closing down a cache set;
478 * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. 478 * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e.
479 * flushing dirty data). 479 * flushing dirty data).
480 *
481 * CACHE_SET_RUNNING means all cache devices have been registered and journal
482 * replay is complete.
480 */ 483 */
481#define CACHE_SET_UNREGISTERING 0 484#define CACHE_SET_UNREGISTERING 0
482#define CACHE_SET_STOPPING 1 485#define CACHE_SET_STOPPING 1
486#define CACHE_SET_RUNNING 2
483 487
484struct cache_set { 488struct cache_set {
485 struct closure cl; 489 struct closure cl;
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 545416415305..646fe85261c1 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1182,7 +1182,7 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
1182{ 1182{
1183 uint64_t start_time; 1183 uint64_t start_time;
1184 bool used_mempool = false; 1184 bool used_mempool = false;
1185 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, 1185 struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOWAIT,
1186 order); 1186 order);
1187 if (!out) { 1187 if (!out) {
1188 struct page *outp; 1188 struct page *outp;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index 5f6728d5d4dd..ae964624efb2 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -453,7 +453,7 @@ static inline bool bch_bkey_equal_header(const struct bkey *l,
453{ 453{
454 return (KEY_DIRTY(l) == KEY_DIRTY(r) && 454 return (KEY_DIRTY(l) == KEY_DIRTY(r) &&
455 KEY_PTRS(l) == KEY_PTRS(r) && 455 KEY_PTRS(l) == KEY_PTRS(r) &&
456 KEY_CSUM(l) == KEY_CSUM(l)); 456 KEY_CSUM(l) == KEY_CSUM(r));
457} 457}
458 458
459/* Keylists */ 459/* Keylists */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 7347b6100961..00cde40db572 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -117,9 +117,9 @@
117({ \ 117({ \
118 int _r, l = (b)->level - 1; \ 118 int _r, l = (b)->level - 1; \
119 bool _w = l <= (op)->lock; \ 119 bool _w = l <= (op)->lock; \
120 struct btree *_child = bch_btree_node_get((b)->c, op, key, l, _w);\ 120 struct btree *_child = bch_btree_node_get((b)->c, op, key, l, \
121 _w, b); \
121 if (!IS_ERR(_child)) { \ 122 if (!IS_ERR(_child)) { \
122 _child->parent = (b); \
123 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \ 123 _r = bch_btree_ ## fn(_child, op, ##__VA_ARGS__); \
124 rw_unlock(_w, _child); \ 124 rw_unlock(_w, _child); \
125 } else \ 125 } else \
@@ -142,7 +142,6 @@
142 rw_lock(_w, _b, _b->level); \ 142 rw_lock(_w, _b, _b->level); \
143 if (_b == (c)->root && \ 143 if (_b == (c)->root && \
144 _w == insert_lock(op, _b)) { \ 144 _w == insert_lock(op, _b)) { \
145 _b->parent = NULL; \
146 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \ 145 _r = bch_btree_ ## fn(_b, op, ##__VA_ARGS__); \
147 } \ 146 } \
148 rw_unlock(_w, _b); \ 147 rw_unlock(_w, _b); \
@@ -202,7 +201,7 @@ void bch_btree_node_read_done(struct btree *b)
202 struct bset *i = btree_bset_first(b); 201 struct bset *i = btree_bset_first(b);
203 struct btree_iter *iter; 202 struct btree_iter *iter;
204 203
205 iter = mempool_alloc(b->c->fill_iter, GFP_NOWAIT); 204 iter = mempool_alloc(b->c->fill_iter, GFP_NOIO);
206 iter->size = b->c->sb.bucket_size / b->c->sb.block_size; 205 iter->size = b->c->sb.bucket_size / b->c->sb.block_size;
207 iter->used = 0; 206 iter->used = 0;
208 207
@@ -421,7 +420,7 @@ static void do_btree_node_write(struct btree *b)
421 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) + 420 SET_PTR_OFFSET(&k.key, 0, PTR_OFFSET(&k.key, 0) +
422 bset_sector_offset(&b->keys, i)); 421 bset_sector_offset(&b->keys, i));
423 422
424 if (!bio_alloc_pages(b->bio, GFP_NOIO)) { 423 if (!bio_alloc_pages(b->bio, __GFP_NOWARN|GFP_NOWAIT)) {
425 int j; 424 int j;
426 struct bio_vec *bv; 425 struct bio_vec *bv;
427 void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1)); 426 void *base = (void *) ((unsigned long) i & ~(PAGE_SIZE - 1));
@@ -967,7 +966,8 @@ err:
967 * level and op->lock. 966 * level and op->lock.
968 */ 967 */
969struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op, 968struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
970 struct bkey *k, int level, bool write) 969 struct bkey *k, int level, bool write,
970 struct btree *parent)
971{ 971{
972 int i = 0; 972 int i = 0;
973 struct btree *b; 973 struct btree *b;
@@ -1002,6 +1002,7 @@ retry:
1002 BUG_ON(b->level != level); 1002 BUG_ON(b->level != level);
1003 } 1003 }
1004 1004
1005 b->parent = parent;
1005 b->accessed = 1; 1006 b->accessed = 1;
1006 1007
1007 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) { 1008 for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
@@ -1022,15 +1023,16 @@ retry:
1022 return b; 1023 return b;
1023} 1024}
1024 1025
1025static void btree_node_prefetch(struct cache_set *c, struct bkey *k, int level) 1026static void btree_node_prefetch(struct btree *parent, struct bkey *k)
1026{ 1027{
1027 struct btree *b; 1028 struct btree *b;
1028 1029
1029 mutex_lock(&c->bucket_lock); 1030 mutex_lock(&parent->c->bucket_lock);
1030 b = mca_alloc(c, NULL, k, level); 1031 b = mca_alloc(parent->c, NULL, k, parent->level - 1);
1031 mutex_unlock(&c->bucket_lock); 1032 mutex_unlock(&parent->c->bucket_lock);
1032 1033
1033 if (!IS_ERR_OR_NULL(b)) { 1034 if (!IS_ERR_OR_NULL(b)) {
1035 b->parent = parent;
1034 bch_btree_node_read(b); 1036 bch_btree_node_read(b);
1035 rw_unlock(true, b); 1037 rw_unlock(true, b);
1036 } 1038 }
@@ -1060,15 +1062,16 @@ static void btree_node_free(struct btree *b)
1060 mutex_unlock(&b->c->bucket_lock); 1062 mutex_unlock(&b->c->bucket_lock);
1061} 1063}
1062 1064
1063struct btree *bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, 1065struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
1064 int level) 1066 int level, bool wait,
1067 struct btree *parent)
1065{ 1068{
1066 BKEY_PADDED(key) k; 1069 BKEY_PADDED(key) k;
1067 struct btree *b = ERR_PTR(-EAGAIN); 1070 struct btree *b = ERR_PTR(-EAGAIN);
1068 1071
1069 mutex_lock(&c->bucket_lock); 1072 mutex_lock(&c->bucket_lock);
1070retry: 1073retry:
1071 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, op != NULL)) 1074 if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait))
1072 goto err; 1075 goto err;
1073 1076
1074 bkey_put(c, &k.key); 1077 bkey_put(c, &k.key);
@@ -1085,6 +1088,7 @@ retry:
1085 } 1088 }
1086 1089
1087 b->accessed = 1; 1090 b->accessed = 1;
1091 b->parent = parent;
1088 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb)); 1092 bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
1089 1093
1090 mutex_unlock(&c->bucket_lock); 1094 mutex_unlock(&c->bucket_lock);
@@ -1096,14 +1100,21 @@ err_free:
1096err: 1100err:
1097 mutex_unlock(&c->bucket_lock); 1101 mutex_unlock(&c->bucket_lock);
1098 1102
1099 trace_bcache_btree_node_alloc_fail(b); 1103 trace_bcache_btree_node_alloc_fail(c);
1100 return b; 1104 return b;
1101} 1105}
1102 1106
1107static struct btree *bch_btree_node_alloc(struct cache_set *c,
1108 struct btree_op *op, int level,
1109 struct btree *parent)
1110{
1111 return __bch_btree_node_alloc(c, op, level, op != NULL, parent);
1112}
1113
1103static struct btree *btree_node_alloc_replacement(struct btree *b, 1114static struct btree *btree_node_alloc_replacement(struct btree *b,
1104 struct btree_op *op) 1115 struct btree_op *op)
1105{ 1116{
1106 struct btree *n = bch_btree_node_alloc(b->c, op, b->level); 1117 struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
1107 if (!IS_ERR_OR_NULL(n)) { 1118 if (!IS_ERR_OR_NULL(n)) {
1108 mutex_lock(&n->write_lock); 1119 mutex_lock(&n->write_lock);
1109 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); 1120 bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1403,6 +1414,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
1403 BUG_ON(btree_bset_first(new_nodes[0])->keys); 1414 BUG_ON(btree_bset_first(new_nodes[0])->keys);
1404 btree_node_free(new_nodes[0]); 1415 btree_node_free(new_nodes[0]);
1405 rw_unlock(true, new_nodes[0]); 1416 rw_unlock(true, new_nodes[0]);
1417 new_nodes[0] = NULL;
1406 1418
1407 for (i = 0; i < nodes; i++) { 1419 for (i = 0; i < nodes; i++) {
1408 if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key))) 1420 if (__bch_keylist_realloc(&keylist, bkey_u64s(&r[i].b->key)))
@@ -1516,7 +1528,7 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
1516 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); 1528 k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad);
1517 if (k) { 1529 if (k) {
1518 r->b = bch_btree_node_get(b->c, op, k, b->level - 1, 1530 r->b = bch_btree_node_get(b->c, op, k, b->level - 1,
1519 true); 1531 true, b);
1520 if (IS_ERR(r->b)) { 1532 if (IS_ERR(r->b)) {
1521 ret = PTR_ERR(r->b); 1533 ret = PTR_ERR(r->b);
1522 break; 1534 break;
@@ -1811,7 +1823,7 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
1811 k = bch_btree_iter_next_filter(&iter, &b->keys, 1823 k = bch_btree_iter_next_filter(&iter, &b->keys,
1812 bch_ptr_bad); 1824 bch_ptr_bad);
1813 if (k) 1825 if (k)
1814 btree_node_prefetch(b->c, k, b->level - 1); 1826 btree_node_prefetch(b, k);
1815 1827
1816 if (p) 1828 if (p)
1817 ret = btree(check_recurse, p, b, op); 1829 ret = btree(check_recurse, p, b, op);
@@ -1976,12 +1988,12 @@ static int btree_split(struct btree *b, struct btree_op *op,
1976 1988
1977 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); 1989 trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
1978 1990
1979 n2 = bch_btree_node_alloc(b->c, op, b->level); 1991 n2 = bch_btree_node_alloc(b->c, op, b->level, b->parent);
1980 if (IS_ERR(n2)) 1992 if (IS_ERR(n2))
1981 goto err_free1; 1993 goto err_free1;
1982 1994
1983 if (!b->parent) { 1995 if (!b->parent) {
1984 n3 = bch_btree_node_alloc(b->c, op, b->level + 1); 1996 n3 = bch_btree_node_alloc(b->c, op, b->level + 1, NULL);
1985 if (IS_ERR(n3)) 1997 if (IS_ERR(n3))
1986 goto err_free2; 1998 goto err_free2;
1987 } 1999 }
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 91dfa5e69685..5c391fa01bed 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -242,9 +242,10 @@ void __bch_btree_node_write(struct btree *, struct closure *);
242void bch_btree_node_write(struct btree *, struct closure *); 242void bch_btree_node_write(struct btree *, struct closure *);
243 243
244void bch_btree_set_root(struct btree *); 244void bch_btree_set_root(struct btree *);
245struct btree *bch_btree_node_alloc(struct cache_set *, struct btree_op *, int); 245struct btree *__bch_btree_node_alloc(struct cache_set *, struct btree_op *,
246 int, bool, struct btree *);
246struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *, 247struct btree *bch_btree_node_get(struct cache_set *, struct btree_op *,
247 struct bkey *, int, bool); 248 struct bkey *, int, bool, struct btree *);
248 249
249int bch_btree_insert_check_key(struct btree *, struct btree_op *, 250int bch_btree_insert_check_key(struct btree *, struct btree_op *,
250 struct bkey *); 251 struct bkey *);
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 3a0de4cf9771..243de0bf15cd 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -474,9 +474,8 @@ out:
474 return false; 474 return false;
475} 475}
476 476
477static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k) 477bool __bch_extent_invalid(struct cache_set *c, const struct bkey *k)
478{ 478{
479 struct btree *b = container_of(bk, struct btree, keys);
480 char buf[80]; 479 char buf[80];
481 480
482 if (!KEY_SIZE(k)) 481 if (!KEY_SIZE(k))
@@ -485,16 +484,22 @@ static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
485 if (KEY_SIZE(k) > KEY_OFFSET(k)) 484 if (KEY_SIZE(k) > KEY_OFFSET(k))
486 goto bad; 485 goto bad;
487 486
488 if (__ptr_invalid(b->c, k)) 487 if (__ptr_invalid(c, k))
489 goto bad; 488 goto bad;
490 489
491 return false; 490 return false;
492bad: 491bad:
493 bch_extent_to_text(buf, sizeof(buf), k); 492 bch_extent_to_text(buf, sizeof(buf), k);
494 cache_bug(b->c, "spotted extent %s: %s", buf, bch_ptr_status(b->c, k)); 493 cache_bug(c, "spotted extent %s: %s", buf, bch_ptr_status(c, k));
495 return true; 494 return true;
496} 495}
497 496
497static bool bch_extent_invalid(struct btree_keys *bk, const struct bkey *k)
498{
499 struct btree *b = container_of(bk, struct btree, keys);
500 return __bch_extent_invalid(b->c, k);
501}
502
498static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k, 503static bool bch_extent_bad_expensive(struct btree *b, const struct bkey *k,
499 unsigned ptr) 504 unsigned ptr)
500{ 505{
diff --git a/drivers/md/bcache/extents.h b/drivers/md/bcache/extents.h
index e4e23409782d..e2ed54054e7a 100644
--- a/drivers/md/bcache/extents.h
+++ b/drivers/md/bcache/extents.h
@@ -9,5 +9,6 @@ struct cache_set;
9 9
10void bch_extent_to_text(char *, size_t, const struct bkey *); 10void bch_extent_to_text(char *, size_t, const struct bkey *);
11bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *); 11bool __bch_btree_ptr_invalid(struct cache_set *, const struct bkey *);
12bool __bch_extent_invalid(struct cache_set *, const struct bkey *);
12 13
13#endif /* _BCACHE_EXTENTS_H */ 14#endif /* _BCACHE_EXTENTS_H */
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 59e82021b5bb..fe080ad0e558 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -7,6 +7,7 @@
7#include "bcache.h" 7#include "bcache.h"
8#include "btree.h" 8#include "btree.h"
9#include "debug.h" 9#include "debug.h"
10#include "extents.h"
10 11
11#include <trace/events/bcache.h> 12#include <trace/events/bcache.h>
12 13
@@ -189,11 +190,15 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
189 if (read_bucket(l)) 190 if (read_bucket(l))
190 goto bsearch; 191 goto bsearch;
191 192
192 if (list_empty(list)) 193 /* no journal entries on this device? */
194 if (l == ca->sb.njournal_buckets)
193 continue; 195 continue;
194bsearch: 196bsearch:
197 BUG_ON(list_empty(list));
198
195 /* Binary search */ 199 /* Binary search */
196 m = r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); 200 m = l;
201 r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
197 pr_debug("starting binary search, l %u r %u", l, r); 202 pr_debug("starting binary search, l %u r %u", l, r);
198 203
199 while (l + 1 < r) { 204 while (l + 1 < r) {
@@ -291,15 +296,16 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list)
291 296
292 for (k = i->j.start; 297 for (k = i->j.start;
293 k < bset_bkey_last(&i->j); 298 k < bset_bkey_last(&i->j);
294 k = bkey_next(k)) { 299 k = bkey_next(k))
295 unsigned j; 300 if (!__bch_extent_invalid(c, k)) {
301 unsigned j;
296 302
297 for (j = 0; j < KEY_PTRS(k); j++) 303 for (j = 0; j < KEY_PTRS(k); j++)
298 if (ptr_available(c, k, j)) 304 if (ptr_available(c, k, j))
299 atomic_inc(&PTR_BUCKET(c, k, j)->pin); 305 atomic_inc(&PTR_BUCKET(c, k, j)->pin);
300 306
301 bch_initial_mark_key(c, 0, k); 307 bch_initial_mark_key(c, 0, k);
302 } 308 }
303 } 309 }
304} 310}
305 311
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 15fff4f68a7c..62e6e98186b5 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -311,7 +311,8 @@ void bch_data_insert(struct closure *cl)
311{ 311{
312 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 312 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
313 313
314 trace_bcache_write(op->bio, op->writeback, op->bypass); 314 trace_bcache_write(op->c, op->inode, op->bio,
315 op->writeback, op->bypass);
315 316
316 bch_keylist_init(&op->insert_keys); 317 bch_keylist_init(&op->insert_keys);
317 bio_get(op->bio); 318 bio_get(op->bio);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 926ded8ccbf5..d4713d098a39 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -733,8 +733,6 @@ static void bcache_device_detach(struct bcache_device *d)
733static void bcache_device_attach(struct bcache_device *d, struct cache_set *c, 733static void bcache_device_attach(struct bcache_device *d, struct cache_set *c,
734 unsigned id) 734 unsigned id)
735{ 735{
736 BUG_ON(test_bit(CACHE_SET_STOPPING, &c->flags));
737
738 d->id = id; 736 d->id = id;
739 d->c = c; 737 d->c = c;
740 c->devices[id] = d; 738 c->devices[id] = d;
@@ -927,6 +925,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
927 list_move(&dc->list, &uncached_devices); 925 list_move(&dc->list, &uncached_devices);
928 926
929 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags); 927 clear_bit(BCACHE_DEV_DETACHING, &dc->disk.flags);
928 clear_bit(BCACHE_DEV_UNLINK_DONE, &dc->disk.flags);
930 929
931 mutex_unlock(&bch_register_lock); 930 mutex_unlock(&bch_register_lock);
932 931
@@ -1041,6 +1040,9 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c)
1041 */ 1040 */
1042 atomic_set(&dc->count, 1); 1041 atomic_set(&dc->count, 1);
1043 1042
1043 if (bch_cached_dev_writeback_start(dc))
1044 return -ENOMEM;
1045
1044 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { 1046 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
1045 bch_sectors_dirty_init(dc); 1047 bch_sectors_dirty_init(dc);
1046 atomic_set(&dc->has_dirty, 1); 1048 atomic_set(&dc->has_dirty, 1);
@@ -1070,7 +1072,8 @@ static void cached_dev_free(struct closure *cl)
1070 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl); 1072 struct cached_dev *dc = container_of(cl, struct cached_dev, disk.cl);
1071 1073
1072 cancel_delayed_work_sync(&dc->writeback_rate_update); 1074 cancel_delayed_work_sync(&dc->writeback_rate_update);
1073 kthread_stop(dc->writeback_thread); 1075 if (!IS_ERR_OR_NULL(dc->writeback_thread))
1076 kthread_stop(dc->writeback_thread);
1074 1077
1075 mutex_lock(&bch_register_lock); 1078 mutex_lock(&bch_register_lock);
1076 1079
@@ -1081,12 +1084,8 @@ static void cached_dev_free(struct closure *cl)
1081 1084
1082 mutex_unlock(&bch_register_lock); 1085 mutex_unlock(&bch_register_lock);
1083 1086
1084 if (!IS_ERR_OR_NULL(dc->bdev)) { 1087 if (!IS_ERR_OR_NULL(dc->bdev))
1085 if (dc->bdev->bd_disk)
1086 blk_sync_queue(bdev_get_queue(dc->bdev));
1087
1088 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 1088 blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1089 }
1090 1089
1091 wake_up(&unregister_wait); 1090 wake_up(&unregister_wait);
1092 1091
@@ -1213,7 +1212,9 @@ void bch_flash_dev_release(struct kobject *kobj)
1213static void flash_dev_free(struct closure *cl) 1212static void flash_dev_free(struct closure *cl)
1214{ 1213{
1215 struct bcache_device *d = container_of(cl, struct bcache_device, cl); 1214 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1215 mutex_lock(&bch_register_lock);
1216 bcache_device_free(d); 1216 bcache_device_free(d);
1217 mutex_unlock(&bch_register_lock);
1217 kobject_put(&d->kobj); 1218 kobject_put(&d->kobj);
1218} 1219}
1219 1220
@@ -1221,7 +1222,9 @@ static void flash_dev_flush(struct closure *cl)
1221{ 1222{
1222 struct bcache_device *d = container_of(cl, struct bcache_device, cl); 1223 struct bcache_device *d = container_of(cl, struct bcache_device, cl);
1223 1224
1225 mutex_lock(&bch_register_lock);
1224 bcache_device_unlink(d); 1226 bcache_device_unlink(d);
1227 mutex_unlock(&bch_register_lock);
1225 kobject_del(&d->kobj); 1228 kobject_del(&d->kobj);
1226 continue_at(cl, flash_dev_free, system_wq); 1229 continue_at(cl, flash_dev_free, system_wq);
1227} 1230}
@@ -1277,6 +1280,9 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
1277 if (test_bit(CACHE_SET_STOPPING, &c->flags)) 1280 if (test_bit(CACHE_SET_STOPPING, &c->flags))
1278 return -EINTR; 1281 return -EINTR;
1279 1282
1283 if (!test_bit(CACHE_SET_RUNNING, &c->flags))
1284 return -EPERM;
1285
1280 u = uuid_find_empty(c); 1286 u = uuid_find_empty(c);
1281 if (!u) { 1287 if (!u) {
1282 pr_err("Can't create volume, no room for UUID"); 1288 pr_err("Can't create volume, no room for UUID");
@@ -1346,8 +1352,11 @@ static void cache_set_free(struct closure *cl)
1346 bch_journal_free(c); 1352 bch_journal_free(c);
1347 1353
1348 for_each_cache(ca, c, i) 1354 for_each_cache(ca, c, i)
1349 if (ca) 1355 if (ca) {
1356 ca->set = NULL;
1357 c->cache[ca->sb.nr_this_dev] = NULL;
1350 kobject_put(&ca->kobj); 1358 kobject_put(&ca->kobj);
1359 }
1351 1360
1352 bch_bset_sort_state_free(&c->sort); 1361 bch_bset_sort_state_free(&c->sort);
1353 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c))); 1362 free_pages((unsigned long) c->uuids, ilog2(bucket_pages(c)));
@@ -1405,9 +1414,11 @@ static void cache_set_flush(struct closure *cl)
1405 if (ca->alloc_thread) 1414 if (ca->alloc_thread)
1406 kthread_stop(ca->alloc_thread); 1415 kthread_stop(ca->alloc_thread);
1407 1416
1408 cancel_delayed_work_sync(&c->journal.work); 1417 if (c->journal.cur) {
1409 /* flush last journal entry if needed */ 1418 cancel_delayed_work_sync(&c->journal.work);
1410 c->journal.work.work.func(&c->journal.work.work); 1419 /* flush last journal entry if needed */
1420 c->journal.work.work.func(&c->journal.work.work);
1421 }
1411 1422
1412 closure_return(cl); 1423 closure_return(cl);
1413} 1424}
@@ -1586,7 +1597,7 @@ static void run_cache_set(struct cache_set *c)
1586 goto err; 1597 goto err;
1587 1598
1588 err = "error reading btree root"; 1599 err = "error reading btree root";
1589 c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true); 1600 c->root = bch_btree_node_get(c, NULL, k, j->btree_level, true, NULL);
1590 if (IS_ERR_OR_NULL(c->root)) 1601 if (IS_ERR_OR_NULL(c->root))
1591 goto err; 1602 goto err;
1592 1603
@@ -1661,7 +1672,7 @@ static void run_cache_set(struct cache_set *c)
1661 goto err; 1672 goto err;
1662 1673
1663 err = "cannot allocate new btree root"; 1674 err = "cannot allocate new btree root";
1664 c->root = bch_btree_node_alloc(c, NULL, 0); 1675 c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL);
1665 if (IS_ERR_OR_NULL(c->root)) 1676 if (IS_ERR_OR_NULL(c->root))
1666 goto err; 1677 goto err;
1667 1678
@@ -1697,6 +1708,7 @@ static void run_cache_set(struct cache_set *c)
1697 1708
1698 flash_devs_run(c); 1709 flash_devs_run(c);
1699 1710
1711 set_bit(CACHE_SET_RUNNING, &c->flags);
1700 return; 1712 return;
1701err: 1713err:
1702 closure_sync(&cl); 1714 closure_sync(&cl);
@@ -1760,6 +1772,7 @@ found:
1760 pr_debug("set version = %llu", c->sb.version); 1772 pr_debug("set version = %llu", c->sb.version);
1761 } 1773 }
1762 1774
1775 kobject_get(&ca->kobj);
1763 ca->set = c; 1776 ca->set = c;
1764 ca->set->cache[ca->sb.nr_this_dev] = ca; 1777 ca->set->cache[ca->sb.nr_this_dev] = ca;
1765 c->cache_by_alloc[c->caches_loaded++] = ca; 1778 c->cache_by_alloc[c->caches_loaded++] = ca;
@@ -1780,8 +1793,10 @@ void bch_cache_release(struct kobject *kobj)
1780 struct cache *ca = container_of(kobj, struct cache, kobj); 1793 struct cache *ca = container_of(kobj, struct cache, kobj);
1781 unsigned i; 1794 unsigned i;
1782 1795
1783 if (ca->set) 1796 if (ca->set) {
1797 BUG_ON(ca->set->cache[ca->sb.nr_this_dev] != ca);
1784 ca->set->cache[ca->sb.nr_this_dev] = NULL; 1798 ca->set->cache[ca->sb.nr_this_dev] = NULL;
1799 }
1785 1800
1786 bio_split_pool_free(&ca->bio_split_hook); 1801 bio_split_pool_free(&ca->bio_split_hook);
1787 1802
@@ -1798,10 +1813,8 @@ void bch_cache_release(struct kobject *kobj)
1798 if (ca->sb_bio.bi_inline_vecs[0].bv_page) 1813 if (ca->sb_bio.bi_inline_vecs[0].bv_page)
1799 put_page(ca->sb_bio.bi_io_vec[0].bv_page); 1814 put_page(ca->sb_bio.bi_io_vec[0].bv_page);
1800 1815
1801 if (!IS_ERR_OR_NULL(ca->bdev)) { 1816 if (!IS_ERR_OR_NULL(ca->bdev))
1802 blk_sync_queue(bdev_get_queue(ca->bdev));
1803 blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); 1817 blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1804 }
1805 1818
1806 kfree(ca); 1819 kfree(ca);
1807 module_put(THIS_MODULE); 1820 module_put(THIS_MODULE);
@@ -1844,7 +1857,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
1844} 1857}
1845 1858
1846static void register_cache(struct cache_sb *sb, struct page *sb_page, 1859static void register_cache(struct cache_sb *sb, struct page *sb_page,
1847 struct block_device *bdev, struct cache *ca) 1860 struct block_device *bdev, struct cache *ca)
1848{ 1861{
1849 char name[BDEVNAME_SIZE]; 1862 char name[BDEVNAME_SIZE];
1850 const char *err = "cannot allocate memory"; 1863 const char *err = "cannot allocate memory";
@@ -1877,10 +1890,12 @@ static void register_cache(struct cache_sb *sb, struct page *sb_page,
1877 goto err; 1890 goto err;
1878 1891
1879 pr_info("registered cache device %s", bdevname(bdev, name)); 1892 pr_info("registered cache device %s", bdevname(bdev, name));
1893out:
1894 kobject_put(&ca->kobj);
1880 return; 1895 return;
1881err: 1896err:
1882 pr_notice("error opening %s: %s", bdevname(bdev, name), err); 1897 pr_notice("error opening %s: %s", bdevname(bdev, name), err);
1883 kobject_put(&ca->kobj); 1898 goto out;
1884} 1899}
1885 1900
1886/* Global interfaces/init */ 1901/* Global interfaces/init */
@@ -1945,10 +1960,12 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
1945 if (IS_ERR(bdev)) { 1960 if (IS_ERR(bdev)) {
1946 if (bdev == ERR_PTR(-EBUSY)) { 1961 if (bdev == ERR_PTR(-EBUSY)) {
1947 bdev = lookup_bdev(strim(path)); 1962 bdev = lookup_bdev(strim(path));
1963 mutex_lock(&bch_register_lock);
1948 if (!IS_ERR(bdev) && bch_is_open(bdev)) 1964 if (!IS_ERR(bdev) && bch_is_open(bdev))
1949 err = "device already registered"; 1965 err = "device already registered";
1950 else 1966 else
1951 err = "device busy"; 1967 err = "device busy";
1968 mutex_unlock(&bch_register_lock);
1952 } 1969 }
1953 goto err; 1970 goto err;
1954 } 1971 }
diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h
index ac7d0d1f70d7..98df7572b5f7 100644
--- a/drivers/md/bcache/util.h
+++ b/drivers/md/bcache/util.h
@@ -416,8 +416,8 @@ do { \
416 average_frequency, frequency_units); \ 416 average_frequency, frequency_units); \
417 __print_time_stat(stats, name, \ 417 __print_time_stat(stats, name, \
418 average_duration, duration_units); \ 418 average_duration, duration_units); \
419 __print_time_stat(stats, name, \ 419 sysfs_print(name ## _ ##max_duration ## _ ## duration_units, \
420 max_duration, duration_units); \ 420 div_u64((stats)->max_duration, NSEC_PER_ ## duration_units));\
421 \ 421 \
422 sysfs_print(name ## _last_ ## frequency_units, (stats)->last \ 422 sysfs_print(name ## _last_ ## frequency_units, (stats)->last \
423 ? div_s64(local_clock() - (stats)->last, \ 423 ? div_s64(local_clock() - (stats)->last, \
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index f4300e4c0114..f1986bcd1bf0 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -239,7 +239,7 @@ static void read_dirty(struct cached_dev *dc)
239 if (KEY_START(&w->key) != dc->last_read || 239 if (KEY_START(&w->key) != dc->last_read ||
240 jiffies_to_msecs(delay) > 50) 240 jiffies_to_msecs(delay) > 50)
241 while (!kthread_should_stop() && delay) 241 while (!kthread_should_stop() && delay)
242 delay = schedule_timeout_uninterruptible(delay); 242 delay = schedule_timeout_interruptible(delay);
243 243
244 dc->last_read = KEY_OFFSET(&w->key); 244 dc->last_read = KEY_OFFSET(&w->key);
245 245
@@ -436,7 +436,7 @@ static int bch_writeback_thread(void *arg)
436 while (delay && 436 while (delay &&
437 !kthread_should_stop() && 437 !kthread_should_stop() &&
438 !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags)) 438 !test_bit(BCACHE_DEV_DETACHING, &dc->disk.flags))
439 delay = schedule_timeout_uninterruptible(delay); 439 delay = schedule_timeout_interruptible(delay);
440 } 440 }
441 } 441 }
442 442
@@ -478,7 +478,7 @@ void bch_sectors_dirty_init(struct cached_dev *dc)
478 dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk); 478 dc->disk.sectors_dirty_last = bcache_dev_sectors_dirty(&dc->disk);
479} 479}
480 480
481int bch_cached_dev_writeback_init(struct cached_dev *dc) 481void bch_cached_dev_writeback_init(struct cached_dev *dc)
482{ 482{
483 sema_init(&dc->in_flight, 64); 483 sema_init(&dc->in_flight, 64);
484 init_rwsem(&dc->writeback_lock); 484 init_rwsem(&dc->writeback_lock);
@@ -494,14 +494,20 @@ int bch_cached_dev_writeback_init(struct cached_dev *dc)
494 dc->writeback_rate_d_term = 30; 494 dc->writeback_rate_d_term = 30;
495 dc->writeback_rate_p_term_inverse = 6000; 495 dc->writeback_rate_p_term_inverse = 6000;
496 496
497 INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
498}
499
500int bch_cached_dev_writeback_start(struct cached_dev *dc)
501{
497 dc->writeback_thread = kthread_create(bch_writeback_thread, dc, 502 dc->writeback_thread = kthread_create(bch_writeback_thread, dc,
498 "bcache_writeback"); 503 "bcache_writeback");
499 if (IS_ERR(dc->writeback_thread)) 504 if (IS_ERR(dc->writeback_thread))
500 return PTR_ERR(dc->writeback_thread); 505 return PTR_ERR(dc->writeback_thread);
501 506
502 INIT_DELAYED_WORK(&dc->writeback_rate_update, update_writeback_rate);
503 schedule_delayed_work(&dc->writeback_rate_update, 507 schedule_delayed_work(&dc->writeback_rate_update,
504 dc->writeback_rate_update_seconds * HZ); 508 dc->writeback_rate_update_seconds * HZ);
505 509
510 bch_writeback_queue(dc);
511
506 return 0; 512 return 0;
507} 513}
diff --git a/drivers/md/bcache/writeback.h b/drivers/md/bcache/writeback.h
index e2f8598937ac..0a9dab187b79 100644
--- a/drivers/md/bcache/writeback.h
+++ b/drivers/md/bcache/writeback.h
@@ -85,6 +85,7 @@ static inline void bch_writeback_add(struct cached_dev *dc)
85void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int); 85void bcache_dev_sectors_dirty_add(struct cache_set *, unsigned, uint64_t, int);
86 86
87void bch_sectors_dirty_init(struct cached_dev *dc); 87void bch_sectors_dirty_init(struct cached_dev *dc);
88int bch_cached_dev_writeback_init(struct cached_dev *); 88void bch_cached_dev_writeback_init(struct cached_dev *);
89int bch_cached_dev_writeback_start(struct cached_dev *);
89 90
90#endif 91#endif
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index d2899e7eb3aa..06709257adde 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -330,7 +330,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
330 disk_super->discard_root = cpu_to_le64(cmd->discard_root); 330 disk_super->discard_root = cpu_to_le64(cmd->discard_root);
331 disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size); 331 disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
332 disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks)); 332 disk_super->discard_nr_blocks = cpu_to_le64(from_oblock(cmd->discard_nr_blocks));
333 disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); 333 disk_super->metadata_block_size = cpu_to_le32(DM_CACHE_METADATA_BLOCK_SIZE);
334 disk_super->data_block_size = cpu_to_le32(cmd->data_block_size); 334 disk_super->data_block_size = cpu_to_le32(cmd->data_block_size);
335 disk_super->cache_blocks = cpu_to_le32(0); 335 disk_super->cache_blocks = cpu_to_le32(0);
336 336
@@ -478,7 +478,7 @@ static int __create_persistent_data_objects(struct dm_cache_metadata *cmd,
478 bool may_format_device) 478 bool may_format_device)
479{ 479{
480 int r; 480 int r;
481 cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE, 481 cmd->bm = dm_block_manager_create(cmd->bdev, DM_CACHE_METADATA_BLOCK_SIZE << SECTOR_SHIFT,
482 CACHE_METADATA_CACHE_SIZE, 482 CACHE_METADATA_CACHE_SIZE,
483 CACHE_MAX_CONCURRENT_LOCKS); 483 CACHE_MAX_CONCURRENT_LOCKS);
484 if (IS_ERR(cmd->bm)) { 484 if (IS_ERR(cmd->bm)) {
diff --git a/drivers/md/dm-cache-metadata.h b/drivers/md/dm-cache-metadata.h
index cd70a78623a3..7383c90ccdb8 100644
--- a/drivers/md/dm-cache-metadata.h
+++ b/drivers/md/dm-cache-metadata.h
@@ -9,19 +9,17 @@
9 9
10#include "dm-cache-block-types.h" 10#include "dm-cache-block-types.h"
11#include "dm-cache-policy-internal.h" 11#include "dm-cache-policy-internal.h"
12#include "persistent-data/dm-space-map-metadata.h"
12 13
13/*----------------------------------------------------------------*/ 14/*----------------------------------------------------------------*/
14 15
15#define DM_CACHE_METADATA_BLOCK_SIZE 4096 16#define DM_CACHE_METADATA_BLOCK_SIZE DM_SM_METADATA_BLOCK_SIZE
16 17
17/* FIXME: remove this restriction */ 18/* FIXME: remove this restriction */
18/* 19/*
19 * The metadata device is currently limited in size. 20 * The metadata device is currently limited in size.
20 *
21 * We have one block of index, which can hold 255 index entries. Each
22 * index entry contains allocation info about 16k metadata blocks.
23 */ 21 */
24#define DM_CACHE_METADATA_MAX_SECTORS (255 * (1 << 14) * (DM_CACHE_METADATA_BLOCK_SIZE / (1 << SECTOR_SHIFT))) 22#define DM_CACHE_METADATA_MAX_SECTORS DM_SM_METADATA_MAX_SECTORS
25 23
26/* 24/*
27 * A metadata device larger than 16GB triggers a warning. 25 * A metadata device larger than 16GB triggers a warning.
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index 2c63326638b6..1af40ee209e2 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -718,6 +718,22 @@ static int bio_triggers_commit(struct cache *cache, struct bio *bio)
718 return bio->bi_rw & (REQ_FLUSH | REQ_FUA); 718 return bio->bi_rw & (REQ_FLUSH | REQ_FUA);
719} 719}
720 720
721/*
722 * You must increment the deferred set whilst the prison cell is held. To
723 * encourage this, we ask for 'cell' to be passed in.
724 */
725static void inc_ds(struct cache *cache, struct bio *bio,
726 struct dm_bio_prison_cell *cell)
727{
728 size_t pb_data_size = get_per_bio_data_size(cache);
729 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
730
731 BUG_ON(!cell);
732 BUG_ON(pb->all_io_entry);
733
734 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
735}
736
721static void issue(struct cache *cache, struct bio *bio) 737static void issue(struct cache *cache, struct bio *bio)
722{ 738{
723 unsigned long flags; 739 unsigned long flags;
@@ -737,6 +753,12 @@ static void issue(struct cache *cache, struct bio *bio)
737 spin_unlock_irqrestore(&cache->lock, flags); 753 spin_unlock_irqrestore(&cache->lock, flags);
738} 754}
739 755
756static void inc_and_issue(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell *cell)
757{
758 inc_ds(cache, bio, cell);
759 issue(cache, bio);
760}
761
740static void defer_writethrough_bio(struct cache *cache, struct bio *bio) 762static void defer_writethrough_bio(struct cache *cache, struct bio *bio)
741{ 763{
742 unsigned long flags; 764 unsigned long flags;
@@ -1015,6 +1037,11 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio)
1015 1037
1016 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg); 1038 dm_hook_bio(&pb->hook_info, bio, overwrite_endio, mg);
1017 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock); 1039 remap_to_cache_dirty(mg->cache, bio, mg->new_oblock, mg->cblock);
1040
1041 /*
1042 * No need to inc_ds() here, since the cell will be held for the
1043 * duration of the io.
1044 */
1018 generic_make_request(bio); 1045 generic_make_request(bio);
1019} 1046}
1020 1047
@@ -1115,8 +1142,7 @@ static void check_for_quiesced_migrations(struct cache *cache,
1115 return; 1142 return;
1116 1143
1117 INIT_LIST_HEAD(&work); 1144 INIT_LIST_HEAD(&work);
1118 if (pb->all_io_entry) 1145 dm_deferred_entry_dec(pb->all_io_entry, &work);
1119 dm_deferred_entry_dec(pb->all_io_entry, &work);
1120 1146
1121 if (!list_empty(&work)) 1147 if (!list_empty(&work))
1122 queue_quiesced_migrations(cache, &work); 1148 queue_quiesced_migrations(cache, &work);
@@ -1252,6 +1278,11 @@ static void process_flush_bio(struct cache *cache, struct bio *bio)
1252 else 1278 else
1253 remap_to_cache(cache, bio, 0); 1279 remap_to_cache(cache, bio, 0);
1254 1280
1281 /*
1282 * REQ_FLUSH is not directed at any particular block so we don't
1283 * need to inc_ds(). REQ_FUA's are split into a write + REQ_FLUSH
1284 * by dm-core.
1285 */
1255 issue(cache, bio); 1286 issue(cache, bio);
1256} 1287}
1257 1288
@@ -1301,15 +1332,6 @@ static void inc_miss_counter(struct cache *cache, struct bio *bio)
1301 &cache->stats.read_miss : &cache->stats.write_miss); 1332 &cache->stats.read_miss : &cache->stats.write_miss);
1302} 1333}
1303 1334
1304static void issue_cache_bio(struct cache *cache, struct bio *bio,
1305 struct per_bio_data *pb,
1306 dm_oblock_t oblock, dm_cblock_t cblock)
1307{
1308 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1309 remap_to_cache_dirty(cache, bio, oblock, cblock);
1310 issue(cache, bio);
1311}
1312
1313static void process_bio(struct cache *cache, struct prealloc *structs, 1335static void process_bio(struct cache *cache, struct prealloc *structs,
1314 struct bio *bio) 1336 struct bio *bio)
1315{ 1337{
@@ -1318,8 +1340,6 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
1318 dm_oblock_t block = get_bio_block(cache, bio); 1340 dm_oblock_t block = get_bio_block(cache, bio);
1319 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell; 1341 struct dm_bio_prison_cell *cell_prealloc, *old_ocell, *new_ocell;
1320 struct policy_result lookup_result; 1342 struct policy_result lookup_result;
1321 size_t pb_data_size = get_per_bio_data_size(cache);
1322 struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size);
1323 bool discarded_block = is_discarded_oblock(cache, block); 1343 bool discarded_block = is_discarded_oblock(cache, block);
1324 bool passthrough = passthrough_mode(&cache->features); 1344 bool passthrough = passthrough_mode(&cache->features);
1325 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache)); 1345 bool can_migrate = !passthrough && (discarded_block || spare_migration_bandwidth(cache));
@@ -1359,9 +1379,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
1359 1379
1360 } else { 1380 } else {
1361 /* FIXME: factor out issue_origin() */ 1381 /* FIXME: factor out issue_origin() */
1362 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1363 remap_to_origin_clear_discard(cache, bio, block); 1382 remap_to_origin_clear_discard(cache, bio, block);
1364 issue(cache, bio); 1383 inc_and_issue(cache, bio, new_ocell);
1365 } 1384 }
1366 } else { 1385 } else {
1367 inc_hit_counter(cache, bio); 1386 inc_hit_counter(cache, bio);
@@ -1369,20 +1388,21 @@ static void process_bio(struct cache *cache, struct prealloc *structs,
1369 if (bio_data_dir(bio) == WRITE && 1388 if (bio_data_dir(bio) == WRITE &&
1370 writethrough_mode(&cache->features) && 1389 writethrough_mode(&cache->features) &&
1371 !is_dirty(cache, lookup_result.cblock)) { 1390 !is_dirty(cache, lookup_result.cblock)) {
1372 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1373 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 1391 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
1374 issue(cache, bio); 1392 inc_and_issue(cache, bio, new_ocell);
1375 } else 1393
1376 issue_cache_bio(cache, bio, pb, block, lookup_result.cblock); 1394 } else {
1395 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
1396 inc_and_issue(cache, bio, new_ocell);
1397 }
1377 } 1398 }
1378 1399
1379 break; 1400 break;
1380 1401
1381 case POLICY_MISS: 1402 case POLICY_MISS:
1382 inc_miss_counter(cache, bio); 1403 inc_miss_counter(cache, bio);
1383 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
1384 remap_to_origin_clear_discard(cache, bio, block); 1404 remap_to_origin_clear_discard(cache, bio, block);
1385 issue(cache, bio); 1405 inc_and_issue(cache, bio, new_ocell);
1386 break; 1406 break;
1387 1407
1388 case POLICY_NEW: 1408 case POLICY_NEW:
@@ -1501,6 +1521,9 @@ static void process_deferred_flush_bios(struct cache *cache, bool submit_bios)
1501 bio_list_init(&cache->deferred_flush_bios); 1521 bio_list_init(&cache->deferred_flush_bios);
1502 spin_unlock_irqrestore(&cache->lock, flags); 1522 spin_unlock_irqrestore(&cache->lock, flags);
1503 1523
1524 /*
1525 * These bios have already been through inc_ds()
1526 */
1504 while ((bio = bio_list_pop(&bios))) 1527 while ((bio = bio_list_pop(&bios)))
1505 submit_bios ? generic_make_request(bio) : bio_io_error(bio); 1528 submit_bios ? generic_make_request(bio) : bio_io_error(bio);
1506} 1529}
@@ -1518,6 +1541,9 @@ static void process_deferred_writethrough_bios(struct cache *cache)
1518 bio_list_init(&cache->deferred_writethrough_bios); 1541 bio_list_init(&cache->deferred_writethrough_bios);
1519 spin_unlock_irqrestore(&cache->lock, flags); 1542 spin_unlock_irqrestore(&cache->lock, flags);
1520 1543
1544 /*
1545 * These bios have already been through inc_ds()
1546 */
1521 while ((bio = bio_list_pop(&bios))) 1547 while ((bio = bio_list_pop(&bios)))
1522 generic_make_request(bio); 1548 generic_make_request(bio);
1523} 1549}
@@ -1694,6 +1720,7 @@ static void do_worker(struct work_struct *ws)
1694 1720
1695 if (commit_if_needed(cache)) { 1721 if (commit_if_needed(cache)) {
1696 process_deferred_flush_bios(cache, false); 1722 process_deferred_flush_bios(cache, false);
1723 process_migrations(cache, &cache->need_commit_migrations, migration_failure);
1697 1724
1698 /* 1725 /*
1699 * FIXME: rollback metadata or just go into a 1726 * FIXME: rollback metadata or just go into a
@@ -2406,16 +2433,13 @@ out:
2406 return r; 2433 return r;
2407} 2434}
2408 2435
2409static int cache_map(struct dm_target *ti, struct bio *bio) 2436static int __cache_map(struct cache *cache, struct bio *bio, struct dm_bio_prison_cell **cell)
2410{ 2437{
2411 struct cache *cache = ti->private;
2412
2413 int r; 2438 int r;
2414 dm_oblock_t block = get_bio_block(cache, bio); 2439 dm_oblock_t block = get_bio_block(cache, bio);
2415 size_t pb_data_size = get_per_bio_data_size(cache); 2440 size_t pb_data_size = get_per_bio_data_size(cache);
2416 bool can_migrate = false; 2441 bool can_migrate = false;
2417 bool discarded_block; 2442 bool discarded_block;
2418 struct dm_bio_prison_cell *cell;
2419 struct policy_result lookup_result; 2443 struct policy_result lookup_result;
2420 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size); 2444 struct per_bio_data *pb = init_per_bio_data(bio, pb_data_size);
2421 2445
@@ -2437,15 +2461,15 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2437 /* 2461 /*
2438 * Check to see if that block is currently migrating. 2462 * Check to see if that block is currently migrating.
2439 */ 2463 */
2440 cell = alloc_prison_cell(cache); 2464 *cell = alloc_prison_cell(cache);
2441 if (!cell) { 2465 if (!*cell) {
2442 defer_bio(cache, bio); 2466 defer_bio(cache, bio);
2443 return DM_MAPIO_SUBMITTED; 2467 return DM_MAPIO_SUBMITTED;
2444 } 2468 }
2445 2469
2446 r = bio_detain(cache, block, bio, cell, 2470 r = bio_detain(cache, block, bio, *cell,
2447 (cell_free_fn) free_prison_cell, 2471 (cell_free_fn) free_prison_cell,
2448 cache, &cell); 2472 cache, cell);
2449 if (r) { 2473 if (r) {
2450 if (r < 0) 2474 if (r < 0)
2451 defer_bio(cache, bio); 2475 defer_bio(cache, bio);
@@ -2458,11 +2482,12 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2458 r = policy_map(cache->policy, block, false, can_migrate, discarded_block, 2482 r = policy_map(cache->policy, block, false, can_migrate, discarded_block,
2459 bio, &lookup_result); 2483 bio, &lookup_result);
2460 if (r == -EWOULDBLOCK) { 2484 if (r == -EWOULDBLOCK) {
2461 cell_defer(cache, cell, true); 2485 cell_defer(cache, *cell, true);
2462 return DM_MAPIO_SUBMITTED; 2486 return DM_MAPIO_SUBMITTED;
2463 2487
2464 } else if (r) { 2488 } else if (r) {
2465 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r); 2489 DMERR_LIMIT("Unexpected return from cache replacement policy: %d", r);
2490 cell_defer(cache, *cell, false);
2466 bio_io_error(bio); 2491 bio_io_error(bio);
2467 return DM_MAPIO_SUBMITTED; 2492 return DM_MAPIO_SUBMITTED;
2468 } 2493 }
@@ -2476,52 +2501,44 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2476 * We need to invalidate this block, so 2501 * We need to invalidate this block, so
2477 * defer for the worker thread. 2502 * defer for the worker thread.
2478 */ 2503 */
2479 cell_defer(cache, cell, true); 2504 cell_defer(cache, *cell, true);
2480 r = DM_MAPIO_SUBMITTED; 2505 r = DM_MAPIO_SUBMITTED;
2481 2506
2482 } else { 2507 } else {
2483 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
2484 inc_miss_counter(cache, bio); 2508 inc_miss_counter(cache, bio);
2485 remap_to_origin_clear_discard(cache, bio, block); 2509 remap_to_origin_clear_discard(cache, bio, block);
2486
2487 cell_defer(cache, cell, false);
2488 } 2510 }
2489 2511
2490 } else { 2512 } else {
2491 inc_hit_counter(cache, bio); 2513 inc_hit_counter(cache, bio);
2492 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
2493
2494 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) && 2514 if (bio_data_dir(bio) == WRITE && writethrough_mode(&cache->features) &&
2495 !is_dirty(cache, lookup_result.cblock)) 2515 !is_dirty(cache, lookup_result.cblock))
2496 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock); 2516 remap_to_origin_then_cache(cache, bio, block, lookup_result.cblock);
2497 else 2517 else
2498 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock); 2518 remap_to_cache_dirty(cache, bio, block, lookup_result.cblock);
2499
2500 cell_defer(cache, cell, false);
2501 } 2519 }
2502 break; 2520 break;
2503 2521
2504 case POLICY_MISS: 2522 case POLICY_MISS:
2505 inc_miss_counter(cache, bio); 2523 inc_miss_counter(cache, bio);
2506 pb->all_io_entry = dm_deferred_entry_inc(cache->all_io_ds);
2507
2508 if (pb->req_nr != 0) { 2524 if (pb->req_nr != 0) {
2509 /* 2525 /*
2510 * This is a duplicate writethrough io that is no 2526 * This is a duplicate writethrough io that is no
2511 * longer needed because the block has been demoted. 2527 * longer needed because the block has been demoted.
2512 */ 2528 */
2513 bio_endio(bio, 0); 2529 bio_endio(bio, 0);
2514 cell_defer(cache, cell, false); 2530 cell_defer(cache, *cell, false);
2515 return DM_MAPIO_SUBMITTED; 2531 r = DM_MAPIO_SUBMITTED;
2516 } else { 2532
2533 } else
2517 remap_to_origin_clear_discard(cache, bio, block); 2534 remap_to_origin_clear_discard(cache, bio, block);
2518 cell_defer(cache, cell, false); 2535
2519 }
2520 break; 2536 break;
2521 2537
2522 default: 2538 default:
2523 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__, 2539 DMERR_LIMIT("%s: erroring bio: unknown policy op: %u", __func__,
2524 (unsigned) lookup_result.op); 2540 (unsigned) lookup_result.op);
2541 cell_defer(cache, *cell, false);
2525 bio_io_error(bio); 2542 bio_io_error(bio);
2526 r = DM_MAPIO_SUBMITTED; 2543 r = DM_MAPIO_SUBMITTED;
2527 } 2544 }
@@ -2529,6 +2546,21 @@ static int cache_map(struct dm_target *ti, struct bio *bio)
2529 return r; 2546 return r;
2530} 2547}
2531 2548
2549static int cache_map(struct dm_target *ti, struct bio *bio)
2550{
2551 int r;
2552 struct dm_bio_prison_cell *cell;
2553 struct cache *cache = ti->private;
2554
2555 r = __cache_map(cache, bio, &cell);
2556 if (r == DM_MAPIO_REMAPPED) {
2557 inc_ds(cache, bio, cell);
2558 cell_defer(cache, cell, false);
2559 }
2560
2561 return r;
2562}
2563
2532static int cache_end_io(struct dm_target *ti, struct bio *bio, int error) 2564static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
2533{ 2565{
2534 struct cache *cache = ti->private; 2566 struct cache *cache = ti->private;
@@ -2808,7 +2840,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
2808 residency = policy_residency(cache->policy); 2840 residency = policy_residency(cache->policy);
2809 2841
2810 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", 2842 DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
2811 (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), 2843 (unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
2812 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), 2844 (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
2813 (unsigned long long)nr_blocks_metadata, 2845 (unsigned long long)nr_blocks_metadata,
2814 cache->sectors_per_block, 2846 cache->sectors_per_block,
@@ -3062,7 +3094,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3062 */ 3094 */
3063 if (io_opt_sectors < cache->sectors_per_block || 3095 if (io_opt_sectors < cache->sectors_per_block ||
3064 do_div(io_opt_sectors, cache->sectors_per_block)) { 3096 do_div(io_opt_sectors, cache->sectors_per_block)) {
3065 blk_limits_io_min(limits, 0); 3097 blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
3066 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT); 3098 blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
3067 } 3099 }
3068 set_discard_limits(cache, limits); 3100 set_discard_limits(cache, limits);
@@ -3072,7 +3104,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
3072 3104
3073static struct target_type cache_target = { 3105static struct target_type cache_target = {
3074 .name = "cache", 3106 .name = "cache",
3075 .version = {1, 4, 0}, 3107 .version = {1, 5, 0},
3076 .module = THIS_MODULE, 3108 .module = THIS_MODULE,
3077 .ctr = cache_ctr, 3109 .ctr = cache_ctr,
3078 .dtr = cache_dtr, 3110 .dtr = cache_dtr,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 4cba2d808afb..2785007e0e46 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -59,7 +59,7 @@ struct dm_crypt_io {
59 int error; 59 int error;
60 sector_t sector; 60 sector_t sector;
61 struct dm_crypt_io *base_io; 61 struct dm_crypt_io *base_io;
62}; 62} CRYPTO_MINALIGN_ATTR;
63 63
64struct dm_crypt_request { 64struct dm_crypt_request {
65 struct convert_context *ctx; 65 struct convert_context *ctx;
@@ -162,6 +162,8 @@ struct crypt_config {
162 */ 162 */
163 unsigned int dmreq_start; 163 unsigned int dmreq_start;
164 164
165 unsigned int per_bio_data_size;
166
165 unsigned long flags; 167 unsigned long flags;
166 unsigned int key_size; 168 unsigned int key_size;
167 unsigned int key_parts; /* independent parts in key buffer */ 169 unsigned int key_parts; /* independent parts in key buffer */
@@ -895,6 +897,15 @@ static void crypt_alloc_req(struct crypt_config *cc,
895 kcryptd_async_done, dmreq_of_req(cc, ctx->req)); 897 kcryptd_async_done, dmreq_of_req(cc, ctx->req));
896} 898}
897 899
900static void crypt_free_req(struct crypt_config *cc,
901 struct ablkcipher_request *req, struct bio *base_bio)
902{
903 struct dm_crypt_io *io = dm_per_bio_data(base_bio, cc->per_bio_data_size);
904
905 if ((struct ablkcipher_request *)(io + 1) != req)
906 mempool_free(req, cc->req_pool);
907}
908
898/* 909/*
899 * Encrypt / decrypt data from one bio to another one (can be the same one) 910 * Encrypt / decrypt data from one bio to another one (can be the same one)
900 */ 911 */
@@ -1008,12 +1019,9 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone)
1008 } 1019 }
1009} 1020}
1010 1021
1011static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc, 1022static void crypt_io_init(struct dm_crypt_io *io, struct crypt_config *cc,
1012 struct bio *bio, sector_t sector) 1023 struct bio *bio, sector_t sector)
1013{ 1024{
1014 struct dm_crypt_io *io;
1015
1016 io = mempool_alloc(cc->io_pool, GFP_NOIO);
1017 io->cc = cc; 1025 io->cc = cc;
1018 io->base_bio = bio; 1026 io->base_bio = bio;
1019 io->sector = sector; 1027 io->sector = sector;
@@ -1021,8 +1029,6 @@ static struct dm_crypt_io *crypt_io_alloc(struct crypt_config *cc,
1021 io->base_io = NULL; 1029 io->base_io = NULL;
1022 io->ctx.req = NULL; 1030 io->ctx.req = NULL;
1023 atomic_set(&io->io_pending, 0); 1031 atomic_set(&io->io_pending, 0);
1024
1025 return io;
1026} 1032}
1027 1033
1028static void crypt_inc_pending(struct dm_crypt_io *io) 1034static void crypt_inc_pending(struct dm_crypt_io *io)
@@ -1046,8 +1052,9 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
1046 return; 1052 return;
1047 1053
1048 if (io->ctx.req) 1054 if (io->ctx.req)
1049 mempool_free(io->ctx.req, cc->req_pool); 1055 crypt_free_req(cc, io->ctx.req, base_bio);
1050 mempool_free(io, cc->io_pool); 1056 if (io != dm_per_bio_data(base_bio, cc->per_bio_data_size))
1057 mempool_free(io, cc->io_pool);
1051 1058
1052 if (likely(!base_io)) 1059 if (likely(!base_io))
1053 bio_endio(base_bio, error); 1060 bio_endio(base_bio, error);
@@ -1255,8 +1262,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
1255 * between fragments, so switch to a new dm_crypt_io structure. 1262 * between fragments, so switch to a new dm_crypt_io structure.
1256 */ 1263 */
1257 if (unlikely(!crypt_finished && remaining)) { 1264 if (unlikely(!crypt_finished && remaining)) {
1258 new_io = crypt_io_alloc(io->cc, io->base_bio, 1265 new_io = mempool_alloc(cc->io_pool, GFP_NOIO);
1259 sector); 1266 crypt_io_init(new_io, io->cc, io->base_bio, sector);
1260 crypt_inc_pending(new_io); 1267 crypt_inc_pending(new_io);
1261 crypt_convert_init(cc, &new_io->ctx, NULL, 1268 crypt_convert_init(cc, &new_io->ctx, NULL,
1262 io->base_bio, sector); 1269 io->base_bio, sector);
@@ -1325,7 +1332,7 @@ static void kcryptd_async_done(struct crypto_async_request *async_req,
1325 if (error < 0) 1332 if (error < 0)
1326 io->error = -EIO; 1333 io->error = -EIO;
1327 1334
1328 mempool_free(req_of_dmreq(cc, dmreq), cc->req_pool); 1335 crypt_free_req(cc, req_of_dmreq(cc, dmreq), io->base_bio);
1329 1336
1330 if (!atomic_dec_and_test(&ctx->cc_pending)) 1337 if (!atomic_dec_and_test(&ctx->cc_pending))
1331 return; 1338 return;
@@ -1728,6 +1735,10 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1728 goto bad; 1735 goto bad;
1729 } 1736 }
1730 1737
1738 cc->per_bio_data_size = ti->per_bio_data_size =
1739 sizeof(struct dm_crypt_io) + cc->dmreq_start +
1740 sizeof(struct dm_crypt_request) + cc->iv_size;
1741
1731 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0); 1742 cc->page_pool = mempool_create_page_pool(MIN_POOL_PAGES, 0);
1732 if (!cc->page_pool) { 1743 if (!cc->page_pool) {
1733 ti->error = "Cannot allocate page mempool"; 1744 ti->error = "Cannot allocate page mempool";
@@ -1824,7 +1835,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
1824 return DM_MAPIO_REMAPPED; 1835 return DM_MAPIO_REMAPPED;
1825 } 1836 }
1826 1837
1827 io = crypt_io_alloc(cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); 1838 io = dm_per_bio_data(bio, cc->per_bio_data_size);
1839 crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
1840 io->ctx.req = (struct ablkcipher_request *)(io + 1);
1828 1841
1829 if (bio_data_dir(io->base_bio) == READ) { 1842 if (bio_data_dir(io->base_bio) == READ) {
1830 if (kcryptd_io_read(io, GFP_NOWAIT)) 1843 if (kcryptd_io_read(io, GFP_NOWAIT))
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index db404a0f7e2c..c09359db3a90 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -33,7 +33,6 @@ struct dm_io_client {
33struct io { 33struct io {
34 unsigned long error_bits; 34 unsigned long error_bits;
35 atomic_t count; 35 atomic_t count;
36 struct completion *wait;
37 struct dm_io_client *client; 36 struct dm_io_client *client;
38 io_notify_fn callback; 37 io_notify_fn callback;
39 void *context; 38 void *context;
@@ -112,28 +111,27 @@ static void retrieve_io_and_region_from_bio(struct bio *bio, struct io **io,
112 * We need an io object to keep track of the number of bios that 111 * We need an io object to keep track of the number of bios that
113 * have been dispatched for a particular io. 112 * have been dispatched for a particular io.
114 *---------------------------------------------------------------*/ 113 *---------------------------------------------------------------*/
115static void dec_count(struct io *io, unsigned int region, int error) 114static void complete_io(struct io *io)
116{ 115{
117 if (error) 116 unsigned long error_bits = io->error_bits;
118 set_bit(region, &io->error_bits); 117 io_notify_fn fn = io->callback;
118 void *context = io->context;
119 119
120 if (atomic_dec_and_test(&io->count)) { 120 if (io->vma_invalidate_size)
121 if (io->vma_invalidate_size) 121 invalidate_kernel_vmap_range(io->vma_invalidate_address,
122 invalidate_kernel_vmap_range(io->vma_invalidate_address, 122 io->vma_invalidate_size);
123 io->vma_invalidate_size);
124 123
125 if (io->wait) 124 mempool_free(io, io->client->pool);
126 complete(io->wait); 125 fn(error_bits, context);
126}
127 127
128 else { 128static void dec_count(struct io *io, unsigned int region, int error)
129 unsigned long r = io->error_bits; 129{
130 io_notify_fn fn = io->callback; 130 if (error)
131 void *context = io->context; 131 set_bit(region, &io->error_bits);
132 132
133 mempool_free(io, io->client->pool); 133 if (atomic_dec_and_test(&io->count))
134 fn(r, context); 134 complete_io(io);
135 }
136 }
137} 135}
138 136
139static void endio(struct bio *bio, int error) 137static void endio(struct bio *bio, int error)
@@ -376,41 +374,51 @@ static void dispatch_io(int rw, unsigned int num_regions,
376 dec_count(io, 0, 0); 374 dec_count(io, 0, 0);
377} 375}
378 376
377struct sync_io {
378 unsigned long error_bits;
379 struct completion wait;
380};
381
382static void sync_io_complete(unsigned long error, void *context)
383{
384 struct sync_io *sio = context;
385
386 sio->error_bits = error;
387 complete(&sio->wait);
388}
389
379static int sync_io(struct dm_io_client *client, unsigned int num_regions, 390static int sync_io(struct dm_io_client *client, unsigned int num_regions,
380 struct dm_io_region *where, int rw, struct dpages *dp, 391 struct dm_io_region *where, int rw, struct dpages *dp,
381 unsigned long *error_bits) 392 unsigned long *error_bits)
382{ 393{
383 /* 394 struct io *io;
384 * gcc <= 4.3 can't do the alignment for stack variables, so we must 395 struct sync_io sio;
385 * align it on our own.
386 * volatile prevents the optimizer from removing or reusing
387 * "io_" field from the stack frame (allowed in ANSI C).
388 */
389 volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1];
390 struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io));
391 DECLARE_COMPLETION_ONSTACK(wait);
392 396
393 if (num_regions > 1 && (rw & RW_MASK) != WRITE) { 397 if (num_regions > 1 && (rw & RW_MASK) != WRITE) {
394 WARN_ON(1); 398 WARN_ON(1);
395 return -EIO; 399 return -EIO;
396 } 400 }
397 401
402 init_completion(&sio.wait);
403
404 io = mempool_alloc(client->pool, GFP_NOIO);
398 io->error_bits = 0; 405 io->error_bits = 0;
399 atomic_set(&io->count, 1); /* see dispatch_io() */ 406 atomic_set(&io->count, 1); /* see dispatch_io() */
400 io->wait = &wait;
401 io->client = client; 407 io->client = client;
408 io->callback = sync_io_complete;
409 io->context = &sio;
402 410
403 io->vma_invalidate_address = dp->vma_invalidate_address; 411 io->vma_invalidate_address = dp->vma_invalidate_address;
404 io->vma_invalidate_size = dp->vma_invalidate_size; 412 io->vma_invalidate_size = dp->vma_invalidate_size;
405 413
406 dispatch_io(rw, num_regions, where, dp, io, 1); 414 dispatch_io(rw, num_regions, where, dp, io, 1);
407 415
408 wait_for_completion_io(&wait); 416 wait_for_completion_io(&sio.wait);
409 417
410 if (error_bits) 418 if (error_bits)
411 *error_bits = io->error_bits; 419 *error_bits = sio.error_bits;
412 420
413 return io->error_bits ? -EIO : 0; 421 return sio.error_bits ? -EIO : 0;
414} 422}
415 423
416static int async_io(struct dm_io_client *client, unsigned int num_regions, 424static int async_io(struct dm_io_client *client, unsigned int num_regions,
@@ -428,7 +436,6 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions,
428 io = mempool_alloc(client->pool, GFP_NOIO); 436 io = mempool_alloc(client->pool, GFP_NOIO);
429 io->error_bits = 0; 437 io->error_bits = 0;
430 atomic_set(&io->count, 1); /* see dispatch_io() */ 438 atomic_set(&io->count, 1); /* see dispatch_io() */
431 io->wait = NULL;
432 io->client = client; 439 io->client = client;
433 io->callback = fn; 440 io->callback = fn;
434 io->context = context; 441 io->context = context;
@@ -481,9 +488,9 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp,
481 * New collapsed (a)synchronous interface. 488 * New collapsed (a)synchronous interface.
482 * 489 *
483 * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug 490 * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug
484 * the queue with blk_unplug() some time later or set REQ_SYNC in 491 * the queue with blk_unplug() some time later or set REQ_SYNC in io_req->bi_rw.
485io_req->bi_rw. If you fail to do one of these, the IO will be submitted to 492 * If you fail to do one of these, the IO will be submitted to the disk after
486 * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. 493 * q->unplug_delay, which defaults to 3ms in blk-settings.c.
487 */ 494 */
488int dm_io(struct dm_io_request *io_req, unsigned num_regions, 495int dm_io(struct dm_io_request *io_req, unsigned num_regions,
489 struct dm_io_region *where, unsigned long *sync_error_bits) 496 struct dm_io_region *where, unsigned long *sync_error_bits)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index f4167b013d99..833d7e752f06 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -373,8 +373,6 @@ static int __must_push_back(struct multipath *m)
373 dm_noflush_suspending(m->ti))); 373 dm_noflush_suspending(m->ti)));
374} 374}
375 375
376#define pg_ready(m) (!(m)->queue_io && !(m)->pg_init_required)
377
378/* 376/*
379 * Map cloned requests 377 * Map cloned requests
380 */ 378 */
@@ -402,11 +400,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone,
402 if (!__must_push_back(m)) 400 if (!__must_push_back(m))
403 r = -EIO; /* Failed */ 401 r = -EIO; /* Failed */
404 goto out_unlock; 402 goto out_unlock;
405 } 403 } else if (m->queue_io || m->pg_init_required) {
406 if (!pg_ready(m)) {
407 __pg_init_all_paths(m); 404 __pg_init_all_paths(m);
408 goto out_unlock; 405 goto out_unlock;
409 } 406 }
407
410 if (set_mapinfo(m, map_context) < 0) 408 if (set_mapinfo(m, map_context) < 0)
411 /* ENOMEM, requeue */ 409 /* ENOMEM, requeue */
412 goto out_unlock; 410 goto out_unlock;
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index 09a688b3d48c..50fca469cafd 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -137,13 +137,23 @@ static void switch_get_position(struct switch_ctx *sctx, unsigned long region_nr
137 *bit *= sctx->region_table_entry_bits; 137 *bit *= sctx->region_table_entry_bits;
138} 138}
139 139
140static unsigned switch_region_table_read(struct switch_ctx *sctx, unsigned long region_nr)
141{
142 unsigned long region_index;
143 unsigned bit;
144
145 switch_get_position(sctx, region_nr, &region_index, &bit);
146
147 return (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
148 ((1 << sctx->region_table_entry_bits) - 1);
149}
150
140/* 151/*
141 * Find which path to use at given offset. 152 * Find which path to use at given offset.
142 */ 153 */
143static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset) 154static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
144{ 155{
145 unsigned long region_index; 156 unsigned path_nr;
146 unsigned bit, path_nr;
147 sector_t p; 157 sector_t p;
148 158
149 p = offset; 159 p = offset;
@@ -152,9 +162,7 @@ static unsigned switch_get_path_nr(struct switch_ctx *sctx, sector_t offset)
152 else 162 else
153 sector_div(p, sctx->region_size); 163 sector_div(p, sctx->region_size);
154 164
155 switch_get_position(sctx, p, &region_index, &bit); 165 path_nr = switch_region_table_read(sctx, p);
156 path_nr = (ACCESS_ONCE(sctx->region_table[region_index]) >> bit) &
157 ((1 << sctx->region_table_entry_bits) - 1);
158 166
159 /* This can only happen if the processor uses non-atomic stores. */ 167 /* This can only happen if the processor uses non-atomic stores. */
160 if (unlikely(path_nr >= sctx->nr_paths)) 168 if (unlikely(path_nr >= sctx->nr_paths))
@@ -363,7 +371,7 @@ static __always_inline unsigned long parse_hex(const char **string)
363} 371}
364 372
365static int process_set_region_mappings(struct switch_ctx *sctx, 373static int process_set_region_mappings(struct switch_ctx *sctx,
366 unsigned argc, char **argv) 374 unsigned argc, char **argv)
367{ 375{
368 unsigned i; 376 unsigned i;
369 unsigned long region_index = 0; 377 unsigned long region_index = 0;
@@ -372,6 +380,51 @@ static int process_set_region_mappings(struct switch_ctx *sctx,
372 unsigned long path_nr; 380 unsigned long path_nr;
373 const char *string = argv[i]; 381 const char *string = argv[i];
374 382
383 if ((*string & 0xdf) == 'R') {
384 unsigned long cycle_length, num_write;
385
386 string++;
387 if (unlikely(*string == ',')) {
388 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
389 return -EINVAL;
390 }
391 cycle_length = parse_hex(&string);
392 if (unlikely(*string != ',')) {
393 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
394 return -EINVAL;
395 }
396 string++;
397 if (unlikely(!*string)) {
398 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
399 return -EINVAL;
400 }
401 num_write = parse_hex(&string);
402 if (unlikely(*string)) {
403 DMWARN("invalid set_region_mappings argument: '%s'", argv[i]);
404 return -EINVAL;
405 }
406
407 if (unlikely(!cycle_length) || unlikely(cycle_length - 1 > region_index)) {
408 DMWARN("invalid set_region_mappings cycle length: %lu > %lu",
409 cycle_length - 1, region_index);
410 return -EINVAL;
411 }
412 if (unlikely(region_index + num_write < region_index) ||
413 unlikely(region_index + num_write >= sctx->nr_regions)) {
414 DMWARN("invalid set_region_mappings region number: %lu + %lu >= %lu",
415 region_index, num_write, sctx->nr_regions);
416 return -EINVAL;
417 }
418
419 while (num_write--) {
420 region_index++;
421 path_nr = switch_region_table_read(sctx, region_index - cycle_length);
422 switch_region_table_write(sctx, region_index, path_nr);
423 }
424
425 continue;
426 }
427
375 if (*string == ':') 428 if (*string == ':')
376 region_index++; 429 region_index++;
377 else { 430 else {
@@ -500,7 +553,7 @@ static int switch_iterate_devices(struct dm_target *ti,
500 553
501static struct target_type switch_target = { 554static struct target_type switch_target = {
502 .name = "switch", 555 .name = "switch",
503 .version = {1, 0, 0}, 556 .version = {1, 1, 0},
504 .module = THIS_MODULE, 557 .module = THIS_MODULE,
505 .ctr = switch_ctr, 558 .ctr = switch_ctr,
506 .dtr = switch_dtr, 559 .dtr = switch_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 5f59f1e3e5b1..f9c6cb8dbcf8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1386,6 +1386,14 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev,
1386 return q && !blk_queue_add_random(q); 1386 return q && !blk_queue_add_random(q);
1387} 1387}
1388 1388
1389static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev,
1390 sector_t start, sector_t len, void *data)
1391{
1392 struct request_queue *q = bdev_get_queue(dev->bdev);
1393
1394 return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
1395}
1396
1389static bool dm_table_all_devices_attribute(struct dm_table *t, 1397static bool dm_table_all_devices_attribute(struct dm_table *t,
1390 iterate_devices_callout_fn func) 1398 iterate_devices_callout_fn func)
1391{ 1399{
@@ -1430,6 +1438,43 @@ static bool dm_table_supports_write_same(struct dm_table *t)
1430 return true; 1438 return true;
1431} 1439}
1432 1440
1441static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
1442 sector_t start, sector_t len, void *data)
1443{
1444 struct request_queue *q = bdev_get_queue(dev->bdev);
1445
1446 return q && blk_queue_discard(q);
1447}
1448
1449static bool dm_table_supports_discards(struct dm_table *t)
1450{
1451 struct dm_target *ti;
1452 unsigned i = 0;
1453
1454 /*
1455 * Unless any target used by the table set discards_supported,
1456 * require at least one underlying device to support discards.
1457 * t->devices includes internal dm devices such as mirror logs
1458 * so we need to use iterate_devices here, which targets
1459 * supporting discard selectively must provide.
1460 */
1461 while (i < dm_table_get_num_targets(t)) {
1462 ti = dm_table_get_target(t, i++);
1463
1464 if (!ti->num_discard_bios)
1465 continue;
1466
1467 if (ti->discards_supported)
1468 return 1;
1469
1470 if (ti->type->iterate_devices &&
1471 ti->type->iterate_devices(ti, device_discard_capable, NULL))
1472 return 1;
1473 }
1474
1475 return 0;
1476}
1477
1433void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, 1478void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1434 struct queue_limits *limits) 1479 struct queue_limits *limits)
1435{ 1480{
@@ -1464,6 +1509,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
1464 if (!dm_table_supports_write_same(t)) 1509 if (!dm_table_supports_write_same(t))
1465 q->limits.max_write_same_sectors = 0; 1510 q->limits.max_write_same_sectors = 0;
1466 1511
1512 if (dm_table_all_devices_attribute(t, queue_supports_sg_merge))
1513 queue_flag_clear_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
1514 else
1515 queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q);
1516
1467 dm_table_set_integrity(t); 1517 dm_table_set_integrity(t);
1468 1518
1469 /* 1519 /*
@@ -1636,39 +1686,3 @@ void dm_table_run_md_queue_async(struct dm_table *t)
1636} 1686}
1637EXPORT_SYMBOL(dm_table_run_md_queue_async); 1687EXPORT_SYMBOL(dm_table_run_md_queue_async);
1638 1688
1639static int device_discard_capable(struct dm_target *ti, struct dm_dev *dev,
1640 sector_t start, sector_t len, void *data)
1641{
1642 struct request_queue *q = bdev_get_queue(dev->bdev);
1643
1644 return q && blk_queue_discard(q);
1645}
1646
1647bool dm_table_supports_discards(struct dm_table *t)
1648{
1649 struct dm_target *ti;
1650 unsigned i = 0;
1651
1652 /*
1653 * Unless any target used by the table set discards_supported,
1654 * require at least one underlying device to support discards.
1655 * t->devices includes internal dm devices such as mirror logs
1656 * so we need to use iterate_devices here, which targets
1657 * supporting discard selectively must provide.
1658 */
1659 while (i < dm_table_get_num_targets(t)) {
1660 ti = dm_table_get_target(t, i++);
1661
1662 if (!ti->num_discard_bios)
1663 continue;
1664
1665 if (ti->discards_supported)
1666 return 1;
1667
1668 if (ti->type->iterate_devices &&
1669 ti->type->iterate_devices(ti, device_discard_capable, NULL))
1670 return 1;
1671 }
1672
1673 return 0;
1674}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index fc9c848a60c9..4843801173fe 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -227,6 +227,7 @@ struct thin_c {
227 struct list_head list; 227 struct list_head list;
228 struct dm_dev *pool_dev; 228 struct dm_dev *pool_dev;
229 struct dm_dev *origin_dev; 229 struct dm_dev *origin_dev;
230 sector_t origin_size;
230 dm_thin_id dev_id; 231 dm_thin_id dev_id;
231 232
232 struct pool *pool; 233 struct pool *pool;
@@ -554,11 +555,16 @@ static void remap_and_issue(struct thin_c *tc, struct bio *bio,
554struct dm_thin_new_mapping { 555struct dm_thin_new_mapping {
555 struct list_head list; 556 struct list_head list;
556 557
557 bool quiesced:1;
558 bool prepared:1;
559 bool pass_discard:1; 558 bool pass_discard:1;
560 bool definitely_not_shared:1; 559 bool definitely_not_shared:1;
561 560
561 /*
562 * Track quiescing, copying and zeroing preparation actions. When this
563 * counter hits zero the block is prepared and can be inserted into the
564 * btree.
565 */
566 atomic_t prepare_actions;
567
562 int err; 568 int err;
563 struct thin_c *tc; 569 struct thin_c *tc;
564 dm_block_t virt_block; 570 dm_block_t virt_block;
@@ -575,43 +581,41 @@ struct dm_thin_new_mapping {
575 bio_end_io_t *saved_bi_end_io; 581 bio_end_io_t *saved_bi_end_io;
576}; 582};
577 583
578static void __maybe_add_mapping(struct dm_thin_new_mapping *m) 584static void __complete_mapping_preparation(struct dm_thin_new_mapping *m)
579{ 585{
580 struct pool *pool = m->tc->pool; 586 struct pool *pool = m->tc->pool;
581 587
582 if (m->quiesced && m->prepared) { 588 if (atomic_dec_and_test(&m->prepare_actions)) {
583 list_add_tail(&m->list, &pool->prepared_mappings); 589 list_add_tail(&m->list, &pool->prepared_mappings);
584 wake_worker(pool); 590 wake_worker(pool);
585 } 591 }
586} 592}
587 593
588static void copy_complete(int read_err, unsigned long write_err, void *context) 594static void complete_mapping_preparation(struct dm_thin_new_mapping *m)
589{ 595{
590 unsigned long flags; 596 unsigned long flags;
591 struct dm_thin_new_mapping *m = context;
592 struct pool *pool = m->tc->pool; 597 struct pool *pool = m->tc->pool;
593 598
594 m->err = read_err || write_err ? -EIO : 0;
595
596 spin_lock_irqsave(&pool->lock, flags); 599 spin_lock_irqsave(&pool->lock, flags);
597 m->prepared = true; 600 __complete_mapping_preparation(m);
598 __maybe_add_mapping(m);
599 spin_unlock_irqrestore(&pool->lock, flags); 601 spin_unlock_irqrestore(&pool->lock, flags);
600} 602}
601 603
604static void copy_complete(int read_err, unsigned long write_err, void *context)
605{
606 struct dm_thin_new_mapping *m = context;
607
608 m->err = read_err || write_err ? -EIO : 0;
609 complete_mapping_preparation(m);
610}
611
602static void overwrite_endio(struct bio *bio, int err) 612static void overwrite_endio(struct bio *bio, int err)
603{ 613{
604 unsigned long flags;
605 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); 614 struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
606 struct dm_thin_new_mapping *m = h->overwrite_mapping; 615 struct dm_thin_new_mapping *m = h->overwrite_mapping;
607 struct pool *pool = m->tc->pool;
608 616
609 m->err = err; 617 m->err = err;
610 618 complete_mapping_preparation(m);
611 spin_lock_irqsave(&pool->lock, flags);
612 m->prepared = true;
613 __maybe_add_mapping(m);
614 spin_unlock_irqrestore(&pool->lock, flags);
615} 619}
616 620
617/*----------------------------------------------------------------*/ 621/*----------------------------------------------------------------*/
@@ -821,10 +825,31 @@ static struct dm_thin_new_mapping *get_next_mapping(struct pool *pool)
821 return m; 825 return m;
822} 826}
823 827
828static void ll_zero(struct thin_c *tc, struct dm_thin_new_mapping *m,
829 sector_t begin, sector_t end)
830{
831 int r;
832 struct dm_io_region to;
833
834 to.bdev = tc->pool_dev->bdev;
835 to.sector = begin;
836 to.count = end - begin;
837
838 r = dm_kcopyd_zero(tc->pool->copier, 1, &to, 0, copy_complete, m);
839 if (r < 0) {
840 DMERR_LIMIT("dm_kcopyd_zero() failed");
841 copy_complete(1, 1, m);
842 }
843}
844
845/*
846 * A partial copy also needs to zero the uncopied region.
847 */
824static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 848static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
825 struct dm_dev *origin, dm_block_t data_origin, 849 struct dm_dev *origin, dm_block_t data_origin,
826 dm_block_t data_dest, 850 dm_block_t data_dest,
827 struct dm_bio_prison_cell *cell, struct bio *bio) 851 struct dm_bio_prison_cell *cell, struct bio *bio,
852 sector_t len)
828{ 853{
829 int r; 854 int r;
830 struct pool *pool = tc->pool; 855 struct pool *pool = tc->pool;
@@ -835,8 +860,15 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
835 m->data_block = data_dest; 860 m->data_block = data_dest;
836 m->cell = cell; 861 m->cell = cell;
837 862
863 /*
864 * quiesce action + copy action + an extra reference held for the
865 * duration of this function (we may need to inc later for a
866 * partial zero).
867 */
868 atomic_set(&m->prepare_actions, 3);
869
838 if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list)) 870 if (!dm_deferred_set_add_work(pool->shared_read_ds, &m->list))
839 m->quiesced = true; 871 complete_mapping_preparation(m); /* already quiesced */
840 872
841 /* 873 /*
842 * IO to pool_dev remaps to the pool target's data_dev. 874 * IO to pool_dev remaps to the pool target's data_dev.
@@ -857,20 +889,38 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block,
857 889
858 from.bdev = origin->bdev; 890 from.bdev = origin->bdev;
859 from.sector = data_origin * pool->sectors_per_block; 891 from.sector = data_origin * pool->sectors_per_block;
860 from.count = pool->sectors_per_block; 892 from.count = len;
861 893
862 to.bdev = tc->pool_dev->bdev; 894 to.bdev = tc->pool_dev->bdev;
863 to.sector = data_dest * pool->sectors_per_block; 895 to.sector = data_dest * pool->sectors_per_block;
864 to.count = pool->sectors_per_block; 896 to.count = len;
865 897
866 r = dm_kcopyd_copy(pool->copier, &from, 1, &to, 898 r = dm_kcopyd_copy(pool->copier, &from, 1, &to,
867 0, copy_complete, m); 899 0, copy_complete, m);
868 if (r < 0) { 900 if (r < 0) {
869 mempool_free(m, pool->mapping_pool);
870 DMERR_LIMIT("dm_kcopyd_copy() failed"); 901 DMERR_LIMIT("dm_kcopyd_copy() failed");
871 cell_error(pool, cell); 902 copy_complete(1, 1, m);
903
904 /*
905 * We allow the zero to be issued, to simplify the
906 * error path. Otherwise we'd need to start
907 * worrying about decrementing the prepare_actions
908 * counter.
909 */
910 }
911
912 /*
913 * Do we need to zero a tail region?
914 */
915 if (len < pool->sectors_per_block && pool->pf.zero_new_blocks) {
916 atomic_inc(&m->prepare_actions);
917 ll_zero(tc, m,
918 data_dest * pool->sectors_per_block + len,
919 (data_dest + 1) * pool->sectors_per_block);
872 } 920 }
873 } 921 }
922
923 complete_mapping_preparation(m); /* drop our ref */
874} 924}
875 925
876static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block, 926static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
@@ -878,15 +928,8 @@ static void schedule_internal_copy(struct thin_c *tc, dm_block_t virt_block,
878 struct dm_bio_prison_cell *cell, struct bio *bio) 928 struct dm_bio_prison_cell *cell, struct bio *bio)
879{ 929{
880 schedule_copy(tc, virt_block, tc->pool_dev, 930 schedule_copy(tc, virt_block, tc->pool_dev,
881 data_origin, data_dest, cell, bio); 931 data_origin, data_dest, cell, bio,
882} 932 tc->pool->sectors_per_block);
883
884static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
885 dm_block_t data_dest,
886 struct dm_bio_prison_cell *cell, struct bio *bio)
887{
888 schedule_copy(tc, virt_block, tc->origin_dev,
889 virt_block, data_dest, cell, bio);
890} 933}
891 934
892static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, 935static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
@@ -896,8 +939,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
896 struct pool *pool = tc->pool; 939 struct pool *pool = tc->pool;
897 struct dm_thin_new_mapping *m = get_next_mapping(pool); 940 struct dm_thin_new_mapping *m = get_next_mapping(pool);
898 941
899 m->quiesced = true; 942 atomic_set(&m->prepare_actions, 1); /* no need to quiesce */
900 m->prepared = false;
901 m->tc = tc; 943 m->tc = tc;
902 m->virt_block = virt_block; 944 m->virt_block = virt_block;
903 m->data_block = data_block; 945 m->data_block = data_block;
@@ -919,21 +961,33 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block,
919 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); 961 save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio);
920 inc_all_io_entry(pool, bio); 962 inc_all_io_entry(pool, bio);
921 remap_and_issue(tc, bio, data_block); 963 remap_and_issue(tc, bio, data_block);
922 } else {
923 int r;
924 struct dm_io_region to;
925 964
926 to.bdev = tc->pool_dev->bdev; 965 } else
927 to.sector = data_block * pool->sectors_per_block; 966 ll_zero(tc, m,
928 to.count = pool->sectors_per_block; 967 data_block * pool->sectors_per_block,
968 (data_block + 1) * pool->sectors_per_block);
969}
929 970
930 r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); 971static void schedule_external_copy(struct thin_c *tc, dm_block_t virt_block,
931 if (r < 0) { 972 dm_block_t data_dest,
932 mempool_free(m, pool->mapping_pool); 973 struct dm_bio_prison_cell *cell, struct bio *bio)
933 DMERR_LIMIT("dm_kcopyd_zero() failed"); 974{
934 cell_error(pool, cell); 975 struct pool *pool = tc->pool;
935 } 976 sector_t virt_block_begin = virt_block * pool->sectors_per_block;
936 } 977 sector_t virt_block_end = (virt_block + 1) * pool->sectors_per_block;
978
979 if (virt_block_end <= tc->origin_size)
980 schedule_copy(tc, virt_block, tc->origin_dev,
981 virt_block, data_dest, cell, bio,
982 pool->sectors_per_block);
983
984 else if (virt_block_begin < tc->origin_size)
985 schedule_copy(tc, virt_block, tc->origin_dev,
986 virt_block, data_dest, cell, bio,
987 tc->origin_size - virt_block_begin);
988
989 else
990 schedule_zero(tc, virt_block, data_dest, cell, bio);
937} 991}
938 992
939/* 993/*
@@ -1315,7 +1369,18 @@ static void process_bio(struct thin_c *tc, struct bio *bio)
1315 inc_all_io_entry(pool, bio); 1369 inc_all_io_entry(pool, bio);
1316 cell_defer_no_holder(tc, cell); 1370 cell_defer_no_holder(tc, cell);
1317 1371
1318 remap_to_origin_and_issue(tc, bio); 1372 if (bio_end_sector(bio) <= tc->origin_size)
1373 remap_to_origin_and_issue(tc, bio);
1374
1375 else if (bio->bi_iter.bi_sector < tc->origin_size) {
1376 zero_fill_bio(bio);
1377 bio->bi_iter.bi_size = (tc->origin_size - bio->bi_iter.bi_sector) << SECTOR_SHIFT;
1378 remap_to_origin_and_issue(tc, bio);
1379
1380 } else {
1381 zero_fill_bio(bio);
1382 bio_endio(bio, 0);
1383 }
1319 } else 1384 } else
1320 provision_block(tc, bio, block, cell); 1385 provision_block(tc, bio, block, cell);
1321 break; 1386 break;
@@ -3112,7 +3177,7 @@ static void pool_io_hints(struct dm_target *ti, struct queue_limits *limits)
3112 */ 3177 */
3113 if (io_opt_sectors < pool->sectors_per_block || 3178 if (io_opt_sectors < pool->sectors_per_block ||
3114 do_div(io_opt_sectors, pool->sectors_per_block)) { 3179 do_div(io_opt_sectors, pool->sectors_per_block)) {
3115 blk_limits_io_min(limits, 0); 3180 blk_limits_io_min(limits, pool->sectors_per_block << SECTOR_SHIFT);
3116 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT); 3181 blk_limits_io_opt(limits, pool->sectors_per_block << SECTOR_SHIFT);
3117 } 3182 }
3118 3183
@@ -3141,7 +3206,7 @@ static struct target_type pool_target = {
3141 .name = "thin-pool", 3206 .name = "thin-pool",
3142 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | 3207 .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE |
3143 DM_TARGET_IMMUTABLE, 3208 DM_TARGET_IMMUTABLE,
3144 .version = {1, 12, 0}, 3209 .version = {1, 13, 0},
3145 .module = THIS_MODULE, 3210 .module = THIS_MODULE,
3146 .ctr = pool_ctr, 3211 .ctr = pool_ctr,
3147 .dtr = pool_dtr, 3212 .dtr = pool_dtr,
@@ -3361,8 +3426,7 @@ static int thin_endio(struct dm_target *ti, struct bio *bio, int err)
3361 spin_lock_irqsave(&pool->lock, flags); 3426 spin_lock_irqsave(&pool->lock, flags);
3362 list_for_each_entry_safe(m, tmp, &work, list) { 3427 list_for_each_entry_safe(m, tmp, &work, list) {
3363 list_del(&m->list); 3428 list_del(&m->list);
3364 m->quiesced = true; 3429 __complete_mapping_preparation(m);
3365 __maybe_add_mapping(m);
3366 } 3430 }
3367 spin_unlock_irqrestore(&pool->lock, flags); 3431 spin_unlock_irqrestore(&pool->lock, flags);
3368 } 3432 }
@@ -3401,6 +3465,16 @@ static void thin_postsuspend(struct dm_target *ti)
3401 noflush_work(tc, do_noflush_stop); 3465 noflush_work(tc, do_noflush_stop);
3402} 3466}
3403 3467
3468static int thin_preresume(struct dm_target *ti)
3469{
3470 struct thin_c *tc = ti->private;
3471
3472 if (tc->origin_dev)
3473 tc->origin_size = get_dev_size(tc->origin_dev->bdev);
3474
3475 return 0;
3476}
3477
3404/* 3478/*
3405 * <nr mapped sectors> <highest mapped sector> 3479 * <nr mapped sectors> <highest mapped sector>
3406 */ 3480 */
@@ -3483,12 +3557,13 @@ static int thin_iterate_devices(struct dm_target *ti,
3483 3557
3484static struct target_type thin_target = { 3558static struct target_type thin_target = {
3485 .name = "thin", 3559 .name = "thin",
3486 .version = {1, 12, 0}, 3560 .version = {1, 13, 0},
3487 .module = THIS_MODULE, 3561 .module = THIS_MODULE,
3488 .ctr = thin_ctr, 3562 .ctr = thin_ctr,
3489 .dtr = thin_dtr, 3563 .dtr = thin_dtr,
3490 .map = thin_map, 3564 .map = thin_map,
3491 .end_io = thin_endio, 3565 .end_io = thin_endio,
3566 .preresume = thin_preresume,
3492 .presuspend = thin_presuspend, 3567 .presuspend = thin_presuspend,
3493 .postsuspend = thin_postsuspend, 3568 .postsuspend = thin_postsuspend,
3494 .status = thin_status, 3569 .status = thin_status,
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index ed76126aac54..e81d2152fa68 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -72,7 +72,6 @@ int dm_table_any_busy_target(struct dm_table *t);
72unsigned dm_table_get_type(struct dm_table *t); 72unsigned dm_table_get_type(struct dm_table *t);
73struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); 73struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
74bool dm_table_request_based(struct dm_table *t); 74bool dm_table_request_based(struct dm_table *t);
75bool dm_table_supports_discards(struct dm_table *t);
76void dm_table_free_md_mempools(struct dm_table *t); 75void dm_table_free_md_mempools(struct dm_table *t);
77struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); 76struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
78 77
diff --git a/drivers/mfd/rtsx_usb.c b/drivers/mfd/rtsx_usb.c
index 6352bec8419a..71f387ce8cbd 100644
--- a/drivers/mfd/rtsx_usb.c
+++ b/drivers/mfd/rtsx_usb.c
@@ -744,6 +744,7 @@ static struct usb_device_id rtsx_usb_usb_ids[] = {
744 { USB_DEVICE(0x0BDA, 0x0140) }, 744 { USB_DEVICE(0x0BDA, 0x0140) },
745 { } 745 { }
746}; 746};
747MODULE_DEVICE_TABLE(usb, rtsx_usb_usb_ids);
747 748
748static struct usb_driver rtsx_usb_driver = { 749static struct usb_driver rtsx_usb_driver = {
749 .name = "rtsx_usb", 750 .name = "rtsx_usb",
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 452782bffebc..ede41f05c392 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2028,8 +2028,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
2028 /* complete ongoing async transfer before issuing discard */ 2028 /* complete ongoing async transfer before issuing discard */
2029 if (card->host->areq) 2029 if (card->host->areq)
2030 mmc_blk_issue_rw_rq(mq, NULL); 2030 mmc_blk_issue_rw_rq(mq, NULL);
2031 if (req->cmd_flags & REQ_SECURE && 2031 if (req->cmd_flags & REQ_SECURE)
2032 !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
2033 ret = mmc_blk_issue_secdiscard_rq(mq, req); 2032 ret = mmc_blk_issue_secdiscard_rq(mq, req);
2034 else 2033 else
2035 ret = mmc_blk_issue_discard_rq(mq, req); 2034 ret = mmc_blk_issue_discard_rq(mq, req);
@@ -2432,6 +2431,8 @@ static int mmc_blk_probe(struct mmc_card *card)
2432 if (!(card->csd.cmdclass & CCC_BLOCK_READ)) 2431 if (!(card->csd.cmdclass & CCC_BLOCK_READ))
2433 return -ENODEV; 2432 return -ENODEV;
2434 2433
2434 mmc_fixup_device(card, blk_fixups);
2435
2435 md = mmc_blk_alloc(card); 2436 md = mmc_blk_alloc(card);
2436 if (IS_ERR(md)) 2437 if (IS_ERR(md))
2437 return PTR_ERR(md); 2438 return PTR_ERR(md);
@@ -2446,7 +2447,6 @@ static int mmc_blk_probe(struct mmc_card *card)
2446 goto out; 2447 goto out;
2447 2448
2448 mmc_set_drvdata(card, md); 2449 mmc_set_drvdata(card, md);
2449 mmc_fixup_device(card, blk_fixups);
2450 2450
2451 if (mmc_add_disk(md)) 2451 if (mmc_add_disk(md))
2452 goto out; 2452 goto out;
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index d2dbf02022bd..8a1f1240e058 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -180,7 +180,6 @@ static int mmc_bus_resume(struct device *dev)
180#endif 180#endif
181 181
182#ifdef CONFIG_PM_RUNTIME 182#ifdef CONFIG_PM_RUNTIME
183
184static int mmc_runtime_suspend(struct device *dev) 183static int mmc_runtime_suspend(struct device *dev)
185{ 184{
186 struct mmc_card *card = mmc_dev_to_card(dev); 185 struct mmc_card *card = mmc_dev_to_card(dev);
@@ -196,17 +195,10 @@ static int mmc_runtime_resume(struct device *dev)
196 195
197 return host->bus_ops->runtime_resume(host); 196 return host->bus_ops->runtime_resume(host);
198} 197}
199
200static int mmc_runtime_idle(struct device *dev)
201{
202 return 0;
203}
204
205#endif /* !CONFIG_PM_RUNTIME */ 198#endif /* !CONFIG_PM_RUNTIME */
206 199
207static const struct dev_pm_ops mmc_bus_pm_ops = { 200static const struct dev_pm_ops mmc_bus_pm_ops = {
208 SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, 201 SET_RUNTIME_PM_OPS(mmc_runtime_suspend, mmc_runtime_resume, NULL)
209 mmc_runtime_idle)
210 SET_SYSTEM_SLEEP_PM_OPS(mmc_bus_suspend, mmc_bus_resume) 202 SET_SYSTEM_SLEEP_PM_OPS(mmc_bus_suspend, mmc_bus_resume)
211}; 203};
212 204
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 7dc0c85fdb60..d03a080fb9cd 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2102,7 +2102,8 @@ EXPORT_SYMBOL(mmc_can_sanitize);
2102 2102
2103int mmc_can_secure_erase_trim(struct mmc_card *card) 2103int mmc_can_secure_erase_trim(struct mmc_card *card)
2104{ 2104{
2105 if (card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) 2105 if ((card->ext_csd.sec_feature_support & EXT_CSD_SEC_ER_EN) &&
2106 !(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
2106 return 1; 2107 return 1;
2107 return 0; 2108 return 0;
2108} 2109}
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 793c6f7ddb04..1eda8dd8c867 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -324,13 +324,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
324 } 324 }
325 } 325 }
326 326
327 /*
328 * The EXT_CSD format is meant to be forward compatible. As long
329 * as CSD_STRUCTURE does not change, all values for EXT_CSD_REV
330 * are authorized, see JEDEC JESD84-B50 section B.8.
331 */
327 card->ext_csd.rev = ext_csd[EXT_CSD_REV]; 332 card->ext_csd.rev = ext_csd[EXT_CSD_REV];
328 if (card->ext_csd.rev > 7) {
329 pr_err("%s: unrecognised EXT_CSD revision %d\n",
330 mmc_hostname(card->host), card->ext_csd.rev);
331 err = -EINVAL;
332 goto out;
333 }
334 333
335 card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0]; 334 card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
336 card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1]; 335 card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index 6c36fccaa1ec..dd1d1e0fe322 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -91,7 +91,7 @@ void mmc_fixup_device(struct mmc_card *card, const struct mmc_fixup *table)
91 (f->cis_device == card->cis.device || 91 (f->cis_device == card->cis.device ||
92 f->cis_device == (u16) SDIO_ANY_ID) && 92 f->cis_device == (u16) SDIO_ANY_ID) &&
93 rev >= f->rev_start && rev <= f->rev_end) { 93 rev >= f->rev_start && rev <= f->rev_end) {
94 dev_dbg(&card->dev, "calling %pF\n", f->vendor_fixup); 94 dev_dbg(&card->dev, "calling %pf\n", f->vendor_fixup);
95 f->vendor_fixup(card, f->data); 95 f->vendor_fixup(card, f->data);
96 } 96 }
97 } 97 }
diff --git a/drivers/mmc/core/sd_ops.c b/drivers/mmc/core/sd_ops.c
index 274ef00b4463..48d0c93ba25a 100644
--- a/drivers/mmc/core/sd_ops.c
+++ b/drivers/mmc/core/sd_ops.c
@@ -184,6 +184,9 @@ int mmc_send_app_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr)
184 mmc_delay(10); 184 mmc_delay(10);
185 } 185 }
186 186
187 if (!i)
188 pr_err("%s: card never left busy state\n", mmc_hostname(host));
189
187 if (rocr && !mmc_host_is_spi(host)) 190 if (rocr && !mmc_host_is_spi(host))
188 *rocr = cmd.resp[0]; 191 *rocr = cmd.resp[0];
189 192
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index a5652548230a..451135822464 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -290,6 +290,18 @@ config MMC_MOXART
290 be found on some embedded hardware such as UC-7112-LX. 290 be found on some embedded hardware such as UC-7112-LX.
291 If you have a controller with this interface, say Y here. 291 If you have a controller with this interface, say Y here.
292 292
293config MMC_SDHCI_ST
294 tristate "SDHCI support on STMicroelectronics SoC"
295 depends on ARCH_STI
296 depends on MMC_SDHCI_PLTFM
297 select MMC_SDHCI_IO_ACCESSORS
298 help
299 This selects the Secure Digital Host Controller Interface in
300 STMicroelectronics SoCs.
301
302 If you have a controller with this interface, say Y or M here.
303 If unsure, say N.
304
293config MMC_OMAP 305config MMC_OMAP
294 tristate "TI OMAP Multimedia Card Interface support" 306 tristate "TI OMAP Multimedia Card Interface support"
295 depends on ARCH_OMAP 307 depends on ARCH_OMAP
@@ -303,6 +315,7 @@ config MMC_OMAP
303 315
304config MMC_OMAP_HS 316config MMC_OMAP_HS
305 tristate "TI OMAP High Speed Multimedia Card Interface support" 317 tristate "TI OMAP High Speed Multimedia Card Interface support"
318 depends on HAS_DMA
306 depends on ARCH_OMAP2PLUS || COMPILE_TEST 319 depends on ARCH_OMAP2PLUS || COMPILE_TEST
307 help 320 help
308 This selects the TI OMAP High Speed Multimedia card Interface. 321 This selects the TI OMAP High Speed Multimedia card Interface.
@@ -343,7 +356,7 @@ config MMC_ATMELMCI
343 356
344config MMC_SDHCI_MSM 357config MMC_SDHCI_MSM
345 tristate "Qualcomm SDHCI Controller Support" 358 tristate "Qualcomm SDHCI Controller Support"
346 depends on ARCH_QCOM 359 depends on ARCH_QCOM || (ARM && COMPILE_TEST)
347 depends on MMC_SDHCI_PLTFM 360 depends on MMC_SDHCI_PLTFM
348 help 361 help
349 This selects the Secure Digital Host Controller Interface (SDHCI) 362 This selects the Secure Digital Host Controller Interface (SDHCI)
@@ -440,6 +453,7 @@ config MMC_SPI
440config MMC_S3C 453config MMC_S3C
441 tristate "Samsung S3C SD/MMC Card Interface support" 454 tristate "Samsung S3C SD/MMC Card Interface support"
442 depends on ARCH_S3C24XX 455 depends on ARCH_S3C24XX
456 depends on S3C24XX_DMAC
443 help 457 help
444 This selects a driver for the MCI interface found in 458 This selects a driver for the MCI interface found in
445 Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs. 459 Samsung's S3C2410, S3C2412, S3C2440, S3C2442 CPUs.
@@ -477,15 +491,6 @@ config MMC_S3C_DMA
477 working properly and needs to be debugged before this 491 working properly and needs to be debugged before this
478 option is useful. 492 option is useful.
479 493
480config MMC_S3C_PIODMA
481 bool "Support for both PIO and DMA"
482 help
483 Compile both the PIO and DMA transfer routines into the
484 driver and let the platform select at run-time which one
485 is best.
486
487 See notes for the DMA option.
488
489endchoice 494endchoice
490 495
491config MMC_SDRICOH_CS 496config MMC_SDRICOH_CS
@@ -623,7 +628,7 @@ config MMC_DW_PCI
623 628
624config MMC_SH_MMCIF 629config MMC_SH_MMCIF
625 tristate "SuperH Internal MMCIF support" 630 tristate "SuperH Internal MMCIF support"
626 depends on MMC_BLOCK 631 depends on MMC_BLOCK && HAS_DMA
627 depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST 632 depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
628 help 633 help
629 This selects the MMC Host Interface controller (MMCIF). 634 This selects the MMC Host Interface controller (MMCIF).
@@ -697,6 +702,7 @@ config MMC_WMT
697 702
698config MMC_USDHI6ROL0 703config MMC_USDHI6ROL0
699 tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support" 704 tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support"
705 depends on HAS_DMA
700 help 706 help
701 This selects support for the Renesas USDHI6ROL0 SD/SDIO 707 This selects support for the Renesas USDHI6ROL0 SD/SDIO
702 Host Controller 708 Host Controller
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 7f81ddf1dd2c..f211eede8db5 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_MMC_SDHCI_OF_HLWD) += sdhci-of-hlwd.o
68obj-$(CONFIG_MMC_SDHCI_BCM_KONA) += sdhci-bcm-kona.o 68obj-$(CONFIG_MMC_SDHCI_BCM_KONA) += sdhci-bcm-kona.o
69obj-$(CONFIG_MMC_SDHCI_BCM2835) += sdhci-bcm2835.o 69obj-$(CONFIG_MMC_SDHCI_BCM2835) += sdhci-bcm2835.o
70obj-$(CONFIG_MMC_SDHCI_MSM) += sdhci-msm.o 70obj-$(CONFIG_MMC_SDHCI_MSM) += sdhci-msm.o
71obj-$(CONFIG_MMC_SDHCI_ST) += sdhci-st.o
71 72
72ifeq ($(CONFIG_CB710_DEBUG),y) 73ifeq ($(CONFIG_CB710_DEBUG),y)
73 CFLAGS-cb710-mmc += -DDEBUG 74 CFLAGS-cb710-mmc += -DDEBUG
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 1ac227c603b7..8f216edbdf08 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -111,8 +111,7 @@ static const u8 tuning_blk_pattern_8bit[] = {
111 0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 111 0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
112}; 112};
113 113
114static inline bool dw_mci_fifo_reset(struct dw_mci *host); 114static bool dw_mci_reset(struct dw_mci *host);
115static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host);
116 115
117#if defined(CONFIG_DEBUG_FS) 116#if defined(CONFIG_DEBUG_FS)
118static int dw_mci_req_show(struct seq_file *s, void *v) 117static int dw_mci_req_show(struct seq_file *s, void *v)
@@ -997,7 +996,8 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
997 int gpio_ro = mmc_gpio_get_ro(mmc); 996 int gpio_ro = mmc_gpio_get_ro(mmc);
998 997
999 /* Use platform get_ro function, else try on board write protect */ 998 /* Use platform get_ro function, else try on board write protect */
1000 if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) 999 if ((slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT) ||
1000 (slot->host->quirks & DW_MCI_QUIRK_NO_WRITE_PROTECT))
1001 read_only = 0; 1001 read_only = 0;
1002 else if (!IS_ERR_VALUE(gpio_ro)) 1002 else if (!IS_ERR_VALUE(gpio_ro))
1003 read_only = gpio_ro; 1003 read_only = gpio_ro;
@@ -1235,7 +1235,7 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
1235 * After an error, there may be data lingering 1235 * After an error, there may be data lingering
1236 * in the FIFO 1236 * in the FIFO
1237 */ 1237 */
1238 dw_mci_fifo_reset(host); 1238 dw_mci_reset(host);
1239 } else { 1239 } else {
1240 data->bytes_xfered = data->blocks * data->blksz; 1240 data->bytes_xfered = data->blocks * data->blksz;
1241 data->error = 0; 1241 data->error = 0;
@@ -1352,7 +1352,7 @@ static void dw_mci_tasklet_func(unsigned long priv)
1352 1352
1353 /* CMD error in data command */ 1353 /* CMD error in data command */
1354 if (mrq->cmd->error && mrq->data) 1354 if (mrq->cmd->error && mrq->data)
1355 dw_mci_fifo_reset(host); 1355 dw_mci_reset(host);
1356 1356
1357 host->cmd = NULL; 1357 host->cmd = NULL;
1358 host->data = NULL; 1358 host->data = NULL;
@@ -1963,14 +1963,8 @@ static void dw_mci_work_routine_card(struct work_struct *work)
1963 } 1963 }
1964 1964
1965 /* Power down slot */ 1965 /* Power down slot */
1966 if (present == 0) { 1966 if (present == 0)
1967 /* Clear down the FIFO */ 1967 dw_mci_reset(host);
1968 dw_mci_fifo_reset(host);
1969#ifdef CONFIG_MMC_DW_IDMAC
1970 dw_mci_idmac_reset(host);
1971#endif
1972
1973 }
1974 1968
1975 spin_unlock_bh(&host->lock); 1969 spin_unlock_bh(&host->lock);
1976 1970
@@ -2021,8 +2015,11 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
2021 2015
2022 /* get quirks */ 2016 /* get quirks */
2023 for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++) 2017 for (idx = 0; idx < ARRAY_SIZE(of_slot_quirks); idx++)
2024 if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) 2018 if (of_get_property(np, of_slot_quirks[idx].quirk, NULL)) {
2019 dev_warn(dev, "Slot quirk %s is deprecated\n",
2020 of_slot_quirks[idx].quirk);
2025 quirks |= of_slot_quirks[idx].id; 2021 quirks |= of_slot_quirks[idx].id;
2022 }
2026 2023
2027 return quirks; 2024 return quirks;
2028} 2025}
@@ -2208,8 +2205,11 @@ static bool dw_mci_ctrl_reset(struct dw_mci *host, u32 reset)
2208 return false; 2205 return false;
2209} 2206}
2210 2207
2211static inline bool dw_mci_fifo_reset(struct dw_mci *host) 2208static bool dw_mci_reset(struct dw_mci *host)
2212{ 2209{
2210 u32 flags = SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET;
2211 bool ret = false;
2212
2213 /* 2213 /*
2214 * Reseting generates a block interrupt, hence setting 2214 * Reseting generates a block interrupt, hence setting
2215 * the scatter-gather pointer to NULL. 2215 * the scatter-gather pointer to NULL.
@@ -2219,15 +2219,60 @@ static inline bool dw_mci_fifo_reset(struct dw_mci *host)
2219 host->sg = NULL; 2219 host->sg = NULL;
2220 } 2220 }
2221 2221
2222 return dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET); 2222 if (host->use_dma)
2223} 2223 flags |= SDMMC_CTRL_DMA_RESET;
2224 2224
2225static inline bool dw_mci_ctrl_all_reset(struct dw_mci *host) 2225 if (dw_mci_ctrl_reset(host, flags)) {
2226{ 2226 /*
2227 return dw_mci_ctrl_reset(host, 2227 * In all cases we clear the RAWINTS register to clear any
2228 SDMMC_CTRL_FIFO_RESET | 2228 * interrupts.
2229 SDMMC_CTRL_RESET | 2229 */
2230 SDMMC_CTRL_DMA_RESET); 2230 mci_writel(host, RINTSTS, 0xFFFFFFFF);
2231
2232 /* if using dma we wait for dma_req to clear */
2233 if (host->use_dma) {
2234 unsigned long timeout = jiffies + msecs_to_jiffies(500);
2235 u32 status;
2236 do {
2237 status = mci_readl(host, STATUS);
2238 if (!(status & SDMMC_STATUS_DMA_REQ))
2239 break;
2240 cpu_relax();
2241 } while (time_before(jiffies, timeout));
2242
2243 if (status & SDMMC_STATUS_DMA_REQ) {
2244 dev_err(host->dev,
2245 "%s: Timeout waiting for dma_req to "
2246 "clear during reset\n", __func__);
2247 goto ciu_out;
2248 }
2249
2250 /* when using DMA next we reset the fifo again */
2251 if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_FIFO_RESET))
2252 goto ciu_out;
2253 }
2254 } else {
2255 /* if the controller reset bit did clear, then set clock regs */
2256 if (!(mci_readl(host, CTRL) & SDMMC_CTRL_RESET)) {
2257 dev_err(host->dev, "%s: fifo/dma reset bits didn't "
2258 "clear but ciu was reset, doing clock update\n",
2259 __func__);
2260 goto ciu_out;
2261 }
2262 }
2263
2264#if IS_ENABLED(CONFIG_MMC_DW_IDMAC)
2265 /* It is also recommended that we reset and reprogram idmac */
2266 dw_mci_idmac_reset(host);
2267#endif
2268
2269 ret = true;
2270
2271ciu_out:
2272 /* After a CTRL reset we need to have CIU set clock registers */
2273 mci_send_cmd(host->cur_slot, SDMMC_CMD_UPD_CLK, 0);
2274
2275 return ret;
2231} 2276}
2232 2277
2233#ifdef CONFIG_OF 2278#ifdef CONFIG_OF
@@ -2238,6 +2283,9 @@ static struct dw_mci_of_quirks {
2238 { 2283 {
2239 .quirk = "broken-cd", 2284 .quirk = "broken-cd",
2240 .id = DW_MCI_QUIRK_BROKEN_CARD_DETECTION, 2285 .id = DW_MCI_QUIRK_BROKEN_CARD_DETECTION,
2286 }, {
2287 .quirk = "disable-wp",
2288 .id = DW_MCI_QUIRK_NO_WRITE_PROTECT,
2241 }, 2289 },
2242}; 2290};
2243 2291
@@ -2425,7 +2473,7 @@ int dw_mci_probe(struct dw_mci *host)
2425 } 2473 }
2426 2474
2427 /* Reset all blocks */ 2475 /* Reset all blocks */
2428 if (!dw_mci_ctrl_all_reset(host)) 2476 if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS))
2429 return -ENODEV; 2477 return -ENODEV;
2430 2478
2431 host->dma_ops = host->pdata->dma_ops; 2479 host->dma_ops = host->pdata->dma_ops;
@@ -2612,7 +2660,7 @@ int dw_mci_resume(struct dw_mci *host)
2612 } 2660 }
2613 } 2661 }
2614 2662
2615 if (!dw_mci_ctrl_all_reset(host)) { 2663 if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
2616 ret = -ENODEV; 2664 ret = -ENODEV;
2617 return ret; 2665 return ret;
2618 } 2666 }
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 738fa241d058..08fd956d81f3 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -129,6 +129,7 @@
129#define SDMMC_CMD_INDX(n) ((n) & 0x1F) 129#define SDMMC_CMD_INDX(n) ((n) & 0x1F)
130/* Status register defines */ 130/* Status register defines */
131#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF) 131#define SDMMC_GET_FCNT(x) (((x)>>17) & 0x1FFF)
132#define SDMMC_STATUS_DMA_REQ BIT(31)
132/* FIFOTH register defines */ 133/* FIFOTH register defines */
133#define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \ 134#define SDMMC_SET_FIFOTH(m, r, t) (((m) & 0x7) << 28 | \
134 ((r) & 0xFFF) << 16 | \ 135 ((r) & 0xFFF) << 16 | \
@@ -150,6 +151,10 @@
150/* Card read threshold */ 151/* Card read threshold */
151#define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x)) 152#define SDMMC_SET_RD_THLD(v, x) (((v) & 0x1FFF) << 16 | (x))
152 153
154/* All ctrl reset bits */
155#define SDMMC_CTRL_ALL_RESET_FLAGS \
156 (SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
157
153/* Register access macros */ 158/* Register access macros */
154#define mci_readl(dev, reg) \ 159#define mci_readl(dev, reg) \
155 __raw_readl((dev)->regs + SDMMC_##reg) 160 __raw_readl((dev)->regs + SDMMC_##reg)
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7ad463e9741c..e4d470704150 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -52,34 +52,53 @@ static unsigned int fmax = 515633;
52 * struct variant_data - MMCI variant-specific quirks 52 * struct variant_data - MMCI variant-specific quirks
53 * @clkreg: default value for MCICLOCK register 53 * @clkreg: default value for MCICLOCK register
54 * @clkreg_enable: enable value for MMCICLOCK register 54 * @clkreg_enable: enable value for MMCICLOCK register
55 * @clkreg_8bit_bus_enable: enable value for 8 bit bus
56 * @clkreg_neg_edge_enable: enable value for inverted data/cmd output
55 * @datalength_bits: number of bits in the MMCIDATALENGTH register 57 * @datalength_bits: number of bits in the MMCIDATALENGTH register
56 * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY 58 * @fifosize: number of bytes that can be written when MMCI_TXFIFOEMPTY
57 * is asserted (likewise for RX) 59 * is asserted (likewise for RX)
58 * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY 60 * @fifohalfsize: number of bytes that can be written when MCI_TXFIFOHALFEMPTY
59 * is asserted (likewise for RX) 61 * is asserted (likewise for RX)
62 * @data_cmd_enable: enable value for data commands.
60 * @sdio: variant supports SDIO 63 * @sdio: variant supports SDIO
61 * @st_clkdiv: true if using a ST-specific clock divider algorithm 64 * @st_clkdiv: true if using a ST-specific clock divider algorithm
65 * @datactrl_mask_ddrmode: ddr mode mask in datactrl register.
62 * @blksz_datactrl16: true if Block size is at b16..b30 position in datactrl register 66 * @blksz_datactrl16: true if Block size is at b16..b30 position in datactrl register
67 * @blksz_datactrl4: true if Block size is at b4..b16 position in datactrl
68 * register
63 * @pwrreg_powerup: power up value for MMCIPOWER register 69 * @pwrreg_powerup: power up value for MMCIPOWER register
70 * @f_max: maximum clk frequency supported by the controller.
64 * @signal_direction: input/out direction of bus signals can be indicated 71 * @signal_direction: input/out direction of bus signals can be indicated
65 * @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock 72 * @pwrreg_clkgate: MMCIPOWER register must be used to gate the clock
66 * @busy_detect: true if busy detection on dat0 is supported 73 * @busy_detect: true if busy detection on dat0 is supported
67 * @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply 74 * @pwrreg_nopower: bits in MMCIPOWER don't controls ext. power supply
75 * @explicit_mclk_control: enable explicit mclk control in driver.
76 * @qcom_fifo: enables qcom specific fifo pio read logic.
77 * @reversed_irq_handling: handle data irq before cmd irq.
68 */ 78 */
69struct variant_data { 79struct variant_data {
70 unsigned int clkreg; 80 unsigned int clkreg;
71 unsigned int clkreg_enable; 81 unsigned int clkreg_enable;
82 unsigned int clkreg_8bit_bus_enable;
83 unsigned int clkreg_neg_edge_enable;
72 unsigned int datalength_bits; 84 unsigned int datalength_bits;
73 unsigned int fifosize; 85 unsigned int fifosize;
74 unsigned int fifohalfsize; 86 unsigned int fifohalfsize;
87 unsigned int data_cmd_enable;
88 unsigned int datactrl_mask_ddrmode;
75 bool sdio; 89 bool sdio;
76 bool st_clkdiv; 90 bool st_clkdiv;
77 bool blksz_datactrl16; 91 bool blksz_datactrl16;
92 bool blksz_datactrl4;
78 u32 pwrreg_powerup; 93 u32 pwrreg_powerup;
94 u32 f_max;
79 bool signal_direction; 95 bool signal_direction;
80 bool pwrreg_clkgate; 96 bool pwrreg_clkgate;
81 bool busy_detect; 97 bool busy_detect;
82 bool pwrreg_nopower; 98 bool pwrreg_nopower;
99 bool explicit_mclk_control;
100 bool qcom_fifo;
101 bool reversed_irq_handling;
83}; 102};
84 103
85static struct variant_data variant_arm = { 104static struct variant_data variant_arm = {
@@ -87,6 +106,8 @@ static struct variant_data variant_arm = {
87 .fifohalfsize = 8 * 4, 106 .fifohalfsize = 8 * 4,
88 .datalength_bits = 16, 107 .datalength_bits = 16,
89 .pwrreg_powerup = MCI_PWR_UP, 108 .pwrreg_powerup = MCI_PWR_UP,
109 .f_max = 100000000,
110 .reversed_irq_handling = true,
90}; 111};
91 112
92static struct variant_data variant_arm_extended_fifo = { 113static struct variant_data variant_arm_extended_fifo = {
@@ -94,6 +115,7 @@ static struct variant_data variant_arm_extended_fifo = {
94 .fifohalfsize = 64 * 4, 115 .fifohalfsize = 64 * 4,
95 .datalength_bits = 16, 116 .datalength_bits = 16,
96 .pwrreg_powerup = MCI_PWR_UP, 117 .pwrreg_powerup = MCI_PWR_UP,
118 .f_max = 100000000,
97}; 119};
98 120
99static struct variant_data variant_arm_extended_fifo_hwfc = { 121static struct variant_data variant_arm_extended_fifo_hwfc = {
@@ -102,15 +124,18 @@ static struct variant_data variant_arm_extended_fifo_hwfc = {
102 .clkreg_enable = MCI_ARM_HWFCEN, 124 .clkreg_enable = MCI_ARM_HWFCEN,
103 .datalength_bits = 16, 125 .datalength_bits = 16,
104 .pwrreg_powerup = MCI_PWR_UP, 126 .pwrreg_powerup = MCI_PWR_UP,
127 .f_max = 100000000,
105}; 128};
106 129
107static struct variant_data variant_u300 = { 130static struct variant_data variant_u300 = {
108 .fifosize = 16 * 4, 131 .fifosize = 16 * 4,
109 .fifohalfsize = 8 * 4, 132 .fifohalfsize = 8 * 4,
110 .clkreg_enable = MCI_ST_U300_HWFCEN, 133 .clkreg_enable = MCI_ST_U300_HWFCEN,
134 .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
111 .datalength_bits = 16, 135 .datalength_bits = 16,
112 .sdio = true, 136 .sdio = true,
113 .pwrreg_powerup = MCI_PWR_ON, 137 .pwrreg_powerup = MCI_PWR_ON,
138 .f_max = 100000000,
114 .signal_direction = true, 139 .signal_direction = true,
115 .pwrreg_clkgate = true, 140 .pwrreg_clkgate = true,
116 .pwrreg_nopower = true, 141 .pwrreg_nopower = true,
@@ -124,6 +149,7 @@ static struct variant_data variant_nomadik = {
124 .sdio = true, 149 .sdio = true,
125 .st_clkdiv = true, 150 .st_clkdiv = true,
126 .pwrreg_powerup = MCI_PWR_ON, 151 .pwrreg_powerup = MCI_PWR_ON,
152 .f_max = 100000000,
127 .signal_direction = true, 153 .signal_direction = true,
128 .pwrreg_clkgate = true, 154 .pwrreg_clkgate = true,
129 .pwrreg_nopower = true, 155 .pwrreg_nopower = true,
@@ -134,10 +160,13 @@ static struct variant_data variant_ux500 = {
134 .fifohalfsize = 8 * 4, 160 .fifohalfsize = 8 * 4,
135 .clkreg = MCI_CLK_ENABLE, 161 .clkreg = MCI_CLK_ENABLE,
136 .clkreg_enable = MCI_ST_UX500_HWFCEN, 162 .clkreg_enable = MCI_ST_UX500_HWFCEN,
163 .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
164 .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE,
137 .datalength_bits = 24, 165 .datalength_bits = 24,
138 .sdio = true, 166 .sdio = true,
139 .st_clkdiv = true, 167 .st_clkdiv = true,
140 .pwrreg_powerup = MCI_PWR_ON, 168 .pwrreg_powerup = MCI_PWR_ON,
169 .f_max = 100000000,
141 .signal_direction = true, 170 .signal_direction = true,
142 .pwrreg_clkgate = true, 171 .pwrreg_clkgate = true,
143 .busy_detect = true, 172 .busy_detect = true,
@@ -149,17 +178,38 @@ static struct variant_data variant_ux500v2 = {
149 .fifohalfsize = 8 * 4, 178 .fifohalfsize = 8 * 4,
150 .clkreg = MCI_CLK_ENABLE, 179 .clkreg = MCI_CLK_ENABLE,
151 .clkreg_enable = MCI_ST_UX500_HWFCEN, 180 .clkreg_enable = MCI_ST_UX500_HWFCEN,
181 .clkreg_8bit_bus_enable = MCI_ST_8BIT_BUS,
182 .clkreg_neg_edge_enable = MCI_ST_UX500_NEG_EDGE,
183 .datactrl_mask_ddrmode = MCI_ST_DPSM_DDRMODE,
152 .datalength_bits = 24, 184 .datalength_bits = 24,
153 .sdio = true, 185 .sdio = true,
154 .st_clkdiv = true, 186 .st_clkdiv = true,
155 .blksz_datactrl16 = true, 187 .blksz_datactrl16 = true,
156 .pwrreg_powerup = MCI_PWR_ON, 188 .pwrreg_powerup = MCI_PWR_ON,
189 .f_max = 100000000,
157 .signal_direction = true, 190 .signal_direction = true,
158 .pwrreg_clkgate = true, 191 .pwrreg_clkgate = true,
159 .busy_detect = true, 192 .busy_detect = true,
160 .pwrreg_nopower = true, 193 .pwrreg_nopower = true,
161}; 194};
162 195
196static struct variant_data variant_qcom = {
197 .fifosize = 16 * 4,
198 .fifohalfsize = 8 * 4,
199 .clkreg = MCI_CLK_ENABLE,
200 .clkreg_enable = MCI_QCOM_CLK_FLOWENA |
201 MCI_QCOM_CLK_SELECT_IN_FBCLK,
202 .clkreg_8bit_bus_enable = MCI_QCOM_CLK_WIDEBUS_8,
203 .datactrl_mask_ddrmode = MCI_QCOM_CLK_SELECT_IN_DDR_MODE,
204 .data_cmd_enable = MCI_QCOM_CSPM_DATCMD,
205 .blksz_datactrl4 = true,
206 .datalength_bits = 24,
207 .pwrreg_powerup = MCI_PWR_UP,
208 .f_max = 208000000,
209 .explicit_mclk_control = true,
210 .qcom_fifo = true,
211};
212
163static int mmci_card_busy(struct mmc_host *mmc) 213static int mmci_card_busy(struct mmc_host *mmc)
164{ 214{
165 struct mmci_host *host = mmc_priv(mmc); 215 struct mmci_host *host = mmc_priv(mmc);
@@ -260,7 +310,9 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
260 host->cclk = 0; 310 host->cclk = 0;
261 311
262 if (desired) { 312 if (desired) {
263 if (desired >= host->mclk) { 313 if (variant->explicit_mclk_control) {
314 host->cclk = host->mclk;
315 } else if (desired >= host->mclk) {
264 clk = MCI_CLK_BYPASS; 316 clk = MCI_CLK_BYPASS;
265 if (variant->st_clkdiv) 317 if (variant->st_clkdiv)
266 clk |= MCI_ST_UX500_NEG_EDGE; 318 clk |= MCI_ST_UX500_NEG_EDGE;
@@ -299,11 +351,11 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
299 if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4) 351 if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_4)
300 clk |= MCI_4BIT_BUS; 352 clk |= MCI_4BIT_BUS;
301 if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8) 353 if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8)
302 clk |= MCI_ST_8BIT_BUS; 354 clk |= variant->clkreg_8bit_bus_enable;
303 355
304 if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 || 356 if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
305 host->mmc->ios.timing == MMC_TIMING_MMC_DDR52) 357 host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
306 clk |= MCI_ST_UX500_NEG_EDGE; 358 clk |= variant->clkreg_neg_edge_enable;
307 359
308 mmci_write_clkreg(host, clk); 360 mmci_write_clkreg(host, clk);
309} 361}
@@ -719,7 +771,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
719 data->bytes_xfered = 0; 771 data->bytes_xfered = 0;
720 772
721 clks = (unsigned long long)data->timeout_ns * host->cclk; 773 clks = (unsigned long long)data->timeout_ns * host->cclk;
722 do_div(clks, 1000000000UL); 774 do_div(clks, NSEC_PER_SEC);
723 775
724 timeout = data->timeout_clks + (unsigned int)clks; 776 timeout = data->timeout_clks + (unsigned int)clks;
725 777
@@ -732,6 +784,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
732 784
733 if (variant->blksz_datactrl16) 785 if (variant->blksz_datactrl16)
734 datactrl = MCI_DPSM_ENABLE | (data->blksz << 16); 786 datactrl = MCI_DPSM_ENABLE | (data->blksz << 16);
787 else if (variant->blksz_datactrl4)
788 datactrl = MCI_DPSM_ENABLE | (data->blksz << 4);
735 else 789 else
736 datactrl = MCI_DPSM_ENABLE | blksz_bits << 4; 790 datactrl = MCI_DPSM_ENABLE | blksz_bits << 4;
737 791
@@ -767,7 +821,7 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
767 821
768 if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 || 822 if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
769 host->mmc->ios.timing == MMC_TIMING_MMC_DDR52) 823 host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
770 datactrl |= MCI_ST_DPSM_DDRMODE; 824 datactrl |= variant->datactrl_mask_ddrmode;
771 825
772 /* 826 /*
773 * Attempt to use DMA operation mode, if this 827 * Attempt to use DMA operation mode, if this
@@ -812,7 +866,7 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
812 866
813 if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) { 867 if (readl(base + MMCICOMMAND) & MCI_CPSM_ENABLE) {
814 writel(0, base + MMCICOMMAND); 868 writel(0, base + MMCICOMMAND);
815 udelay(1); 869 mmci_reg_delay(host);
816 } 870 }
817 871
818 c |= cmd->opcode | MCI_CPSM_ENABLE; 872 c |= cmd->opcode | MCI_CPSM_ENABLE;
@@ -824,6 +878,9 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
824 if (/*interrupt*/0) 878 if (/*interrupt*/0)
825 c |= MCI_CPSM_INTERRUPT; 879 c |= MCI_CPSM_INTERRUPT;
826 880
881 if (mmc_cmd_type(cmd) == MMC_CMD_ADTC)
882 c |= host->variant->data_cmd_enable;
883
827 host->cmd = cmd; 884 host->cmd = cmd;
828 885
829 writel(cmd->arg, base + MMCIARGUMENT); 886 writel(cmd->arg, base + MMCIARGUMENT);
@@ -834,6 +891,10 @@ static void
834mmci_data_irq(struct mmci_host *host, struct mmc_data *data, 891mmci_data_irq(struct mmci_host *host, struct mmc_data *data,
835 unsigned int status) 892 unsigned int status)
836{ 893{
894 /* Make sure we have data to handle */
895 if (!data)
896 return;
897
837 /* First check for errors */ 898 /* First check for errors */
838 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR| 899 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR|
839 MCI_TXUNDERRUN|MCI_RXOVERRUN)) { 900 MCI_TXUNDERRUN|MCI_RXOVERRUN)) {
@@ -902,9 +963,17 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
902 unsigned int status) 963 unsigned int status)
903{ 964{
904 void __iomem *base = host->base; 965 void __iomem *base = host->base;
905 bool sbc = (cmd == host->mrq->sbc); 966 bool sbc, busy_resp;
906 bool busy_resp = host->variant->busy_detect && 967
907 (cmd->flags & MMC_RSP_BUSY); 968 if (!cmd)
969 return;
970
971 sbc = (cmd == host->mrq->sbc);
972 busy_resp = host->variant->busy_detect && (cmd->flags & MMC_RSP_BUSY);
973
974 if (!((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT|
975 MCI_CMDSENT|MCI_CMDRESPEND)))
976 return;
908 977
909 /* Check if we need to wait for busy completion. */ 978 /* Check if we need to wait for busy completion. */
910 if (host->busy_status && (status & MCI_ST_CARDBUSY)) 979 if (host->busy_status && (status & MCI_ST_CARDBUSY))
@@ -957,15 +1026,34 @@ mmci_cmd_irq(struct mmci_host *host, struct mmc_command *cmd,
957 } 1026 }
958} 1027}
959 1028
1029static int mmci_get_rx_fifocnt(struct mmci_host *host, u32 status, int remain)
1030{
1031 return remain - (readl(host->base + MMCIFIFOCNT) << 2);
1032}
1033
1034static int mmci_qcom_get_rx_fifocnt(struct mmci_host *host, u32 status, int r)
1035{
1036 /*
1037 * on qcom SDCC4 only 8 words are used in each burst so only 8 addresses
1038 * from the fifo range should be used
1039 */
1040 if (status & MCI_RXFIFOHALFFULL)
1041 return host->variant->fifohalfsize;
1042 else if (status & MCI_RXDATAAVLBL)
1043 return 4;
1044
1045 return 0;
1046}
1047
960static int mmci_pio_read(struct mmci_host *host, char *buffer, unsigned int remain) 1048static int mmci_pio_read(struct mmci_host *host, char *buffer, unsigned int remain)
961{ 1049{
962 void __iomem *base = host->base; 1050 void __iomem *base = host->base;
963 char *ptr = buffer; 1051 char *ptr = buffer;
964 u32 status; 1052 u32 status = readl(host->base + MMCISTATUS);
965 int host_remain = host->size; 1053 int host_remain = host->size;
966 1054
967 do { 1055 do {
968 int count = host_remain - (readl(base + MMCIFIFOCNT) << 2); 1056 int count = host->get_rx_fifocnt(host, status, host_remain);
969 1057
970 if (count > remain) 1058 if (count > remain)
971 count = remain; 1059 count = remain;
@@ -1132,9 +1220,6 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
1132 spin_lock(&host->lock); 1220 spin_lock(&host->lock);
1133 1221
1134 do { 1222 do {
1135 struct mmc_command *cmd;
1136 struct mmc_data *data;
1137
1138 status = readl(host->base + MMCISTATUS); 1223 status = readl(host->base + MMCISTATUS);
1139 1224
1140 if (host->singleirq) { 1225 if (host->singleirq) {
@@ -1154,16 +1239,13 @@ static irqreturn_t mmci_irq(int irq, void *dev_id)
1154 1239
1155 dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status); 1240 dev_dbg(mmc_dev(host->mmc), "irq0 (data+cmd) %08x\n", status);
1156 1241
1157 cmd = host->cmd; 1242 if (host->variant->reversed_irq_handling) {
1158 if ((status|host->busy_status) & (MCI_CMDCRCFAIL|MCI_CMDTIMEOUT| 1243 mmci_data_irq(host, host->data, status);
1159 MCI_CMDSENT|MCI_CMDRESPEND) && cmd) 1244 mmci_cmd_irq(host, host->cmd, status);
1160 mmci_cmd_irq(host, cmd, status); 1245 } else {
1161 1246 mmci_cmd_irq(host, host->cmd, status);
1162 data = host->data; 1247 mmci_data_irq(host, host->data, status);
1163 if (status & (MCI_DATACRCFAIL|MCI_DATATIMEOUT|MCI_STARTBITERR| 1248 }
1164 MCI_TXUNDERRUN|MCI_RXOVERRUN|MCI_DATAEND|
1165 MCI_DATABLOCKEND) && data)
1166 mmci_data_irq(host, data, status);
1167 1249
1168 /* Don't poll for busy completion in irq context. */ 1250 /* Don't poll for busy completion in irq context. */
1169 if (host->busy_status) 1251 if (host->busy_status)
@@ -1296,6 +1378,17 @@ static void mmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
1296 if (!ios->clock && variant->pwrreg_clkgate) 1378 if (!ios->clock && variant->pwrreg_clkgate)
1297 pwr &= ~MCI_PWR_ON; 1379 pwr &= ~MCI_PWR_ON;
1298 1380
1381 if (host->variant->explicit_mclk_control &&
1382 ios->clock != host->clock_cache) {
1383 ret = clk_set_rate(host->clk, ios->clock);
1384 if (ret < 0)
1385 dev_err(mmc_dev(host->mmc),
1386 "Error setting clock rate (%d)\n", ret);
1387 else
1388 host->mclk = clk_get_rate(host->clk);
1389 }
1390 host->clock_cache = ios->clock;
1391
1299 spin_lock_irqsave(&host->lock, flags); 1392 spin_lock_irqsave(&host->lock, flags);
1300 1393
1301 mmci_set_clkreg(host, ios->clock); 1394 mmci_set_clkreg(host, ios->clock);
@@ -1443,6 +1536,11 @@ static int mmci_probe(struct amba_device *dev,
1443 if (ret) 1536 if (ret)
1444 goto host_free; 1537 goto host_free;
1445 1538
1539 if (variant->qcom_fifo)
1540 host->get_rx_fifocnt = mmci_qcom_get_rx_fifocnt;
1541 else
1542 host->get_rx_fifocnt = mmci_get_rx_fifocnt;
1543
1446 host->plat = plat; 1544 host->plat = plat;
1447 host->variant = variant; 1545 host->variant = variant;
1448 host->mclk = clk_get_rate(host->clk); 1546 host->mclk = clk_get_rate(host->clk);
@@ -1451,8 +1549,8 @@ static int mmci_probe(struct amba_device *dev,
1451 * so we try to adjust the clock down to this, 1549 * so we try to adjust the clock down to this,
1452 * (if possible). 1550 * (if possible).
1453 */ 1551 */
1454 if (host->mclk > 100000000) { 1552 if (host->mclk > variant->f_max) {
1455 ret = clk_set_rate(host->clk, 100000000); 1553 ret = clk_set_rate(host->clk, variant->f_max);
1456 if (ret < 0) 1554 if (ret < 0)
1457 goto clk_disable; 1555 goto clk_disable;
1458 host->mclk = clk_get_rate(host->clk); 1556 host->mclk = clk_get_rate(host->clk);
@@ -1471,9 +1569,12 @@ static int mmci_probe(struct amba_device *dev,
1471 * The ARM and ST versions of the block have slightly different 1569 * The ARM and ST versions of the block have slightly different
1472 * clock divider equations which means that the minimum divider 1570 * clock divider equations which means that the minimum divider
1473 * differs too. 1571 * differs too.
1572 * on Qualcomm like controllers get the nearest minimum clock to 100Khz
1474 */ 1573 */
1475 if (variant->st_clkdiv) 1574 if (variant->st_clkdiv)
1476 mmc->f_min = DIV_ROUND_UP(host->mclk, 257); 1575 mmc->f_min = DIV_ROUND_UP(host->mclk, 257);
1576 else if (variant->explicit_mclk_control)
1577 mmc->f_min = clk_round_rate(host->clk, 100000);
1477 else 1578 else
1478 mmc->f_min = DIV_ROUND_UP(host->mclk, 512); 1579 mmc->f_min = DIV_ROUND_UP(host->mclk, 512);
1479 /* 1580 /*
@@ -1483,9 +1584,14 @@ static int mmci_probe(struct amba_device *dev,
1483 * the block, of course. 1584 * the block, of course.
1484 */ 1585 */
1485 if (mmc->f_max) 1586 if (mmc->f_max)
1486 mmc->f_max = min(host->mclk, mmc->f_max); 1587 mmc->f_max = variant->explicit_mclk_control ?
1588 min(variant->f_max, mmc->f_max) :
1589 min(host->mclk, mmc->f_max);
1487 else 1590 else
1488 mmc->f_max = min(host->mclk, fmax); 1591 mmc->f_max = variant->explicit_mclk_control ?
1592 fmax : min(host->mclk, fmax);
1593
1594
1489 dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max); 1595 dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
1490 1596
1491 /* Get regulators and the supported OCR mask */ 1597 /* Get regulators and the supported OCR mask */
@@ -1752,6 +1858,12 @@ static struct amba_id mmci_ids[] = {
1752 .mask = 0xf0ffffff, 1858 .mask = 0xf0ffffff,
1753 .data = &variant_ux500v2, 1859 .data = &variant_ux500v2,
1754 }, 1860 },
1861 /* Qualcomm variants */
1862 {
1863 .id = 0x00051180,
1864 .mask = 0x000fffff,
1865 .data = &variant_qcom,
1866 },
1755 { 0, 0 }, 1867 { 0, 0 },
1756}; 1868};
1757 1869
diff --git a/drivers/mmc/host/mmci.h b/drivers/mmc/host/mmci.h
index 347d942d740b..a1f5e4f49e2a 100644
--- a/drivers/mmc/host/mmci.h
+++ b/drivers/mmc/host/mmci.h
@@ -41,6 +41,15 @@
41/* Modified PL180 on Versatile Express platform */ 41/* Modified PL180 on Versatile Express platform */
42#define MCI_ARM_HWFCEN (1 << 12) 42#define MCI_ARM_HWFCEN (1 << 12)
43 43
44/* Modified on Qualcomm Integrations */
45#define MCI_QCOM_CLK_WIDEBUS_8 (BIT(10) | BIT(11))
46#define MCI_QCOM_CLK_FLOWENA BIT(12)
47#define MCI_QCOM_CLK_INVERTOUT BIT(13)
48
49/* select in latch data and command in */
50#define MCI_QCOM_CLK_SELECT_IN_FBCLK BIT(15)
51#define MCI_QCOM_CLK_SELECT_IN_DDR_MODE (BIT(14) | BIT(15))
52
44#define MMCIARGUMENT 0x008 53#define MMCIARGUMENT 0x008
45#define MMCICOMMAND 0x00c 54#define MMCICOMMAND 0x00c
46#define MCI_CPSM_RESPONSE (1 << 6) 55#define MCI_CPSM_RESPONSE (1 << 6)
@@ -54,6 +63,14 @@
54#define MCI_ST_NIEN (1 << 13) 63#define MCI_ST_NIEN (1 << 13)
55#define MCI_ST_CE_ATACMD (1 << 14) 64#define MCI_ST_CE_ATACMD (1 << 14)
56 65
66/* Modified on Qualcomm Integrations */
67#define MCI_QCOM_CSPM_DATCMD BIT(12)
68#define MCI_QCOM_CSPM_MCIABORT BIT(13)
69#define MCI_QCOM_CSPM_CCSENABLE BIT(14)
70#define MCI_QCOM_CSPM_CCSDISABLE BIT(15)
71#define MCI_QCOM_CSPM_AUTO_CMD19 BIT(16)
72#define MCI_QCOM_CSPM_AUTO_CMD21 BIT(21)
73
57#define MMCIRESPCMD 0x010 74#define MMCIRESPCMD 0x010
58#define MMCIRESPONSE0 0x014 75#define MMCIRESPONSE0 0x014
59#define MMCIRESPONSE1 0x018 76#define MMCIRESPONSE1 0x018
@@ -191,6 +208,8 @@ struct mmci_host {
191 spinlock_t lock; 208 spinlock_t lock;
192 209
193 unsigned int mclk; 210 unsigned int mclk;
211 /* cached value of requested clk in set_ios */
212 unsigned int clock_cache;
194 unsigned int cclk; 213 unsigned int cclk;
195 u32 pwr_reg; 214 u32 pwr_reg;
196 u32 pwr_reg_add; 215 u32 pwr_reg_add;
@@ -210,6 +229,7 @@ struct mmci_host {
210 /* pio stuff */ 229 /* pio stuff */
211 struct sg_mapping_iter sg_miter; 230 struct sg_mapping_iter sg_miter;
212 unsigned int size; 231 unsigned int size;
232 int (*get_rx_fifocnt)(struct mmci_host *h, u32 status, int remain);
213 233
214#ifdef CONFIG_DMA_ENGINE 234#ifdef CONFIG_DMA_ENGINE
215 /* DMA stuff */ 235 /* DMA stuff */
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 74924a04026e..b4b1efbf6c16 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -13,7 +13,6 @@
13 * warranty of any kind, whether express or implied. 13 * warranty of any kind, whether express or implied.
14 */ 14 */
15 15
16#include <linux/version.h>
17#include <linux/module.h> 16#include <linux/module.h>
18#include <linux/init.h> 17#include <linux/init.h>
19#include <linux/platform_device.h> 18#include <linux/platform_device.h>
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index babfea03ba8a..140885a5a4e7 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -86,7 +86,8 @@ static int mxs_mmc_get_cd(struct mmc_host *mmc)
86 if (ret >= 0) 86 if (ret >= 0)
87 return ret; 87 return ret;
88 88
89 present = !(readl(ssp->base + HW_SSP_STATUS(ssp)) & 89 present = mmc->caps & MMC_CAP_NEEDS_POLL ||
90 !(readl(ssp->base + HW_SSP_STATUS(ssp)) &
90 BM_SSP_STATUS_CARD_DETECT); 91 BM_SSP_STATUS_CARD_DETECT);
91 92
92 if (mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH) 93 if (mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH)
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 6b7b75585926..965672663ef0 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -29,6 +29,7 @@
29#include <linux/timer.h> 29#include <linux/timer.h>
30#include <linux/clk.h> 30#include <linux/clk.h>
31#include <linux/of.h> 31#include <linux/of.h>
32#include <linux/of_irq.h>
32#include <linux/of_gpio.h> 33#include <linux/of_gpio.h>
33#include <linux/of_device.h> 34#include <linux/of_device.h>
34#include <linux/omap-dmaengine.h> 35#include <linux/omap-dmaengine.h>
@@ -36,6 +37,7 @@
36#include <linux/mmc/core.h> 37#include <linux/mmc/core.h>
37#include <linux/mmc/mmc.h> 38#include <linux/mmc/mmc.h>
38#include <linux/io.h> 39#include <linux/io.h>
40#include <linux/irq.h>
39#include <linux/gpio.h> 41#include <linux/gpio.h>
40#include <linux/regulator/consumer.h> 42#include <linux/regulator/consumer.h>
41#include <linux/pinctrl/consumer.h> 43#include <linux/pinctrl/consumer.h>
@@ -54,6 +56,7 @@
54#define OMAP_HSMMC_RSP54 0x0118 56#define OMAP_HSMMC_RSP54 0x0118
55#define OMAP_HSMMC_RSP76 0x011C 57#define OMAP_HSMMC_RSP76 0x011C
56#define OMAP_HSMMC_DATA 0x0120 58#define OMAP_HSMMC_DATA 0x0120
59#define OMAP_HSMMC_PSTATE 0x0124
57#define OMAP_HSMMC_HCTL 0x0128 60#define OMAP_HSMMC_HCTL 0x0128
58#define OMAP_HSMMC_SYSCTL 0x012C 61#define OMAP_HSMMC_SYSCTL 0x012C
59#define OMAP_HSMMC_STAT 0x0130 62#define OMAP_HSMMC_STAT 0x0130
@@ -91,7 +94,10 @@
91#define BCE (1 << 1) 94#define BCE (1 << 1)
92#define FOUR_BIT (1 << 1) 95#define FOUR_BIT (1 << 1)
93#define HSPE (1 << 2) 96#define HSPE (1 << 2)
97#define IWE (1 << 24)
94#define DDR (1 << 19) 98#define DDR (1 << 19)
99#define CLKEXTFREE (1 << 16)
100#define CTPL (1 << 11)
95#define DW8 (1 << 5) 101#define DW8 (1 << 5)
96#define OD 0x1 102#define OD 0x1
97#define STAT_CLEAR 0xFFFFFFFF 103#define STAT_CLEAR 0xFFFFFFFF
@@ -101,11 +107,15 @@
101#define SRD (1 << 26) 107#define SRD (1 << 26)
102#define SOFTRESET (1 << 1) 108#define SOFTRESET (1 << 1)
103 109
110/* PSTATE */
111#define DLEV_DAT(x) (1 << (20 + (x)))
112
104/* Interrupt masks for IE and ISE register */ 113/* Interrupt masks for IE and ISE register */
105#define CC_EN (1 << 0) 114#define CC_EN (1 << 0)
106#define TC_EN (1 << 1) 115#define TC_EN (1 << 1)
107#define BWR_EN (1 << 4) 116#define BWR_EN (1 << 4)
108#define BRR_EN (1 << 5) 117#define BRR_EN (1 << 5)
118#define CIRQ_EN (1 << 8)
109#define ERR_EN (1 << 15) 119#define ERR_EN (1 << 15)
110#define CTO_EN (1 << 16) 120#define CTO_EN (1 << 16)
111#define CCRC_EN (1 << 17) 121#define CCRC_EN (1 << 17)
@@ -140,7 +150,6 @@
140#define VDD_3V0 3000000 /* 300000 uV */ 150#define VDD_3V0 3000000 /* 300000 uV */
141#define VDD_165_195 (ffs(MMC_VDD_165_195) - 1) 151#define VDD_165_195 (ffs(MMC_VDD_165_195) - 1)
142 152
143#define AUTO_CMD23 (1 << 1) /* Auto CMD23 support */
144/* 153/*
145 * One controller can have multiple slots, like on some omap boards using 154 * One controller can have multiple slots, like on some omap boards using
146 * omap.c controller driver. Luckily this is not currently done on any known 155 * omap.c controller driver. Luckily this is not currently done on any known
@@ -194,6 +203,7 @@ struct omap_hsmmc_host {
194 u32 sysctl; 203 u32 sysctl;
195 u32 capa; 204 u32 capa;
196 int irq; 205 int irq;
206 int wake_irq;
197 int use_dma, dma_ch; 207 int use_dma, dma_ch;
198 struct dma_chan *tx_chan; 208 struct dma_chan *tx_chan;
199 struct dma_chan *rx_chan; 209 struct dma_chan *rx_chan;
@@ -206,6 +216,9 @@ struct omap_hsmmc_host {
206 int req_in_progress; 216 int req_in_progress;
207 unsigned long clk_rate; 217 unsigned long clk_rate;
208 unsigned int flags; 218 unsigned int flags;
219#define AUTO_CMD23 (1 << 0) /* Auto CMD23 support */
220#define HSMMC_SDIO_IRQ_ENABLED (1 << 1) /* SDIO irq enabled */
221#define HSMMC_WAKE_IRQ_ENABLED (1 << 2)
209 struct omap_hsmmc_next next_data; 222 struct omap_hsmmc_next next_data;
210 struct omap_mmc_platform_data *pdata; 223 struct omap_mmc_platform_data *pdata;
211}; 224};
@@ -510,27 +523,40 @@ static void omap_hsmmc_stop_clock(struct omap_hsmmc_host *host)
510static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host, 523static void omap_hsmmc_enable_irq(struct omap_hsmmc_host *host,
511 struct mmc_command *cmd) 524 struct mmc_command *cmd)
512{ 525{
513 unsigned int irq_mask; 526 u32 irq_mask = INT_EN_MASK;
527 unsigned long flags;
514 528
515 if (host->use_dma) 529 if (host->use_dma)
516 irq_mask = INT_EN_MASK & ~(BRR_EN | BWR_EN); 530 irq_mask &= ~(BRR_EN | BWR_EN);
517 else
518 irq_mask = INT_EN_MASK;
519 531
520 /* Disable timeout for erases */ 532 /* Disable timeout for erases */
521 if (cmd->opcode == MMC_ERASE) 533 if (cmd->opcode == MMC_ERASE)
522 irq_mask &= ~DTO_EN; 534 irq_mask &= ~DTO_EN;
523 535
536 spin_lock_irqsave(&host->irq_lock, flags);
524 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); 537 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
525 OMAP_HSMMC_WRITE(host->base, ISE, irq_mask); 538 OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
539
540 /* latch pending CIRQ, but don't signal MMC core */
541 if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
542 irq_mask |= CIRQ_EN;
526 OMAP_HSMMC_WRITE(host->base, IE, irq_mask); 543 OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
544 spin_unlock_irqrestore(&host->irq_lock, flags);
527} 545}
528 546
529static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host) 547static void omap_hsmmc_disable_irq(struct omap_hsmmc_host *host)
530{ 548{
531 OMAP_HSMMC_WRITE(host->base, ISE, 0); 549 u32 irq_mask = 0;
532 OMAP_HSMMC_WRITE(host->base, IE, 0); 550 unsigned long flags;
551
552 spin_lock_irqsave(&host->irq_lock, flags);
553 /* no transfer running but need to keep cirq if enabled */
554 if (host->flags & HSMMC_SDIO_IRQ_ENABLED)
555 irq_mask |= CIRQ_EN;
556 OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
557 OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
533 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR); 558 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
559 spin_unlock_irqrestore(&host->irq_lock, flags);
534} 560}
535 561
536/* Calculate divisor for the given clock frequency */ 562/* Calculate divisor for the given clock frequency */
@@ -667,6 +693,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
667 capa = VS18; 693 capa = VS18;
668 } 694 }
669 695
696 if (host->mmc->caps & MMC_CAP_SDIO_IRQ)
697 hctl |= IWE;
698
670 OMAP_HSMMC_WRITE(host->base, HCTL, 699 OMAP_HSMMC_WRITE(host->base, HCTL,
671 OMAP_HSMMC_READ(host->base, HCTL) | hctl); 700 OMAP_HSMMC_READ(host->base, HCTL) | hctl);
672 701
@@ -681,7 +710,9 @@ static int omap_hsmmc_context_restore(struct omap_hsmmc_host *host)
681 && time_before(jiffies, timeout)) 710 && time_before(jiffies, timeout))
682 ; 711 ;
683 712
684 omap_hsmmc_disable_irq(host); 713 OMAP_HSMMC_WRITE(host->base, ISE, 0);
714 OMAP_HSMMC_WRITE(host->base, IE, 0);
715 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
685 716
686 /* Do not initialize card-specific things if the power is off */ 717 /* Do not initialize card-specific things if the power is off */
687 if (host->power_mode == MMC_POWER_OFF) 718 if (host->power_mode == MMC_POWER_OFF)
@@ -1118,8 +1149,12 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
1118 int status; 1149 int status;
1119 1150
1120 status = OMAP_HSMMC_READ(host->base, STAT); 1151 status = OMAP_HSMMC_READ(host->base, STAT);
1121 while (status & INT_EN_MASK && host->req_in_progress) { 1152 while (status & (INT_EN_MASK | CIRQ_EN)) {
1122 omap_hsmmc_do_irq(host, status); 1153 if (host->req_in_progress)
1154 omap_hsmmc_do_irq(host, status);
1155
1156 if (status & CIRQ_EN)
1157 mmc_signal_sdio_irq(host->mmc);
1123 1158
1124 /* Flush posted write */ 1159 /* Flush posted write */
1125 status = OMAP_HSMMC_READ(host->base, STAT); 1160 status = OMAP_HSMMC_READ(host->base, STAT);
@@ -1128,6 +1163,22 @@ static irqreturn_t omap_hsmmc_irq(int irq, void *dev_id)
1128 return IRQ_HANDLED; 1163 return IRQ_HANDLED;
1129} 1164}
1130 1165
1166static irqreturn_t omap_hsmmc_wake_irq(int irq, void *dev_id)
1167{
1168 struct omap_hsmmc_host *host = dev_id;
1169
1170 /* cirq is level triggered, disable to avoid infinite loop */
1171 spin_lock(&host->irq_lock);
1172 if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
1173 disable_irq_nosync(host->wake_irq);
1174 host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
1175 }
1176 spin_unlock(&host->irq_lock);
1177 pm_request_resume(host->dev); /* no use counter */
1178
1179 return IRQ_HANDLED;
1180}
1181
1131static void set_sd_bus_power(struct omap_hsmmc_host *host) 1182static void set_sd_bus_power(struct omap_hsmmc_host *host)
1132{ 1183{
1133 unsigned long i; 1184 unsigned long i;
@@ -1639,6 +1690,103 @@ static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
1639 mmc_slot(host).init_card(card); 1690 mmc_slot(host).init_card(card);
1640} 1691}
1641 1692
1693static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
1694{
1695 struct omap_hsmmc_host *host = mmc_priv(mmc);
1696 u32 irq_mask, con;
1697 unsigned long flags;
1698
1699 spin_lock_irqsave(&host->irq_lock, flags);
1700
1701 con = OMAP_HSMMC_READ(host->base, CON);
1702 irq_mask = OMAP_HSMMC_READ(host->base, ISE);
1703 if (enable) {
1704 host->flags |= HSMMC_SDIO_IRQ_ENABLED;
1705 irq_mask |= CIRQ_EN;
1706 con |= CTPL | CLKEXTFREE;
1707 } else {
1708 host->flags &= ~HSMMC_SDIO_IRQ_ENABLED;
1709 irq_mask &= ~CIRQ_EN;
1710 con &= ~(CTPL | CLKEXTFREE);
1711 }
1712 OMAP_HSMMC_WRITE(host->base, CON, con);
1713 OMAP_HSMMC_WRITE(host->base, IE, irq_mask);
1714
1715 /*
1716 * if enable, piggy back detection on current request
1717 * but always disable immediately
1718 */
1719 if (!host->req_in_progress || !enable)
1720 OMAP_HSMMC_WRITE(host->base, ISE, irq_mask);
1721
1722 /* flush posted write */
1723 OMAP_HSMMC_READ(host->base, IE);
1724
1725 spin_unlock_irqrestore(&host->irq_lock, flags);
1726}
1727
1728static int omap_hsmmc_configure_wake_irq(struct omap_hsmmc_host *host)
1729{
1730 struct mmc_host *mmc = host->mmc;
1731 int ret;
1732
1733 /*
1734 * For omaps with wake-up path, wakeirq will be irq from pinctrl and
1735 * for other omaps, wakeirq will be from GPIO (dat line remuxed to
1736 * gpio). wakeirq is needed to detect sdio irq in runtime suspend state
1737 * with functional clock disabled.
1738 */
1739 if (!host->dev->of_node || !host->wake_irq)
1740 return -ENODEV;
1741
1742 /* Prevent auto-enabling of IRQ */
1743 irq_set_status_flags(host->wake_irq, IRQ_NOAUTOEN);
1744 ret = devm_request_irq(host->dev, host->wake_irq, omap_hsmmc_wake_irq,
1745 IRQF_TRIGGER_LOW | IRQF_ONESHOT,
1746 mmc_hostname(mmc), host);
1747 if (ret) {
1748 dev_err(mmc_dev(host->mmc), "Unable to request wake IRQ\n");
1749 goto err;
1750 }
1751
1752 /*
1753 * Some omaps don't have wake-up path from deeper idle states
1754 * and need to remux SDIO DAT1 to GPIO for wake-up from idle.
1755 */
1756 if (host->pdata->controller_flags & OMAP_HSMMC_SWAKEUP_MISSING) {
1757 struct pinctrl *p = devm_pinctrl_get(host->dev);
1758 if (!p) {
1759 ret = -ENODEV;
1760 goto err_free_irq;
1761 }
1762 if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_DEFAULT))) {
1763 dev_info(host->dev, "missing default pinctrl state\n");
1764 devm_pinctrl_put(p);
1765 ret = -EINVAL;
1766 goto err_free_irq;
1767 }
1768
1769 if (IS_ERR(pinctrl_lookup_state(p, PINCTRL_STATE_IDLE))) {
1770 dev_info(host->dev, "missing idle pinctrl state\n");
1771 devm_pinctrl_put(p);
1772 ret = -EINVAL;
1773 goto err_free_irq;
1774 }
1775 devm_pinctrl_put(p);
1776 }
1777
1778 OMAP_HSMMC_WRITE(host->base, HCTL,
1779 OMAP_HSMMC_READ(host->base, HCTL) | IWE);
1780 return 0;
1781
1782err_free_irq:
1783 devm_free_irq(host->dev, host->wake_irq, host);
1784err:
1785 dev_warn(host->dev, "no SDIO IRQ support, falling back to polling\n");
1786 host->wake_irq = 0;
1787 return ret;
1788}
1789
1642static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host) 1790static void omap_hsmmc_conf_bus_power(struct omap_hsmmc_host *host)
1643{ 1791{
1644 u32 hctl, capa, value; 1792 u32 hctl, capa, value;
@@ -1691,7 +1839,7 @@ static const struct mmc_host_ops omap_hsmmc_ops = {
1691 .get_cd = omap_hsmmc_get_cd, 1839 .get_cd = omap_hsmmc_get_cd,
1692 .get_ro = omap_hsmmc_get_ro, 1840 .get_ro = omap_hsmmc_get_ro,
1693 .init_card = omap_hsmmc_init_card, 1841 .init_card = omap_hsmmc_init_card,
1694 /* NYET -- enable_sdio_irq */ 1842 .enable_sdio_irq = omap_hsmmc_enable_sdio_irq,
1695}; 1843};
1696 1844
1697#ifdef CONFIG_DEBUG_FS 1845#ifdef CONFIG_DEBUG_FS
@@ -1701,13 +1849,23 @@ static int omap_hsmmc_regs_show(struct seq_file *s, void *data)
1701 struct mmc_host *mmc = s->private; 1849 struct mmc_host *mmc = s->private;
1702 struct omap_hsmmc_host *host = mmc_priv(mmc); 1850 struct omap_hsmmc_host *host = mmc_priv(mmc);
1703 1851
1704 seq_printf(s, "mmc%d:\n ctx_loss:\t%d\n\nregs:\n", 1852 seq_printf(s, "mmc%d:\n", mmc->index);
1705 mmc->index, host->context_loss); 1853 seq_printf(s, "sdio irq mode\t%s\n",
1854 (mmc->caps & MMC_CAP_SDIO_IRQ) ? "interrupt" : "polling");
1706 1855
1707 pm_runtime_get_sync(host->dev); 1856 if (mmc->caps & MMC_CAP_SDIO_IRQ) {
1857 seq_printf(s, "sdio irq \t%s\n",
1858 (host->flags & HSMMC_SDIO_IRQ_ENABLED) ? "enabled"
1859 : "disabled");
1860 }
1861 seq_printf(s, "ctx_loss:\t%d\n", host->context_loss);
1708 1862
1863 pm_runtime_get_sync(host->dev);
1864 seq_puts(s, "\nregs:\n");
1709 seq_printf(s, "CON:\t\t0x%08x\n", 1865 seq_printf(s, "CON:\t\t0x%08x\n",
1710 OMAP_HSMMC_READ(host->base, CON)); 1866 OMAP_HSMMC_READ(host->base, CON));
1867 seq_printf(s, "PSTATE:\t\t0x%08x\n",
1868 OMAP_HSMMC_READ(host->base, PSTATE));
1711 seq_printf(s, "HCTL:\t\t0x%08x\n", 1869 seq_printf(s, "HCTL:\t\t0x%08x\n",
1712 OMAP_HSMMC_READ(host->base, HCTL)); 1870 OMAP_HSMMC_READ(host->base, HCTL));
1713 seq_printf(s, "SYSCTL:\t\t0x%08x\n", 1871 seq_printf(s, "SYSCTL:\t\t0x%08x\n",
@@ -1761,6 +1919,10 @@ static const struct omap_mmc_of_data omap3_pre_es3_mmc_of_data = {
1761static const struct omap_mmc_of_data omap4_mmc_of_data = { 1919static const struct omap_mmc_of_data omap4_mmc_of_data = {
1762 .reg_offset = 0x100, 1920 .reg_offset = 0x100,
1763}; 1921};
1922static const struct omap_mmc_of_data am33xx_mmc_of_data = {
1923 .reg_offset = 0x100,
1924 .controller_flags = OMAP_HSMMC_SWAKEUP_MISSING,
1925};
1764 1926
1765static const struct of_device_id omap_mmc_of_match[] = { 1927static const struct of_device_id omap_mmc_of_match[] = {
1766 { 1928 {
@@ -1777,6 +1939,10 @@ static const struct of_device_id omap_mmc_of_match[] = {
1777 .compatible = "ti,omap4-hsmmc", 1939 .compatible = "ti,omap4-hsmmc",
1778 .data = &omap4_mmc_of_data, 1940 .data = &omap4_mmc_of_data,
1779 }, 1941 },
1942 {
1943 .compatible = "ti,am33xx-hsmmc",
1944 .data = &am33xx_mmc_of_data,
1945 },
1780 {}, 1946 {},
1781}; 1947};
1782MODULE_DEVICE_TABLE(of, omap_mmc_of_match); 1948MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
@@ -1850,7 +2016,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
1850 const struct of_device_id *match; 2016 const struct of_device_id *match;
1851 dma_cap_mask_t mask; 2017 dma_cap_mask_t mask;
1852 unsigned tx_req, rx_req; 2018 unsigned tx_req, rx_req;
1853 struct pinctrl *pinctrl;
1854 const struct omap_mmc_of_data *data; 2019 const struct omap_mmc_of_data *data;
1855 void __iomem *base; 2020 void __iomem *base;
1856 2021
@@ -1913,6 +2078,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
1913 2078
1914 platform_set_drvdata(pdev, host); 2079 platform_set_drvdata(pdev, host);
1915 2080
2081 if (pdev->dev.of_node)
2082 host->wake_irq = irq_of_parse_and_map(pdev->dev.of_node, 1);
2083
1916 mmc->ops = &omap_hsmmc_ops; 2084 mmc->ops = &omap_hsmmc_ops;
1917 2085
1918 mmc->f_min = OMAP_MMC_MIN_CLOCK; 2086 mmc->f_min = OMAP_MMC_MIN_CLOCK;
@@ -2061,10 +2229,17 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
2061 2229
2062 omap_hsmmc_disable_irq(host); 2230 omap_hsmmc_disable_irq(host);
2063 2231
2064 pinctrl = devm_pinctrl_get_select_default(&pdev->dev); 2232 /*
2065 if (IS_ERR(pinctrl)) 2233 * For now, only support SDIO interrupt if we have a separate
2066 dev_warn(&pdev->dev, 2234 * wake-up interrupt configured from device tree. This is because
2067 "pins are not configured from the driver\n"); 2235 * the wake-up interrupt is needed for idle state and some
2236 * platforms need special quirks. And we don't want to add new
2237 * legacy mux platform init code callbacks any longer as we
2238 * are moving to DT based booting anyways.
2239 */
2240 ret = omap_hsmmc_configure_wake_irq(host);
2241 if (!ret)
2242 mmc->caps |= MMC_CAP_SDIO_IRQ;
2068 2243
2069 omap_hsmmc_protect_card(host); 2244 omap_hsmmc_protect_card(host);
2070 2245
@@ -2170,11 +2345,18 @@ static int omap_hsmmc_suspend(struct device *dev)
2170 pm_runtime_get_sync(host->dev); 2345 pm_runtime_get_sync(host->dev);
2171 2346
2172 if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) { 2347 if (!(host->mmc->pm_flags & MMC_PM_KEEP_POWER)) {
2173 omap_hsmmc_disable_irq(host); 2348 OMAP_HSMMC_WRITE(host->base, ISE, 0);
2349 OMAP_HSMMC_WRITE(host->base, IE, 0);
2350 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
2174 OMAP_HSMMC_WRITE(host->base, HCTL, 2351 OMAP_HSMMC_WRITE(host->base, HCTL,
2175 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP); 2352 OMAP_HSMMC_READ(host->base, HCTL) & ~SDBP);
2176 } 2353 }
2177 2354
2355 /* do not wake up due to sdio irq */
2356 if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
2357 !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
2358 disable_irq(host->wake_irq);
2359
2178 if (host->dbclk) 2360 if (host->dbclk)
2179 clk_disable_unprepare(host->dbclk); 2361 clk_disable_unprepare(host->dbclk);
2180 2362
@@ -2200,6 +2382,10 @@ static int omap_hsmmc_resume(struct device *dev)
2200 2382
2201 omap_hsmmc_protect_card(host); 2383 omap_hsmmc_protect_card(host);
2202 2384
2385 if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
2386 !(host->mmc->pm_flags & MMC_PM_WAKE_SDIO_IRQ))
2387 enable_irq(host->wake_irq);
2388
2203 pm_runtime_mark_last_busy(host->dev); 2389 pm_runtime_mark_last_busy(host->dev);
2204 pm_runtime_put_autosuspend(host->dev); 2390 pm_runtime_put_autosuspend(host->dev);
2205 return 0; 2391 return 0;
@@ -2215,22 +2401,77 @@ static int omap_hsmmc_resume(struct device *dev)
2215static int omap_hsmmc_runtime_suspend(struct device *dev) 2401static int omap_hsmmc_runtime_suspend(struct device *dev)
2216{ 2402{
2217 struct omap_hsmmc_host *host; 2403 struct omap_hsmmc_host *host;
2404 unsigned long flags;
2405 int ret = 0;
2218 2406
2219 host = platform_get_drvdata(to_platform_device(dev)); 2407 host = platform_get_drvdata(to_platform_device(dev));
2220 omap_hsmmc_context_save(host); 2408 omap_hsmmc_context_save(host);
2221 dev_dbg(dev, "disabled\n"); 2409 dev_dbg(dev, "disabled\n");
2222 2410
2223 return 0; 2411 spin_lock_irqsave(&host->irq_lock, flags);
2412 if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
2413 (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
2414 /* disable sdio irq handling to prevent race */
2415 OMAP_HSMMC_WRITE(host->base, ISE, 0);
2416 OMAP_HSMMC_WRITE(host->base, IE, 0);
2417
2418 if (!(OMAP_HSMMC_READ(host->base, PSTATE) & DLEV_DAT(1))) {
2419 /*
2420 * dat1 line low, pending sdio irq
2421 * race condition: possible irq handler running on
2422 * multi-core, abort
2423 */
2424 dev_dbg(dev, "pending sdio irq, abort suspend\n");
2425 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
2426 OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
2427 OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
2428 pm_runtime_mark_last_busy(dev);
2429 ret = -EBUSY;
2430 goto abort;
2431 }
2432
2433 pinctrl_pm_select_idle_state(dev);
2434
2435 WARN_ON(host->flags & HSMMC_WAKE_IRQ_ENABLED);
2436 enable_irq(host->wake_irq);
2437 host->flags |= HSMMC_WAKE_IRQ_ENABLED;
2438 } else {
2439 pinctrl_pm_select_idle_state(dev);
2440 }
2441
2442abort:
2443 spin_unlock_irqrestore(&host->irq_lock, flags);
2444 return ret;
2224} 2445}
2225 2446
2226static int omap_hsmmc_runtime_resume(struct device *dev) 2447static int omap_hsmmc_runtime_resume(struct device *dev)
2227{ 2448{
2228 struct omap_hsmmc_host *host; 2449 struct omap_hsmmc_host *host;
2450 unsigned long flags;
2229 2451
2230 host = platform_get_drvdata(to_platform_device(dev)); 2452 host = platform_get_drvdata(to_platform_device(dev));
2231 omap_hsmmc_context_restore(host); 2453 omap_hsmmc_context_restore(host);
2232 dev_dbg(dev, "enabled\n"); 2454 dev_dbg(dev, "enabled\n");
2233 2455
2456 spin_lock_irqsave(&host->irq_lock, flags);
2457 if ((host->mmc->caps & MMC_CAP_SDIO_IRQ) &&
2458 (host->flags & HSMMC_SDIO_IRQ_ENABLED)) {
2459 /* sdio irq flag can't change while in runtime suspend */
2460 if (host->flags & HSMMC_WAKE_IRQ_ENABLED) {
2461 disable_irq_nosync(host->wake_irq);
2462 host->flags &= ~HSMMC_WAKE_IRQ_ENABLED;
2463 }
2464
2465 pinctrl_pm_select_default_state(host->dev);
2466
2467 /* irq lost, if pinmux incorrect */
2468 OMAP_HSMMC_WRITE(host->base, STAT, STAT_CLEAR);
2469 OMAP_HSMMC_WRITE(host->base, ISE, CIRQ_EN);
2470 OMAP_HSMMC_WRITE(host->base, IE, CIRQ_EN);
2471 } else {
2472 pinctrl_pm_select_default_state(host->dev);
2473 }
2474 spin_unlock_irqrestore(&host->irq_lock, flags);
2234 return 0; 2475 return 0;
2235} 2476}
2236 2477
diff --git a/drivers/mmc/host/s3cmci.c b/drivers/mmc/host/s3cmci.c
index f23782683a7c..e5516a226362 100644
--- a/drivers/mmc/host/s3cmci.c
+++ b/drivers/mmc/host/s3cmci.c
@@ -12,6 +12,7 @@
12 */ 12 */
13 13
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/dmaengine.h>
15#include <linux/dma-mapping.h> 16#include <linux/dma-mapping.h>
16#include <linux/clk.h> 17#include <linux/clk.h>
17#include <linux/mmc/host.h> 18#include <linux/mmc/host.h>
@@ -27,6 +28,7 @@
27#include <mach/dma.h> 28#include <mach/dma.h>
28#include <mach/gpio-samsung.h> 29#include <mach/gpio-samsung.h>
29 30
31#include <linux/platform_data/dma-s3c24xx.h>
30#include <linux/platform_data/mmc-s3cmci.h> 32#include <linux/platform_data/mmc-s3cmci.h>
31 33
32#include "s3cmci.h" 34#include "s3cmci.h"
@@ -140,10 +142,6 @@ static const int dbgmap_debug = dbg_err | dbg_debug;
140 dev_dbg(&host->pdev->dev, args); \ 142 dev_dbg(&host->pdev->dev, args); \
141 } while (0) 143 } while (0)
142 144
143static struct s3c2410_dma_client s3cmci_dma_client = {
144 .name = "s3c-mci",
145};
146
147static void finalize_request(struct s3cmci_host *host); 145static void finalize_request(struct s3cmci_host *host);
148static void s3cmci_send_request(struct mmc_host *mmc); 146static void s3cmci_send_request(struct mmc_host *mmc);
149static void s3cmci_reset(struct s3cmci_host *host); 147static void s3cmci_reset(struct s3cmci_host *host);
@@ -256,25 +254,8 @@ static inline bool s3cmci_host_usedma(struct s3cmci_host *host)
256{ 254{
257#ifdef CONFIG_MMC_S3C_PIO 255#ifdef CONFIG_MMC_S3C_PIO
258 return false; 256 return false;
259#elif defined(CONFIG_MMC_S3C_DMA) 257#else /* CONFIG_MMC_S3C_DMA */
260 return true; 258 return true;
261#else
262 return host->dodma;
263#endif
264}
265
266/**
267 * s3cmci_host_canpio - return true if host has pio code available
268 *
269 * Return true if the driver has been compiled with the PIO support code
270 * available.
271 */
272static inline bool s3cmci_host_canpio(void)
273{
274#ifdef CONFIG_MMC_S3C_PIO
275 return true;
276#else
277 return false;
278#endif 259#endif
279} 260}
280 261
@@ -841,60 +822,24 @@ static irqreturn_t s3cmci_irq_cd(int irq, void *dev_id)
841 return IRQ_HANDLED; 822 return IRQ_HANDLED;
842} 823}
843 824
844static void s3cmci_dma_done_callback(struct s3c2410_dma_chan *dma_ch, 825static void s3cmci_dma_done_callback(void *arg)
845 void *buf_id, int size,
846 enum s3c2410_dma_buffresult result)
847{ 826{
848 struct s3cmci_host *host = buf_id; 827 struct s3cmci_host *host = arg;
849 unsigned long iflags; 828 unsigned long iflags;
850 u32 mci_csta, mci_dsta, mci_fsta, mci_dcnt;
851
852 mci_csta = readl(host->base + S3C2410_SDICMDSTAT);
853 mci_dsta = readl(host->base + S3C2410_SDIDSTA);
854 mci_fsta = readl(host->base + S3C2410_SDIFSTA);
855 mci_dcnt = readl(host->base + S3C2410_SDIDCNT);
856 829
857 BUG_ON(!host->mrq); 830 BUG_ON(!host->mrq);
858 BUG_ON(!host->mrq->data); 831 BUG_ON(!host->mrq->data);
859 BUG_ON(!host->dmatogo);
860 832
861 spin_lock_irqsave(&host->complete_lock, iflags); 833 spin_lock_irqsave(&host->complete_lock, iflags);
862 834
863 if (result != S3C2410_RES_OK) { 835 dbg(host, dbg_dma, "DMA FINISHED\n");
864 dbg(host, dbg_fail, "DMA FAILED: csta=0x%08x dsta=0x%08x "
865 "fsta=0x%08x dcnt:0x%08x result:0x%08x toGo:%u\n",
866 mci_csta, mci_dsta, mci_fsta,
867 mci_dcnt, result, host->dmatogo);
868
869 goto fail_request;
870 }
871
872 host->dmatogo--;
873 if (host->dmatogo) {
874 dbg(host, dbg_dma, "DMA DONE Size:%i DSTA:[%08x] "
875 "DCNT:[%08x] toGo:%u\n",
876 size, mci_dsta, mci_dcnt, host->dmatogo);
877
878 goto out;
879 }
880
881 dbg(host, dbg_dma, "DMA FINISHED Size:%i DSTA:%08x DCNT:%08x\n",
882 size, mci_dsta, mci_dcnt);
883 836
884 host->dma_complete = 1; 837 host->dma_complete = 1;
885 host->complete_what = COMPLETION_FINALIZE; 838 host->complete_what = COMPLETION_FINALIZE;
886 839
887out:
888 tasklet_schedule(&host->pio_tasklet); 840 tasklet_schedule(&host->pio_tasklet);
889 spin_unlock_irqrestore(&host->complete_lock, iflags); 841 spin_unlock_irqrestore(&host->complete_lock, iflags);
890 return;
891 842
892fail_request:
893 host->mrq->data->error = -EINVAL;
894 host->complete_what = COMPLETION_FINALIZE;
895 clear_imask(host);
896
897 goto out;
898} 843}
899 844
900static void finalize_request(struct s3cmci_host *host) 845static void finalize_request(struct s3cmci_host *host)
@@ -966,7 +911,7 @@ static void finalize_request(struct s3cmci_host *host)
966 * DMA channel and the fifo to clear out any garbage. */ 911 * DMA channel and the fifo to clear out any garbage. */
967 if (mrq->data->error != 0) { 912 if (mrq->data->error != 0) {
968 if (s3cmci_host_usedma(host)) 913 if (s3cmci_host_usedma(host))
969 s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); 914 dmaengine_terminate_all(host->dma);
970 915
971 if (host->is2440) { 916 if (host->is2440) {
972 /* Clear failure register and reset fifo. */ 917 /* Clear failure register and reset fifo. */
@@ -992,29 +937,6 @@ request_done:
992 mmc_request_done(host->mmc, mrq); 937 mmc_request_done(host->mmc, mrq);
993} 938}
994 939
995static void s3cmci_dma_setup(struct s3cmci_host *host,
996 enum dma_data_direction source)
997{
998 static enum dma_data_direction last_source = -1;
999 static int setup_ok;
1000
1001 if (last_source == source)
1002 return;
1003
1004 last_source = source;
1005
1006 s3c2410_dma_devconfig(host->dma, source,
1007 host->mem->start + host->sdidata);
1008
1009 if (!setup_ok) {
1010 s3c2410_dma_config(host->dma, 4);
1011 s3c2410_dma_set_buffdone_fn(host->dma,
1012 s3cmci_dma_done_callback);
1013 s3c2410_dma_setflags(host->dma, S3C2410_DMAF_AUTOSTART);
1014 setup_ok = 1;
1015 }
1016}
1017
1018static void s3cmci_send_command(struct s3cmci_host *host, 940static void s3cmci_send_command(struct s3cmci_host *host,
1019 struct mmc_command *cmd) 941 struct mmc_command *cmd)
1020{ 942{
@@ -1162,43 +1084,45 @@ static int s3cmci_prepare_pio(struct s3cmci_host *host, struct mmc_data *data)
1162 1084
1163static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data) 1085static int s3cmci_prepare_dma(struct s3cmci_host *host, struct mmc_data *data)
1164{ 1086{
1165 int dma_len, i;
1166 int rw = data->flags & MMC_DATA_WRITE; 1087 int rw = data->flags & MMC_DATA_WRITE;
1088 struct dma_async_tx_descriptor *desc;
1089 struct dma_slave_config conf = {
1090 .src_addr = host->mem->start + host->sdidata,
1091 .dst_addr = host->mem->start + host->sdidata,
1092 .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
1093 .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
1094 };
1167 1095
1168 BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR); 1096 BUG_ON((data->flags & BOTH_DIR) == BOTH_DIR);
1169 1097
1170 s3cmci_dma_setup(host, rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1098 /* Restore prescaler value */
1171 s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); 1099 writel(host->prescaler, host->base + S3C2410_SDIPRE);
1172
1173 dma_len = dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
1174 rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1175
1176 if (dma_len == 0)
1177 return -ENOMEM;
1178
1179 host->dma_complete = 0;
1180 host->dmatogo = dma_len;
1181
1182 for (i = 0; i < dma_len; i++) {
1183 int res;
1184
1185 dbg(host, dbg_dma, "enqueue %i: %08x@%u\n", i,
1186 sg_dma_address(&data->sg[i]),
1187 sg_dma_len(&data->sg[i]));
1188 1100
1189 res = s3c2410_dma_enqueue(host->dma, host, 1101 if (!rw)
1190 sg_dma_address(&data->sg[i]), 1102 conf.direction = DMA_DEV_TO_MEM;
1191 sg_dma_len(&data->sg[i])); 1103 else
1104 conf.direction = DMA_MEM_TO_DEV;
1192 1105
1193 if (res) { 1106 dma_map_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
1194 s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_FLUSH); 1107 rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1195 return -EBUSY;
1196 }
1197 }
1198 1108
1199 s3c2410_dma_ctrl(host->dma, S3C2410_DMAOP_START); 1109 dmaengine_slave_config(host->dma, &conf);
1110 desc = dmaengine_prep_slave_sg(host->dma, data->sg, data->sg_len,
1111 conf.direction,
1112 DMA_CTRL_ACK | DMA_PREP_INTERRUPT);
1113 if (!desc)
1114 goto unmap_exit;
1115 desc->callback = s3cmci_dma_done_callback;
1116 desc->callback_param = host;
1117 dmaengine_submit(desc);
1118 dma_async_issue_pending(host->dma);
1200 1119
1201 return 0; 1120 return 0;
1121
1122unmap_exit:
1123 dma_unmap_sg(mmc_dev(host->mmc), data->sg, data->sg_len,
1124 rw ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1125 return -ENOMEM;
1202} 1126}
1203 1127
1204static void s3cmci_send_request(struct mmc_host *mmc) 1128static void s3cmci_send_request(struct mmc_host *mmc)
@@ -1676,10 +1600,6 @@ static int s3cmci_probe(struct platform_device *pdev)
1676 host->complete_what = COMPLETION_NONE; 1600 host->complete_what = COMPLETION_NONE;
1677 host->pio_active = XFER_NONE; 1601 host->pio_active = XFER_NONE;
1678 1602
1679#ifdef CONFIG_MMC_S3C_PIODMA
1680 host->dodma = host->pdata->use_dma;
1681#endif
1682
1683 host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1603 host->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1684 if (!host->mem) { 1604 if (!host->mem) {
1685 dev_err(&pdev->dev, 1605 dev_err(&pdev->dev,
@@ -1765,17 +1685,17 @@ static int s3cmci_probe(struct platform_device *pdev)
1765 /* depending on the dma state, get a dma channel to use. */ 1685 /* depending on the dma state, get a dma channel to use. */
1766 1686
1767 if (s3cmci_host_usedma(host)) { 1687 if (s3cmci_host_usedma(host)) {
1768 host->dma = s3c2410_dma_request(DMACH_SDI, &s3cmci_dma_client, 1688 dma_cap_mask_t mask;
1769 host); 1689
1770 if (host->dma < 0) { 1690 dma_cap_zero(mask);
1691 dma_cap_set(DMA_SLAVE, mask);
1692
1693 host->dma = dma_request_slave_channel_compat(mask,
1694 s3c24xx_dma_filter, (void *)DMACH_SDI, &pdev->dev, "rx-tx");
1695 if (!host->dma) {
1771 dev_err(&pdev->dev, "cannot get DMA channel.\n"); 1696 dev_err(&pdev->dev, "cannot get DMA channel.\n");
1772 if (!s3cmci_host_canpio()) { 1697 ret = -EBUSY;
1773 ret = -EBUSY; 1698 goto probe_free_gpio_wp;
1774 goto probe_free_gpio_wp;
1775 } else {
1776 dev_warn(&pdev->dev, "falling back to PIO.\n");
1777 host->dodma = 0;
1778 }
1779 } 1699 }
1780 } 1700 }
1781 1701
@@ -1787,7 +1707,7 @@ static int s3cmci_probe(struct platform_device *pdev)
1787 goto probe_free_dma; 1707 goto probe_free_dma;
1788 } 1708 }
1789 1709
1790 ret = clk_enable(host->clk); 1710 ret = clk_prepare_enable(host->clk);
1791 if (ret) { 1711 if (ret) {
1792 dev_err(&pdev->dev, "failed to enable clock source.\n"); 1712 dev_err(&pdev->dev, "failed to enable clock source.\n");
1793 goto clk_free; 1713 goto clk_free;
@@ -1816,7 +1736,7 @@ static int s3cmci_probe(struct platform_device *pdev)
1816 mmc->max_segs = 128; 1736 mmc->max_segs = 128;
1817 1737
1818 dbg(host, dbg_debug, 1738 dbg(host, dbg_debug,
1819 "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%u.\n", 1739 "probe: mode:%s mapped mci_base:%p irq:%u irq_cd:%u dma:%p.\n",
1820 (host->is2440?"2440":""), 1740 (host->is2440?"2440":""),
1821 host->base, host->irq, host->irq_cd, host->dma); 1741 host->base, host->irq, host->irq_cd, host->dma);
1822 1742
@@ -1845,14 +1765,14 @@ static int s3cmci_probe(struct platform_device *pdev)
1845 s3cmci_cpufreq_deregister(host); 1765 s3cmci_cpufreq_deregister(host);
1846 1766
1847 free_dmabuf: 1767 free_dmabuf:
1848 clk_disable(host->clk); 1768 clk_disable_unprepare(host->clk);
1849 1769
1850 clk_free: 1770 clk_free:
1851 clk_put(host->clk); 1771 clk_put(host->clk);
1852 1772
1853 probe_free_dma: 1773 probe_free_dma:
1854 if (s3cmci_host_usedma(host)) 1774 if (s3cmci_host_usedma(host))
1855 s3c2410_dma_free(host->dma, &s3cmci_dma_client); 1775 dma_release_channel(host->dma);
1856 1776
1857 probe_free_gpio_wp: 1777 probe_free_gpio_wp:
1858 if (!host->pdata->no_wprotect) 1778 if (!host->pdata->no_wprotect)
@@ -1897,7 +1817,7 @@ static void s3cmci_shutdown(struct platform_device *pdev)
1897 s3cmci_debugfs_remove(host); 1817 s3cmci_debugfs_remove(host);
1898 s3cmci_cpufreq_deregister(host); 1818 s3cmci_cpufreq_deregister(host);
1899 mmc_remove_host(mmc); 1819 mmc_remove_host(mmc);
1900 clk_disable(host->clk); 1820 clk_disable_unprepare(host->clk);
1901} 1821}
1902 1822
1903static int s3cmci_remove(struct platform_device *pdev) 1823static int s3cmci_remove(struct platform_device *pdev)
@@ -1914,7 +1834,7 @@ static int s3cmci_remove(struct platform_device *pdev)
1914 tasklet_disable(&host->pio_tasklet); 1834 tasklet_disable(&host->pio_tasklet);
1915 1835
1916 if (s3cmci_host_usedma(host)) 1836 if (s3cmci_host_usedma(host))
1917 s3c2410_dma_free(host->dma, &s3cmci_dma_client); 1837 dma_release_channel(host->dma);
1918 1838
1919 free_irq(host->irq, host); 1839 free_irq(host->irq, host);
1920 1840
diff --git a/drivers/mmc/host/s3cmci.h b/drivers/mmc/host/s3cmci.h
index c76b53dbeb61..cc2e46cb5c64 100644
--- a/drivers/mmc/host/s3cmci.h
+++ b/drivers/mmc/host/s3cmci.h
@@ -26,7 +26,7 @@ struct s3cmci_host {
26 void __iomem *base; 26 void __iomem *base;
27 int irq; 27 int irq;
28 int irq_cd; 28 int irq_cd;
29 int dma; 29 struct dma_chan *dma;
30 30
31 unsigned long clk_rate; 31 unsigned long clk_rate;
32 unsigned long clk_div; 32 unsigned long clk_div;
@@ -36,8 +36,6 @@ struct s3cmci_host {
36 int is2440; 36 int is2440;
37 unsigned sdiimsk; 37 unsigned sdiimsk;
38 unsigned sdidata; 38 unsigned sdidata;
39 int dodma;
40 int dmatogo;
41 39
42 bool irq_disabled; 40 bool irq_disabled;
43 bool irq_enabled; 41 bool irq_enabled;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 8ce3c28cb76e..8c5337002c51 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -124,9 +124,11 @@ static const struct sdhci_acpi_chip sdhci_acpi_chip_int = {
124 124
125static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = { 125static const struct sdhci_acpi_slot sdhci_acpi_slot_int_emmc = {
126 .chip = &sdhci_acpi_chip_int, 126 .chip = &sdhci_acpi_chip_int,
127 .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | MMC_CAP_HW_RESET, 127 .caps = MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
128 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR,
128 .caps2 = MMC_CAP2_HC_ERASE_SZ, 129 .caps2 = MMC_CAP2_HC_ERASE_SZ,
129 .flags = SDHCI_ACPI_RUNTIME_PM, 130 .flags = SDHCI_ACPI_RUNTIME_PM,
131 .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
130}; 132};
131 133
132static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = { 134static const struct sdhci_acpi_slot sdhci_acpi_slot_int_sdio = {
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 40573a58486a..1a6661ed6205 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -16,7 +16,6 @@
16 16
17#include <linux/module.h> 17#include <linux/module.h>
18#include <linux/of_device.h> 18#include <linux/of_device.h>
19#include <linux/regulator/consumer.h>
20#include <linux/delay.h> 19#include <linux/delay.h>
21#include <linux/mmc/mmc.h> 20#include <linux/mmc/mmc.h>
22#include <linux/slab.h> 21#include <linux/slab.h>
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index 52c42fcc284c..c3a1debc9289 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -103,6 +103,10 @@ static const struct sdhci_pci_fixes sdhci_cafe = {
103 SDHCI_QUIRK_BROKEN_TIMEOUT_VAL, 103 SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
104}; 104};
105 105
106static const struct sdhci_pci_fixes sdhci_intel_qrk = {
107 .quirks = SDHCI_QUIRK_NO_HISPD_BIT,
108};
109
106static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot) 110static int mrst_hc_probe_slot(struct sdhci_pci_slot *slot)
107{ 111{
108 slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA; 112 slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA;
@@ -264,7 +268,7 @@ static void sdhci_pci_int_hw_reset(struct sdhci_host *host)
264static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot) 268static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
265{ 269{
266 slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE | 270 slot->host->mmc->caps |= MMC_CAP_8_BIT_DATA | MMC_CAP_NONREMOVABLE |
267 MMC_CAP_HW_RESET; 271 MMC_CAP_HW_RESET | MMC_CAP_1_8V_DDR;
268 slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ; 272 slot->host->mmc->caps2 |= MMC_CAP2_HC_ERASE_SZ;
269 slot->hw_reset = sdhci_pci_int_hw_reset; 273 slot->hw_reset = sdhci_pci_int_hw_reset;
270 return 0; 274 return 0;
@@ -279,6 +283,7 @@ static int byt_sdio_probe_slot(struct sdhci_pci_slot *slot)
279static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = { 283static const struct sdhci_pci_fixes sdhci_intel_byt_emmc = {
280 .allow_runtime_pm = true, 284 .allow_runtime_pm = true,
281 .probe_slot = byt_emmc_probe_slot, 285 .probe_slot = byt_emmc_probe_slot,
286 .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN,
282}; 287};
283 288
284static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = { 289static const struct sdhci_pci_fixes sdhci_intel_byt_sdio = {
@@ -753,6 +758,14 @@ static const struct pci_device_id pci_ids[] = {
753 758
754 { 759 {
755 .vendor = PCI_VENDOR_ID_INTEL, 760 .vendor = PCI_VENDOR_ID_INTEL,
761 .device = PCI_DEVICE_ID_INTEL_QRK_SD,
762 .subvendor = PCI_ANY_ID,
763 .subdevice = PCI_ANY_ID,
764 .driver_data = (kernel_ulong_t)&sdhci_intel_qrk,
765 },
766
767 {
768 .vendor = PCI_VENDOR_ID_INTEL,
756 .device = PCI_DEVICE_ID_INTEL_MRST_SD0, 769 .device = PCI_DEVICE_ID_INTEL_MRST_SD0,
757 .subvendor = PCI_ANY_ID, 770 .subvendor = PCI_ANY_ID,
758 .subdevice = PCI_ANY_ID, 771 .subdevice = PCI_ANY_ID,
@@ -1130,18 +1143,13 @@ static int sdhci_pci_suspend(struct device *dev)
1130 goto err_pci_suspend; 1143 goto err_pci_suspend;
1131 } 1144 }
1132 1145
1133 pci_save_state(pdev);
1134 if (pm_flags & MMC_PM_KEEP_POWER) { 1146 if (pm_flags & MMC_PM_KEEP_POWER) {
1135 if (pm_flags & MMC_PM_WAKE_SDIO_IRQ) { 1147 if (pm_flags & MMC_PM_WAKE_SDIO_IRQ)
1136 pci_pme_active(pdev, true); 1148 device_init_wakeup(dev, true);
1137 pci_enable_wake(pdev, PCI_D3hot, 1); 1149 else
1138 } 1150 device_init_wakeup(dev, false);
1139 pci_set_power_state(pdev, PCI_D3hot); 1151 } else
1140 } else { 1152 device_init_wakeup(dev, false);
1141 pci_enable_wake(pdev, PCI_D3hot, 0);
1142 pci_disable_device(pdev);
1143 pci_set_power_state(pdev, PCI_D3hot);
1144 }
1145 1153
1146 return 0; 1154 return 0;
1147 1155
@@ -1162,12 +1170,6 @@ static int sdhci_pci_resume(struct device *dev)
1162 if (!chip) 1170 if (!chip)
1163 return 0; 1171 return 0;
1164 1172
1165 pci_set_power_state(pdev, PCI_D0);
1166 pci_restore_state(pdev);
1167 ret = pci_enable_device(pdev);
1168 if (ret)
1169 return ret;
1170
1171 if (chip->fixes && chip->fixes->resume) { 1173 if (chip->fixes && chip->fixes->resume) {
1172 ret = chip->fixes->resume(chip); 1174 ret = chip->fixes->resume(chip);
1173 if (ret) 1175 if (ret)
diff --git a/drivers/mmc/host/sdhci-pci.h b/drivers/mmc/host/sdhci-pci.h
index 6d718719659e..c101477ef3be 100644
--- a/drivers/mmc/host/sdhci-pci.h
+++ b/drivers/mmc/host/sdhci-pci.h
@@ -17,6 +17,7 @@
17#define PCI_DEVICE_ID_INTEL_CLV_SDIO2 0x08fb 17#define PCI_DEVICE_ID_INTEL_CLV_SDIO2 0x08fb
18#define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5 18#define PCI_DEVICE_ID_INTEL_CLV_EMMC0 0x08e5
19#define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6 19#define PCI_DEVICE_ID_INTEL_CLV_EMMC1 0x08e6
20#define PCI_DEVICE_ID_INTEL_QRK_SD 0x08A7
20 21
21/* 22/*
22 * PCI registers 23 * PCI registers
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index f4f128947561..6f842fb8e6b8 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -288,15 +288,13 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
288 int ret; 288 int ret;
289 struct clk *clk; 289 struct clk *clk;
290 290
291 pxa = kzalloc(sizeof(struct sdhci_pxa), GFP_KERNEL); 291 pxa = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_pxa), GFP_KERNEL);
292 if (!pxa) 292 if (!pxa)
293 return -ENOMEM; 293 return -ENOMEM;
294 294
295 host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0); 295 host = sdhci_pltfm_init(pdev, &sdhci_pxav3_pdata, 0);
296 if (IS_ERR(host)) { 296 if (IS_ERR(host))
297 kfree(pxa);
298 return PTR_ERR(host); 297 return PTR_ERR(host);
299 }
300 298
301 if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) { 299 if (of_device_is_compatible(np, "marvell,armada-380-sdhci")) {
302 ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info()); 300 ret = mv_conf_mbus_windows(pdev, mv_mbus_dram_info());
@@ -308,7 +306,7 @@ static int sdhci_pxav3_probe(struct platform_device *pdev)
308 pltfm_host = sdhci_priv(host); 306 pltfm_host = sdhci_priv(host);
309 pltfm_host->priv = pxa; 307 pltfm_host->priv = pxa;
310 308
311 clk = clk_get(dev, NULL); 309 clk = devm_clk_get(dev, NULL);
312 if (IS_ERR(clk)) { 310 if (IS_ERR(clk)) {
313 dev_err(dev, "failed to get io clock\n"); 311 dev_err(dev, "failed to get io clock\n");
314 ret = PTR_ERR(clk); 312 ret = PTR_ERR(clk);
@@ -389,11 +387,9 @@ err_add_host:
389 pm_runtime_put_sync(&pdev->dev); 387 pm_runtime_put_sync(&pdev->dev);
390 pm_runtime_disable(&pdev->dev); 388 pm_runtime_disable(&pdev->dev);
391 clk_disable_unprepare(clk); 389 clk_disable_unprepare(clk);
392 clk_put(clk);
393err_clk_get: 390err_clk_get:
394err_mbus_win: 391err_mbus_win:
395 sdhci_pltfm_free(pdev); 392 sdhci_pltfm_free(pdev);
396 kfree(pxa);
397 return ret; 393 return ret;
398} 394}
399 395
@@ -401,17 +397,14 @@ static int sdhci_pxav3_remove(struct platform_device *pdev)
401{ 397{
402 struct sdhci_host *host = platform_get_drvdata(pdev); 398 struct sdhci_host *host = platform_get_drvdata(pdev);
403 struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); 399 struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
404 struct sdhci_pxa *pxa = pltfm_host->priv;
405 400
406 pm_runtime_get_sync(&pdev->dev); 401 pm_runtime_get_sync(&pdev->dev);
407 sdhci_remove_host(host, 1); 402 sdhci_remove_host(host, 1);
408 pm_runtime_disable(&pdev->dev); 403 pm_runtime_disable(&pdev->dev);
409 404
410 clk_disable_unprepare(pltfm_host->clk); 405 clk_disable_unprepare(pltfm_host->clk);
411 clk_put(pltfm_host->clk);
412 406
413 sdhci_pltfm_free(pdev); 407 sdhci_pltfm_free(pdev);
414 kfree(pxa);
415 408
416 return 0; 409 return 0;
417} 410}
diff --git a/drivers/mmc/host/sdhci-st.c b/drivers/mmc/host/sdhci-st.c
new file mode 100644
index 000000000000..328f348c7243
--- /dev/null
+++ b/drivers/mmc/host/sdhci-st.c
@@ -0,0 +1,176 @@
1/*
2 * Support for SDHCI on STMicroelectronics SoCs
3 *
4 * Copyright (C) 2014 STMicroelectronics Ltd
5 * Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
6 * Contributors: Peter Griffin <peter.griffin@linaro.org>
7 *
8 * Based on sdhci-cns3xxx.c
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 */
20
21#include <linux/io.h>
22#include <linux/of.h>
23#include <linux/module.h>
24#include <linux/err.h>
25#include <linux/mmc/host.h>
26
27#include "sdhci-pltfm.h"
28
29static u32 sdhci_st_readl(struct sdhci_host *host, int reg)
30{
31 u32 ret;
32
33 switch (reg) {
34 case SDHCI_CAPABILITIES:
35 ret = readl_relaxed(host->ioaddr + reg);
36 /* Support 3.3V and 1.8V */
37 ret &= ~SDHCI_CAN_VDD_300;
38 break;
39 default:
40 ret = readl_relaxed(host->ioaddr + reg);
41 }
42 return ret;
43}
44
45static const struct sdhci_ops sdhci_st_ops = {
46 .get_max_clock = sdhci_pltfm_clk_get_max_clock,
47 .set_clock = sdhci_set_clock,
48 .set_bus_width = sdhci_set_bus_width,
49 .read_l = sdhci_st_readl,
50 .reset = sdhci_reset,
51};
52
53static const struct sdhci_pltfm_data sdhci_st_pdata = {
54 .ops = &sdhci_st_ops,
55 .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
56 SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
57};
58
59
60static int sdhci_st_probe(struct platform_device *pdev)
61{
62 struct sdhci_host *host;
63 struct sdhci_pltfm_host *pltfm_host;
64 struct clk *clk;
65 int ret = 0;
66 u16 host_version;
67
68 clk = devm_clk_get(&pdev->dev, "mmc");
69 if (IS_ERR(clk)) {
70 dev_err(&pdev->dev, "Peripheral clk not found\n");
71 return PTR_ERR(clk);
72 }
73
74 host = sdhci_pltfm_init(pdev, &sdhci_st_pdata, 0);
75 if (IS_ERR(host)) {
76 dev_err(&pdev->dev, "Failed sdhci_pltfm_init\n");
77 return PTR_ERR(host);
78 }
79
80 ret = mmc_of_parse(host->mmc);
81
82 if (ret) {
83 dev_err(&pdev->dev, "Failed mmc_of_parse\n");
84 return ret;
85 }
86
87 clk_prepare_enable(clk);
88
89 pltfm_host = sdhci_priv(host);
90 pltfm_host->clk = clk;
91
92 ret = sdhci_add_host(host);
93 if (ret) {
94 dev_err(&pdev->dev, "Failed sdhci_add_host\n");
95 goto err_out;
96 }
97
98 platform_set_drvdata(pdev, host);
99
100 host_version = readw_relaxed((host->ioaddr + SDHCI_HOST_VERSION));
101
102 dev_info(&pdev->dev, "SDHCI ST Initialised: Host Version: 0x%x Vendor Version 0x%x\n",
103 ((host_version & SDHCI_SPEC_VER_MASK) >> SDHCI_SPEC_VER_SHIFT),
104 ((host_version & SDHCI_VENDOR_VER_MASK) >>
105 SDHCI_VENDOR_VER_SHIFT));
106
107 return 0;
108
109err_out:
110 clk_disable_unprepare(clk);
111 sdhci_pltfm_free(pdev);
112
113 return ret;
114}
115
116static int sdhci_st_remove(struct platform_device *pdev)
117{
118 struct sdhci_host *host = platform_get_drvdata(pdev);
119 struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
120
121 clk_disable_unprepare(pltfm_host->clk);
122
123 return sdhci_pltfm_unregister(pdev);
124}
125
126#ifdef CONFIG_PM_SLEEP
127static int sdhci_st_suspend(struct device *dev)
128{
129 struct sdhci_host *host = dev_get_drvdata(dev);
130 struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
131 int ret = sdhci_suspend_host(host);
132
133 if (ret)
134 goto out;
135
136 clk_disable_unprepare(pltfm_host->clk);
137out:
138 return ret;
139}
140
141static int sdhci_st_resume(struct device *dev)
142{
143 struct sdhci_host *host = dev_get_drvdata(dev);
144 struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
145
146 clk_prepare_enable(pltfm_host->clk);
147
148 return sdhci_resume_host(host);
149}
150#endif
151
152static SIMPLE_DEV_PM_OPS(sdhci_st_pmops, sdhci_st_suspend, sdhci_st_resume);
153
154static const struct of_device_id st_sdhci_match[] = {
155 { .compatible = "st,sdhci" },
156 {},
157};
158
159MODULE_DEVICE_TABLE(of, st_sdhci_match);
160
161static struct platform_driver sdhci_st_driver = {
162 .probe = sdhci_st_probe,
163 .remove = sdhci_st_remove,
164 .driver = {
165 .name = "sdhci-st",
166 .pm = &sdhci_st_pmops,
167 .of_match_table = of_match_ptr(st_sdhci_match),
168 },
169};
170
171module_platform_driver(sdhci_st_driver);
172
173MODULE_DESCRIPTION("SDHCI driver for STMicroelectronics SoCs");
174MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
175MODULE_LICENSE("GPL v2");
176MODULE_ALIAS("platform:st-sdhci");
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index d93a063a36f3..33100d10d176 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -26,8 +26,6 @@
26#include <linux/mmc/host.h> 26#include <linux/mmc/host.h>
27#include <linux/mmc/slot-gpio.h> 27#include <linux/mmc/slot-gpio.h>
28 28
29#include <asm/gpio.h>
30
31#include "sdhci-pltfm.h" 29#include "sdhci-pltfm.h"
32 30
33/* Tegra SDHOST controller vendor register definitions */ 31/* Tegra SDHOST controller vendor register definitions */
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 47055f3f01b8..37b2a9ae52ef 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1223,8 +1223,16 @@ EXPORT_SYMBOL_GPL(sdhci_set_clock);
1223static void sdhci_set_power(struct sdhci_host *host, unsigned char mode, 1223static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
1224 unsigned short vdd) 1224 unsigned short vdd)
1225{ 1225{
1226 struct mmc_host *mmc = host->mmc;
1226 u8 pwr = 0; 1227 u8 pwr = 0;
1227 1228
1229 if (!IS_ERR(mmc->supply.vmmc)) {
1230 spin_unlock_irq(&host->lock);
1231 mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, vdd);
1232 spin_lock_irq(&host->lock);
1233 return;
1234 }
1235
1228 if (mode != MMC_POWER_OFF) { 1236 if (mode != MMC_POWER_OFF) {
1229 switch (1 << vdd) { 1237 switch (1 << vdd) {
1230 case MMC_VDD_165_195: 1238 case MMC_VDD_165_195:
@@ -1283,12 +1291,6 @@ static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
1283 if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER) 1291 if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
1284 mdelay(10); 1292 mdelay(10);
1285 } 1293 }
1286
1287 if (host->vmmc) {
1288 spin_unlock_irq(&host->lock);
1289 mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
1290 spin_lock_irq(&host->lock);
1291 }
1292} 1294}
1293 1295
1294/*****************************************************************************\ 1296/*****************************************************************************\
@@ -1440,13 +1442,15 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
1440{ 1442{
1441 unsigned long flags; 1443 unsigned long flags;
1442 u8 ctrl; 1444 u8 ctrl;
1445 struct mmc_host *mmc = host->mmc;
1443 1446
1444 spin_lock_irqsave(&host->lock, flags); 1447 spin_lock_irqsave(&host->lock, flags);
1445 1448
1446 if (host->flags & SDHCI_DEVICE_DEAD) { 1449 if (host->flags & SDHCI_DEVICE_DEAD) {
1447 spin_unlock_irqrestore(&host->lock, flags); 1450 spin_unlock_irqrestore(&host->lock, flags);
1448 if (host->vmmc && ios->power_mode == MMC_POWER_OFF) 1451 if (!IS_ERR(mmc->supply.vmmc) &&
1449 mmc_regulator_set_ocr(host->mmc, host->vmmc, 0); 1452 ios->power_mode == MMC_POWER_OFF)
1453 mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
1450 return; 1454 return;
1451 } 1455 }
1452 1456
@@ -1530,7 +1534,6 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
1530 host->ops->set_clock(host, host->clock); 1534 host->ops->set_clock(host, host->clock);
1531 } 1535 }
1532 1536
1533
1534 /* Reset SD Clock Enable */ 1537 /* Reset SD Clock Enable */
1535 clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL); 1538 clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
1536 clk &= ~SDHCI_CLOCK_CARD_EN; 1539 clk &= ~SDHCI_CLOCK_CARD_EN;
@@ -1707,6 +1710,7 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
1707static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host, 1710static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
1708 struct mmc_ios *ios) 1711 struct mmc_ios *ios)
1709{ 1712{
1713 struct mmc_host *mmc = host->mmc;
1710 u16 ctrl; 1714 u16 ctrl;
1711 int ret; 1715 int ret;
1712 1716
@@ -1725,11 +1729,12 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
1725 ctrl &= ~SDHCI_CTRL_VDD_180; 1729 ctrl &= ~SDHCI_CTRL_VDD_180;
1726 sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); 1730 sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
1727 1731
1728 if (host->vqmmc) { 1732 if (!IS_ERR(mmc->supply.vqmmc)) {
1729 ret = regulator_set_voltage(host->vqmmc, 2700000, 3600000); 1733 ret = regulator_set_voltage(mmc->supply.vqmmc, 2700000,
1734 3600000);
1730 if (ret) { 1735 if (ret) {
1731 pr_warning("%s: Switching to 3.3V signalling voltage " 1736 pr_warning("%s: Switching to 3.3V signalling voltage "
1732 " failed\n", mmc_hostname(host->mmc)); 1737 " failed\n", mmc_hostname(mmc));
1733 return -EIO; 1738 return -EIO;
1734 } 1739 }
1735 } 1740 }
@@ -1742,16 +1747,16 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
1742 return 0; 1747 return 0;
1743 1748
1744 pr_warning("%s: 3.3V regulator output did not became stable\n", 1749 pr_warning("%s: 3.3V regulator output did not became stable\n",
1745 mmc_hostname(host->mmc)); 1750 mmc_hostname(mmc));
1746 1751
1747 return -EAGAIN; 1752 return -EAGAIN;
1748 case MMC_SIGNAL_VOLTAGE_180: 1753 case MMC_SIGNAL_VOLTAGE_180:
1749 if (host->vqmmc) { 1754 if (!IS_ERR(mmc->supply.vqmmc)) {
1750 ret = regulator_set_voltage(host->vqmmc, 1755 ret = regulator_set_voltage(mmc->supply.vqmmc,
1751 1700000, 1950000); 1756 1700000, 1950000);
1752 if (ret) { 1757 if (ret) {
1753 pr_warning("%s: Switching to 1.8V signalling voltage " 1758 pr_warning("%s: Switching to 1.8V signalling voltage "
1754 " failed\n", mmc_hostname(host->mmc)); 1759 " failed\n", mmc_hostname(mmc));
1755 return -EIO; 1760 return -EIO;
1756 } 1761 }
1757 } 1762 }
@@ -1763,24 +1768,22 @@ static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
1763 ctrl |= SDHCI_CTRL_VDD_180; 1768 ctrl |= SDHCI_CTRL_VDD_180;
1764 sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2); 1769 sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
1765 1770
1766 /* Wait for 5ms */
1767 usleep_range(5000, 5500);
1768
1769 /* 1.8V regulator output should be stable within 5 ms */ 1771 /* 1.8V regulator output should be stable within 5 ms */
1770 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2); 1772 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
1771 if (ctrl & SDHCI_CTRL_VDD_180) 1773 if (ctrl & SDHCI_CTRL_VDD_180)
1772 return 0; 1774 return 0;
1773 1775
1774 pr_warning("%s: 1.8V regulator output did not became stable\n", 1776 pr_warning("%s: 1.8V regulator output did not became stable\n",
1775 mmc_hostname(host->mmc)); 1777 mmc_hostname(mmc));
1776 1778
1777 return -EAGAIN; 1779 return -EAGAIN;
1778 case MMC_SIGNAL_VOLTAGE_120: 1780 case MMC_SIGNAL_VOLTAGE_120:
1779 if (host->vqmmc) { 1781 if (!IS_ERR(mmc->supply.vqmmc)) {
1780 ret = regulator_set_voltage(host->vqmmc, 1100000, 1300000); 1782 ret = regulator_set_voltage(mmc->supply.vqmmc, 1100000,
1783 1300000);
1781 if (ret) { 1784 if (ret) {
1782 pr_warning("%s: Switching to 1.2V signalling voltage " 1785 pr_warning("%s: Switching to 1.2V signalling voltage "
1783 " failed\n", mmc_hostname(host->mmc)); 1786 " failed\n", mmc_hostname(mmc));
1784 return -EIO; 1787 return -EIO;
1785 } 1788 }
1786 } 1789 }
@@ -2643,7 +2646,6 @@ static void sdhci_runtime_pm_bus_off(struct sdhci_host *host)
2643int sdhci_runtime_suspend_host(struct sdhci_host *host) 2646int sdhci_runtime_suspend_host(struct sdhci_host *host)
2644{ 2647{
2645 unsigned long flags; 2648 unsigned long flags;
2646 int ret = 0;
2647 2649
2648 /* Disable tuning since we are suspending */ 2650 /* Disable tuning since we are suspending */
2649 if (host->flags & SDHCI_USING_RETUNING_TIMER) { 2651 if (host->flags & SDHCI_USING_RETUNING_TIMER) {
@@ -2663,14 +2665,14 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
2663 host->runtime_suspended = true; 2665 host->runtime_suspended = true;
2664 spin_unlock_irqrestore(&host->lock, flags); 2666 spin_unlock_irqrestore(&host->lock, flags);
2665 2667
2666 return ret; 2668 return 0;
2667} 2669}
2668EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host); 2670EXPORT_SYMBOL_GPL(sdhci_runtime_suspend_host);
2669 2671
2670int sdhci_runtime_resume_host(struct sdhci_host *host) 2672int sdhci_runtime_resume_host(struct sdhci_host *host)
2671{ 2673{
2672 unsigned long flags; 2674 unsigned long flags;
2673 int ret = 0, host_flags = host->flags; 2675 int host_flags = host->flags;
2674 2676
2675 if (host_flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) { 2677 if (host_flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
2676 if (host->ops->enable_dma) 2678 if (host->ops->enable_dma)
@@ -2709,7 +2711,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
2709 2711
2710 spin_unlock_irqrestore(&host->lock, flags); 2712 spin_unlock_irqrestore(&host->lock, flags);
2711 2713
2712 return ret; 2714 return 0;
2713} 2715}
2714EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host); 2716EXPORT_SYMBOL_GPL(sdhci_runtime_resume_host);
2715 2717
@@ -2820,12 +2822,12 @@ int sdhci_add_host(struct sdhci_host *host)
2820 * (128) and potentially one alignment transfer for 2822 * (128) and potentially one alignment transfer for
2821 * each of those entries. 2823 * each of those entries.
2822 */ 2824 */
2823 host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc), 2825 host->adma_desc = dma_alloc_coherent(mmc_dev(mmc),
2824 ADMA_SIZE, &host->adma_addr, 2826 ADMA_SIZE, &host->adma_addr,
2825 GFP_KERNEL); 2827 GFP_KERNEL);
2826 host->align_buffer = kmalloc(128 * 4, GFP_KERNEL); 2828 host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
2827 if (!host->adma_desc || !host->align_buffer) { 2829 if (!host->adma_desc || !host->align_buffer) {
2828 dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, 2830 dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
2829 host->adma_desc, host->adma_addr); 2831 host->adma_desc, host->adma_addr);
2830 kfree(host->align_buffer); 2832 kfree(host->align_buffer);
2831 pr_warning("%s: Unable to allocate ADMA " 2833 pr_warning("%s: Unable to allocate ADMA "
@@ -2838,7 +2840,7 @@ int sdhci_add_host(struct sdhci_host *host)
2838 pr_warning("%s: unable to allocate aligned ADMA descriptor\n", 2840 pr_warning("%s: unable to allocate aligned ADMA descriptor\n",
2839 mmc_hostname(mmc)); 2841 mmc_hostname(mmc));
2840 host->flags &= ~SDHCI_USE_ADMA; 2842 host->flags &= ~SDHCI_USE_ADMA;
2841 dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, 2843 dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
2842 host->adma_desc, host->adma_addr); 2844 host->adma_desc, host->adma_addr);
2843 kfree(host->align_buffer); 2845 kfree(host->align_buffer);
2844 host->adma_desc = NULL; 2846 host->adma_desc = NULL;
@@ -2853,7 +2855,7 @@ int sdhci_add_host(struct sdhci_host *host)
2853 */ 2855 */
2854 if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) { 2856 if (!(host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA))) {
2855 host->dma_mask = DMA_BIT_MASK(64); 2857 host->dma_mask = DMA_BIT_MASK(64);
2856 mmc_dev(host->mmc)->dma_mask = &host->dma_mask; 2858 mmc_dev(mmc)->dma_mask = &host->dma_mask;
2857 } 2859 }
2858 2860
2859 if (host->version >= SDHCI_SPEC_300) 2861 if (host->version >= SDHCI_SPEC_300)
@@ -2959,28 +2961,25 @@ int sdhci_add_host(struct sdhci_host *host)
2959 mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED; 2961 mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
2960 2962
2961 if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) && 2963 if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) &&
2962 !(host->mmc->caps & MMC_CAP_NONREMOVABLE)) 2964 !(mmc->caps & MMC_CAP_NONREMOVABLE))
2963 mmc->caps |= MMC_CAP_NEEDS_POLL; 2965 mmc->caps |= MMC_CAP_NEEDS_POLL;
2964 2966
2967 /* If there are external regulators, get them */
2968 if (mmc_regulator_get_supply(mmc) == -EPROBE_DEFER)
2969 return -EPROBE_DEFER;
2970
2965 /* If vqmmc regulator and no 1.8V signalling, then there's no UHS */ 2971 /* If vqmmc regulator and no 1.8V signalling, then there's no UHS */
2966 host->vqmmc = regulator_get_optional(mmc_dev(mmc), "vqmmc"); 2972 if (!IS_ERR(mmc->supply.vqmmc)) {
2967 if (IS_ERR_OR_NULL(host->vqmmc)) { 2973 ret = regulator_enable(mmc->supply.vqmmc);
2968 if (PTR_ERR(host->vqmmc) < 0) { 2974 if (!regulator_is_supported_voltage(mmc->supply.vqmmc, 1700000,
2969 pr_info("%s: no vqmmc regulator found\n", 2975 1950000))
2970 mmc_hostname(mmc));
2971 host->vqmmc = NULL;
2972 }
2973 } else {
2974 ret = regulator_enable(host->vqmmc);
2975 if (!regulator_is_supported_voltage(host->vqmmc, 1700000,
2976 1950000))
2977 caps[1] &= ~(SDHCI_SUPPORT_SDR104 | 2976 caps[1] &= ~(SDHCI_SUPPORT_SDR104 |
2978 SDHCI_SUPPORT_SDR50 | 2977 SDHCI_SUPPORT_SDR50 |
2979 SDHCI_SUPPORT_DDR50); 2978 SDHCI_SUPPORT_DDR50);
2980 if (ret) { 2979 if (ret) {
2981 pr_warn("%s: Failed to enable vqmmc regulator: %d\n", 2980 pr_warn("%s: Failed to enable vqmmc regulator: %d\n",
2982 mmc_hostname(mmc), ret); 2981 mmc_hostname(mmc), ret);
2983 host->vqmmc = NULL; 2982 mmc->supply.vqmmc = NULL;
2984 } 2983 }
2985 } 2984 }
2986 2985
@@ -3041,34 +3040,6 @@ int sdhci_add_host(struct sdhci_host *host)
3041 3040
3042 ocr_avail = 0; 3041 ocr_avail = 0;
3043 3042
3044 host->vmmc = regulator_get_optional(mmc_dev(mmc), "vmmc");
3045 if (IS_ERR_OR_NULL(host->vmmc)) {
3046 if (PTR_ERR(host->vmmc) < 0) {
3047 pr_info("%s: no vmmc regulator found\n",
3048 mmc_hostname(mmc));
3049 host->vmmc = NULL;
3050 }
3051 }
3052
3053#ifdef CONFIG_REGULATOR
3054 /*
3055 * Voltage range check makes sense only if regulator reports
3056 * any voltage value.
3057 */
3058 if (host->vmmc && regulator_get_voltage(host->vmmc) > 0) {
3059 ret = regulator_is_supported_voltage(host->vmmc, 2700000,
3060 3600000);
3061 if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_330)))
3062 caps[0] &= ~SDHCI_CAN_VDD_330;
3063 if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_300)))
3064 caps[0] &= ~SDHCI_CAN_VDD_300;
3065 ret = regulator_is_supported_voltage(host->vmmc, 1700000,
3066 1950000);
3067 if ((ret <= 0) || (!(caps[0] & SDHCI_CAN_VDD_180)))
3068 caps[0] &= ~SDHCI_CAN_VDD_180;
3069 }
3070#endif /* CONFIG_REGULATOR */
3071
3072 /* 3043 /*
3073 * According to SD Host Controller spec v3.00, if the Host System 3044 * According to SD Host Controller spec v3.00, if the Host System
3074 * can afford more than 150mA, Host Driver should set XPC to 1. Also 3045 * can afford more than 150mA, Host Driver should set XPC to 1. Also
@@ -3077,8 +3048,8 @@ int sdhci_add_host(struct sdhci_host *host)
3077 * value. 3048 * value.
3078 */ 3049 */
3079 max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT); 3050 max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
3080 if (!max_current_caps && host->vmmc) { 3051 if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
3081 u32 curr = regulator_get_current_limit(host->vmmc); 3052 u32 curr = regulator_get_current_limit(mmc->supply.vmmc);
3082 if (curr > 0) { 3053 if (curr > 0) {
3083 3054
3084 /* convert to SDHCI_MAX_CURRENT format */ 3055 /* convert to SDHCI_MAX_CURRENT format */
@@ -3118,8 +3089,12 @@ int sdhci_add_host(struct sdhci_host *host)
3118 SDHCI_MAX_CURRENT_MULTIPLIER; 3089 SDHCI_MAX_CURRENT_MULTIPLIER;
3119 } 3090 }
3120 3091
3092 /* If OCR set by external regulators, use it instead */
3093 if (mmc->ocr_avail)
3094 ocr_avail = mmc->ocr_avail;
3095
3121 if (host->ocr_mask) 3096 if (host->ocr_mask)
3122 ocr_avail = host->ocr_mask; 3097 ocr_avail &= host->ocr_mask;
3123 3098
3124 mmc->ocr_avail = ocr_avail; 3099 mmc->ocr_avail = ocr_avail;
3125 mmc->ocr_avail_sdio = ocr_avail; 3100 mmc->ocr_avail_sdio = ocr_avail;
@@ -3273,6 +3248,7 @@ EXPORT_SYMBOL_GPL(sdhci_add_host);
3273 3248
3274void sdhci_remove_host(struct sdhci_host *host, int dead) 3249void sdhci_remove_host(struct sdhci_host *host, int dead)
3275{ 3250{
3251 struct mmc_host *mmc = host->mmc;
3276 unsigned long flags; 3252 unsigned long flags;
3277 3253
3278 if (dead) { 3254 if (dead) {
@@ -3282,7 +3258,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
3282 3258
3283 if (host->mrq) { 3259 if (host->mrq) {
3284 pr_err("%s: Controller removed during " 3260 pr_err("%s: Controller removed during "
3285 " transfer!\n", mmc_hostname(host->mmc)); 3261 " transfer!\n", mmc_hostname(mmc));
3286 3262
3287 host->mrq->cmd->error = -ENOMEDIUM; 3263 host->mrq->cmd->error = -ENOMEDIUM;
3288 tasklet_schedule(&host->finish_tasklet); 3264 tasklet_schedule(&host->finish_tasklet);
@@ -3293,7 +3269,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
3293 3269
3294 sdhci_disable_card_detection(host); 3270 sdhci_disable_card_detection(host);
3295 3271
3296 mmc_remove_host(host->mmc); 3272 mmc_remove_host(mmc);
3297 3273
3298#ifdef SDHCI_USE_LEDS_CLASS 3274#ifdef SDHCI_USE_LEDS_CLASS
3299 led_classdev_unregister(&host->led); 3275 led_classdev_unregister(&host->led);
@@ -3310,18 +3286,14 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
3310 3286
3311 tasklet_kill(&host->finish_tasklet); 3287 tasklet_kill(&host->finish_tasklet);
3312 3288
3313 if (host->vmmc) { 3289 if (!IS_ERR(mmc->supply.vmmc))
3314 regulator_disable(host->vmmc); 3290 regulator_disable(mmc->supply.vmmc);
3315 regulator_put(host->vmmc);
3316 }
3317 3291
3318 if (host->vqmmc) { 3292 if (!IS_ERR(mmc->supply.vqmmc))
3319 regulator_disable(host->vqmmc); 3293 regulator_disable(mmc->supply.vqmmc);
3320 regulator_put(host->vqmmc);
3321 }
3322 3294
3323 if (host->adma_desc) 3295 if (host->adma_desc)
3324 dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE, 3296 dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
3325 host->adma_desc, host->adma_addr); 3297 host->adma_desc, host->adma_addr);
3326 kfree(host->align_buffer); 3298 kfree(host->align_buffer);
3327 3299
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 656fbba4c422..d11708c815d7 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -386,7 +386,7 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
386 struct sh_mmcif_plat_data *pdata, 386 struct sh_mmcif_plat_data *pdata,
387 enum dma_transfer_direction direction) 387 enum dma_transfer_direction direction)
388{ 388{
389 struct dma_slave_config cfg; 389 struct dma_slave_config cfg = { 0, };
390 struct dma_chan *chan; 390 struct dma_chan *chan;
391 unsigned int slave_id; 391 unsigned int slave_id;
392 struct resource *res; 392 struct resource *res;
@@ -417,8 +417,15 @@ sh_mmcif_request_dma_one(struct sh_mmcif_host *host,
417 /* In the OF case the driver will get the slave ID from the DT */ 417 /* In the OF case the driver will get the slave ID from the DT */
418 cfg.slave_id = slave_id; 418 cfg.slave_id = slave_id;
419 cfg.direction = direction; 419 cfg.direction = direction;
420 cfg.dst_addr = res->start + MMCIF_CE_DATA; 420
421 cfg.src_addr = 0; 421 if (direction == DMA_DEV_TO_MEM) {
422 cfg.src_addr = res->start + MMCIF_CE_DATA;
423 cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
424 } else {
425 cfg.dst_addr = res->start + MMCIF_CE_DATA;
426 cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
427 }
428
422 ret = dmaengine_slave_config(chan, &cfg); 429 ret = dmaengine_slave_config(chan, &cfg);
423 if (ret < 0) { 430 if (ret < 0) {
424 dma_release_channel(chan); 431 dma_release_channel(chan);
@@ -1378,26 +1385,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
1378 dev_err(&pdev->dev, "Get irq error\n"); 1385 dev_err(&pdev->dev, "Get irq error\n");
1379 return -ENXIO; 1386 return -ENXIO;
1380 } 1387 }
1388
1381 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1389 res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1382 if (!res) { 1390 reg = devm_ioremap_resource(&pdev->dev, res);
1383 dev_err(&pdev->dev, "platform_get_resource error.\n"); 1391 if (IS_ERR(reg))
1384 return -ENXIO; 1392 return PTR_ERR(reg);
1385 }
1386 reg = ioremap(res->start, resource_size(res));
1387 if (!reg) {
1388 dev_err(&pdev->dev, "ioremap error.\n");
1389 return -ENOMEM;
1390 }
1391 1393
1392 mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev); 1394 mmc = mmc_alloc_host(sizeof(struct sh_mmcif_host), &pdev->dev);
1393 if (!mmc) { 1395 if (!mmc)
1394 ret = -ENOMEM; 1396 return -ENOMEM;
1395 goto ealloch;
1396 }
1397 1397
1398 ret = mmc_of_parse(mmc); 1398 ret = mmc_of_parse(mmc);
1399 if (ret < 0) 1399 if (ret < 0)
1400 goto eofparse; 1400 goto err_host;
1401 1401
1402 host = mmc_priv(mmc); 1402 host = mmc_priv(mmc);
1403 host->mmc = mmc; 1403 host->mmc = mmc;
@@ -1427,19 +1427,19 @@ static int sh_mmcif_probe(struct platform_device *pdev)
1427 pm_runtime_enable(&pdev->dev); 1427 pm_runtime_enable(&pdev->dev);
1428 host->power = false; 1428 host->power = false;
1429 1429
1430 host->hclk = clk_get(&pdev->dev, NULL); 1430 host->hclk = devm_clk_get(&pdev->dev, NULL);
1431 if (IS_ERR(host->hclk)) { 1431 if (IS_ERR(host->hclk)) {
1432 ret = PTR_ERR(host->hclk); 1432 ret = PTR_ERR(host->hclk);
1433 dev_err(&pdev->dev, "cannot get clock: %d\n", ret); 1433 dev_err(&pdev->dev, "cannot get clock: %d\n", ret);
1434 goto eclkget; 1434 goto err_pm;
1435 } 1435 }
1436 ret = sh_mmcif_clk_update(host); 1436 ret = sh_mmcif_clk_update(host);
1437 if (ret < 0) 1437 if (ret < 0)
1438 goto eclkupdate; 1438 goto err_pm;
1439 1439
1440 ret = pm_runtime_resume(&pdev->dev); 1440 ret = pm_runtime_resume(&pdev->dev);
1441 if (ret < 0) 1441 if (ret < 0)
1442 goto eresume; 1442 goto err_clk;
1443 1443
1444 INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work); 1444 INIT_DELAYED_WORK(&host->timeout_work, mmcif_timeout_work);
1445 1445
@@ -1447,65 +1447,55 @@ static int sh_mmcif_probe(struct platform_device *pdev)
1447 sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL); 1447 sh_mmcif_writel(host->addr, MMCIF_CE_INT_MASK, MASK_ALL);
1448 1448
1449 name = irq[1] < 0 ? dev_name(&pdev->dev) : "sh_mmc:error"; 1449 name = irq[1] < 0 ? dev_name(&pdev->dev) : "sh_mmc:error";
1450 ret = request_threaded_irq(irq[0], sh_mmcif_intr, sh_mmcif_irqt, 0, name, host); 1450 ret = devm_request_threaded_irq(&pdev->dev, irq[0], sh_mmcif_intr,
1451 sh_mmcif_irqt, 0, name, host);
1451 if (ret) { 1452 if (ret) {
1452 dev_err(&pdev->dev, "request_irq error (%s)\n", name); 1453 dev_err(&pdev->dev, "request_irq error (%s)\n", name);
1453 goto ereqirq0; 1454 goto err_clk;
1454 } 1455 }
1455 if (irq[1] >= 0) { 1456 if (irq[1] >= 0) {
1456 ret = request_threaded_irq(irq[1], sh_mmcif_intr, sh_mmcif_irqt, 1457 ret = devm_request_threaded_irq(&pdev->dev, irq[1],
1457 0, "sh_mmc:int", host); 1458 sh_mmcif_intr, sh_mmcif_irqt,
1459 0, "sh_mmc:int", host);
1458 if (ret) { 1460 if (ret) {
1459 dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n"); 1461 dev_err(&pdev->dev, "request_irq error (sh_mmc:int)\n");
1460 goto ereqirq1; 1462 goto err_clk;
1461 } 1463 }
1462 } 1464 }
1463 1465
1464 if (pd && pd->use_cd_gpio) { 1466 if (pd && pd->use_cd_gpio) {
1465 ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0); 1467 ret = mmc_gpio_request_cd(mmc, pd->cd_gpio, 0);
1466 if (ret < 0) 1468 if (ret < 0)
1467 goto erqcd; 1469 goto err_clk;
1468 } 1470 }
1469 1471
1470 mutex_init(&host->thread_lock); 1472 mutex_init(&host->thread_lock);
1471 1473
1472 clk_disable_unprepare(host->hclk);
1473 ret = mmc_add_host(mmc); 1474 ret = mmc_add_host(mmc);
1474 if (ret < 0) 1475 if (ret < 0)
1475 goto emmcaddh; 1476 goto err_clk;
1476 1477
1477 dev_pm_qos_expose_latency_limit(&pdev->dev, 100); 1478 dev_pm_qos_expose_latency_limit(&pdev->dev, 100);
1478 1479
1479 dev_info(&pdev->dev, "driver version %s\n", DRIVER_VERSION); 1480 dev_info(&pdev->dev, "Chip version 0x%04x, clock rate %luMHz\n",
1480 dev_dbg(&pdev->dev, "chip ver H'%04x\n", 1481 sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0xffff,
1481 sh_mmcif_readl(host->addr, MMCIF_CE_VERSION) & 0x0000ffff); 1482 clk_get_rate(host->hclk) / 1000000UL);
1483
1484 clk_disable_unprepare(host->hclk);
1482 return ret; 1485 return ret;
1483 1486
1484emmcaddh: 1487err_clk:
1485erqcd:
1486 if (irq[1] >= 0)
1487 free_irq(irq[1], host);
1488ereqirq1:
1489 free_irq(irq[0], host);
1490ereqirq0:
1491 pm_runtime_suspend(&pdev->dev);
1492eresume:
1493 clk_disable_unprepare(host->hclk); 1488 clk_disable_unprepare(host->hclk);
1494eclkupdate: 1489err_pm:
1495 clk_put(host->hclk);
1496eclkget:
1497 pm_runtime_disable(&pdev->dev); 1490 pm_runtime_disable(&pdev->dev);
1498eofparse: 1491err_host:
1499 mmc_free_host(mmc); 1492 mmc_free_host(mmc);
1500ealloch:
1501 iounmap(reg);
1502 return ret; 1493 return ret;
1503} 1494}
1504 1495
1505static int sh_mmcif_remove(struct platform_device *pdev) 1496static int sh_mmcif_remove(struct platform_device *pdev)
1506{ 1497{
1507 struct sh_mmcif_host *host = platform_get_drvdata(pdev); 1498 struct sh_mmcif_host *host = platform_get_drvdata(pdev);
1508 int irq[2];
1509 1499
1510 host->dying = true; 1500 host->dying = true;
1511 clk_prepare_enable(host->hclk); 1501 clk_prepare_enable(host->hclk);
@@ -1523,16 +1513,6 @@ static int sh_mmcif_remove(struct platform_device *pdev)
1523 */ 1513 */
1524 cancel_delayed_work_sync(&host->timeout_work); 1514 cancel_delayed_work_sync(&host->timeout_work);
1525 1515
1526 if (host->addr)
1527 iounmap(host->addr);
1528
1529 irq[0] = platform_get_irq(pdev, 0);
1530 irq[1] = platform_get_irq(pdev, 1);
1531
1532 free_irq(irq[0], host);
1533 if (irq[1] >= 0)
1534 free_irq(irq[1], host);
1535
1536 clk_disable_unprepare(host->hclk); 1516 clk_disable_unprepare(host->hclk);
1537 mmc_free_host(host->mmc); 1517 mmc_free_host(host->mmc);
1538 pm_runtime_put_sync(&pdev->dev); 1518 pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index 03e7b280cb4c..eb8f1d5c34b1 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -294,6 +294,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
294 cfg.slave_id = pdata->dma->slave_id_tx; 294 cfg.slave_id = pdata->dma->slave_id_tx;
295 cfg.direction = DMA_MEM_TO_DEV; 295 cfg.direction = DMA_MEM_TO_DEV;
296 cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->pdata->bus_shift); 296 cfg.dst_addr = res->start + (CTL_SD_DATA_PORT << host->pdata->bus_shift);
297 cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
297 cfg.src_addr = 0; 298 cfg.src_addr = 0;
298 ret = dmaengine_slave_config(host->chan_tx, &cfg); 299 ret = dmaengine_slave_config(host->chan_tx, &cfg);
299 if (ret < 0) 300 if (ret < 0)
@@ -312,6 +313,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
312 cfg.slave_id = pdata->dma->slave_id_rx; 313 cfg.slave_id = pdata->dma->slave_id_rx;
313 cfg.direction = DMA_DEV_TO_MEM; 314 cfg.direction = DMA_DEV_TO_MEM;
314 cfg.src_addr = cfg.dst_addr; 315 cfg.src_addr = cfg.dst_addr;
316 cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
315 cfg.dst_addr = 0; 317 cfg.dst_addr = 0;
316 ret = dmaengine_slave_config(host->chan_rx, &cfg); 318 ret = dmaengine_slave_config(host->chan_rx, &cfg);
317 if (ret < 0) 319 if (ret < 0)
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 282891a8e451..54181b4f6e9e 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -72,7 +72,6 @@
72#define BM_SPI_CS 0x20 72#define BM_SPI_CS 0x20
73#define BM_SD_POWER 0x40 73#define BM_SD_POWER 0x40
74#define BM_SOFT_RESET 0x80 74#define BM_SOFT_RESET 0x80
75#define BM_ONEBIT_MASK 0xFD
76 75
77/* SDMMC_BLKLEN bit fields */ 76/* SDMMC_BLKLEN bit fields */
78#define BLKL_CRCERR_ABORT 0x0800 77#define BLKL_CRCERR_ABORT 0x0800
@@ -120,6 +119,8 @@
120#define STS2_DATARSP_BUSY 0x20 119#define STS2_DATARSP_BUSY 0x20
121#define STS2_DIS_FORCECLK 0x80 120#define STS2_DIS_FORCECLK 0x80
122 121
122/* SDMMC_EXTCTRL bit fields */
123#define EXT_EIGHTBIT 0x04
123 124
124/* MMC/SD DMA Controller Registers */ 125/* MMC/SD DMA Controller Registers */
125#define SDDMA_GCR 0x100 126#define SDDMA_GCR 0x100
@@ -672,7 +673,7 @@ static void wmt_mci_request(struct mmc_host *mmc, struct mmc_request *req)
672static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) 673static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
673{ 674{
674 struct wmt_mci_priv *priv; 675 struct wmt_mci_priv *priv;
675 u32 reg_tmp; 676 u32 busmode, extctrl;
676 677
677 priv = mmc_priv(mmc); 678 priv = mmc_priv(mmc);
678 679
@@ -687,28 +688,26 @@ static void wmt_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
687 if (ios->clock != 0) 688 if (ios->clock != 0)
688 clk_set_rate(priv->clk_sdmmc, ios->clock); 689 clk_set_rate(priv->clk_sdmmc, ios->clock);
689 690
691 busmode = readb(priv->sdmmc_base + SDMMC_BUSMODE);
692 extctrl = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
693
694 busmode &= ~(BM_EIGHTBIT_MODE | BM_FOURBIT_MODE);
695 extctrl &= ~EXT_EIGHTBIT;
696
690 switch (ios->bus_width) { 697 switch (ios->bus_width) {
691 case MMC_BUS_WIDTH_8: 698 case MMC_BUS_WIDTH_8:
692 reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL); 699 busmode |= BM_EIGHTBIT_MODE;
693 writeb(reg_tmp | 0x04, priv->sdmmc_base + SDMMC_EXTCTRL); 700 extctrl |= EXT_EIGHTBIT;
694 break; 701 break;
695 case MMC_BUS_WIDTH_4: 702 case MMC_BUS_WIDTH_4:
696 reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE); 703 busmode |= BM_FOURBIT_MODE;
697 writeb(reg_tmp | BM_FOURBIT_MODE, priv->sdmmc_base +
698 SDMMC_BUSMODE);
699
700 reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
701 writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
702 break; 704 break;
703 case MMC_BUS_WIDTH_1: 705 case MMC_BUS_WIDTH_1:
704 reg_tmp = readb(priv->sdmmc_base + SDMMC_BUSMODE);
705 writeb(reg_tmp & BM_ONEBIT_MASK, priv->sdmmc_base +
706 SDMMC_BUSMODE);
707
708 reg_tmp = readb(priv->sdmmc_base + SDMMC_EXTCTRL);
709 writeb(reg_tmp & 0xFB, priv->sdmmc_base + SDMMC_EXTCTRL);
710 break; 706 break;
711 } 707 }
708
709 writeb(busmode, priv->sdmmc_base + SDMMC_BUSMODE);
710 writeb(extctrl, priv->sdmmc_base + SDMMC_EXTCTRL);
712} 711}
713 712
714static int wmt_mci_get_ro(struct mmc_host *mmc) 713static int wmt_mci_get_ro(struct mmc_host *mmc)
@@ -830,7 +829,7 @@ static int wmt_mci_probe(struct platform_device *pdev)
830 goto fail3; 829 goto fail3;
831 } 830 }
832 831
833 ret = request_irq(dma_irq, wmt_mci_dma_isr, 32, "sdmmc", priv); 832 ret = request_irq(dma_irq, wmt_mci_dma_isr, 0, "sdmmc", priv);
834 if (ret) { 833 if (ret) {
835 dev_err(&pdev->dev, "Register DMA IRQ fail\n"); 834 dev_err(&pdev->dev, "Register DMA IRQ fail\n");
836 goto fail4; 835 goto fail4;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index af7c40ac1455..e1a8f4e19983 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -581,7 +581,11 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
581 struct xgene_enet_desc_ring *ring; 581 struct xgene_enet_desc_ring *ring;
582 struct xgene_enet_pdata *pdata = netdev_priv(ndev); 582 struct xgene_enet_pdata *pdata = netdev_priv(ndev);
583 struct device *dev = ndev_to_dev(ndev); 583 struct device *dev = ndev_to_dev(ndev);
584 u32 size; 584 int size;
585
586 size = xgene_enet_get_ring_size(dev, cfgsize);
587 if (size < 0)
588 return NULL;
585 589
586 ring = devm_kzalloc(dev, sizeof(struct xgene_enet_desc_ring), 590 ring = devm_kzalloc(dev, sizeof(struct xgene_enet_desc_ring),
587 GFP_KERNEL); 591 GFP_KERNEL);
@@ -593,7 +597,6 @@ static struct xgene_enet_desc_ring *xgene_enet_create_desc_ring(
593 ring->cfgsize = cfgsize; 597 ring->cfgsize = cfgsize;
594 ring->id = ring_id; 598 ring->id = ring_id;
595 599
596 size = xgene_enet_get_ring_size(dev, cfgsize);
597 ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma, 600 ring->desc_addr = dma_zalloc_coherent(dev, size, &ring->dma,
598 GFP_KERNEL); 601 GFP_KERNEL);
599 if (!ring->desc_addr) { 602 if (!ring->desc_addr) {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a3dd5dc64f4c..4296b3d26f02 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -14093,8 +14093,9 @@ static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev,
14093 14093
14094 spin_lock_bh(&tp->lock); 14094 spin_lock_bh(&tp->lock);
14095 if (!tp->hw_stats) { 14095 if (!tp->hw_stats) {
14096 *stats = tp->net_stats_prev;
14096 spin_unlock_bh(&tp->lock); 14097 spin_unlock_bh(&tp->lock);
14097 return &tp->net_stats_prev; 14098 return stats;
14098 } 14099 }
14099 14100
14100 tg3_get_nstats(tp, stats); 14101 tg3_get_nstats(tp, stats);
diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h
index 811f1351db7a..43e08d0bc3d3 100644
--- a/drivers/net/ethernet/emulex/benet/be.h
+++ b/drivers/net/ethernet/emulex/benet/be.h
@@ -897,5 +897,6 @@ void be_roce_dev_remove(struct be_adapter *);
897 */ 897 */
898void be_roce_dev_open(struct be_adapter *); 898void be_roce_dev_open(struct be_adapter *);
899void be_roce_dev_close(struct be_adapter *); 899void be_roce_dev_close(struct be_adapter *);
900void be_roce_dev_shutdown(struct be_adapter *);
900 901
901#endif /* BE_H */ 902#endif /* BE_H */
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index db4ff14ff18f..9cdeda54674a 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -5014,6 +5014,7 @@ static void be_shutdown(struct pci_dev *pdev)
5014 if (!adapter) 5014 if (!adapter)
5015 return; 5015 return;
5016 5016
5017 be_roce_dev_shutdown(adapter);
5017 cancel_delayed_work_sync(&adapter->work); 5018 cancel_delayed_work_sync(&adapter->work);
5018 cancel_delayed_work_sync(&adapter->func_recovery_work); 5019 cancel_delayed_work_sync(&adapter->func_recovery_work);
5019 5020
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c
index 5bf16603a3e9..ef4672dc7357 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.c
+++ b/drivers/net/ethernet/emulex/benet/be_roce.c
@@ -120,7 +120,8 @@ static void _be_roce_dev_open(struct be_adapter *adapter)
120{ 120{
121 if (ocrdma_drv && adapter->ocrdma_dev && 121 if (ocrdma_drv && adapter->ocrdma_dev &&
122 ocrdma_drv->state_change_handler) 122 ocrdma_drv->state_change_handler)
123 ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0); 123 ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
124 BE_DEV_UP);
124} 125}
125 126
126void be_roce_dev_open(struct be_adapter *adapter) 127void be_roce_dev_open(struct be_adapter *adapter)
@@ -136,7 +137,8 @@ static void _be_roce_dev_close(struct be_adapter *adapter)
136{ 137{
137 if (ocrdma_drv && adapter->ocrdma_dev && 138 if (ocrdma_drv && adapter->ocrdma_dev &&
138 ocrdma_drv->state_change_handler) 139 ocrdma_drv->state_change_handler)
139 ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1); 140 ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
141 BE_DEV_DOWN);
140} 142}
141 143
142void be_roce_dev_close(struct be_adapter *adapter) 144void be_roce_dev_close(struct be_adapter *adapter)
@@ -148,6 +150,18 @@ void be_roce_dev_close(struct be_adapter *adapter)
148 } 150 }
149} 151}
150 152
153void be_roce_dev_shutdown(struct be_adapter *adapter)
154{
155 if (be_roce_supported(adapter)) {
156 mutex_lock(&be_adapter_list_lock);
157 if (ocrdma_drv && adapter->ocrdma_dev &&
158 ocrdma_drv->state_change_handler)
159 ocrdma_drv->state_change_handler(adapter->ocrdma_dev,
160 BE_DEV_SHUTDOWN);
161 mutex_unlock(&be_adapter_list_lock);
162 }
163}
164
151int be_roce_register_driver(struct ocrdma_driver *drv) 165int be_roce_register_driver(struct ocrdma_driver *drv)
152{ 166{
153 struct be_adapter *dev; 167 struct be_adapter *dev;
diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h
index a3d9e96c18eb..e6f7eb1a7d87 100644
--- a/drivers/net/ethernet/emulex/benet/be_roce.h
+++ b/drivers/net/ethernet/emulex/benet/be_roce.h
@@ -62,7 +62,8 @@ struct ocrdma_driver {
62 62
63enum { 63enum {
64 BE_DEV_UP = 0, 64 BE_DEV_UP = 0,
65 BE_DEV_DOWN = 1 65 BE_DEV_DOWN = 1,
66 BE_DEV_SHUTDOWN = 2
66}; 67};
67 68
68/* APIs for RoCE driver to register callback handlers, 69/* APIs for RoCE driver to register callback handlers,
diff --git a/drivers/net/ethernet/ibm/ehea/Makefile b/drivers/net/ethernet/ibm/ehea/Makefile
index 775d9969b5c2..cd473e295242 100644
--- a/drivers/net/ethernet/ibm/ehea/Makefile
+++ b/drivers/net/ethernet/ibm/ehea/Makefile
@@ -1,6 +1,6 @@
1# 1#
2# Makefile for the eHEA ethernet device driver for IBM eServer System p 2# Makefile for the eHEA ethernet device driver for IBM eServer System p
3# 3#
4ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o ehea_phyp.o 4ehea-y = ehea_main.o ehea_phyp.o ehea_qmr.o ehea_ethtool.o
5obj-$(CONFIG_EHEA) += ehea.o 5obj-$(CONFIG_EHEA) += ehea.o
6 6
diff --git a/drivers/net/ethernet/intel/e1000e/manage.c b/drivers/net/ethernet/intel/e1000e/manage.c
index 58856032298d..06edfca1a35e 100644
--- a/drivers/net/ethernet/intel/e1000e/manage.c
+++ b/drivers/net/ethernet/intel/e1000e/manage.c
@@ -47,7 +47,7 @@ static u8 e1000_calculate_checksum(u8 *buffer, u32 length)
47 * e1000_mng_enable_host_if - Checks host interface is enabled 47 * e1000_mng_enable_host_if - Checks host interface is enabled
48 * @hw: pointer to the HW structure 48 * @hw: pointer to the HW structure
49 * 49 *
50 * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND 50 * Returns 0 upon success, else -E1000_ERR_HOST_INTERFACE_COMMAND
51 * 51 *
52 * This function checks whether the HOST IF is enabled for command operation 52 * This function checks whether the HOST IF is enabled for command operation
53 * and also checks whether the previous command is completed. It busy waits 53 * and also checks whether the previous command is completed. It busy waits
@@ -78,7 +78,7 @@ static s32 e1000_mng_enable_host_if(struct e1000_hw *hw)
78 } 78 }
79 79
80 if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { 80 if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) {
81 e_dbg("Previous command timeout failed .\n"); 81 e_dbg("Previous command timeout failed.\n");
82 return -E1000_ERR_HOST_INTERFACE_COMMAND; 82 return -E1000_ERR_HOST_INTERFACE_COMMAND;
83 } 83 }
84 84
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 6938fc1ad877..5d01db1d789b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -33,6 +33,7 @@
33#include <scsi/fc/fc_fcoe.h> 33#include <scsi/fc/fc_fcoe.h>
34#include <scsi/libfc.h> 34#include <scsi/libfc.h>
35#include <scsi/libfcoe.h> 35#include <scsi/libfcoe.h>
36#include <uapi/linux/dcbnl.h>
36 37
37#include "i40e.h" 38#include "i40e.h"
38#include "i40e_fcoe.h" 39#include "i40e_fcoe.h"
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 51bc03072ed3..871474f6fe62 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4415,13 +4415,13 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
4415 4415
4416 switch (vsi->back->hw.phy.link_info.link_speed) { 4416 switch (vsi->back->hw.phy.link_info.link_speed) {
4417 case I40E_LINK_SPEED_40GB: 4417 case I40E_LINK_SPEED_40GB:
4418 strncpy(speed, "40 Gbps", SPEED_SIZE); 4418 strlcpy(speed, "40 Gbps", SPEED_SIZE);
4419 break; 4419 break;
4420 case I40E_LINK_SPEED_10GB: 4420 case I40E_LINK_SPEED_10GB:
4421 strncpy(speed, "10 Gbps", SPEED_SIZE); 4421 strlcpy(speed, "10 Gbps", SPEED_SIZE);
4422 break; 4422 break;
4423 case I40E_LINK_SPEED_1GB: 4423 case I40E_LINK_SPEED_1GB:
4424 strncpy(speed, "1000 Mbps", SPEED_SIZE); 4424 strlcpy(speed, "1000 Mbps", SPEED_SIZE);
4425 break; 4425 break;
4426 default: 4426 default:
4427 break; 4427 break;
@@ -4429,16 +4429,16 @@ static void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
4429 4429
4430 switch (vsi->back->hw.fc.current_mode) { 4430 switch (vsi->back->hw.fc.current_mode) {
4431 case I40E_FC_FULL: 4431 case I40E_FC_FULL:
4432 strncpy(fc, "RX/TX", FC_SIZE); 4432 strlcpy(fc, "RX/TX", FC_SIZE);
4433 break; 4433 break;
4434 case I40E_FC_TX_PAUSE: 4434 case I40E_FC_TX_PAUSE:
4435 strncpy(fc, "TX", FC_SIZE); 4435 strlcpy(fc, "TX", FC_SIZE);
4436 break; 4436 break;
4437 case I40E_FC_RX_PAUSE: 4437 case I40E_FC_RX_PAUSE:
4438 strncpy(fc, "RX", FC_SIZE); 4438 strlcpy(fc, "RX", FC_SIZE);
4439 break; 4439 break;
4440 default: 4440 default:
4441 strncpy(fc, "None", FC_SIZE); 4441 strlcpy(fc, "None", FC_SIZE);
4442 break; 4442 break;
4443 } 4443 }
4444 4444
@@ -5839,7 +5839,7 @@ static void i40e_send_version(struct i40e_pf *pf)
5839 dv.minor_version = DRV_VERSION_MINOR; 5839 dv.minor_version = DRV_VERSION_MINOR;
5840 dv.build_version = DRV_VERSION_BUILD; 5840 dv.build_version = DRV_VERSION_BUILD;
5841 dv.subbuild_version = 0; 5841 dv.subbuild_version = 0;
5842 strncpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string)); 5842 strlcpy(dv.driver_string, DRV_VERSION, sizeof(dv.driver_string));
5843 i40e_aq_send_driver_version(&pf->hw, &dv, NULL); 5843 i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
5844} 5844}
5845 5845
@@ -6293,7 +6293,7 @@ static int i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
6293 6293
6294 if (alloc_qvectors) { 6294 if (alloc_qvectors) {
6295 /* allocate memory for q_vector pointers */ 6295 /* allocate memory for q_vector pointers */
6296 size = sizeof(struct i40e_q_vectors *) * vsi->num_q_vectors; 6296 size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
6297 vsi->q_vectors = kzalloc(size, GFP_KERNEL); 6297 vsi->q_vectors = kzalloc(size, GFP_KERNEL);
6298 if (!vsi->q_vectors) { 6298 if (!vsi->q_vectors) {
6299 ret = -ENOMEM; 6299 ret = -ENOMEM;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 97bda3dffd49..25c4f9a3011f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -251,9 +251,9 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
251 * 251 *
252 * Writes a 16 bit words buffer to the Shadow RAM using the admin command. 252 * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
253 **/ 253 **/
254i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer, 254static i40e_status i40e_write_nvm_aq(struct i40e_hw *hw, u8 module_pointer,
255 u32 offset, u16 words, void *data, 255 u32 offset, u16 words, void *data,
256 bool last_command) 256 bool last_command)
257{ 257{
258 i40e_status ret_code = I40E_ERR_NVM; 258 i40e_status ret_code = I40E_ERR_NVM;
259 259
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5d940a26055c..65a4a0f88ea0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1311,6 +1311,15 @@ static struct mlx4_cmd_info cmd_info[] = {
1311 .wrapper = mlx4_MAD_IFC_wrapper 1311 .wrapper = mlx4_MAD_IFC_wrapper
1312 }, 1312 },
1313 { 1313 {
1314 .opcode = MLX4_CMD_MAD_DEMUX,
1315 .has_inbox = false,
1316 .has_outbox = false,
1317 .out_is_imm = false,
1318 .encode_slave_id = false,
1319 .verify = NULL,
1320 .wrapper = mlx4_CMD_EPERM_wrapper
1321 },
1322 {
1314 .opcode = MLX4_CMD_QUERY_IF_STAT, 1323 .opcode = MLX4_CMD_QUERY_IF_STAT,
1315 .has_inbox = false, 1324 .has_inbox = false,
1316 .has_outbox = true, 1325 .has_outbox = true,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 688e1eabab29..494753e44ae3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
136 [7] = "FSM (MAC anti-spoofing) support", 136 [7] = "FSM (MAC anti-spoofing) support",
137 [8] = "Dynamic QP updates support", 137 [8] = "Dynamic QP updates support",
138 [9] = "Device managed flow steering IPoIB support", 138 [9] = "Device managed flow steering IPoIB support",
139 [10] = "TCP/IP offloads/flow-steering for VXLAN support" 139 [10] = "TCP/IP offloads/flow-steering for VXLAN support",
140 [11] = "MAD DEMUX (Secure-Host) support"
140 }; 141 };
141 int i; 142 int i;
142 143
@@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
571#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 572#define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0
572#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d 573#define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d
573#define QUERY_DEV_CAP_VXLAN 0x9e 574#define QUERY_DEV_CAP_VXLAN 0x9e
575#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0
574 576
575 dev_cap->flags2 = 0; 577 dev_cap->flags2 = 0;
576 mailbox = mlx4_alloc_cmd_mailbox(dev); 578 mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
748 MLX4_GET(dev_cap->max_counters, outbox, 750 MLX4_GET(dev_cap->max_counters, outbox,
749 QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); 751 QUERY_DEV_CAP_MAX_COUNTERS_OFFSET);
750 752
753 MLX4_GET(field32, outbox,
754 QUERY_DEV_CAP_MAD_DEMUX_OFFSET);
755 if (field32 & (1 << 0))
756 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
757
751 MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); 758 MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
752 if (field32 & (1 << 16)) 759 if (field32 & (1 << 16))
753 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; 760 dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work)
2016out: 2023out:
2017 mlx4_free_cmd_mailbox(dev, mailbox); 2024 mlx4_free_cmd_mailbox(dev, mailbox);
2018} 2025}
2026
2027static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev,
2028 struct mlx4_cmd_mailbox *mailbox)
2029{
2030#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10
2031#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20
2032#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40
2033#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70
2034
2035 u32 set_attr_mask, getresp_attr_mask;
2036 u32 trap_attr_mask, traprepress_attr_mask;
2037
2038 MLX4_GET(set_attr_mask, mailbox->buf,
2039 MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET);
2040 mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n",
2041 set_attr_mask);
2042
2043 MLX4_GET(getresp_attr_mask, mailbox->buf,
2044 MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET);
2045 mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n",
2046 getresp_attr_mask);
2047
2048 MLX4_GET(trap_attr_mask, mailbox->buf,
2049 MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET);
2050 mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n",
2051 trap_attr_mask);
2052
2053 MLX4_GET(traprepress_attr_mask, mailbox->buf,
2054 MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET);
2055 mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n",
2056 traprepress_attr_mask);
2057
2058 if (set_attr_mask && getresp_attr_mask && trap_attr_mask &&
2059 traprepress_attr_mask)
2060 return 1;
2061
2062 return 0;
2063}
2064
2065int mlx4_config_mad_demux(struct mlx4_dev *dev)
2066{
2067 struct mlx4_cmd_mailbox *mailbox;
2068 int secure_host_active;
2069 int err;
2070
2071 /* Check if mad_demux is supported */
2072 if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX))
2073 return 0;
2074
2075 mailbox = mlx4_alloc_cmd_mailbox(dev);
2076 if (IS_ERR(mailbox)) {
2077 mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX");
2078 return -ENOMEM;
2079 }
2080
2081 /* Query mad_demux to find out which MADs are handled by internal sma */
2082 err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */,
2083 MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX,
2084 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
2085 if (err) {
2086 mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n",
2087 err);
2088 goto out;
2089 }
2090
2091 secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox);
2092
2093 /* Config mad_demux to handle all MADs returned by the query above */
2094 err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */,
2095 MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX,
2096 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
2097 if (err) {
2098 mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err);
2099 goto out;
2100 }
2101
2102 if (secure_host_active)
2103 mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n");
2104out:
2105 mlx4_free_cmd_mailbox(dev, mailbox);
2106 return err;
2107}
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 80b8c5f30e4e..0158689906fd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1853,6 +1853,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
1853 mlx4_err(dev, "Failed to initialize multicast group table, aborting\n"); 1853 mlx4_err(dev, "Failed to initialize multicast group table, aborting\n");
1854 goto err_mr_table_free; 1854 goto err_mr_table_free;
1855 } 1855 }
1856 err = mlx4_config_mad_demux(dev);
1857 if (err) {
1858 mlx4_err(dev, "Failed in config_mad_demux, aborting\n");
1859 goto err_mcg_table_free;
1860 }
1856 } 1861 }
1857 1862
1858 err = mlx4_init_eq_table(dev); 1863 err = mlx4_init_eq_table(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 13fbcd03c3e4..b508c7887ef8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -274,6 +274,8 @@ struct mlx4_icm_table {
274#define MLX4_MPT_FLAG_PHYSICAL (1 << 9) 274#define MLX4_MPT_FLAG_PHYSICAL (1 << 9)
275#define MLX4_MPT_FLAG_REGION (1 << 8) 275#define MLX4_MPT_FLAG_REGION (1 << 8)
276 276
277#define MLX4_MPT_PD_MASK (0x1FFFFUL)
278#define MLX4_MPT_PD_VF_MASK (0xFE0000UL)
277#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) 279#define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27)
278#define MLX4_MPT_PD_FLAG_RAE (1 << 28) 280#define MLX4_MPT_PD_FLAG_RAE (1 << 28)
279#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) 281#define MLX4_MPT_PD_FLAG_EN_INV (3 << 24)
@@ -1306,5 +1308,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev);
1306int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); 1308int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
1307/* Returns the VF index of slave */ 1309/* Returns the VF index of slave */
1308int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); 1310int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
1311int mlx4_config_mad_demux(struct mlx4_dev *dev);
1309 1312
1310#endif /* MLX4_H */ 1313#endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 2839abb878a6..7d717eccb7b0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
298 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); 298 MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
299} 299}
300 300
301int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
302 struct mlx4_mpt_entry ***mpt_entry)
303{
304 int err;
305 int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
306 struct mlx4_cmd_mailbox *mailbox = NULL;
307
308 /* Make sure that at this point we have single-threaded access only */
309
310 if (mmr->enabled != MLX4_MPT_EN_HW)
311 return -EINVAL;
312
313 err = mlx4_HW2SW_MPT(dev, NULL, key);
314
315 if (err) {
316 mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
317 mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
318 return err;
319 }
320
321 mmr->enabled = MLX4_MPT_EN_SW;
322
323 if (!mlx4_is_mfunc(dev)) {
324 **mpt_entry = mlx4_table_find(
325 &mlx4_priv(dev)->mr_table.dmpt_table,
326 key, NULL);
327 } else {
328 mailbox = mlx4_alloc_cmd_mailbox(dev);
329 if (IS_ERR_OR_NULL(mailbox))
330 return PTR_ERR(mailbox);
331
332 err = mlx4_cmd_box(dev, 0, mailbox->dma, key,
333 0, MLX4_CMD_QUERY_MPT,
334 MLX4_CMD_TIME_CLASS_B,
335 MLX4_CMD_WRAPPED);
336
337 if (err)
338 goto free_mailbox;
339
340 *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf;
341 }
342
343 if (!(*mpt_entry) || !(**mpt_entry)) {
344 err = -ENOMEM;
345 goto free_mailbox;
346 }
347
348 return 0;
349
350free_mailbox:
351 mlx4_free_cmd_mailbox(dev, mailbox);
352 return err;
353}
354EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt);
355
356int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
357 struct mlx4_mpt_entry **mpt_entry)
358{
359 int err;
360
361 if (!mlx4_is_mfunc(dev)) {
362 /* Make sure any changes to this entry are flushed */
363 wmb();
364
365 *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW;
366
367 /* Make sure the new status is written */
368 wmb();
369
370 err = mlx4_SYNC_TPT(dev);
371 } else {
372 int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
373
374 struct mlx4_cmd_mailbox *mailbox =
375 container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
376 buf);
377
378 err = mlx4_SW2HW_MPT(dev, mailbox, key);
379 }
380
381 mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
382 if (!err)
383 mmr->enabled = MLX4_MPT_EN_HW;
384 return err;
385}
386EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
387
388void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
389 struct mlx4_mpt_entry **mpt_entry)
390{
391 if (mlx4_is_mfunc(dev)) {
392 struct mlx4_cmd_mailbox *mailbox =
393 container_of((void *)mpt_entry, struct mlx4_cmd_mailbox,
394 buf);
395 mlx4_free_cmd_mailbox(dev, mailbox);
396 }
397}
398EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
399
400int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
401 u32 pdn)
402{
403 u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
404 /* The wrapper function will put the slave's id here */
405 if (mlx4_is_mfunc(dev))
406 pd_flags &= ~MLX4_MPT_PD_VF_MASK;
407 mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) |
408 (pdn & MLX4_MPT_PD_MASK)
409 | MLX4_MPT_PD_FLAG_EN_INV);
410 return 0;
411}
412EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd);
413
414int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
415 struct mlx4_mpt_entry *mpt_entry,
416 u32 access)
417{
418 u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) |
419 (access & MLX4_PERM_MASK);
420
421 mpt_entry->flags = cpu_to_be32(flags);
422 return 0;
423}
424EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access);
425
301static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, 426static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd,
302 u64 iova, u64 size, u32 access, int npages, 427 u64 iova, u64 size, u32 access, int npages,
303 int page_shift, struct mlx4_mr *mr) 428 int page_shift, struct mlx4_mr *mr)
@@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr)
463} 588}
464EXPORT_SYMBOL_GPL(mlx4_mr_free); 589EXPORT_SYMBOL_GPL(mlx4_mr_free);
465 590
591void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr)
592{
593 mlx4_mtt_cleanup(dev, &mr->mtt);
594}
595EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup);
596
597int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
598 u64 iova, u64 size, int npages,
599 int page_shift, struct mlx4_mpt_entry *mpt_entry)
600{
601 int err;
602
603 mpt_entry->start = cpu_to_be64(mr->iova);
604 mpt_entry->length = cpu_to_be64(mr->size);
605 mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
606
607 err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
608 if (err)
609 return err;
610
611 if (mr->mtt.order < 0) {
612 mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
613 mpt_entry->mtt_addr = 0;
614 } else {
615 mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
616 &mr->mtt));
617 if (mr->mtt.page_shift == 0)
618 mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
619 }
620 mr->enabled = MLX4_MPT_EN_SW;
621
622 return 0;
623}
624EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write);
625
466int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) 626int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
467{ 627{
468 struct mlx4_cmd_mailbox *mailbox; 628 struct mlx4_cmd_mailbox *mailbox;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0efc1368e5a8..1089367fed22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
2613 if (err) 2613 if (err)
2614 return err; 2614 return err;
2615 2615
2616 if (mpt->com.from_state != RES_MPT_HW) { 2616 if (mpt->com.from_state == RES_MPT_MAPPED) {
2617 /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do
2618 * that, the VF must read the MPT. But since the MPT entry memory is not
2619 * in the VF's virtual memory space, it must use QUERY_MPT to obtain the
2620 * entry contents. To guarantee that the MPT cannot be changed, the driver
2621 * must perform HW2SW_MPT before this query and return the MPT entry to HW
2622 * ownership fofollowing the change. The change here allows the VF to
2623 * perform QUERY_MPT also when the entry is in SW ownership.
2624 */
2625 struct mlx4_mpt_entry *mpt_entry = mlx4_table_find(
2626 &mlx4_priv(dev)->mr_table.dmpt_table,
2627 mpt->key, NULL);
2628
2629 if (NULL == mpt_entry || NULL == outbox->buf) {
2630 err = -EINVAL;
2631 goto out;
2632 }
2633
2634 memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry));
2635
2636 err = 0;
2637 } else if (mpt->com.from_state == RES_MPT_HW) {
2638 err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2639 } else {
2617 err = -EBUSY; 2640 err = -EBUSY;
2618 goto out; 2641 goto out;
2619 } 2642 }
2620 2643
2621 err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
2622 2644
2623out: 2645out:
2624 put_res(dev, slave, id, RES_MPT); 2646 put_res(dev, slave, id, RES_MPT);
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 69c26f04d8ce..679db026f4be 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -873,6 +873,10 @@ static int myri10ge_dma_test(struct myri10ge_priv *mgp, int test_type)
873 return -ENOMEM; 873 return -ENOMEM;
874 dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE, 874 dmatest_bus = pci_map_page(mgp->pdev, dmatest_page, 0, PAGE_SIZE,
875 DMA_BIDIRECTIONAL); 875 DMA_BIDIRECTIONAL);
876 if (unlikely(pci_dma_mapping_error(mgp->pdev, dmatest_bus))) {
877 __free_page(dmatest_page);
878 return -ENOMEM;
879 }
876 880
877 /* Run a small DMA test. 881 /* Run a small DMA test.
878 * The magic multipliers to the length tell the firmware 882 * The magic multipliers to the length tell the firmware
@@ -1294,6 +1298,7 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
1294 int bytes, int watchdog) 1298 int bytes, int watchdog)
1295{ 1299{
1296 struct page *page; 1300 struct page *page;
1301 dma_addr_t bus;
1297 int idx; 1302 int idx;
1298#if MYRI10GE_ALLOC_SIZE > 4096 1303#if MYRI10GE_ALLOC_SIZE > 4096
1299 int end_offset; 1304 int end_offset;
@@ -1318,11 +1323,21 @@ myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
1318 rx->watchdog_needed = 1; 1323 rx->watchdog_needed = 1;
1319 return; 1324 return;
1320 } 1325 }
1326
1327 bus = pci_map_page(mgp->pdev, page, 0,
1328 MYRI10GE_ALLOC_SIZE,
1329 PCI_DMA_FROMDEVICE);
1330 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
1331 __free_pages(page, MYRI10GE_ALLOC_ORDER);
1332 if (rx->fill_cnt - rx->cnt < 16)
1333 rx->watchdog_needed = 1;
1334 return;
1335 }
1336
1321 rx->page = page; 1337 rx->page = page;
1322 rx->page_offset = 0; 1338 rx->page_offset = 0;
1323 rx->bus = pci_map_page(mgp->pdev, page, 0, 1339 rx->bus = bus;
1324 MYRI10GE_ALLOC_SIZE, 1340
1325 PCI_DMA_FROMDEVICE);
1326 } 1341 }
1327 rx->info[idx].page = rx->page; 1342 rx->info[idx].page = rx->page;
1328 rx->info[idx].page_offset = rx->page_offset; 1343 rx->info[idx].page_offset = rx->page_offset;
@@ -2764,6 +2779,35 @@ myri10ge_submit_req(struct myri10ge_tx_buf *tx, struct mcp_kreq_ether_send *src,
2764 mb(); 2779 mb();
2765} 2780}
2766 2781
2782static void myri10ge_unmap_tx_dma(struct myri10ge_priv *mgp,
2783 struct myri10ge_tx_buf *tx, int idx)
2784{
2785 unsigned int len;
2786 int last_idx;
2787
2788 /* Free any DMA resources we've alloced and clear out the skb slot */
2789 last_idx = (idx + 1) & tx->mask;
2790 idx = tx->req & tx->mask;
2791 do {
2792 len = dma_unmap_len(&tx->info[idx], len);
2793 if (len) {
2794 if (tx->info[idx].skb != NULL)
2795 pci_unmap_single(mgp->pdev,
2796 dma_unmap_addr(&tx->info[idx],
2797 bus), len,
2798 PCI_DMA_TODEVICE);
2799 else
2800 pci_unmap_page(mgp->pdev,
2801 dma_unmap_addr(&tx->info[idx],
2802 bus), len,
2803 PCI_DMA_TODEVICE);
2804 dma_unmap_len_set(&tx->info[idx], len, 0);
2805 tx->info[idx].skb = NULL;
2806 }
2807 idx = (idx + 1) & tx->mask;
2808 } while (idx != last_idx);
2809}
2810
2767/* 2811/*
2768 * Transmit a packet. We need to split the packet so that a single 2812 * Transmit a packet. We need to split the packet so that a single
2769 * segment does not cross myri10ge->tx_boundary, so this makes segment 2813 * segment does not cross myri10ge->tx_boundary, so this makes segment
@@ -2787,7 +2831,7 @@ static netdev_tx_t myri10ge_xmit(struct sk_buff *skb,
2787 u32 low; 2831 u32 low;
2788 __be32 high_swapped; 2832 __be32 high_swapped;
2789 unsigned int len; 2833 unsigned int len;
2790 int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; 2834 int idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
2791 u16 pseudo_hdr_offset, cksum_offset, queue; 2835 u16 pseudo_hdr_offset, cksum_offset, queue;
2792 int cum_len, seglen, boundary, rdma_count; 2836 int cum_len, seglen, boundary, rdma_count;
2793 u8 flags, odd_flag; 2837 u8 flags, odd_flag;
@@ -2884,9 +2928,12 @@ again:
2884 2928
2885 /* map the skb for DMA */ 2929 /* map the skb for DMA */
2886 len = skb_headlen(skb); 2930 len = skb_headlen(skb);
2931 bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
2932 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus)))
2933 goto drop;
2934
2887 idx = tx->req & tx->mask; 2935 idx = tx->req & tx->mask;
2888 tx->info[idx].skb = skb; 2936 tx->info[idx].skb = skb;
2889 bus = pci_map_single(mgp->pdev, skb->data, len, PCI_DMA_TODEVICE);
2890 dma_unmap_addr_set(&tx->info[idx], bus, bus); 2937 dma_unmap_addr_set(&tx->info[idx], bus, bus);
2891 dma_unmap_len_set(&tx->info[idx], len, len); 2938 dma_unmap_len_set(&tx->info[idx], len, len);
2892 2939
@@ -2985,12 +3032,16 @@ again:
2985 break; 3032 break;
2986 3033
2987 /* map next fragment for DMA */ 3034 /* map next fragment for DMA */
2988 idx = (count + tx->req) & tx->mask;
2989 frag = &skb_shinfo(skb)->frags[frag_idx]; 3035 frag = &skb_shinfo(skb)->frags[frag_idx];
2990 frag_idx++; 3036 frag_idx++;
2991 len = skb_frag_size(frag); 3037 len = skb_frag_size(frag);
2992 bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, 3038 bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len,
2993 DMA_TO_DEVICE); 3039 DMA_TO_DEVICE);
3040 if (unlikely(pci_dma_mapping_error(mgp->pdev, bus))) {
3041 myri10ge_unmap_tx_dma(mgp, tx, idx);
3042 goto drop;
3043 }
3044 idx = (count + tx->req) & tx->mask;
2994 dma_unmap_addr_set(&tx->info[idx], bus, bus); 3045 dma_unmap_addr_set(&tx->info[idx], bus, bus);
2995 dma_unmap_len_set(&tx->info[idx], len, len); 3046 dma_unmap_len_set(&tx->info[idx], len, len);
2996 } 3047 }
@@ -3021,31 +3072,8 @@ again:
3021 return NETDEV_TX_OK; 3072 return NETDEV_TX_OK;
3022 3073
3023abort_linearize: 3074abort_linearize:
3024 /* Free any DMA resources we've alloced and clear out the skb 3075 myri10ge_unmap_tx_dma(mgp, tx, idx);
3025 * slot so as to not trip up assertions, and to avoid a
3026 * double-free if linearizing fails */
3027 3076
3028 last_idx = (idx + 1) & tx->mask;
3029 idx = tx->req & tx->mask;
3030 tx->info[idx].skb = NULL;
3031 do {
3032 len = dma_unmap_len(&tx->info[idx], len);
3033 if (len) {
3034 if (tx->info[idx].skb != NULL)
3035 pci_unmap_single(mgp->pdev,
3036 dma_unmap_addr(&tx->info[idx],
3037 bus), len,
3038 PCI_DMA_TODEVICE);
3039 else
3040 pci_unmap_page(mgp->pdev,
3041 dma_unmap_addr(&tx->info[idx],
3042 bus), len,
3043 PCI_DMA_TODEVICE);
3044 dma_unmap_len_set(&tx->info[idx], len, 0);
3045 tx->info[idx].skb = NULL;
3046 }
3047 idx = (idx + 1) & tx->mask;
3048 } while (idx != last_idx);
3049 if (skb_is_gso(skb)) { 3077 if (skb_is_gso(skb)) {
3050 netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n"); 3078 netdev_err(mgp->dev, "TSO but wanted to linearize?!?!?\n");
3051 goto drop; 3079 goto drop;
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index d813bfb1a847..23c89ab5a6ad 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -32,6 +32,11 @@ MODULE_DESCRIPTION("Sun LDOM virtual network driver");
32MODULE_LICENSE("GPL"); 32MODULE_LICENSE("GPL");
33MODULE_VERSION(DRV_MODULE_VERSION); 33MODULE_VERSION(DRV_MODULE_VERSION);
34 34
35/* Heuristic for the number of times to exponentially backoff and
36 * retry sending an LDC trigger when EAGAIN is encountered
37 */
38#define VNET_MAX_RETRIES 10
39
35/* Ordered from largest major to lowest */ 40/* Ordered from largest major to lowest */
36static struct vio_version vnet_versions[] = { 41static struct vio_version vnet_versions[] = {
37 { .major = 1, .minor = 0 }, 42 { .major = 1, .minor = 0 },
@@ -260,6 +265,7 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
260 .state = vio_dring_state, 265 .state = vio_dring_state,
261 }; 266 };
262 int err, delay; 267 int err, delay;
268 int retries = 0;
263 269
264 hdr.seq = dr->snd_nxt; 270 hdr.seq = dr->snd_nxt;
265 delay = 1; 271 delay = 1;
@@ -272,6 +278,13 @@ static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
272 udelay(delay); 278 udelay(delay);
273 if ((delay <<= 1) > 128) 279 if ((delay <<= 1) > 128)
274 delay = 128; 280 delay = 128;
281 if (retries++ > VNET_MAX_RETRIES) {
282 pr_info("ECONNRESET %x:%x:%x:%x:%x:%x\n",
283 port->raddr[0], port->raddr[1],
284 port->raddr[2], port->raddr[3],
285 port->raddr[4], port->raddr[5]);
286 err = -ECONNRESET;
287 }
275 } while (err == -EAGAIN); 288 } while (err == -EAGAIN);
276 289
277 return err; 290 return err;
@@ -475,8 +488,9 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
475 return 0; 488 return 0;
476} 489}
477 490
478static void maybe_tx_wakeup(struct vnet *vp) 491static void maybe_tx_wakeup(unsigned long param)
479{ 492{
493 struct vnet *vp = (struct vnet *)param;
480 struct net_device *dev = vp->dev; 494 struct net_device *dev = vp->dev;
481 495
482 netif_tx_lock(dev); 496 netif_tx_lock(dev);
@@ -573,8 +587,13 @@ static void vnet_event(void *arg, int event)
573 break; 587 break;
574 } 588 }
575 spin_unlock(&vio->lock); 589 spin_unlock(&vio->lock);
590 /* Kick off a tasklet to wake the queue. We cannot call
591 * maybe_tx_wakeup directly here because we could deadlock on
592 * netif_tx_lock() with dev_watchdog()
593 */
576 if (unlikely(tx_wakeup && err != -ECONNRESET)) 594 if (unlikely(tx_wakeup && err != -ECONNRESET))
577 maybe_tx_wakeup(port->vp); 595 tasklet_schedule(&port->vp->vnet_tx_wakeup);
596
578 local_irq_restore(flags); 597 local_irq_restore(flags);
579} 598}
580 599
@@ -593,6 +612,7 @@ static int __vnet_tx_trigger(struct vnet_port *port)
593 .end_idx = (u32) -1, 612 .end_idx = (u32) -1,
594 }; 613 };
595 int err, delay; 614 int err, delay;
615 int retries = 0;
596 616
597 hdr.seq = dr->snd_nxt; 617 hdr.seq = dr->snd_nxt;
598 delay = 1; 618 delay = 1;
@@ -605,6 +625,8 @@ static int __vnet_tx_trigger(struct vnet_port *port)
605 udelay(delay); 625 udelay(delay);
606 if ((delay <<= 1) > 128) 626 if ((delay <<= 1) > 128)
607 delay = 128; 627 delay = 128;
628 if (retries++ > VNET_MAX_RETRIES)
629 break;
608 } while (err == -EAGAIN); 630 } while (err == -EAGAIN);
609 631
610 return err; 632 return err;
@@ -691,7 +713,15 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
691 memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len); 713 memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
692 } 714 }
693 715
694 d->hdr.ack = VIO_ACK_ENABLE; 716 /* We don't rely on the ACKs to free the skb in vnet_start_xmit(),
717 * thus it is safe to not set VIO_ACK_ENABLE for each transmission:
718 * the protocol itself does not require it as long as the peer
719 * sends a VIO_SUBTYPE_ACK for VIO_DRING_STOPPED.
720 *
721 * An ACK for every packet in the ring is expensive as the
722 * sending of LDC messages is slow and affects performance.
723 */
724 d->hdr.ack = VIO_ACK_DISABLE;
695 d->size = len; 725 d->size = len;
696 d->ncookies = port->tx_bufs[dr->prod].ncookies; 726 d->ncookies = port->tx_bufs[dr->prod].ncookies;
697 for (i = 0; i < d->ncookies; i++) 727 for (i = 0; i < d->ncookies; i++)
@@ -1046,6 +1076,7 @@ static struct vnet *vnet_new(const u64 *local_mac)
1046 vp = netdev_priv(dev); 1076 vp = netdev_priv(dev);
1047 1077
1048 spin_lock_init(&vp->lock); 1078 spin_lock_init(&vp->lock);
1079 tasklet_init(&vp->vnet_tx_wakeup, maybe_tx_wakeup, (unsigned long)vp);
1049 vp->dev = dev; 1080 vp->dev = dev;
1050 1081
1051 INIT_LIST_HEAD(&vp->port_list); 1082 INIT_LIST_HEAD(&vp->port_list);
@@ -1105,6 +1136,7 @@ static void vnet_cleanup(void)
1105 vp = list_first_entry(&vnet_list, struct vnet, list); 1136 vp = list_first_entry(&vnet_list, struct vnet, list);
1106 list_del(&vp->list); 1137 list_del(&vp->list);
1107 dev = vp->dev; 1138 dev = vp->dev;
1139 tasklet_kill(&vp->vnet_tx_wakeup);
1108 /* vio_unregister_driver() should have cleaned up port_list */ 1140 /* vio_unregister_driver() should have cleaned up port_list */
1109 BUG_ON(!list_empty(&vp->port_list)); 1141 BUG_ON(!list_empty(&vp->port_list));
1110 unregister_netdev(dev); 1142 unregister_netdev(dev);
diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h
index d347a5bf24b0..de5c2c64996f 100644
--- a/drivers/net/ethernet/sun/sunvnet.h
+++ b/drivers/net/ethernet/sun/sunvnet.h
@@ -1,6 +1,8 @@
1#ifndef _SUNVNET_H 1#ifndef _SUNVNET_H
2#define _SUNVNET_H 2#define _SUNVNET_H
3 3
4#include <linux/interrupt.h>
5
4#define DESC_NCOOKIES(entry_size) \ 6#define DESC_NCOOKIES(entry_size) \
5 ((entry_size) - sizeof(struct vio_net_desc)) 7 ((entry_size) - sizeof(struct vio_net_desc))
6 8
@@ -78,6 +80,8 @@ struct vnet {
78 80
79 struct list_head list; 81 struct list_head list;
80 u64 local_mac; 82 u64 local_mac;
83
84 struct tasklet_struct vnet_tx_wakeup;
81}; 85};
82 86
83#endif /* _SUNVNET_H */ 87#endif /* _SUNVNET_H */
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 36f4459520c3..fda5891835d4 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1170,7 +1170,6 @@ static struct platform_driver temac_of_driver = {
1170 .probe = temac_of_probe, 1170 .probe = temac_of_probe,
1171 .remove = temac_of_remove, 1171 .remove = temac_of_remove,
1172 .driver = { 1172 .driver = {
1173 .owner = THIS_MODULE,
1174 .name = "xilinx_temac", 1173 .name = "xilinx_temac",
1175 .of_match_table = temac_of_match, 1174 .of_match_table = temac_of_match,
1176 }, 1175 },
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 30e8608ff050..c8fd94133ecd 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -1645,7 +1645,6 @@ static struct platform_driver axienet_of_driver = {
1645 .probe = axienet_of_probe, 1645 .probe = axienet_of_probe,
1646 .remove = axienet_of_remove, 1646 .remove = axienet_of_remove,
1647 .driver = { 1647 .driver = {
1648 .owner = THIS_MODULE,
1649 .name = "xilinx_axienet", 1648 .name = "xilinx_axienet",
1650 .of_match_table = axienet_of_match, 1649 .of_match_table = axienet_of_match,
1651 }, 1650 },
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 782bb9373cd8..28dbbdc393eb 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -1245,7 +1245,6 @@ MODULE_DEVICE_TABLE(of, xemaclite_of_match);
1245static struct platform_driver xemaclite_of_driver = { 1245static struct platform_driver xemaclite_of_driver = {
1246 .driver = { 1246 .driver = {
1247 .name = DRIVER_NAME, 1247 .name = DRIVER_NAME,
1248 .owner = THIS_MODULE,
1249 .of_match_table = xemaclite_of_match, 1248 .of_match_table = xemaclite_of_match,
1250 }, 1249 },
1251 .probe = xemaclite_of_probe, 1250 .probe = xemaclite_of_probe,
diff --git a/drivers/net/irda/donauboe.c b/drivers/net/irda/donauboe.c
index 768dfe9a9315..6d3e2093bf7f 100644
--- a/drivers/net/irda/donauboe.c
+++ b/drivers/net/irda/donauboe.c
@@ -1755,17 +1755,4 @@ static struct pci_driver donauboe_pci_driver = {
1755 .resume = toshoboe_wakeup 1755 .resume = toshoboe_wakeup
1756}; 1756};
1757 1757
1758static int __init 1758module_pci_driver(donauboe_pci_driver);
1759donauboe_init (void)
1760{
1761 return pci_register_driver(&donauboe_pci_driver);
1762}
1763
1764static void __exit
1765donauboe_cleanup (void)
1766{
1767 pci_unregister_driver(&donauboe_pci_driver);
1768}
1769
1770module_init(donauboe_init);
1771module_exit(donauboe_cleanup);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index ef8a5c20236a..60e4ca01ccbb 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -45,10 +45,9 @@ struct macvlan_port {
45 struct sk_buff_head bc_queue; 45 struct sk_buff_head bc_queue;
46 struct work_struct bc_work; 46 struct work_struct bc_work;
47 bool passthru; 47 bool passthru;
48 int count;
48}; 49};
49 50
50#define MACVLAN_PORT_IS_EMPTY(port) list_empty(&port->vlans)
51
52struct macvlan_skb_cb { 51struct macvlan_skb_cb {
53 const struct macvlan_dev *src; 52 const struct macvlan_dev *src;
54}; 53};
@@ -667,7 +666,8 @@ static void macvlan_uninit(struct net_device *dev)
667 666
668 free_percpu(vlan->pcpu_stats); 667 free_percpu(vlan->pcpu_stats);
669 668
670 if (MACVLAN_PORT_IS_EMPTY(port)) 669 port->count -= 1;
670 if (!port->count)
671 macvlan_port_destroy(port->dev); 671 macvlan_port_destroy(port->dev);
672} 672}
673 673
@@ -1020,12 +1020,13 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
1020 vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]); 1020 vlan->flags = nla_get_u16(data[IFLA_MACVLAN_FLAGS]);
1021 1021
1022 if (vlan->mode == MACVLAN_MODE_PASSTHRU) { 1022 if (vlan->mode == MACVLAN_MODE_PASSTHRU) {
1023 if (!MACVLAN_PORT_IS_EMPTY(port)) 1023 if (port->count)
1024 return -EINVAL; 1024 return -EINVAL;
1025 port->passthru = true; 1025 port->passthru = true;
1026 eth_hw_addr_inherit(dev, lowerdev); 1026 eth_hw_addr_inherit(dev, lowerdev);
1027 } 1027 }
1028 1028
1029 port->count += 1;
1029 err = register_netdevice(dev); 1030 err = register_netdevice(dev);
1030 if (err < 0) 1031 if (err < 0)
1031 goto destroy_port; 1032 goto destroy_port;
@@ -1043,7 +1044,8 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
1043unregister_netdev: 1044unregister_netdev:
1044 unregister_netdevice(dev); 1045 unregister_netdevice(dev);
1045destroy_port: 1046destroy_port:
1046 if (MACVLAN_PORT_IS_EMPTY(port)) 1047 port->count -= 1;
1048 if (!port->count)
1047 macvlan_port_destroy(lowerdev); 1049 macvlan_port_destroy(lowerdev);
1048 1050
1049 return err; 1051 return err;
diff --git a/drivers/net/wireless/ath/carl9170/carl9170.h b/drivers/net/wireless/ath/carl9170/carl9170.h
index 8596aba34f96..237d0cda1bcb 100644
--- a/drivers/net/wireless/ath/carl9170/carl9170.h
+++ b/drivers/net/wireless/ath/carl9170/carl9170.h
@@ -256,6 +256,7 @@ struct ar9170 {
256 atomic_t rx_work_urbs; 256 atomic_t rx_work_urbs;
257 atomic_t rx_pool_urbs; 257 atomic_t rx_pool_urbs;
258 kernel_ulong_t features; 258 kernel_ulong_t features;
259 bool usb_ep_cmd_is_bulk;
259 260
260 /* firmware settings */ 261 /* firmware settings */
261 struct completion fw_load_wait; 262 struct completion fw_load_wait;
diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c
index f35c7f30f9a6..c9f93310c0d6 100644
--- a/drivers/net/wireless/ath/carl9170/usb.c
+++ b/drivers/net/wireless/ath/carl9170/usb.c
@@ -621,9 +621,16 @@ int __carl9170_exec_cmd(struct ar9170 *ar, struct carl9170_cmd *cmd,
621 goto err_free; 621 goto err_free;
622 } 622 }
623 623
624 usb_fill_int_urb(urb, ar->udev, usb_sndintpipe(ar->udev, 624 if (ar->usb_ep_cmd_is_bulk)
625 AR9170_USB_EP_CMD), cmd, cmd->hdr.len + 4, 625 usb_fill_bulk_urb(urb, ar->udev,
626 carl9170_usb_cmd_complete, ar, 1); 626 usb_sndbulkpipe(ar->udev, AR9170_USB_EP_CMD),
627 cmd, cmd->hdr.len + 4,
628 carl9170_usb_cmd_complete, ar);
629 else
630 usb_fill_int_urb(urb, ar->udev,
631 usb_sndintpipe(ar->udev, AR9170_USB_EP_CMD),
632 cmd, cmd->hdr.len + 4,
633 carl9170_usb_cmd_complete, ar, 1);
627 634
628 if (free_buf) 635 if (free_buf)
629 urb->transfer_flags |= URB_FREE_BUFFER; 636 urb->transfer_flags |= URB_FREE_BUFFER;
@@ -1032,9 +1039,10 @@ static void carl9170_usb_firmware_step2(const struct firmware *fw,
1032static int carl9170_usb_probe(struct usb_interface *intf, 1039static int carl9170_usb_probe(struct usb_interface *intf,
1033 const struct usb_device_id *id) 1040 const struct usb_device_id *id)
1034{ 1041{
1042 struct usb_endpoint_descriptor *ep;
1035 struct ar9170 *ar; 1043 struct ar9170 *ar;
1036 struct usb_device *udev; 1044 struct usb_device *udev;
1037 int err; 1045 int i, err;
1038 1046
1039 err = usb_reset_device(interface_to_usbdev(intf)); 1047 err = usb_reset_device(interface_to_usbdev(intf));
1040 if (err) 1048 if (err)
@@ -1050,6 +1058,21 @@ static int carl9170_usb_probe(struct usb_interface *intf,
1050 ar->intf = intf; 1058 ar->intf = intf;
1051 ar->features = id->driver_info; 1059 ar->features = id->driver_info;
1052 1060
1061 /* We need to remember the type of endpoint 4 because it differs
1062 * between high- and full-speed configuration. The high-speed
1063 * configuration specifies it as interrupt and the full-speed
1064 * configuration as bulk endpoint. This information is required
1065 * later when sending urbs to that endpoint.
1066 */
1067 for (i = 0; i < intf->cur_altsetting->desc.bNumEndpoints; ++i) {
1068 ep = &intf->cur_altsetting->endpoint[i].desc;
1069
1070 if (usb_endpoint_num(ep) == AR9170_USB_EP_CMD &&
1071 usb_endpoint_dir_out(ep) &&
1072 usb_endpoint_type(ep) == USB_ENDPOINT_XFER_BULK)
1073 ar->usb_ep_cmd_is_bulk = true;
1074 }
1075
1053 usb_set_intfdata(intf, ar); 1076 usb_set_intfdata(intf, ar);
1054 SET_IEEE80211_DEV(ar->hw, &intf->dev); 1077 SET_IEEE80211_DEV(ar->hw, &intf->dev);
1055 1078
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
index 535c7eb01b3a..8f8b9373de95 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/msgbuf.c
@@ -1318,6 +1318,8 @@ int brcmf_proto_msgbuf_attach(struct brcmf_pub *drvr)
1318 msgbuf->nrof_flowrings = if_msgbuf->nrof_flowrings; 1318 msgbuf->nrof_flowrings = if_msgbuf->nrof_flowrings;
1319 msgbuf->flowring_dma_handle = kzalloc(msgbuf->nrof_flowrings * 1319 msgbuf->flowring_dma_handle = kzalloc(msgbuf->nrof_flowrings *
1320 sizeof(*msgbuf->flowring_dma_handle), GFP_ATOMIC); 1320 sizeof(*msgbuf->flowring_dma_handle), GFP_ATOMIC);
1321 if (!msgbuf->flowring_dma_handle)
1322 goto fail;
1321 1323
1322 msgbuf->rx_dataoffset = if_msgbuf->rx_dataoffset; 1324 msgbuf->rx_dataoffset = if_msgbuf->rx_dataoffset;
1323 msgbuf->max_rxbufpost = if_msgbuf->max_rxbufpost; 1325 msgbuf->max_rxbufpost = if_msgbuf->max_rxbufpost;
@@ -1362,6 +1364,7 @@ fail:
1362 kfree(msgbuf->flow_map); 1364 kfree(msgbuf->flow_map);
1363 kfree(msgbuf->txstatus_done_map); 1365 kfree(msgbuf->txstatus_done_map);
1364 brcmf_msgbuf_release_pktids(msgbuf); 1366 brcmf_msgbuf_release_pktids(msgbuf);
1367 kfree(msgbuf->flowring_dma_handle);
1365 if (msgbuf->ioctbuf) 1368 if (msgbuf->ioctbuf)
1366 dma_free_coherent(drvr->bus_if->dev, 1369 dma_free_coherent(drvr->bus_if->dev,
1367 BRCMF_TX_IOCTL_MAX_MSG_SIZE, 1370 BRCMF_TX_IOCTL_MAX_MSG_SIZE,
@@ -1391,6 +1394,7 @@ void brcmf_proto_msgbuf_detach(struct brcmf_pub *drvr)
1391 BRCMF_TX_IOCTL_MAX_MSG_SIZE, 1394 BRCMF_TX_IOCTL_MAX_MSG_SIZE,
1392 msgbuf->ioctbuf, msgbuf->ioctbuf_handle); 1395 msgbuf->ioctbuf, msgbuf->ioctbuf_handle);
1393 brcmf_msgbuf_release_pktids(msgbuf); 1396 brcmf_msgbuf_release_pktids(msgbuf);
1397 kfree(msgbuf->flowring_dma_handle);
1394 kfree(msgbuf); 1398 kfree(msgbuf);
1395 drvr->proto->pd = NULL; 1399 drvr->proto->pd = NULL;
1396 } 1400 }
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
index bc972c0ba5f8..e5101b287e4e 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/pcie.c
@@ -591,12 +591,13 @@ static void brcmf_pcie_handle_mb_data(struct brcmf_pciedev_info *devinfo)
591 } 591 }
592 if (dtoh_mb_data & BRCMF_D2H_DEV_DS_EXIT_NOTE) 592 if (dtoh_mb_data & BRCMF_D2H_DEV_DS_EXIT_NOTE)
593 brcmf_dbg(PCIE, "D2H_MB_DATA: DEEP SLEEP EXIT\n"); 593 brcmf_dbg(PCIE, "D2H_MB_DATA: DEEP SLEEP EXIT\n");
594 if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) 594 if (dtoh_mb_data & BRCMF_D2H_DEV_D3_ACK) {
595 brcmf_dbg(PCIE, "D2H_MB_DATA: D3 ACK\n"); 595 brcmf_dbg(PCIE, "D2H_MB_DATA: D3 ACK\n");
596 if (waitqueue_active(&devinfo->mbdata_resp_wait)) { 596 if (waitqueue_active(&devinfo->mbdata_resp_wait)) {
597 devinfo->mbdata_completed = true; 597 devinfo->mbdata_completed = true;
598 wake_up(&devinfo->mbdata_resp_wait); 598 wake_up(&devinfo->mbdata_resp_wait);
599 } 599 }
600 }
600} 601}
601 602
602 603
diff --git a/drivers/net/wireless/ipw2x00/ipw2200.c b/drivers/net/wireless/ipw2x00/ipw2200.c
index c5aa404069f3..389656bd1a74 100644
--- a/drivers/net/wireless/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/ipw2x00/ipw2200.c
@@ -9853,6 +9853,7 @@ static int ipw_wx_get_wireless_mode(struct net_device *dev,
9853 strncpy(extra, "unknown", MAX_WX_STRING); 9853 strncpy(extra, "unknown", MAX_WX_STRING);
9854 break; 9854 break;
9855 } 9855 }
9856 extra[MAX_WX_STRING - 1] = '\0';
9856 9857
9857 IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra); 9858 IPW_DEBUG_WX("PRIV GET MODE: %s\n", extra);
9858 9859
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 0d6a8b768a68..7c8796584c25 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -396,7 +396,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
396 else 396 else
397 hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT; 397 hw->wiphy->flags &= ~WIPHY_FLAG_PS_ON_BY_DEFAULT;
398 398
399 hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; 399 /* TODO: enable that only for firmwares that don't crash */
400 /* hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_SCHED_SCAN; */
400 hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX; 401 hw->wiphy->max_sched_scan_ssids = PROBE_OPTION_MAX;
401 hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES; 402 hw->wiphy->max_match_sets = IWL_SCAN_MAX_PROFILES;
402 /* we create the 802.11 header and zero length SSID IE. */ 403 /* we create the 802.11 header and zero length SSID IE. */
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ef3026f46a37..d4eb8d2e9cb7 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -165,6 +165,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
165 u16 dealloc_ring[MAX_PENDING_REQS]; 165 u16 dealloc_ring[MAX_PENDING_REQS];
166 struct task_struct *dealloc_task; 166 struct task_struct *dealloc_task;
167 wait_queue_head_t dealloc_wq; 167 wait_queue_head_t dealloc_wq;
168 atomic_t inflight_packets;
168 169
169 /* Use kthread for guest RX */ 170 /* Use kthread for guest RX */
170 struct task_struct *task; 171 struct task_struct *task;
@@ -329,4 +330,8 @@ extern unsigned int xenvif_max_queues;
329extern struct dentry *xen_netback_dbg_root; 330extern struct dentry *xen_netback_dbg_root;
330#endif 331#endif
331 332
333void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
334 struct sk_buff *skb);
335void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue);
336
332#endif /* __XEN_NETBACK__COMMON_H__ */ 337#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index bfd10cb9c8de..e29e15dca86e 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -43,6 +43,23 @@
43#define XENVIF_QUEUE_LENGTH 32 43#define XENVIF_QUEUE_LENGTH 32
44#define XENVIF_NAPI_WEIGHT 64 44#define XENVIF_NAPI_WEIGHT 64
45 45
46/* This function is used to set SKBTX_DEV_ZEROCOPY as well as
47 * increasing the inflight counter. We need to increase the inflight
48 * counter because core driver calls into xenvif_zerocopy_callback
49 * which calls xenvif_skb_zerocopy_complete.
50 */
51void xenvif_skb_zerocopy_prepare(struct xenvif_queue *queue,
52 struct sk_buff *skb)
53{
54 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
55 atomic_inc(&queue->inflight_packets);
56}
57
58void xenvif_skb_zerocopy_complete(struct xenvif_queue *queue)
59{
60 atomic_dec(&queue->inflight_packets);
61}
62
46static inline void xenvif_stop_queue(struct xenvif_queue *queue) 63static inline void xenvif_stop_queue(struct xenvif_queue *queue)
47{ 64{
48 struct net_device *dev = queue->vif->dev; 65 struct net_device *dev = queue->vif->dev;
@@ -524,9 +541,6 @@ int xenvif_init_queue(struct xenvif_queue *queue)
524 541
525 init_timer(&queue->rx_stalled); 542 init_timer(&queue->rx_stalled);
526 543
527 netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
528 XENVIF_NAPI_WEIGHT);
529
530 return 0; 544 return 0;
531} 545}
532 546
@@ -560,6 +574,7 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
560 574
561 init_waitqueue_head(&queue->wq); 575 init_waitqueue_head(&queue->wq);
562 init_waitqueue_head(&queue->dealloc_wq); 576 init_waitqueue_head(&queue->dealloc_wq);
577 atomic_set(&queue->inflight_packets, 0);
563 578
564 if (tx_evtchn == rx_evtchn) { 579 if (tx_evtchn == rx_evtchn) {
565 /* feature-split-event-channels == 0 */ 580 /* feature-split-event-channels == 0 */
@@ -614,6 +629,9 @@ int xenvif_connect(struct xenvif_queue *queue, unsigned long tx_ring_ref,
614 wake_up_process(queue->task); 629 wake_up_process(queue->task);
615 wake_up_process(queue->dealloc_task); 630 wake_up_process(queue->dealloc_task);
616 631
632 netif_napi_add(queue->vif->dev, &queue->napi, xenvif_poll,
633 XENVIF_NAPI_WEIGHT);
634
617 return 0; 635 return 0;
618 636
619err_rx_unbind: 637err_rx_unbind:
@@ -642,25 +660,6 @@ void xenvif_carrier_off(struct xenvif *vif)
642 rtnl_unlock(); 660 rtnl_unlock();
643} 661}
644 662
645static void xenvif_wait_unmap_timeout(struct xenvif_queue *queue,
646 unsigned int worst_case_skb_lifetime)
647{
648 int i, unmap_timeout = 0;
649
650 for (i = 0; i < MAX_PENDING_REQS; ++i) {
651 if (queue->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
652 unmap_timeout++;
653 schedule_timeout(msecs_to_jiffies(1000));
654 if (unmap_timeout > worst_case_skb_lifetime &&
655 net_ratelimit())
656 netdev_err(queue->vif->dev,
657 "Page still granted! Index: %x\n",
658 i);
659 i = -1;
660 }
661 }
662}
663
664void xenvif_disconnect(struct xenvif *vif) 663void xenvif_disconnect(struct xenvif *vif)
665{ 664{
666 struct xenvif_queue *queue = NULL; 665 struct xenvif_queue *queue = NULL;
@@ -672,6 +671,8 @@ void xenvif_disconnect(struct xenvif *vif)
672 for (queue_index = 0; queue_index < num_queues; ++queue_index) { 671 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
673 queue = &vif->queues[queue_index]; 672 queue = &vif->queues[queue_index];
674 673
674 netif_napi_del(&queue->napi);
675
675 if (queue->task) { 676 if (queue->task) {
676 del_timer_sync(&queue->rx_stalled); 677 del_timer_sync(&queue->rx_stalled);
677 kthread_stop(queue->task); 678 kthread_stop(queue->task);
@@ -704,7 +705,6 @@ void xenvif_disconnect(struct xenvif *vif)
704void xenvif_deinit_queue(struct xenvif_queue *queue) 705void xenvif_deinit_queue(struct xenvif_queue *queue)
705{ 706{
706 free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages); 707 free_xenballooned_pages(MAX_PENDING_REQS, queue->mmap_pages);
707 netif_napi_del(&queue->napi);
708} 708}
709 709
710void xenvif_free(struct xenvif *vif) 710void xenvif_free(struct xenvif *vif)
@@ -712,21 +712,11 @@ void xenvif_free(struct xenvif *vif)
712 struct xenvif_queue *queue = NULL; 712 struct xenvif_queue *queue = NULL;
713 unsigned int num_queues = vif->num_queues; 713 unsigned int num_queues = vif->num_queues;
714 unsigned int queue_index; 714 unsigned int queue_index;
715 /* Here we want to avoid timeout messages if an skb can be legitimately
716 * stuck somewhere else. Realistically this could be an another vif's
717 * internal or QDisc queue. That another vif also has this
718 * rx_drain_timeout_msecs timeout, so give it time to drain out.
719 * Although if that other guest wakes up just before its timeout happens
720 * and takes only one skb from QDisc, it can hold onto other skbs for a
721 * longer period.
722 */
723 unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000);
724 715
725 unregister_netdev(vif->dev); 716 unregister_netdev(vif->dev);
726 717
727 for (queue_index = 0; queue_index < num_queues; ++queue_index) { 718 for (queue_index = 0; queue_index < num_queues; ++queue_index) {
728 queue = &vif->queues[queue_index]; 719 queue = &vif->queues[queue_index];
729 xenvif_wait_unmap_timeout(queue, worst_case_skb_lifetime);
730 xenvif_deinit_queue(queue); 720 xenvif_deinit_queue(queue);
731 } 721 }
732 722
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4734472aa620..08f65996534c 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1525,10 +1525,12 @@ static int xenvif_handle_frag_list(struct xenvif_queue *queue, struct sk_buff *s
1525 /* remove traces of mapped pages and frag_list */ 1525 /* remove traces of mapped pages and frag_list */
1526 skb_frag_list_init(skb); 1526 skb_frag_list_init(skb);
1527 uarg = skb_shinfo(skb)->destructor_arg; 1527 uarg = skb_shinfo(skb)->destructor_arg;
1528 /* increase inflight counter to offset decrement in callback */
1529 atomic_inc(&queue->inflight_packets);
1528 uarg->callback(uarg, true); 1530 uarg->callback(uarg, true);
1529 skb_shinfo(skb)->destructor_arg = NULL; 1531 skb_shinfo(skb)->destructor_arg = NULL;
1530 1532
1531 skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1533 xenvif_skb_zerocopy_prepare(queue, nskb);
1532 kfree_skb(nskb); 1534 kfree_skb(nskb);
1533 1535
1534 return 0; 1536 return 0;
@@ -1589,7 +1591,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
1589 if (net_ratelimit()) 1591 if (net_ratelimit())
1590 netdev_err(queue->vif->dev, 1592 netdev_err(queue->vif->dev,
1591 "Not enough memory to consolidate frag_list!\n"); 1593 "Not enough memory to consolidate frag_list!\n");
1592 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1594 xenvif_skb_zerocopy_prepare(queue, skb);
1593 kfree_skb(skb); 1595 kfree_skb(skb);
1594 continue; 1596 continue;
1595 } 1597 }
@@ -1609,7 +1611,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
1609 "Can't setup checksum in net_tx_action\n"); 1611 "Can't setup checksum in net_tx_action\n");
1610 /* We have to set this flag to trigger the callback */ 1612 /* We have to set this flag to trigger the callback */
1611 if (skb_shinfo(skb)->destructor_arg) 1613 if (skb_shinfo(skb)->destructor_arg)
1612 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1614 xenvif_skb_zerocopy_prepare(queue, skb);
1613 kfree_skb(skb); 1615 kfree_skb(skb);
1614 continue; 1616 continue;
1615 } 1617 }
@@ -1641,7 +1643,7 @@ static int xenvif_tx_submit(struct xenvif_queue *queue)
1641 * skb. E.g. the __pskb_pull_tail earlier can do such thing. 1643 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
1642 */ 1644 */
1643 if (skb_shinfo(skb)->destructor_arg) { 1645 if (skb_shinfo(skb)->destructor_arg) {
1644 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 1646 xenvif_skb_zerocopy_prepare(queue, skb);
1645 queue->stats.tx_zerocopy_sent++; 1647 queue->stats.tx_zerocopy_sent++;
1646 } 1648 }
1647 1649
@@ -1681,6 +1683,7 @@ void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
1681 queue->stats.tx_zerocopy_success++; 1683 queue->stats.tx_zerocopy_success++;
1682 else 1684 else
1683 queue->stats.tx_zerocopy_fail++; 1685 queue->stats.tx_zerocopy_fail++;
1686 xenvif_skb_zerocopy_complete(queue);
1684} 1687}
1685 1688
1686static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue) 1689static inline void xenvif_tx_dealloc_action(struct xenvif_queue *queue)
@@ -2058,15 +2061,24 @@ int xenvif_kthread_guest_rx(void *data)
2058 return 0; 2061 return 0;
2059} 2062}
2060 2063
2064static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
2065{
2066 /* Dealloc thread must remain running until all inflight
2067 * packets complete.
2068 */
2069 return kthread_should_stop() &&
2070 !atomic_read(&queue->inflight_packets);
2071}
2072
2061int xenvif_dealloc_kthread(void *data) 2073int xenvif_dealloc_kthread(void *data)
2062{ 2074{
2063 struct xenvif_queue *queue = data; 2075 struct xenvif_queue *queue = data;
2064 2076
2065 while (!kthread_should_stop()) { 2077 for (;;) {
2066 wait_event_interruptible(queue->dealloc_wq, 2078 wait_event_interruptible(queue->dealloc_wq,
2067 tx_dealloc_work_todo(queue) || 2079 tx_dealloc_work_todo(queue) ||
2068 kthread_should_stop()); 2080 xenvif_dealloc_kthread_should_stop(queue));
2069 if (kthread_should_stop()) 2081 if (xenvif_dealloc_kthread_should_stop(queue))
2070 break; 2082 break;
2071 2083
2072 xenvif_tx_dealloc_action(queue); 2084 xenvif_tx_dealloc_action(queue);
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 580517d857bf..9c47b897b6d2 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -116,6 +116,7 @@ static int xenvif_read_io_ring(struct seq_file *m, void *v)
116} 116}
117 117
118#define XENVIF_KICK_STR "kick" 118#define XENVIF_KICK_STR "kick"
119#define BUFFER_SIZE 32
119 120
120static ssize_t 121static ssize_t
121xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count, 122xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
@@ -124,22 +125,24 @@ xenvif_write_io_ring(struct file *filp, const char __user *buf, size_t count,
124 struct xenvif_queue *queue = 125 struct xenvif_queue *queue =
125 ((struct seq_file *)filp->private_data)->private; 126 ((struct seq_file *)filp->private_data)->private;
126 int len; 127 int len;
127 char write[sizeof(XENVIF_KICK_STR)]; 128 char write[BUFFER_SIZE];
128 129
129 /* don't allow partial writes and check the length */ 130 /* don't allow partial writes and check the length */
130 if (*ppos != 0) 131 if (*ppos != 0)
131 return 0; 132 return 0;
132 if (count < sizeof(XENVIF_KICK_STR) - 1) 133 if (count >= sizeof(write))
133 return -ENOSPC; 134 return -ENOSPC;
134 135
135 len = simple_write_to_buffer(write, 136 len = simple_write_to_buffer(write,
136 sizeof(write), 137 sizeof(write) - 1,
137 ppos, 138 ppos,
138 buf, 139 buf,
139 count); 140 count);
140 if (len < 0) 141 if (len < 0)
141 return len; 142 return len;
142 143
144 write[len] = '\0';
145
143 if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1)) 146 if (!strncmp(write, XENVIF_KICK_STR, sizeof(XENVIF_KICK_STR) - 1))
144 xenvif_interrupt(0, (void *)queue); 147 xenvif_interrupt(0, (void *)queue);
145 else { 148 else {
@@ -171,10 +174,9 @@ static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
171 .write = xenvif_write_io_ring, 174 .write = xenvif_write_io_ring,
172}; 175};
173 176
174static void xenvif_debugfs_addif(struct xenvif_queue *queue) 177static void xenvif_debugfs_addif(struct xenvif *vif)
175{ 178{
176 struct dentry *pfile; 179 struct dentry *pfile;
177 struct xenvif *vif = queue->vif;
178 int i; 180 int i;
179 181
180 if (IS_ERR_OR_NULL(xen_netback_dbg_root)) 182 if (IS_ERR_OR_NULL(xen_netback_dbg_root))
@@ -733,10 +735,11 @@ static void connect(struct backend_info *be)
733 be->vif->num_queues = queue_index; 735 be->vif->num_queues = queue_index;
734 goto err; 736 goto err;
735 } 737 }
738 }
739
736#ifdef CONFIG_DEBUG_FS 740#ifdef CONFIG_DEBUG_FS
737 xenvif_debugfs_addif(queue); 741 xenvif_debugfs_addif(be->vif);
738#endif /* CONFIG_DEBUG_FS */ 742#endif /* CONFIG_DEBUG_FS */
739 }
740 743
741 /* Initialisation completed, tell core driver the number of 744 /* Initialisation completed, tell core driver the number of
742 * active queues. 745 * active queues.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 2dcb0541012d..5160c4eb73c2 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -9,7 +9,8 @@ menu "Device Tree and Open Firmware support"
9 9
10config OF_SELFTEST 10config OF_SELFTEST
11 bool "Device Tree Runtime self tests" 11 bool "Device Tree Runtime self tests"
12 depends on OF_IRQ 12 depends on OF_IRQ && OF_EARLY_FLATTREE
13 select OF_DYNAMIC
13 help 14 help
14 This option builds in test cases for the device tree infrastructure 15 This option builds in test cases for the device tree infrastructure
15 that are executed once at boot time, and the results dumped to the 16 that are executed once at boot time, and the results dumped to the
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 099b1fb00af4..2b6a7b129d10 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -1,11 +1,13 @@
1obj-y = base.o device.o platform.o 1obj-y = base.o device.o platform.o
2obj-$(CONFIG_OF_DYNAMIC) += dynamic.o
2obj-$(CONFIG_OF_FLATTREE) += fdt.o 3obj-$(CONFIG_OF_FLATTREE) += fdt.o
3obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o 4obj-$(CONFIG_OF_EARLY_FLATTREE) += fdt_address.o
4obj-$(CONFIG_OF_PROMTREE) += pdt.o 5obj-$(CONFIG_OF_PROMTREE) += pdt.o
5obj-$(CONFIG_OF_ADDRESS) += address.o 6obj-$(CONFIG_OF_ADDRESS) += address.o
6obj-$(CONFIG_OF_IRQ) += irq.o 7obj-$(CONFIG_OF_IRQ) += irq.o
7obj-$(CONFIG_OF_NET) += of_net.o 8obj-$(CONFIG_OF_NET) += of_net.o
8obj-$(CONFIG_OF_SELFTEST) += selftest.o 9obj-$(CONFIG_OF_SELFTEST) += of_selftest.o
10of_selftest-objs := selftest.o testcase-data/testcases.dtb.o
9obj-$(CONFIG_OF_MDIO) += of_mdio.o 11obj-$(CONFIG_OF_MDIO) += of_mdio.o
10obj-$(CONFIG_OF_PCI) += of_pci.o 12obj-$(CONFIG_OF_PCI) += of_pci.o
11obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o 13obj-$(CONFIG_OF_PCI_IRQ) += of_pci_irq.o
diff --git a/drivers/of/base.c b/drivers/of/base.c
index b9864806e9b8..d8574adf0d62 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -17,6 +17,7 @@
17 * as published by the Free Software Foundation; either version 17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version. 18 * 2 of the License, or (at your option) any later version.
19 */ 19 */
20#include <linux/console.h>
20#include <linux/ctype.h> 21#include <linux/ctype.h>
21#include <linux/cpu.h> 22#include <linux/cpu.h>
22#include <linux/module.h> 23#include <linux/module.h>
@@ -35,15 +36,17 @@ struct device_node *of_allnodes;
35EXPORT_SYMBOL(of_allnodes); 36EXPORT_SYMBOL(of_allnodes);
36struct device_node *of_chosen; 37struct device_node *of_chosen;
37struct device_node *of_aliases; 38struct device_node *of_aliases;
38static struct device_node *of_stdout; 39struct device_node *of_stdout;
39 40
40static struct kset *of_kset; 41struct kset *of_kset;
41 42
42/* 43/*
43 * Used to protect the of_aliases; but also overloaded to hold off addition of 44 * Used to protect the of_aliases, to hold off addition of nodes to sysfs.
44 * nodes to sysfs 45 * This mutex must be held whenever modifications are being made to the
46 * device tree. The of_{attach,detach}_node() and
47 * of_{add,remove,update}_property() helpers make sure this happens.
45 */ 48 */
46DEFINE_MUTEX(of_aliases_mutex); 49DEFINE_MUTEX(of_mutex);
47 50
48/* use when traversing tree through the allnext, child, sibling, 51/* use when traversing tree through the allnext, child, sibling,
49 * or parent members of struct device_node. 52 * or parent members of struct device_node.
@@ -89,79 +92,7 @@ int __weak of_node_to_nid(struct device_node *np)
89} 92}
90#endif 93#endif
91 94
92#if defined(CONFIG_OF_DYNAMIC) 95#ifndef CONFIG_OF_DYNAMIC
93/**
94 * of_node_get - Increment refcount of a node
95 * @node: Node to inc refcount, NULL is supported to
96 * simplify writing of callers
97 *
98 * Returns node.
99 */
100struct device_node *of_node_get(struct device_node *node)
101{
102 if (node)
103 kobject_get(&node->kobj);
104 return node;
105}
106EXPORT_SYMBOL(of_node_get);
107
108static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
109{
110 return container_of(kobj, struct device_node, kobj);
111}
112
113/**
114 * of_node_release - release a dynamically allocated node
115 * @kref: kref element of the node to be released
116 *
117 * In of_node_put() this function is passed to kref_put()
118 * as the destructor.
119 */
120static void of_node_release(struct kobject *kobj)
121{
122 struct device_node *node = kobj_to_device_node(kobj);
123 struct property *prop = node->properties;
124
125 /* We should never be releasing nodes that haven't been detached. */
126 if (!of_node_check_flag(node, OF_DETACHED)) {
127 pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
128 dump_stack();
129 return;
130 }
131
132 if (!of_node_check_flag(node, OF_DYNAMIC))
133 return;
134
135 while (prop) {
136 struct property *next = prop->next;
137 kfree(prop->name);
138 kfree(prop->value);
139 kfree(prop);
140 prop = next;
141
142 if (!prop) {
143 prop = node->deadprops;
144 node->deadprops = NULL;
145 }
146 }
147 kfree(node->full_name);
148 kfree(node->data);
149 kfree(node);
150}
151
152/**
153 * of_node_put - Decrement refcount of a node
154 * @node: Node to dec refcount, NULL is supported to
155 * simplify writing of callers
156 *
157 */
158void of_node_put(struct device_node *node)
159{
160 if (node)
161 kobject_put(&node->kobj);
162}
163EXPORT_SYMBOL(of_node_put);
164#else
165static void of_node_release(struct kobject *kobj) 96static void of_node_release(struct kobject *kobj)
166{ 97{
167 /* Without CONFIG_OF_DYNAMIC, no nodes gets freed */ 98 /* Without CONFIG_OF_DYNAMIC, no nodes gets freed */
@@ -200,13 +131,16 @@ static const char *safe_name(struct kobject *kobj, const char *orig_name)
200 return name; 131 return name;
201} 132}
202 133
203static int __of_add_property_sysfs(struct device_node *np, struct property *pp) 134int __of_add_property_sysfs(struct device_node *np, struct property *pp)
204{ 135{
205 int rc; 136 int rc;
206 137
207 /* Important: Don't leak passwords */ 138 /* Important: Don't leak passwords */
208 bool secure = strncmp(pp->name, "security-", 9) == 0; 139 bool secure = strncmp(pp->name, "security-", 9) == 0;
209 140
141 if (!of_kset || !of_node_is_attached(np))
142 return 0;
143
210 sysfs_bin_attr_init(&pp->attr); 144 sysfs_bin_attr_init(&pp->attr);
211 pp->attr.attr.name = safe_name(&np->kobj, pp->name); 145 pp->attr.attr.name = safe_name(&np->kobj, pp->name);
212 pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO; 146 pp->attr.attr.mode = secure ? S_IRUSR : S_IRUGO;
@@ -218,12 +152,15 @@ static int __of_add_property_sysfs(struct device_node *np, struct property *pp)
218 return rc; 152 return rc;
219} 153}
220 154
221static int __of_node_add(struct device_node *np) 155int __of_attach_node_sysfs(struct device_node *np)
222{ 156{
223 const char *name; 157 const char *name;
224 struct property *pp; 158 struct property *pp;
225 int rc; 159 int rc;
226 160
161 if (!of_kset)
162 return 0;
163
227 np->kobj.kset = of_kset; 164 np->kobj.kset = of_kset;
228 if (!np->parent) { 165 if (!np->parent) {
229 /* Nodes without parents are new top level trees */ 166 /* Nodes without parents are new top level trees */
@@ -245,59 +182,20 @@ static int __of_node_add(struct device_node *np)
245 return 0; 182 return 0;
246} 183}
247 184
248int of_node_add(struct device_node *np)
249{
250 int rc = 0;
251
252 BUG_ON(!of_node_is_initialized(np));
253
254 /*
255 * Grab the mutex here so that in a race condition between of_init() and
256 * of_node_add(), node addition will still be consistent.
257 */
258 mutex_lock(&of_aliases_mutex);
259 if (of_kset)
260 rc = __of_node_add(np);
261 else
262 /* This scenario may be perfectly valid, but report it anyway */
263 pr_info("of_node_add(%s) before of_init()\n", np->full_name);
264 mutex_unlock(&of_aliases_mutex);
265 return rc;
266}
267
268#if defined(CONFIG_OF_DYNAMIC)
269static void of_node_remove(struct device_node *np)
270{
271 struct property *pp;
272
273 BUG_ON(!of_node_is_initialized(np));
274
275 /* only remove properties if on sysfs */
276 if (of_node_is_attached(np)) {
277 for_each_property_of_node(np, pp)
278 sysfs_remove_bin_file(&np->kobj, &pp->attr);
279 kobject_del(&np->kobj);
280 }
281
282 /* finally remove the kobj_init ref */
283 of_node_put(np);
284}
285#endif
286
287static int __init of_init(void) 185static int __init of_init(void)
288{ 186{
289 struct device_node *np; 187 struct device_node *np;
290 188
291 /* Create the kset, and register existing nodes */ 189 /* Create the kset, and register existing nodes */
292 mutex_lock(&of_aliases_mutex); 190 mutex_lock(&of_mutex);
293 of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj); 191 of_kset = kset_create_and_add("devicetree", NULL, firmware_kobj);
294 if (!of_kset) { 192 if (!of_kset) {
295 mutex_unlock(&of_aliases_mutex); 193 mutex_unlock(&of_mutex);
296 return -ENOMEM; 194 return -ENOMEM;
297 } 195 }
298 for_each_of_allnodes(np) 196 for_each_of_allnodes(np)
299 __of_node_add(np); 197 __of_attach_node_sysfs(np);
300 mutex_unlock(&of_aliases_mutex); 198 mutex_unlock(&of_mutex);
301 199
302 /* Symlink in /proc as required by userspace ABI */ 200 /* Symlink in /proc as required by userspace ABI */
303 if (of_allnodes) 201 if (of_allnodes)
@@ -369,8 +267,8 @@ EXPORT_SYMBOL(of_find_all_nodes);
369 * Find a property with a given name for a given node 267 * Find a property with a given name for a given node
370 * and return the value. 268 * and return the value.
371 */ 269 */
372static const void *__of_get_property(const struct device_node *np, 270const void *__of_get_property(const struct device_node *np,
373 const char *name, int *lenp) 271 const char *name, int *lenp)
374{ 272{
375 struct property *pp = __of_find_property(np, name, lenp); 273 struct property *pp = __of_find_property(np, name, lenp);
376 274
@@ -1748,32 +1646,10 @@ int of_count_phandle_with_args(const struct device_node *np, const char *list_na
1748} 1646}
1749EXPORT_SYMBOL(of_count_phandle_with_args); 1647EXPORT_SYMBOL(of_count_phandle_with_args);
1750 1648
1751#if defined(CONFIG_OF_DYNAMIC)
1752static int of_property_notify(int action, struct device_node *np,
1753 struct property *prop)
1754{
1755 struct of_prop_reconfig pr;
1756
1757 /* only call notifiers if the node is attached */
1758 if (!of_node_is_attached(np))
1759 return 0;
1760
1761 pr.dn = np;
1762 pr.prop = prop;
1763 return of_reconfig_notify(action, &pr);
1764}
1765#else
1766static int of_property_notify(int action, struct device_node *np,
1767 struct property *prop)
1768{
1769 return 0;
1770}
1771#endif
1772
1773/** 1649/**
1774 * __of_add_property - Add a property to a node without lock operations 1650 * __of_add_property - Add a property to a node without lock operations
1775 */ 1651 */
1776static int __of_add_property(struct device_node *np, struct property *prop) 1652int __of_add_property(struct device_node *np, struct property *prop)
1777{ 1653{
1778 struct property **next; 1654 struct property **next;
1779 1655
@@ -1799,22 +1675,49 @@ int of_add_property(struct device_node *np, struct property *prop)
1799 unsigned long flags; 1675 unsigned long flags;
1800 int rc; 1676 int rc;
1801 1677
1802 rc = of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop); 1678 mutex_lock(&of_mutex);
1803 if (rc)
1804 return rc;
1805 1679
1806 raw_spin_lock_irqsave(&devtree_lock, flags); 1680 raw_spin_lock_irqsave(&devtree_lock, flags);
1807 rc = __of_add_property(np, prop); 1681 rc = __of_add_property(np, prop);
1808 raw_spin_unlock_irqrestore(&devtree_lock, flags); 1682 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1809 if (rc)
1810 return rc;
1811 1683
1812 if (of_node_is_attached(np)) 1684 if (!rc)
1813 __of_add_property_sysfs(np, prop); 1685 __of_add_property_sysfs(np, prop);
1814 1686
1687 mutex_unlock(&of_mutex);
1688
1689 if (!rc)
1690 of_property_notify(OF_RECONFIG_ADD_PROPERTY, np, prop, NULL);
1691
1815 return rc; 1692 return rc;
1816} 1693}
1817 1694
1695int __of_remove_property(struct device_node *np, struct property *prop)
1696{
1697 struct property **next;
1698
1699 for (next = &np->properties; *next; next = &(*next)->next) {
1700 if (*next == prop)
1701 break;
1702 }
1703 if (*next == NULL)
1704 return -ENODEV;
1705
1706 /* found the node */
1707 *next = prop->next;
1708 prop->next = np->deadprops;
1709 np->deadprops = prop;
1710
1711 return 0;
1712}
1713
1714void __of_remove_property_sysfs(struct device_node *np, struct property *prop)
1715{
1716 /* at early boot, bail here and defer setup to of_init() */
1717 if (of_kset && of_node_is_attached(np))
1718 sysfs_remove_bin_file(&np->kobj, &prop->attr);
1719}
1720
1818/** 1721/**
1819 * of_remove_property - Remove a property from a node. 1722 * of_remove_property - Remove a property from a node.
1820 * 1723 *
@@ -1825,211 +1728,98 @@ int of_add_property(struct device_node *np, struct property *prop)
1825 */ 1728 */
1826int of_remove_property(struct device_node *np, struct property *prop) 1729int of_remove_property(struct device_node *np, struct property *prop)
1827{ 1730{
1828 struct property **next;
1829 unsigned long flags; 1731 unsigned long flags;
1830 int found = 0;
1831 int rc; 1732 int rc;
1832 1733
1833 rc = of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop); 1734 mutex_lock(&of_mutex);
1834 if (rc)
1835 return rc;
1836 1735
1837 raw_spin_lock_irqsave(&devtree_lock, flags); 1736 raw_spin_lock_irqsave(&devtree_lock, flags);
1838 next = &np->properties; 1737 rc = __of_remove_property(np, prop);
1839 while (*next) {
1840 if (*next == prop) {
1841 /* found the node */
1842 *next = prop->next;
1843 prop->next = np->deadprops;
1844 np->deadprops = prop;
1845 found = 1;
1846 break;
1847 }
1848 next = &(*next)->next;
1849 }
1850 raw_spin_unlock_irqrestore(&devtree_lock, flags); 1738 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1851 1739
1852 if (!found) 1740 if (!rc)
1853 return -ENODEV; 1741 __of_remove_property_sysfs(np, prop);
1854 1742
1855 /* at early boot, bail hear and defer setup to of_init() */ 1743 mutex_unlock(&of_mutex);
1856 if (!of_kset)
1857 return 0;
1858 1744
1859 sysfs_remove_bin_file(&np->kobj, &prop->attr); 1745 if (!rc)
1746 of_property_notify(OF_RECONFIG_REMOVE_PROPERTY, np, prop, NULL);
1860 1747
1861 return 0; 1748 return rc;
1862} 1749}
1863 1750
1864/* 1751int __of_update_property(struct device_node *np, struct property *newprop,
1865 * of_update_property - Update a property in a node, if the property does 1752 struct property **oldpropp)
1866 * not exist, add it.
1867 *
1868 * Note that we don't actually remove it, since we have given out
1869 * who-knows-how-many pointers to the data using get-property.
1870 * Instead we just move the property to the "dead properties" list,
1871 * and add the new property to the property list
1872 */
1873int of_update_property(struct device_node *np, struct property *newprop)
1874{ 1753{
1875 struct property **next, *oldprop; 1754 struct property **next, *oldprop;
1876 unsigned long flags;
1877 int rc;
1878
1879 rc = of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop);
1880 if (rc)
1881 return rc;
1882 1755
1883 if (!newprop->name) 1756 for (next = &np->properties; *next; next = &(*next)->next) {
1884 return -EINVAL; 1757 if (of_prop_cmp((*next)->name, newprop->name) == 0)
1758 break;
1759 }
1760 *oldpropp = oldprop = *next;
1885 1761
1886 raw_spin_lock_irqsave(&devtree_lock, flags); 1762 if (oldprop) {
1887 next = &np->properties;
1888 oldprop = __of_find_property(np, newprop->name, NULL);
1889 if (!oldprop) {
1890 /* add the new node */
1891 rc = __of_add_property(np, newprop);
1892 } else while (*next) {
1893 /* replace the node */ 1763 /* replace the node */
1894 if (*next == oldprop) { 1764 newprop->next = oldprop->next;
1895 newprop->next = oldprop->next; 1765 *next = newprop;
1896 *next = newprop; 1766 oldprop->next = np->deadprops;
1897 oldprop->next = np->deadprops; 1767 np->deadprops = oldprop;
1898 np->deadprops = oldprop; 1768 } else {
1899 break; 1769 /* new node */
1900 } 1770 newprop->next = NULL;
1901 next = &(*next)->next; 1771 *next = newprop;
1902 } 1772 }
1903 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1904 if (rc)
1905 return rc;
1906 1773
1774 return 0;
1775}
1776
1777void __of_update_property_sysfs(struct device_node *np, struct property *newprop,
1778 struct property *oldprop)
1779{
1907 /* At early boot, bail out and defer setup to of_init() */ 1780 /* At early boot, bail out and defer setup to of_init() */
1908 if (!of_kset) 1781 if (!of_kset)
1909 return 0; 1782 return;
1910 1783
1911 /* Update the sysfs attribute */
1912 if (oldprop) 1784 if (oldprop)
1913 sysfs_remove_bin_file(&np->kobj, &oldprop->attr); 1785 sysfs_remove_bin_file(&np->kobj, &oldprop->attr);
1914 __of_add_property_sysfs(np, newprop); 1786 __of_add_property_sysfs(np, newprop);
1915
1916 return 0;
1917} 1787}
1918 1788
1919#if defined(CONFIG_OF_DYNAMIC)
1920/* 1789/*
1921 * Support for dynamic device trees. 1790 * of_update_property - Update a property in a node, if the property does
1791 * not exist, add it.
1922 * 1792 *
1923 * On some platforms, the device tree can be manipulated at runtime. 1793 * Note that we don't actually remove it, since we have given out
1924 * The routines in this section support adding, removing and changing 1794 * who-knows-how-many pointers to the data using get-property.
1925 * device tree nodes. 1795 * Instead we just move the property to the "dead properties" list,
1926 */ 1796 * and add the new property to the property list
1927
1928static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
1929
1930int of_reconfig_notifier_register(struct notifier_block *nb)
1931{
1932 return blocking_notifier_chain_register(&of_reconfig_chain, nb);
1933}
1934EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
1935
1936int of_reconfig_notifier_unregister(struct notifier_block *nb)
1937{
1938 return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
1939}
1940EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
1941
1942int of_reconfig_notify(unsigned long action, void *p)
1943{
1944 int rc;
1945
1946 rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
1947 return notifier_to_errno(rc);
1948}
1949
1950/**
1951 * of_attach_node - Plug a device node into the tree and global list.
1952 */ 1797 */
1953int of_attach_node(struct device_node *np) 1798int of_update_property(struct device_node *np, struct property *newprop)
1954{ 1799{
1800 struct property *oldprop;
1955 unsigned long flags; 1801 unsigned long flags;
1956 int rc; 1802 int rc;
1957 1803
1958 rc = of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np); 1804 if (!newprop->name)
1959 if (rc) 1805 return -EINVAL;
1960 return rc;
1961
1962 raw_spin_lock_irqsave(&devtree_lock, flags);
1963 np->sibling = np->parent->child;
1964 np->allnext = np->parent->allnext;
1965 np->parent->allnext = np;
1966 np->parent->child = np;
1967 of_node_clear_flag(np, OF_DETACHED);
1968 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1969
1970 of_node_add(np);
1971 return 0;
1972}
1973
1974/**
1975 * of_detach_node - "Unplug" a node from the device tree.
1976 *
1977 * The caller must hold a reference to the node. The memory associated with
1978 * the node is not freed until its refcount goes to zero.
1979 */
1980int of_detach_node(struct device_node *np)
1981{
1982 struct device_node *parent;
1983 unsigned long flags;
1984 int rc = 0;
1985 1806
1986 rc = of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np); 1807 mutex_lock(&of_mutex);
1987 if (rc)
1988 return rc;
1989 1808
1990 raw_spin_lock_irqsave(&devtree_lock, flags); 1809 raw_spin_lock_irqsave(&devtree_lock, flags);
1810 rc = __of_update_property(np, newprop, &oldprop);
1811 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1991 1812
1992 if (of_node_check_flag(np, OF_DETACHED)) { 1813 if (!rc)
1993 /* someone already detached it */ 1814 __of_update_property_sysfs(np, newprop, oldprop);
1994 raw_spin_unlock_irqrestore(&devtree_lock, flags);
1995 return rc;
1996 }
1997
1998 parent = np->parent;
1999 if (!parent) {
2000 raw_spin_unlock_irqrestore(&devtree_lock, flags);
2001 return rc;
2002 }
2003 1815
2004 if (of_allnodes == np) 1816 mutex_unlock(&of_mutex);
2005 of_allnodes = np->allnext;
2006 else {
2007 struct device_node *prev;
2008 for (prev = of_allnodes;
2009 prev->allnext != np;
2010 prev = prev->allnext)
2011 ;
2012 prev->allnext = np->allnext;
2013 }
2014 1817
2015 if (parent->child == np) 1818 if (!rc)
2016 parent->child = np->sibling; 1819 of_property_notify(OF_RECONFIG_UPDATE_PROPERTY, np, newprop, oldprop);
2017 else {
2018 struct device_node *prevsib;
2019 for (prevsib = np->parent->child;
2020 prevsib->sibling != np;
2021 prevsib = prevsib->sibling)
2022 ;
2023 prevsib->sibling = np->sibling;
2024 }
2025
2026 of_node_set_flag(np, OF_DETACHED);
2027 raw_spin_unlock_irqrestore(&devtree_lock, flags);
2028 1820
2029 of_node_remove(np);
2030 return rc; 1821 return rc;
2031} 1822}
2032#endif /* defined(CONFIG_OF_DYNAMIC) */
2033 1823
2034static void of_alias_add(struct alias_prop *ap, struct device_node *np, 1824static void of_alias_add(struct alias_prop *ap, struct device_node *np,
2035 int id, const char *stem, int stem_len) 1825 int id, const char *stem, int stem_len)
@@ -2062,9 +1852,12 @@ void of_alias_scan(void * (*dt_alloc)(u64 size, u64 align))
2062 of_chosen = of_find_node_by_path("/chosen@0"); 1852 of_chosen = of_find_node_by_path("/chosen@0");
2063 1853
2064 if (of_chosen) { 1854 if (of_chosen) {
1855 /* linux,stdout-path and /aliases/stdout are for legacy compatibility */
2065 const char *name = of_get_property(of_chosen, "stdout-path", NULL); 1856 const char *name = of_get_property(of_chosen, "stdout-path", NULL);
2066 if (!name) 1857 if (!name)
2067 name = of_get_property(of_chosen, "linux,stdout-path", NULL); 1858 name = of_get_property(of_chosen, "linux,stdout-path", NULL);
1859 if (IS_ENABLED(CONFIG_PPC) && !name)
1860 name = of_get_property(of_aliases, "stdout", NULL);
2068 if (name) 1861 if (name)
2069 of_stdout = of_find_node_by_path(name); 1862 of_stdout = of_find_node_by_path(name);
2070 } 1863 }
@@ -2122,7 +1915,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
2122 struct alias_prop *app; 1915 struct alias_prop *app;
2123 int id = -ENODEV; 1916 int id = -ENODEV;
2124 1917
2125 mutex_lock(&of_aliases_mutex); 1918 mutex_lock(&of_mutex);
2126 list_for_each_entry(app, &aliases_lookup, link) { 1919 list_for_each_entry(app, &aliases_lookup, link) {
2127 if (strcmp(app->stem, stem) != 0) 1920 if (strcmp(app->stem, stem) != 0)
2128 continue; 1921 continue;
@@ -2132,7 +1925,7 @@ int of_alias_get_id(struct device_node *np, const char *stem)
2132 break; 1925 break;
2133 } 1926 }
2134 } 1927 }
2135 mutex_unlock(&of_aliases_mutex); 1928 mutex_unlock(&of_mutex);
2136 1929
2137 return id; 1930 return id;
2138} 1931}
@@ -2180,20 +1973,22 @@ const char *of_prop_next_string(struct property *prop, const char *cur)
2180EXPORT_SYMBOL_GPL(of_prop_next_string); 1973EXPORT_SYMBOL_GPL(of_prop_next_string);
2181 1974
2182/** 1975/**
2183 * of_device_is_stdout_path - check if a device node matches the 1976 * of_console_check() - Test and setup console for DT setup
2184 * linux,stdout-path property 1977 * @dn - Pointer to device node
2185 * 1978 * @name - Name to use for preferred console without index. ex. "ttyS"
2186 * Check if this device node matches the linux,stdout-path property 1979 * @index - Index to use for preferred console.
2187 * in the chosen node. return true if yes, false otherwise. 1980 *
1981 * Check if the given device node matches the stdout-path property in the
1982 * /chosen node. If it does then register it as the preferred console and return
1983 * TRUE. Otherwise return FALSE.
2188 */ 1984 */
2189int of_device_is_stdout_path(struct device_node *dn) 1985bool of_console_check(struct device_node *dn, char *name, int index)
2190{ 1986{
2191 if (!of_stdout) 1987 if (!dn || dn != of_stdout || console_set_on_cmdline)
2192 return false; 1988 return false;
2193 1989 return add_preferred_console(name, index, NULL);
2194 return of_stdout == dn;
2195} 1990}
2196EXPORT_SYMBOL_GPL(of_device_is_stdout_path); 1991EXPORT_SYMBOL_GPL(of_console_check);
2197 1992
2198/** 1993/**
2199 * of_find_next_cache_node - Find a node's subsidiary cache 1994 * of_find_next_cache_node - Find a node's subsidiary cache
diff --git a/drivers/of/device.c b/drivers/of/device.c
index dafb9736ab9b..46d6c75c1404 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -160,7 +160,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
160 add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen); 160 add_uevent_var(env, "OF_COMPATIBLE_N=%d", seen);
161 161
162 seen = 0; 162 seen = 0;
163 mutex_lock(&of_aliases_mutex); 163 mutex_lock(&of_mutex);
164 list_for_each_entry(app, &aliases_lookup, link) { 164 list_for_each_entry(app, &aliases_lookup, link) {
165 if (dev->of_node == app->np) { 165 if (dev->of_node == app->np) {
166 add_uevent_var(env, "OF_ALIAS_%d=%s", seen, 166 add_uevent_var(env, "OF_ALIAS_%d=%s", seen,
@@ -168,7 +168,7 @@ void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
168 seen++; 168 seen++;
169 } 169 }
170 } 170 }
171 mutex_unlock(&of_aliases_mutex); 171 mutex_unlock(&of_mutex);
172} 172}
173 173
174int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env) 174int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env)
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
new file mode 100644
index 000000000000..54fecc49a1fe
--- /dev/null
+++ b/drivers/of/dynamic.c
@@ -0,0 +1,660 @@
1/*
2 * Support for dynamic device trees.
3 *
4 * On some platforms, the device tree can be manipulated at runtime.
5 * The routines in this section support adding, removing and changing
6 * device tree nodes.
7 */
8
9#include <linux/of.h>
10#include <linux/spinlock.h>
11#include <linux/slab.h>
12#include <linux/string.h>
13#include <linux/proc_fs.h>
14
15#include "of_private.h"
16
17/**
18 * of_node_get() - Increment refcount of a node
19 * @node: Node to inc refcount, NULL is supported to simplify writing of
20 * callers
21 *
22 * Returns node.
23 */
24struct device_node *of_node_get(struct device_node *node)
25{
26 if (node)
27 kobject_get(&node->kobj);
28 return node;
29}
30EXPORT_SYMBOL(of_node_get);
31
32/**
33 * of_node_put() - Decrement refcount of a node
34 * @node: Node to dec refcount, NULL is supported to simplify writing of
35 * callers
36 */
37void of_node_put(struct device_node *node)
38{
39 if (node)
40 kobject_put(&node->kobj);
41}
42EXPORT_SYMBOL(of_node_put);
43
44void __of_detach_node_sysfs(struct device_node *np)
45{
46 struct property *pp;
47
48 BUG_ON(!of_node_is_initialized(np));
49 if (!of_kset)
50 return;
51
52 /* only remove properties if on sysfs */
53 if (of_node_is_attached(np)) {
54 for_each_property_of_node(np, pp)
55 sysfs_remove_bin_file(&np->kobj, &pp->attr);
56 kobject_del(&np->kobj);
57 }
58
59 /* finally remove the kobj_init ref */
60 of_node_put(np);
61}
62
63static BLOCKING_NOTIFIER_HEAD(of_reconfig_chain);
64
65int of_reconfig_notifier_register(struct notifier_block *nb)
66{
67 return blocking_notifier_chain_register(&of_reconfig_chain, nb);
68}
69EXPORT_SYMBOL_GPL(of_reconfig_notifier_register);
70
71int of_reconfig_notifier_unregister(struct notifier_block *nb)
72{
73 return blocking_notifier_chain_unregister(&of_reconfig_chain, nb);
74}
75EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
76
77int of_reconfig_notify(unsigned long action, void *p)
78{
79 int rc;
80
81 rc = blocking_notifier_call_chain(&of_reconfig_chain, action, p);
82 return notifier_to_errno(rc);
83}
84
85int of_property_notify(int action, struct device_node *np,
86 struct property *prop, struct property *oldprop)
87{
88 struct of_prop_reconfig pr;
89
90 /* only call notifiers if the node is attached */
91 if (!of_node_is_attached(np))
92 return 0;
93
94 pr.dn = np;
95 pr.prop = prop;
96 pr.old_prop = oldprop;
97 return of_reconfig_notify(action, &pr);
98}
99
100void __of_attach_node(struct device_node *np)
101{
102 const __be32 *phandle;
103 int sz;
104
105 np->name = __of_get_property(np, "name", NULL) ? : "<NULL>";
106 np->type = __of_get_property(np, "device_type", NULL) ? : "<NULL>";
107
108 phandle = __of_get_property(np, "phandle", &sz);
109 if (!phandle)
110 phandle = __of_get_property(np, "linux,phandle", &sz);
111 if (IS_ENABLED(PPC_PSERIES) && !phandle)
112 phandle = __of_get_property(np, "ibm,phandle", &sz);
113 np->phandle = (phandle && (sz >= 4)) ? be32_to_cpup(phandle) : 0;
114
115 np->child = NULL;
116 np->sibling = np->parent->child;
117 np->allnext = np->parent->allnext;
118 np->parent->allnext = np;
119 np->parent->child = np;
120 of_node_clear_flag(np, OF_DETACHED);
121}
122
123/**
124 * of_attach_node() - Plug a device node into the tree and global list.
125 */
126int of_attach_node(struct device_node *np)
127{
128 unsigned long flags;
129
130 mutex_lock(&of_mutex);
131 raw_spin_lock_irqsave(&devtree_lock, flags);
132 __of_attach_node(np);
133 raw_spin_unlock_irqrestore(&devtree_lock, flags);
134
135 __of_attach_node_sysfs(np);
136 mutex_unlock(&of_mutex);
137
138 of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
139
140 return 0;
141}
142
143void __of_detach_node(struct device_node *np)
144{
145 struct device_node *parent;
146
147 if (WARN_ON(of_node_check_flag(np, OF_DETACHED)))
148 return;
149
150 parent = np->parent;
151 if (WARN_ON(!parent))
152 return;
153
154 if (of_allnodes == np)
155 of_allnodes = np->allnext;
156 else {
157 struct device_node *prev;
158 for (prev = of_allnodes;
159 prev->allnext != np;
160 prev = prev->allnext)
161 ;
162 prev->allnext = np->allnext;
163 }
164
165 if (parent->child == np)
166 parent->child = np->sibling;
167 else {
168 struct device_node *prevsib;
169 for (prevsib = np->parent->child;
170 prevsib->sibling != np;
171 prevsib = prevsib->sibling)
172 ;
173 prevsib->sibling = np->sibling;
174 }
175
176 of_node_set_flag(np, OF_DETACHED);
177}
178
179/**
180 * of_detach_node() - "Unplug" a node from the device tree.
181 *
182 * The caller must hold a reference to the node. The memory associated with
183 * the node is not freed until its refcount goes to zero.
184 */
185int of_detach_node(struct device_node *np)
186{
187 unsigned long flags;
188 int rc = 0;
189
190 mutex_lock(&of_mutex);
191 raw_spin_lock_irqsave(&devtree_lock, flags);
192 __of_detach_node(np);
193 raw_spin_unlock_irqrestore(&devtree_lock, flags);
194
195 __of_detach_node_sysfs(np);
196 mutex_unlock(&of_mutex);
197
198 of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
199
200 return rc;
201}
202
203/**
204 * of_node_release() - release a dynamically allocated node
205 * @kref: kref element of the node to be released
206 *
207 * In of_node_put() this function is passed to kref_put() as the destructor.
208 */
209void of_node_release(struct kobject *kobj)
210{
211 struct device_node *node = kobj_to_device_node(kobj);
212 struct property *prop = node->properties;
213
214 /* We should never be releasing nodes that haven't been detached. */
215 if (!of_node_check_flag(node, OF_DETACHED)) {
216 pr_err("ERROR: Bad of_node_put() on %s\n", node->full_name);
217 dump_stack();
218 return;
219 }
220
221 if (!of_node_check_flag(node, OF_DYNAMIC))
222 return;
223
224 while (prop) {
225 struct property *next = prop->next;
226 kfree(prop->name);
227 kfree(prop->value);
228 kfree(prop);
229 prop = next;
230
231 if (!prop) {
232 prop = node->deadprops;
233 node->deadprops = NULL;
234 }
235 }
236 kfree(node->full_name);
237 kfree(node->data);
238 kfree(node);
239}
240
241/**
242 * __of_prop_dup - Copy a property dynamically.
243 * @prop: Property to copy
244 * @allocflags: Allocation flags (typically pass GFP_KERNEL)
245 *
246 * Copy a property by dynamically allocating the memory of both the
247 * property stucture and the property name & contents. The property's
248 * flags have the OF_DYNAMIC bit set so that we can differentiate between
249 * dynamically allocated properties and not.
250 * Returns the newly allocated property or NULL on out of memory error.
251 */
252struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags)
253{
254 struct property *new;
255
256 new = kzalloc(sizeof(*new), allocflags);
257 if (!new)
258 return NULL;
259
260 /*
261 * NOTE: There is no check for zero length value.
262 * In case of a boolean property, this will allocate a value
263 * of zero bytes. We do this to work around the use
264 * of of_get_property() calls on boolean values.
265 */
266 new->name = kstrdup(prop->name, allocflags);
267 new->value = kmemdup(prop->value, prop->length, allocflags);
268 new->length = prop->length;
269 if (!new->name || !new->value)
270 goto err_free;
271
272 /* mark the property as dynamic */
273 of_property_set_flag(new, OF_DYNAMIC);
274
275 return new;
276
277 err_free:
278 kfree(new->name);
279 kfree(new->value);
280 kfree(new);
281 return NULL;
282}
283
284/**
285 * __of_node_alloc() - Create an empty device node dynamically.
286 * @full_name: Full name of the new device node
287 * @allocflags: Allocation flags (typically pass GFP_KERNEL)
288 *
289 * Create an empty device tree node, suitable for further modification.
290 * The node data are dynamically allocated and all the node flags
291 * have the OF_DYNAMIC & OF_DETACHED bits set.
292 * Returns the newly allocated node or NULL on out of memory error.
293 */
294struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags)
295{
296 struct device_node *node;
297
298 node = kzalloc(sizeof(*node), allocflags);
299 if (!node)
300 return NULL;
301
302 node->full_name = kstrdup(full_name, allocflags);
303 of_node_set_flag(node, OF_DYNAMIC);
304 of_node_set_flag(node, OF_DETACHED);
305 if (!node->full_name)
306 goto err_free;
307
308 of_node_init(node);
309
310 return node;
311
312 err_free:
313 kfree(node->full_name);
314 kfree(node);
315 return NULL;
316}
317
318static void __of_changeset_entry_destroy(struct of_changeset_entry *ce)
319{
320 of_node_put(ce->np);
321 list_del(&ce->node);
322 kfree(ce);
323}
324
325#ifdef DEBUG
326static void __of_changeset_entry_dump(struct of_changeset_entry *ce)
327{
328 switch (ce->action) {
329 case OF_RECONFIG_ADD_PROPERTY:
330 pr_debug("%p: %s %s/%s\n",
331 ce, "ADD_PROPERTY ", ce->np->full_name,
332 ce->prop->name);
333 break;
334 case OF_RECONFIG_REMOVE_PROPERTY:
335 pr_debug("%p: %s %s/%s\n",
336 ce, "REMOVE_PROPERTY", ce->np->full_name,
337 ce->prop->name);
338 break;
339 case OF_RECONFIG_UPDATE_PROPERTY:
340 pr_debug("%p: %s %s/%s\n",
341 ce, "UPDATE_PROPERTY", ce->np->full_name,
342 ce->prop->name);
343 break;
344 case OF_RECONFIG_ATTACH_NODE:
345 pr_debug("%p: %s %s\n",
346 ce, "ATTACH_NODE ", ce->np->full_name);
347 break;
348 case OF_RECONFIG_DETACH_NODE:
349 pr_debug("%p: %s %s\n",
350 ce, "DETACH_NODE ", ce->np->full_name);
351 break;
352 }
353}
354#else
355static inline void __of_changeset_entry_dump(struct of_changeset_entry *ce)
356{
357 /* empty */
358}
359#endif
360
361static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
362 struct of_changeset_entry *rce)
363{
364 memcpy(rce, ce, sizeof(*rce));
365
366 switch (ce->action) {
367 case OF_RECONFIG_ATTACH_NODE:
368 rce->action = OF_RECONFIG_DETACH_NODE;
369 break;
370 case OF_RECONFIG_DETACH_NODE:
371 rce->action = OF_RECONFIG_ATTACH_NODE;
372 break;
373 case OF_RECONFIG_ADD_PROPERTY:
374 rce->action = OF_RECONFIG_REMOVE_PROPERTY;
375 break;
376 case OF_RECONFIG_REMOVE_PROPERTY:
377 rce->action = OF_RECONFIG_ADD_PROPERTY;
378 break;
379 case OF_RECONFIG_UPDATE_PROPERTY:
380 rce->old_prop = ce->prop;
381 rce->prop = ce->old_prop;
382 break;
383 }
384}
385
386static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
387{
388 struct of_changeset_entry ce_inverted;
389 int ret;
390
391 if (revert) {
392 __of_changeset_entry_invert(ce, &ce_inverted);
393 ce = &ce_inverted;
394 }
395
396 switch (ce->action) {
397 case OF_RECONFIG_ATTACH_NODE:
398 case OF_RECONFIG_DETACH_NODE:
399 ret = of_reconfig_notify(ce->action, ce->np);
400 break;
401 case OF_RECONFIG_ADD_PROPERTY:
402 case OF_RECONFIG_REMOVE_PROPERTY:
403 case OF_RECONFIG_UPDATE_PROPERTY:
404 ret = of_property_notify(ce->action, ce->np, ce->prop, ce->old_prop);
405 break;
406 default:
407 pr_err("%s: invalid devicetree changeset action: %i\n", __func__,
408 (int)ce->action);
409 return;
410 }
411
412 if (ret)
413 pr_err("%s: notifier error @%s\n", __func__, ce->np->full_name);
414}
415
416static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
417{
418 struct property *old_prop, **propp;
419 unsigned long flags;
420 int ret = 0;
421
422 __of_changeset_entry_dump(ce);
423
424 raw_spin_lock_irqsave(&devtree_lock, flags);
425 switch (ce->action) {
426 case OF_RECONFIG_ATTACH_NODE:
427 __of_attach_node(ce->np);
428 break;
429 case OF_RECONFIG_DETACH_NODE:
430 __of_detach_node(ce->np);
431 break;
432 case OF_RECONFIG_ADD_PROPERTY:
433 /* If the property is in deadprops then it must be removed */
434 for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
435 if (*propp == ce->prop) {
436 *propp = ce->prop->next;
437 ce->prop->next = NULL;
438 break;
439 }
440 }
441
442 ret = __of_add_property(ce->np, ce->prop);
443 if (ret) {
444 pr_err("%s: add_property failed @%s/%s\n",
445 __func__, ce->np->full_name,
446 ce->prop->name);
447 break;
448 }
449 break;
450 case OF_RECONFIG_REMOVE_PROPERTY:
451 ret = __of_remove_property(ce->np, ce->prop);
452 if (ret) {
453 pr_err("%s: remove_property failed @%s/%s\n",
454 __func__, ce->np->full_name,
455 ce->prop->name);
456 break;
457 }
458 break;
459
460 case OF_RECONFIG_UPDATE_PROPERTY:
461 /* If the property is in deadprops then it must be removed */
462 for (propp = &ce->np->deadprops; *propp; propp = &(*propp)->next) {
463 if (*propp == ce->prop) {
464 *propp = ce->prop->next;
465 ce->prop->next = NULL;
466 break;
467 }
468 }
469
470 ret = __of_update_property(ce->np, ce->prop, &old_prop);
471 if (ret) {
472 pr_err("%s: update_property failed @%s/%s\n",
473 __func__, ce->np->full_name,
474 ce->prop->name);
475 break;
476 }
477 break;
478 default:
479 ret = -EINVAL;
480 }
481 raw_spin_unlock_irqrestore(&devtree_lock, flags);
482
483 if (ret)
484 return ret;
485
486 switch (ce->action) {
487 case OF_RECONFIG_ATTACH_NODE:
488 __of_attach_node_sysfs(ce->np);
489 break;
490 case OF_RECONFIG_DETACH_NODE:
491 __of_detach_node_sysfs(ce->np);
492 break;
493 case OF_RECONFIG_ADD_PROPERTY:
494 /* ignore duplicate names */
495 __of_add_property_sysfs(ce->np, ce->prop);
496 break;
497 case OF_RECONFIG_REMOVE_PROPERTY:
498 __of_remove_property_sysfs(ce->np, ce->prop);
499 break;
500 case OF_RECONFIG_UPDATE_PROPERTY:
501 __of_update_property_sysfs(ce->np, ce->prop, ce->old_prop);
502 break;
503 }
504
505 return 0;
506}
507
508static inline int __of_changeset_entry_revert(struct of_changeset_entry *ce)
509{
510 struct of_changeset_entry ce_inverted;
511
512 __of_changeset_entry_invert(ce, &ce_inverted);
513 return __of_changeset_entry_apply(&ce_inverted);
514}
515
516/**
517 * of_changeset_init - Initialize a changeset for use
518 *
519 * @ocs: changeset pointer
520 *
521 * Initialize a changeset structure
522 */
523void of_changeset_init(struct of_changeset *ocs)
524{
525 memset(ocs, 0, sizeof(*ocs));
526 INIT_LIST_HEAD(&ocs->entries);
527}
528
529/**
530 * of_changeset_destroy - Destroy a changeset
531 *
532 * @ocs: changeset pointer
533 *
534 * Destroys a changeset. Note that if a changeset is applied,
535 * its changes to the tree cannot be reverted.
536 */
537void of_changeset_destroy(struct of_changeset *ocs)
538{
539 struct of_changeset_entry *ce, *cen;
540
541 list_for_each_entry_safe_reverse(ce, cen, &ocs->entries, node)
542 __of_changeset_entry_destroy(ce);
543}
544
545/**
546 * of_changeset_apply - Applies a changeset
547 *
548 * @ocs: changeset pointer
549 *
550 * Applies a changeset to the live tree.
551 * Any side-effects of live tree state changes are applied here on
552 * sucess, like creation/destruction of devices and side-effects
553 * like creation of sysfs properties and directories.
554 * Returns 0 on success, a negative error value in case of an error.
555 * On error the partially applied effects are reverted.
556 */
557int of_changeset_apply(struct of_changeset *ocs)
558{
559 struct of_changeset_entry *ce;
560 int ret;
561
562 /* perform the rest of the work */
563 pr_debug("of_changeset: applying...\n");
564 list_for_each_entry(ce, &ocs->entries, node) {
565 ret = __of_changeset_entry_apply(ce);
566 if (ret) {
567 pr_err("%s: Error applying changeset (%d)\n", __func__, ret);
568 list_for_each_entry_continue_reverse(ce, &ocs->entries, node)
569 __of_changeset_entry_revert(ce);
570 return ret;
571 }
572 }
573 pr_debug("of_changeset: applied, emitting notifiers.\n");
574
575 /* drop the global lock while emitting notifiers */
576 mutex_unlock(&of_mutex);
577 list_for_each_entry(ce, &ocs->entries, node)
578 __of_changeset_entry_notify(ce, 0);
579 mutex_lock(&of_mutex);
580 pr_debug("of_changeset: notifiers sent.\n");
581
582 return 0;
583}
584
585/**
586 * of_changeset_revert - Reverts an applied changeset
587 *
588 * @ocs: changeset pointer
589 *
590 * Reverts a changeset returning the state of the tree to what it
591 * was before the application.
592 * Any side-effects like creation/destruction of devices and
593 * removal of sysfs properties and directories are applied.
594 * Returns 0 on success, a negative error value in case of an error.
595 */
596int of_changeset_revert(struct of_changeset *ocs)
597{
598 struct of_changeset_entry *ce;
599 int ret;
600
601 pr_debug("of_changeset: reverting...\n");
602 list_for_each_entry_reverse(ce, &ocs->entries, node) {
603 ret = __of_changeset_entry_revert(ce);
604 if (ret) {
605 pr_err("%s: Error reverting changeset (%d)\n", __func__, ret);
606 list_for_each_entry_continue(ce, &ocs->entries, node)
607 __of_changeset_entry_apply(ce);
608 return ret;
609 }
610 }
611 pr_debug("of_changeset: reverted, emitting notifiers.\n");
612
613 /* drop the global lock while emitting notifiers */
614 mutex_unlock(&of_mutex);
615 list_for_each_entry_reverse(ce, &ocs->entries, node)
616 __of_changeset_entry_notify(ce, 1);
617 mutex_lock(&of_mutex);
618 pr_debug("of_changeset: notifiers sent.\n");
619
620 return 0;
621}
622
623/**
624 * of_changeset_action - Perform a changeset action
625 *
626 * @ocs: changeset pointer
627 * @action: action to perform
628 * @np: Pointer to device node
629 * @prop: Pointer to property
630 *
631 * On action being one of:
632 * + OF_RECONFIG_ATTACH_NODE
633 * + OF_RECONFIG_DETACH_NODE,
634 * + OF_RECONFIG_ADD_PROPERTY
635 * + OF_RECONFIG_REMOVE_PROPERTY,
636 * + OF_RECONFIG_UPDATE_PROPERTY
637 * Returns 0 on success, a negative error value in case of an error.
638 */
639int of_changeset_action(struct of_changeset *ocs, unsigned long action,
640 struct device_node *np, struct property *prop)
641{
642 struct of_changeset_entry *ce;
643
644 ce = kzalloc(sizeof(*ce), GFP_KERNEL);
645 if (!ce) {
646 pr_err("%s: Failed to allocate\n", __func__);
647 return -ENOMEM;
648 }
649 /* get a reference to the node */
650 ce->action = action;
651 ce->np = of_node_get(np);
652 ce->prop = prop;
653
654 if (action == OF_RECONFIG_UPDATE_PROPERTY && prop)
655 ce->old_prop = of_find_property(np, prop->name, NULL);
656
657 /* add it to the list */
658 list_add_tail(&ce->node, &ocs->entries);
659 return 0;
660}
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 9aa012e6ea0a..f46a24ffa3fe 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -923,24 +923,24 @@ int __init early_init_dt_scan_chosen(unsigned long node, const char *uname,
923} 923}
924 924
925#ifdef CONFIG_HAVE_MEMBLOCK 925#ifdef CONFIG_HAVE_MEMBLOCK
926#define MAX_PHYS_ADDR ((phys_addr_t)~0)
927
926void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size) 928void __init __weak early_init_dt_add_memory_arch(u64 base, u64 size)
927{ 929{
928 const u64 phys_offset = __pa(PAGE_OFFSET); 930 const u64 phys_offset = __pa(PAGE_OFFSET);
929 base &= PAGE_MASK; 931 base &= PAGE_MASK;
930 size &= PAGE_MASK; 932 size &= PAGE_MASK;
931 933
932 if (sizeof(phys_addr_t) < sizeof(u64)) { 934 if (base > MAX_PHYS_ADDR) {
933 if (base > ULONG_MAX) { 935 pr_warning("Ignoring memory block 0x%llx - 0x%llx\n",
934 pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", 936 base, base + size);
935 base, base + size); 937 return;
936 return; 938 }
937 }
938 939
939 if (base + size > ULONG_MAX) { 940 if (base + size > MAX_PHYS_ADDR) {
940 pr_warning("Ignoring memory range 0x%lx - 0x%llx\n", 941 pr_warning("Ignoring memory range 0x%lx - 0x%llx\n",
941 ULONG_MAX, base + size); 942 ULONG_MAX, base + size);
942 size = ULONG_MAX - base; 943 size = MAX_PHYS_ADDR - base;
943 }
944 } 944 }
945 945
946 if (base + size < phys_offset) { 946 if (base + size < phys_offset) {
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index ff350c8fa7ac..858e0a5d9a11 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -31,6 +31,63 @@ struct alias_prop {
31 char stem[0]; 31 char stem[0];
32}; 32};
33 33
34extern struct mutex of_aliases_mutex; 34extern struct mutex of_mutex;
35extern struct list_head aliases_lookup; 35extern struct list_head aliases_lookup;
36extern struct kset *of_kset;
37
38
39static inline struct device_node *kobj_to_device_node(struct kobject *kobj)
40{
41 return container_of(kobj, struct device_node, kobj);
42}
43
44#if defined(CONFIG_OF_DYNAMIC)
45extern int of_property_notify(int action, struct device_node *np,
46 struct property *prop, struct property *old_prop);
47extern void of_node_release(struct kobject *kobj);
48#else /* CONFIG_OF_DYNAMIC */
49static inline int of_property_notify(int action, struct device_node *np,
50 struct property *prop, struct property *old_prop)
51{
52 return 0;
53}
54#endif /* CONFIG_OF_DYNAMIC */
55
56/**
57 * General utilities for working with live trees.
58 *
59 * All functions with two leading underscores operate
60 * without taking node references, so you either have to
61 * own the devtree lock or work on detached trees only.
62 */
63struct property *__of_prop_dup(const struct property *prop, gfp_t allocflags);
64struct device_node *__of_node_alloc(const char *full_name, gfp_t allocflags);
65
66extern const void *__of_get_property(const struct device_node *np,
67 const char *name, int *lenp);
68extern int __of_add_property(struct device_node *np, struct property *prop);
69extern int __of_add_property_sysfs(struct device_node *np,
70 struct property *prop);
71extern int __of_remove_property(struct device_node *np, struct property *prop);
72extern void __of_remove_property_sysfs(struct device_node *np,
73 struct property *prop);
74extern int __of_update_property(struct device_node *np,
75 struct property *newprop, struct property **oldprop);
76extern void __of_update_property_sysfs(struct device_node *np,
77 struct property *newprop, struct property *oldprop);
78
79extern void __of_attach_node(struct device_node *np);
80extern int __of_attach_node_sysfs(struct device_node *np);
81extern void __of_detach_node(struct device_node *np);
82extern void __of_detach_node_sysfs(struct device_node *np);
83
84/* iterators for transactions, used for overlays */
85/* forward iterator */
86#define for_each_transaction_entry(_oft, _te) \
87 list_for_each_entry(_te, &(_oft)->te_list, node)
88
89/* reverse iterator */
90#define for_each_transaction_entry_reverse(_oft, _te) \
91 list_for_each_entry_reverse(_te, &(_oft)->te_list, node)
92
36#endif /* _LINUX_OF_PRIVATE_H */ 93#endif /* _LINUX_OF_PRIVATE_H */
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 632aae861375..59fb12e84e6b 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -206,8 +206,16 @@ void __init fdt_init_reserved_mem(void)
206 for (i = 0; i < reserved_mem_count; i++) { 206 for (i = 0; i < reserved_mem_count; i++) {
207 struct reserved_mem *rmem = &reserved_mem[i]; 207 struct reserved_mem *rmem = &reserved_mem[i];
208 unsigned long node = rmem->fdt_node; 208 unsigned long node = rmem->fdt_node;
209 int len;
210 const __be32 *prop;
209 int err = 0; 211 int err = 0;
210 212
213 prop = of_get_flat_dt_prop(node, "phandle", &len);
214 if (!prop)
215 prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
216 if (prop)
217 rmem->phandle = of_read_number(prop, len/4);
218
211 if (rmem->size == 0) 219 if (rmem->size == 0)
212 err = __reserved_mem_alloc_size(node, rmem->name, 220 err = __reserved_mem_alloc_size(node, rmem->name,
213 &rmem->base, &rmem->size); 221 &rmem->base, &rmem->size);
@@ -215,3 +223,65 @@ void __init fdt_init_reserved_mem(void)
215 __reserved_mem_init_node(rmem); 223 __reserved_mem_init_node(rmem);
216 } 224 }
217} 225}
226
227static inline struct reserved_mem *__find_rmem(struct device_node *node)
228{
229 unsigned int i;
230
231 if (!node->phandle)
232 return NULL;
233
234 for (i = 0; i < reserved_mem_count; i++)
235 if (reserved_mem[i].phandle == node->phandle)
236 return &reserved_mem[i];
237 return NULL;
238}
239
240/**
241 * of_reserved_mem_device_init() - assign reserved memory region to given device
242 *
243 * This function assign memory region pointed by "memory-region" device tree
244 * property to the given device.
245 */
246void of_reserved_mem_device_init(struct device *dev)
247{
248 struct reserved_mem *rmem;
249 struct device_node *np;
250
251 np = of_parse_phandle(dev->of_node, "memory-region", 0);
252 if (!np)
253 return;
254
255 rmem = __find_rmem(np);
256 of_node_put(np);
257
258 if (!rmem || !rmem->ops || !rmem->ops->device_init)
259 return;
260
261 rmem->ops->device_init(rmem, dev);
262 dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
263}
264
265/**
266 * of_reserved_mem_device_release() - release reserved memory device structures
267 *
268 * This function releases structures allocated for memory region handling for
269 * the given device.
270 */
271void of_reserved_mem_device_release(struct device *dev)
272{
273 struct reserved_mem *rmem;
274 struct device_node *np;
275
276 np = of_parse_phandle(dev->of_node, "memory-region", 0);
277 if (!np)
278 return;
279
280 rmem = __find_rmem(np);
281 of_node_put(np);
282
283 if (!rmem || !rmem->ops || !rmem->ops->device_release)
284 return;
285
286 rmem->ops->device_release(rmem, dev);
287}
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 500436f9be7f..0197725e033a 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -422,6 +422,7 @@ static int of_platform_bus_create(struct device_node *bus,
422 break; 422 break;
423 } 423 }
424 } 424 }
425 of_node_set_flag(bus, OF_POPULATED_BUS);
425 return rc; 426 return rc;
426} 427}
427 428
@@ -508,19 +509,13 @@ EXPORT_SYMBOL_GPL(of_platform_populate);
508 509
509static int of_platform_device_destroy(struct device *dev, void *data) 510static int of_platform_device_destroy(struct device *dev, void *data)
510{ 511{
511 bool *children_left = data;
512
513 /* Do not touch devices not populated from the device tree */ 512 /* Do not touch devices not populated from the device tree */
514 if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED)) { 513 if (!dev->of_node || !of_node_check_flag(dev->of_node, OF_POPULATED))
515 *children_left = true;
516 return 0; 514 return 0;
517 }
518 515
519 /* Recurse, but don't touch this device if it has any children left */ 516 /* Recurse for any nodes that were treated as busses */
520 if (of_platform_depopulate(dev) != 0) { 517 if (of_node_check_flag(dev->of_node, OF_POPULATED_BUS))
521 *children_left = true; 518 device_for_each_child(dev, NULL, of_platform_device_destroy);
522 return 0;
523 }
524 519
525 if (dev->bus == &platform_bus_type) 520 if (dev->bus == &platform_bus_type)
526 platform_device_unregister(to_platform_device(dev)); 521 platform_device_unregister(to_platform_device(dev));
@@ -528,19 +523,15 @@ static int of_platform_device_destroy(struct device *dev, void *data)
528 else if (dev->bus == &amba_bustype) 523 else if (dev->bus == &amba_bustype)
529 amba_device_unregister(to_amba_device(dev)); 524 amba_device_unregister(to_amba_device(dev));
530#endif 525#endif
531 else {
532 *children_left = true;
533 return 0;
534 }
535 526
536 of_node_clear_flag(dev->of_node, OF_POPULATED); 527 of_node_clear_flag(dev->of_node, OF_POPULATED);
537 528 of_node_clear_flag(dev->of_node, OF_POPULATED_BUS);
538 return 0; 529 return 0;
539} 530}
540 531
541/** 532/**
542 * of_platform_depopulate() - Remove devices populated from device tree 533 * of_platform_depopulate() - Remove devices populated from device tree
543 * @parent: device which childred will be removed 534 * @parent: device which children will be removed
544 * 535 *
545 * Complementary to of_platform_populate(), this function removes children 536 * Complementary to of_platform_populate(), this function removes children
546 * of the given device (and, recurrently, their children) that have been 537 * of the given device (and, recurrently, their children) that have been
@@ -550,14 +541,9 @@ static int of_platform_device_destroy(struct device *dev, void *data)
550 * Returns 0 when all children devices have been removed or 541 * Returns 0 when all children devices have been removed or
551 * -EBUSY when some children remained. 542 * -EBUSY when some children remained.
552 */ 543 */
553int of_platform_depopulate(struct device *parent) 544void of_platform_depopulate(struct device *parent)
554{ 545{
555 bool children_left = false; 546 device_for_each_child(parent, NULL, of_platform_device_destroy);
556
557 device_for_each_child(parent, &children_left,
558 of_platform_device_destroy);
559
560 return children_left ? -EBUSY : 0;
561} 547}
562EXPORT_SYMBOL_GPL(of_platform_depopulate); 548EXPORT_SYMBOL_GPL(of_platform_depopulate);
563 549
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 077314eebb95..d41002667833 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -9,6 +9,7 @@
9#include <linux/errno.h> 9#include <linux/errno.h>
10#include <linux/module.h> 10#include <linux/module.h>
11#include <linux/of.h> 11#include <linux/of.h>
12#include <linux/of_fdt.h>
12#include <linux/of_irq.h> 13#include <linux/of_irq.h>
13#include <linux/of_platform.h> 14#include <linux/of_platform.h>
14#include <linux/list.h> 15#include <linux/list.h>
@@ -16,11 +17,17 @@
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/device.h> 18#include <linux/device.h>
18 19
20#include "of_private.h"
21
19static struct selftest_results { 22static struct selftest_results {
20 int passed; 23 int passed;
21 int failed; 24 int failed;
22} selftest_results; 25} selftest_results;
23 26
27#define NO_OF_NODES 2
28static struct device_node *nodes[NO_OF_NODES];
29static int last_node_index;
30
24#define selftest(result, fmt, ...) { \ 31#define selftest(result, fmt, ...) { \
25 if (!(result)) { \ 32 if (!(result)) { \
26 selftest_results.failed++; \ 33 selftest_results.failed++; \
@@ -266,6 +273,81 @@ static void __init of_selftest_property_match_string(void)
266 selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc); 273 selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
267} 274}
268 275
276#define propcmp(p1, p2) (((p1)->length == (p2)->length) && \
277 (p1)->value && (p2)->value && \
278 !memcmp((p1)->value, (p2)->value, (p1)->length) && \
279 !strcmp((p1)->name, (p2)->name))
280static void __init of_selftest_property_copy(void)
281{
282#ifdef CONFIG_OF_DYNAMIC
283 struct property p1 = { .name = "p1", .length = 0, .value = "" };
284 struct property p2 = { .name = "p2", .length = 5, .value = "abcd" };
285 struct property *new;
286
287 new = __of_prop_dup(&p1, GFP_KERNEL);
288 selftest(new && propcmp(&p1, new), "empty property didn't copy correctly\n");
289 kfree(new->value);
290 kfree(new->name);
291 kfree(new);
292
293 new = __of_prop_dup(&p2, GFP_KERNEL);
294 selftest(new && propcmp(&p2, new), "non-empty property didn't copy correctly\n");
295 kfree(new->value);
296 kfree(new->name);
297 kfree(new);
298#endif
299}
300
301static void __init of_selftest_changeset(void)
302{
303#ifdef CONFIG_OF_DYNAMIC
304 struct property *ppadd, padd = { .name = "prop-add", .length = 0, .value = "" };
305 struct property *ppupdate, pupdate = { .name = "prop-update", .length = 5, .value = "abcd" };
306 struct property *ppremove;
307 struct device_node *n1, *n2, *n21, *nremove, *parent;
308 struct of_changeset chgset;
309
310 of_changeset_init(&chgset);
311 n1 = __of_node_alloc("/testcase-data/changeset/n1", GFP_KERNEL);
312 selftest(n1, "testcase setup failure\n");
313 n2 = __of_node_alloc("/testcase-data/changeset/n2", GFP_KERNEL);
314 selftest(n2, "testcase setup failure\n");
315 n21 = __of_node_alloc("/testcase-data/changeset/n2/n21", GFP_KERNEL);
316 selftest(n21, "testcase setup failure %p\n", n21);
317 nremove = of_find_node_by_path("/testcase-data/changeset/node-remove");
318 selftest(nremove, "testcase setup failure\n");
319 ppadd = __of_prop_dup(&padd, GFP_KERNEL);
320 selftest(ppadd, "testcase setup failure\n");
321 ppupdate = __of_prop_dup(&pupdate, GFP_KERNEL);
322 selftest(ppupdate, "testcase setup failure\n");
323 parent = nremove->parent;
324 n1->parent = parent;
325 n2->parent = parent;
326 n21->parent = n2;
327 n2->child = n21;
328 ppremove = of_find_property(parent, "prop-remove", NULL);
329 selftest(ppremove, "failed to find removal prop");
330
331 of_changeset_init(&chgset);
332 selftest(!of_changeset_attach_node(&chgset, n1), "fail attach n1\n");
333 selftest(!of_changeset_attach_node(&chgset, n2), "fail attach n2\n");
334 selftest(!of_changeset_detach_node(&chgset, nremove), "fail remove node\n");
335 selftest(!of_changeset_attach_node(&chgset, n21), "fail attach n21\n");
336 selftest(!of_changeset_add_property(&chgset, parent, ppadd), "fail add prop\n");
337 selftest(!of_changeset_update_property(&chgset, parent, ppupdate), "fail update prop\n");
338 selftest(!of_changeset_remove_property(&chgset, parent, ppremove), "fail remove prop\n");
339 mutex_lock(&of_mutex);
340 selftest(!of_changeset_apply(&chgset), "apply failed\n");
341 mutex_unlock(&of_mutex);
342
343 mutex_lock(&of_mutex);
344 selftest(!of_changeset_revert(&chgset), "revert failed\n");
345 mutex_unlock(&of_mutex);
346
347 of_changeset_destroy(&chgset);
348#endif
349}
350
269static void __init of_selftest_parse_interrupts(void) 351static void __init of_selftest_parse_interrupts(void)
270{ 352{
271 struct device_node *np; 353 struct device_node *np;
@@ -517,9 +599,156 @@ static void __init of_selftest_platform_populate(void)
517 } 599 }
518} 600}
519 601
602/**
603 * update_node_properties - adds the properties
604 * of np into dup node (present in live tree) and
605 * updates parent of children of np to dup.
606 *
607 * @np: node already present in live tree
608 * @dup: node present in live tree to be updated
609 */
610static void update_node_properties(struct device_node *np,
611 struct device_node *dup)
612{
613 struct property *prop;
614 struct device_node *child;
615
616 for_each_property_of_node(np, prop)
617 of_add_property(dup, prop);
618
619 for_each_child_of_node(np, child)
620 child->parent = dup;
621}
622
623/**
624 * attach_node_and_children - attaches nodes
625 * and its children to live tree
626 *
627 * @np: Node to attach to live tree
628 */
629static int attach_node_and_children(struct device_node *np)
630{
631 struct device_node *next, *root = np, *dup;
632
633 if (!np) {
634 pr_warn("%s: No tree to attach; not running tests\n",
635 __func__);
636 return -ENODATA;
637 }
638
639
640 /* skip root node */
641 np = np->child;
642 /* storing a copy in temporary node */
643 dup = np;
644
645 while (dup) {
646 nodes[last_node_index++] = dup;
647 dup = dup->sibling;
648 }
649 dup = NULL;
650
651 while (np) {
652 next = np->allnext;
653 dup = of_find_node_by_path(np->full_name);
654 if (dup)
655 update_node_properties(np, dup);
656 else {
657 np->child = NULL;
658 if (np->parent == root)
659 np->parent = of_allnodes;
660 of_attach_node(np);
661 }
662 np = next;
663 }
664
665 return 0;
666}
667
668/**
669 * selftest_data_add - Reads, copies data from
670 * linked tree and attaches it to the live tree
671 */
672static int __init selftest_data_add(void)
673{
674 void *selftest_data;
675 struct device_node *selftest_data_node;
676 extern uint8_t __dtb_testcases_begin[];
677 extern uint8_t __dtb_testcases_end[];
678 const int size = __dtb_testcases_end - __dtb_testcases_begin;
679
680 if (!size || !of_allnodes) {
681 pr_warn("%s: No testcase data to attach; not running tests\n",
682 __func__);
683 return -ENODATA;
684 }
685
686 /* creating copy */
687 selftest_data = kmemdup(__dtb_testcases_begin, size, GFP_KERNEL);
688
689 if (!selftest_data) {
690 pr_warn("%s: Failed to allocate memory for selftest_data; "
691 "not running tests\n", __func__);
692 return -ENOMEM;
693 }
694 of_fdt_unflatten_tree(selftest_data, &selftest_data_node);
695
696 /* attach the sub-tree to live tree */
697 return attach_node_and_children(selftest_data_node);
698}
699
700/**
701 * detach_node_and_children - detaches node
702 * and its children from live tree
703 *
704 * @np: Node to detach from live tree
705 */
706static void detach_node_and_children(struct device_node *np)
707{
708 while (np->child)
709 detach_node_and_children(np->child);
710
711 while (np->sibling)
712 detach_node_and_children(np->sibling);
713
714 of_detach_node(np);
715}
716
717/**
718 * selftest_data_remove - removes the selftest data
719 * nodes from the live tree
720 */
721static void selftest_data_remove(void)
722{
723 struct device_node *np;
724 struct property *prop;
725
726 while (last_node_index >= 0) {
727 if (nodes[last_node_index]) {
728 np = of_find_node_by_path(nodes[last_node_index]->full_name);
729 if (strcmp(np->full_name, "/aliases") != 0) {
730 detach_node_and_children(np->child);
731 of_detach_node(np);
732 } else {
733 for_each_property_of_node(np, prop) {
734 if (strcmp(prop->name, "testcase-alias") == 0)
735 of_remove_property(np, prop);
736 }
737 }
738 }
739 last_node_index--;
740 }
741}
742
520static int __init of_selftest(void) 743static int __init of_selftest(void)
521{ 744{
522 struct device_node *np; 745 struct device_node *np;
746 int res;
747
748 /* adding data for selftest */
749 res = selftest_data_add();
750 if (res)
751 return res;
523 752
524 np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a"); 753 np = of_find_node_by_path("/testcase-data/phandle-tests/consumer-a");
525 if (!np) { 754 if (!np) {
@@ -533,12 +762,18 @@ static int __init of_selftest(void)
533 of_selftest_dynamic(); 762 of_selftest_dynamic();
534 of_selftest_parse_phandle_with_args(); 763 of_selftest_parse_phandle_with_args();
535 of_selftest_property_match_string(); 764 of_selftest_property_match_string();
765 of_selftest_property_copy();
766 of_selftest_changeset();
536 of_selftest_parse_interrupts(); 767 of_selftest_parse_interrupts();
537 of_selftest_parse_interrupts_extended(); 768 of_selftest_parse_interrupts_extended();
538 of_selftest_match_node(); 769 of_selftest_match_node();
539 of_selftest_platform_populate(); 770 of_selftest_platform_populate();
540 pr_info("end of selftest - %i passed, %i failed\n", 771 pr_info("end of selftest - %i passed, %i failed\n",
541 selftest_results.passed, selftest_results.failed); 772 selftest_results.passed, selftest_results.failed);
773
774 /* removing selftest data from live tree */
775 selftest_data_remove();
776
542 return 0; 777 return 0;
543} 778}
544late_initcall(of_selftest); 779late_initcall(of_selftest);
diff --git a/drivers/of/testcase-data/testcases.dts b/drivers/of/testcase-data/testcases.dts
new file mode 100644
index 000000000000..219ef9324e9c
--- /dev/null
+++ b/drivers/of/testcase-data/testcases.dts
@@ -0,0 +1,15 @@
1/dts-v1/;
2/ {
3 testcase-data {
4 changeset {
5 prop-update = "hello";
6 prop-remove = "world";
7 node-remove {
8 };
9 };
10 };
11};
12#include "tests-phandle.dtsi"
13#include "tests-interrupts.dtsi"
14#include "tests-match.dtsi"
15#include "tests-platform.dtsi"
diff --git a/drivers/of/testcase-data/testcases.dtsi b/drivers/of/testcase-data/testcases.dtsi
deleted file mode 100644
index 6d8d980ac858..000000000000
--- a/drivers/of/testcase-data/testcases.dtsi
+++ /dev/null
@@ -1,4 +0,0 @@
1#include "tests-phandle.dtsi"
2#include "tests-interrupts.dtsi"
3#include "tests-match.dtsi"
4#include "tests-platform.dtsi"
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 93aa29f6d39c..f2945fa73d4f 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -375,11 +375,11 @@ static void __exit cleanup_slots(void)
375 375
376static int __init rpaphp_init(void) 376static int __init rpaphp_init(void)
377{ 377{
378 struct device_node *dn = NULL; 378 struct device_node *dn;
379 379
380 info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 380 info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
381 381
382 while ((dn = of_find_node_by_name(dn, "pci"))) 382 for_each_node_by_name(dn, "pci")
383 rpaphp_add_slot(dn); 383 rpaphp_add_slot(dn);
384 384
385 return 0; 385 return 0;
diff --git a/drivers/scsi/cxgbi/cxgb3i/Kconfig b/drivers/scsi/cxgbi/cxgb3i/Kconfig
index 6bbc36fbd6ec..e4603985dce3 100644
--- a/drivers/scsi/cxgbi/cxgb3i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb3i/Kconfig
@@ -1,6 +1,6 @@
1config SCSI_CXGB3_ISCSI 1config SCSI_CXGB3_ISCSI
2 tristate "Chelsio T3 iSCSI support" 2 tristate "Chelsio T3 iSCSI support"
3 depends on PCI && INET 3 depends on PCI && INET && (IPV6 || IPV6=n)
4 select NETDEVICES 4 select NETDEVICES
5 select ETHERNET 5 select ETHERNET
6 select NET_VENDOR_CHELSIO 6 select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/cxgbi/cxgb4i/Kconfig b/drivers/scsi/cxgbi/cxgb4i/Kconfig
index 16b2c7d26617..8c4e423037b6 100644
--- a/drivers/scsi/cxgbi/cxgb4i/Kconfig
+++ b/drivers/scsi/cxgbi/cxgb4i/Kconfig
@@ -1,6 +1,6 @@
1config SCSI_CXGB4_ISCSI 1config SCSI_CXGB4_ISCSI
2 tristate "Chelsio T4 iSCSI support" 2 tristate "Chelsio T4 iSCSI support"
3 depends on PCI && INET 3 depends on PCI && INET && (IPV6 || IPV6=n)
4 select NETDEVICES 4 select NETDEVICES
5 select ETHERNET 5 select ETHERNET
6 select NET_VENDOR_CHELSIO 6 select NET_VENDOR_CHELSIO
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 43fea2219f83..ae45bd99baed 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -472,7 +472,8 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport)
472 if (delay > 0) 472 if (delay > 0)
473 queue_delayed_work(system_long_wq, &rport->reconnect_work, 473 queue_delayed_work(system_long_wq, &rport->reconnect_work,
474 1UL * delay * HZ); 474 1UL * delay * HZ);
475 if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { 475 if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) &&
476 srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
476 pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), 477 pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev),
477 rport->state); 478 rport->state);
478 scsi_target_block(&shost->shost_gendev); 479 scsi_target_block(&shost->shost_gendev);
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 0419b69e270f..4f485e88f60c 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -108,55 +108,23 @@ static void disable_tx_interrupt(struct ehv_bc_data *bc)
108 * 108 *
109 * The byte channel to be used for the console is specified via a "stdout" 109 * The byte channel to be used for the console is specified via a "stdout"
110 * property in the /chosen node. 110 * property in the /chosen node.
111 *
112 * For compatible with legacy device trees, we also look for a "stdout" alias.
113 */ 111 */
114static int find_console_handle(void) 112static int find_console_handle(void)
115{ 113{
116 struct device_node *np, *np2; 114 struct device_node *np = of_stdout;
117 const char *sprop = NULL; 115 const char *sprop = NULL;
118 const uint32_t *iprop; 116 const uint32_t *iprop;
119 117
120 np = of_find_node_by_path("/chosen");
121 if (np)
122 sprop = of_get_property(np, "stdout-path", NULL);
123
124 if (!np || !sprop) {
125 of_node_put(np);
126 np = of_find_node_by_name(NULL, "aliases");
127 if (np)
128 sprop = of_get_property(np, "stdout", NULL);
129 }
130
131 if (!sprop) {
132 of_node_put(np);
133 return 0;
134 }
135
136 /* We don't care what the aliased node is actually called. We only 118 /* We don't care what the aliased node is actually called. We only
137 * care if it's compatible with "epapr,hv-byte-channel", because that 119 * care if it's compatible with "epapr,hv-byte-channel", because that
138 * indicates that it's a byte channel node. We use a temporary 120 * indicates that it's a byte channel node.
139 * variable, 'np2', because we can't release 'np' until we're done with
140 * 'sprop'.
141 */ 121 */
142 np2 = of_find_node_by_path(sprop); 122 if (!np || !of_device_is_compatible(np, "epapr,hv-byte-channel"))
143 of_node_put(np);
144 np = np2;
145 if (!np) {
146 pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
147 return 0;
148 }
149
150 /* Is it a byte channel? */
151 if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
152 of_node_put(np);
153 return 0; 123 return 0;
154 }
155 124
156 stdout_irq = irq_of_parse_and_map(np, 0); 125 stdout_irq = irq_of_parse_and_map(np, 0);
157 if (stdout_irq == NO_IRQ) { 126 if (stdout_irq == NO_IRQ) {
158 pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop); 127 pr_err("ehv-bc: no 'interrupts' property in %s node\n", np->full_name);
159 of_node_put(np);
160 return 0; 128 return 0;
161 } 129 }
162 130
@@ -167,12 +135,9 @@ static int find_console_handle(void)
167 if (!iprop) { 135 if (!iprop) {
168 pr_err("ehv-bc: no 'hv-handle' property in %s node\n", 136 pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
169 np->name); 137 np->name);
170 of_node_put(np);
171 return 0; 138 return 0;
172 } 139 }
173 stdout_bc = be32_to_cpu(*iprop); 140 stdout_bc = be32_to_cpu(*iprop);
174
175 of_node_put(np);
176 return 1; 141 return 1;
177} 142}
178 143
diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c
index a585079b4b38..a2cc5f834c63 100644
--- a/drivers/tty/hvc/hvc_opal.c
+++ b/drivers/tty/hvc/hvc_opal.c
@@ -342,22 +342,13 @@ static void udbg_init_opal_common(void)
342 342
343void __init hvc_opal_init_early(void) 343void __init hvc_opal_init_early(void)
344{ 344{
345 struct device_node *stdout_node = NULL; 345 struct device_node *stdout_node = of_node_get(of_stdout);
346 const __be32 *termno; 346 const __be32 *termno;
347 const char *name = NULL;
348 const struct hv_ops *ops; 347 const struct hv_ops *ops;
349 u32 index; 348 u32 index;
350 349
351 /* find the boot console from /chosen/stdout */ 350 /* If the console wasn't in /chosen, try /ibm,opal */
352 if (of_chosen) 351 if (!stdout_node) {
353 name = of_get_property(of_chosen, "linux,stdout-path", NULL);
354 if (name) {
355 stdout_node = of_find_node_by_path(name);
356 if (!stdout_node) {
357 pr_err("hvc_opal: Failed to locate default console!\n");
358 return;
359 }
360 } else {
361 struct device_node *opal, *np; 352 struct device_node *opal, *np;
362 353
363 /* Current OPAL takeover doesn't provide the stdout 354 /* Current OPAL takeover doesn't provide the stdout
diff --git a/drivers/tty/hvc/hvc_vio.c b/drivers/tty/hvc/hvc_vio.c
index b594abfbf21e..5618b5fc7500 100644
--- a/drivers/tty/hvc/hvc_vio.c
+++ b/drivers/tty/hvc/hvc_vio.c
@@ -404,42 +404,35 @@ module_exit(hvc_vio_exit);
404 404
405void __init hvc_vio_init_early(void) 405void __init hvc_vio_init_early(void)
406{ 406{
407 struct device_node *stdout_node;
408 const __be32 *termno; 407 const __be32 *termno;
409 const char *name; 408 const char *name;
410 const struct hv_ops *ops; 409 const struct hv_ops *ops;
411 410
412 /* find the boot console from /chosen/stdout */ 411 /* find the boot console from /chosen/stdout */
413 if (!of_chosen) 412 if (!of_stdout)
414 return; 413 return;
415 name = of_get_property(of_chosen, "linux,stdout-path", NULL); 414 name = of_get_property(of_stdout, "name", NULL);
416 if (name == NULL)
417 return;
418 stdout_node = of_find_node_by_path(name);
419 if (!stdout_node)
420 return;
421 name = of_get_property(stdout_node, "name", NULL);
422 if (!name) { 415 if (!name) {
423 printk(KERN_WARNING "stdout node missing 'name' property!\n"); 416 printk(KERN_WARNING "stdout node missing 'name' property!\n");
424 goto out; 417 return;
425 } 418 }
426 419
427 /* Check if it's a virtual terminal */ 420 /* Check if it's a virtual terminal */
428 if (strncmp(name, "vty", 3) != 0) 421 if (strncmp(name, "vty", 3) != 0)
429 goto out; 422 return;
430 termno = of_get_property(stdout_node, "reg", NULL); 423 termno = of_get_property(of_stdout, "reg", NULL);
431 if (termno == NULL) 424 if (termno == NULL)
432 goto out; 425 return;
433 hvterm_priv0.termno = of_read_number(termno, 1); 426 hvterm_priv0.termno = of_read_number(termno, 1);
434 spin_lock_init(&hvterm_priv0.buf_lock); 427 spin_lock_init(&hvterm_priv0.buf_lock);
435 hvterm_privs[0] = &hvterm_priv0; 428 hvterm_privs[0] = &hvterm_priv0;
436 429
437 /* Check the protocol */ 430 /* Check the protocol */
438 if (of_device_is_compatible(stdout_node, "hvterm1")) { 431 if (of_device_is_compatible(of_stdout, "hvterm1")) {
439 hvterm_priv0.proto = HV_PROTOCOL_RAW; 432 hvterm_priv0.proto = HV_PROTOCOL_RAW;
440 ops = &hvterm_raw_ops; 433 ops = &hvterm_raw_ops;
441 } 434 }
442 else if (of_device_is_compatible(stdout_node, "hvterm-protocol")) { 435 else if (of_device_is_compatible(of_stdout, "hvterm-protocol")) {
443 hvterm_priv0.proto = HV_PROTOCOL_HVSI; 436 hvterm_priv0.proto = HV_PROTOCOL_HVSI;
444 ops = &hvterm_hvsi_ops; 437 ops = &hvterm_hvsi_ops;
445 hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars, 438 hvsilib_init(&hvterm_priv0.hvsi, hvc_get_chars, hvc_put_chars,
@@ -447,7 +440,7 @@ void __init hvc_vio_init_early(void)
447 /* HVSI, perform the handshake now */ 440 /* HVSI, perform the handshake now */
448 hvsilib_establish(&hvterm_priv0.hvsi); 441 hvsilib_establish(&hvterm_priv0.hvsi);
449 } else 442 } else
450 goto out; 443 return;
451 udbg_putc = udbg_hvc_putc; 444 udbg_putc = udbg_hvc_putc;
452 udbg_getc = udbg_hvc_getc; 445 udbg_getc = udbg_hvc_getc;
453 udbg_getc_poll = udbg_hvc_getc_poll; 446 udbg_getc_poll = udbg_hvc_getc_poll;
@@ -456,14 +449,12 @@ void __init hvc_vio_init_early(void)
456 * backend for HVSI, only do udbg 449 * backend for HVSI, only do udbg
457 */ 450 */
458 if (hvterm_priv0.proto == HV_PROTOCOL_HVSI) 451 if (hvterm_priv0.proto == HV_PROTOCOL_HVSI)
459 goto out; 452 return;
460#endif 453#endif
461 /* Check whether the user has requested a different console. */ 454 /* Check whether the user has requested a different console. */
462 if (!strstr(cmd_line, "console=")) 455 if (!strstr(cmd_line, "console="))
463 add_preferred_console("hvc", 0, NULL); 456 add_preferred_console("hvc", 0, NULL);
464 hvc_instantiate(0, 0, ops); 457 hvc_instantiate(0, 0, ops);
465out:
466 of_node_put(stdout_node);
467} 458}
468 459
469/* call this from early_init() for a working debug console on 460/* call this from early_init() for a working debug console on
diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index f7ad5b903055..abbfedb84901 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -1653,8 +1653,7 @@ static int __init pmz_probe(void)
1653 /* 1653 /*
1654 * Find all escc chips in the system 1654 * Find all escc chips in the system
1655 */ 1655 */
1656 node_p = of_find_node_by_name(NULL, "escc"); 1656 for_each_node_by_name(node_p, "escc") {
1657 while (node_p) {
1658 /* 1657 /*
1659 * First get channel A/B node pointers 1658 * First get channel A/B node pointers
1660 * 1659 *
@@ -1672,7 +1671,7 @@ static int __init pmz_probe(void)
1672 of_node_put(node_b); 1671 of_node_put(node_b);
1673 printk(KERN_ERR "pmac_zilog: missing node %c for escc %s\n", 1672 printk(KERN_ERR "pmac_zilog: missing node %c for escc %s\n",
1674 (!node_a) ? 'a' : 'b', node_p->full_name); 1673 (!node_a) ? 'a' : 'b', node_p->full_name);
1675 goto next; 1674 continue;
1676 } 1675 }
1677 1676
1678 /* 1677 /*
@@ -1699,11 +1698,9 @@ static int __init pmz_probe(void)
1699 of_node_put(node_b); 1698 of_node_put(node_b);
1700 memset(&pmz_ports[count], 0, sizeof(struct uart_pmac_port)); 1699 memset(&pmz_ports[count], 0, sizeof(struct uart_pmac_port));
1701 memset(&pmz_ports[count+1], 0, sizeof(struct uart_pmac_port)); 1700 memset(&pmz_ports[count+1], 0, sizeof(struct uart_pmac_port));
1702 goto next; 1701 continue;
1703 } 1702 }
1704 count += 2; 1703 count += 2;
1705next:
1706 node_p = of_find_node_by_name(node_p, "escc");
1707 } 1704 }
1708 pmz_ports_count = count; 1705 pmz_ports_count = count;
1709 1706
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 8bb19da01639..29a7be47389a 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -26,6 +26,7 @@
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include <linux/init.h> 27#include <linux/init.h>
28#include <linux/console.h> 28#include <linux/console.h>
29#include <linux/of.h>
29#include <linux/proc_fs.h> 30#include <linux/proc_fs.h>
30#include <linux/seq_file.h> 31#include <linux/seq_file.h>
31#include <linux/device.h> 32#include <linux/device.h>
@@ -2611,6 +2612,8 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
2611 spin_lock_init(&uport->lock); 2612 spin_lock_init(&uport->lock);
2612 lockdep_set_class(&uport->lock, &port_lock_key); 2613 lockdep_set_class(&uport->lock, &port_lock_key);
2613 } 2614 }
2615 if (uport->cons && uport->dev)
2616 of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
2614 2617
2615 uart_configure_port(drv, state, uport); 2618 uart_configure_port(drv, state, uport);
2616 2619
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index af7b204b9215..d8c57636b9ce 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE
8 depends on VFIO && SPAPR_TCE_IOMMU 8 depends on VFIO && SPAPR_TCE_IOMMU
9 default n 9 default n
10 10
11config VFIO_SPAPR_EEH
12 tristate
13 depends on EEH && VFIO_IOMMU_SPAPR_TCE
14 default n
15
11menuconfig VFIO 16menuconfig VFIO
12 tristate "VFIO Non-Privileged userspace driver framework" 17 tristate "VFIO Non-Privileged userspace driver framework"
13 depends on IOMMU_API 18 depends on IOMMU_API
14 select VFIO_IOMMU_TYPE1 if X86 19 select VFIO_IOMMU_TYPE1 if X86
15 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) 20 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES)
21 select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES)
16 select ANON_INODES 22 select ANON_INODES
17 help 23 help
18 VFIO provides a framework for secure userspace device drivers. 24 VFIO provides a framework for secure userspace device drivers.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50e30bc75e85..0b035b12600a 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,5 +1,5 @@
1obj-$(CONFIG_VFIO) += vfio.o 1obj-$(CONFIG_VFIO) += vfio.o
2obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o 2obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
3obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o 3obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
4obj-$(CONFIG_EEH) += vfio_spapr_eeh.o 4obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
5obj-$(CONFIG_VFIO_PCI) += pci/ 5obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index e2ee80f36e3e..f7825332a325 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
37MODULE_PARM_DESC(nointxmask, 37MODULE_PARM_DESC(nointxmask,
38 "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); 38 "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
39 39
40static DEFINE_MUTEX(driver_lock);
41
42static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
43
40static int vfio_pci_enable(struct vfio_pci_device *vdev) 44static int vfio_pci_enable(struct vfio_pci_device *vdev)
41{ 45{
42 struct pci_dev *pdev = vdev->pdev; 46 struct pci_dev *pdev = vdev->pdev;
@@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
44 u16 cmd; 48 u16 cmd;
45 u8 msix_pos; 49 u8 msix_pos;
46 50
51 /* Don't allow our initial saved state to include busmaster */
52 pci_clear_master(pdev);
53
47 ret = pci_enable_device(pdev); 54 ret = pci_enable_device(pdev);
48 if (ret) 55 if (ret)
49 return ret; 56 return ret;
@@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
99 struct pci_dev *pdev = vdev->pdev; 106 struct pci_dev *pdev = vdev->pdev;
100 int bar; 107 int bar;
101 108
102 pci_disable_device(pdev); 109 /* Stop the device from further DMA */
110 pci_clear_master(pdev);
103 111
104 vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | 112 vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE |
105 VFIO_IRQ_SET_ACTION_TRIGGER, 113 VFIO_IRQ_SET_ACTION_TRIGGER,
@@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
117 vdev->barmap[bar] = NULL; 125 vdev->barmap[bar] = NULL;
118 } 126 }
119 127
128 vdev->needs_reset = true;
129
120 /* 130 /*
121 * If we have saved state, restore it. If we can reset the device, 131 * If we have saved state, restore it. If we can reset the device,
122 * even better. Resetting with current state seems better than 132 * even better. Resetting with current state seems better than
@@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
128 __func__, dev_name(&pdev->dev)); 138 __func__, dev_name(&pdev->dev));
129 139
130 if (!vdev->reset_works) 140 if (!vdev->reset_works)
131 return; 141 goto out;
132 142
133 pci_save_state(pdev); 143 pci_save_state(pdev);
134 } 144 }
@@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
148 if (ret) 158 if (ret)
149 pr_warn("%s: Failed to reset device %s (%d)\n", 159 pr_warn("%s: Failed to reset device %s (%d)\n",
150 __func__, dev_name(&pdev->dev), ret); 160 __func__, dev_name(&pdev->dev), ret);
161 else
162 vdev->needs_reset = false;
151 } 163 }
152 164
153 pci_restore_state(pdev); 165 pci_restore_state(pdev);
166out:
167 pci_disable_device(pdev);
168
169 vfio_pci_try_bus_reset(vdev);
154} 170}
155 171
156static void vfio_pci_release(void *device_data) 172static void vfio_pci_release(void *device_data)
157{ 173{
158 struct vfio_pci_device *vdev = device_data; 174 struct vfio_pci_device *vdev = device_data;
159 175
160 if (atomic_dec_and_test(&vdev->refcnt)) { 176 mutex_lock(&driver_lock);
177
178 if (!(--vdev->refcnt)) {
161 vfio_spapr_pci_eeh_release(vdev->pdev); 179 vfio_spapr_pci_eeh_release(vdev->pdev);
162 vfio_pci_disable(vdev); 180 vfio_pci_disable(vdev);
163 } 181 }
164 182
183 mutex_unlock(&driver_lock);
184
165 module_put(THIS_MODULE); 185 module_put(THIS_MODULE);
166} 186}
167 187
168static int vfio_pci_open(void *device_data) 188static int vfio_pci_open(void *device_data)
169{ 189{
170 struct vfio_pci_device *vdev = device_data; 190 struct vfio_pci_device *vdev = device_data;
171 int ret; 191 int ret = 0;
172 192
173 if (!try_module_get(THIS_MODULE)) 193 if (!try_module_get(THIS_MODULE))
174 return -ENODEV; 194 return -ENODEV;
175 195
176 if (atomic_inc_return(&vdev->refcnt) == 1) { 196 mutex_lock(&driver_lock);
197
198 if (!vdev->refcnt) {
177 ret = vfio_pci_enable(vdev); 199 ret = vfio_pci_enable(vdev);
178 if (ret) 200 if (ret)
179 goto error; 201 goto error;
180 202
181 ret = vfio_spapr_pci_eeh_open(vdev->pdev); 203 vfio_spapr_pci_eeh_open(vdev->pdev);
182 if (ret) {
183 vfio_pci_disable(vdev);
184 goto error;
185 }
186 } 204 }
187 205 vdev->refcnt++;
188 return 0;
189error: 206error:
190 module_put(THIS_MODULE); 207 mutex_unlock(&driver_lock);
208 if (ret)
209 module_put(THIS_MODULE);
191 return ret; 210 return ret;
192} 211}
193 212
@@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
843 vdev->irq_type = VFIO_PCI_NUM_IRQS; 862 vdev->irq_type = VFIO_PCI_NUM_IRQS;
844 mutex_init(&vdev->igate); 863 mutex_init(&vdev->igate);
845 spin_lock_init(&vdev->irqlock); 864 spin_lock_init(&vdev->irqlock);
846 atomic_set(&vdev->refcnt, 0);
847 865
848 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); 866 ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev);
849 if (ret) { 867 if (ret) {
@@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev)
858{ 876{
859 struct vfio_pci_device *vdev; 877 struct vfio_pci_device *vdev;
860 878
879 mutex_lock(&driver_lock);
880
861 vdev = vfio_del_group_dev(&pdev->dev); 881 vdev = vfio_del_group_dev(&pdev->dev);
862 if (!vdev) 882 if (vdev) {
863 return; 883 iommu_group_put(pdev->dev.iommu_group);
884 kfree(vdev);
885 }
864 886
865 iommu_group_put(pdev->dev.iommu_group); 887 mutex_unlock(&driver_lock);
866 kfree(vdev);
867} 888}
868 889
869static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, 890static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = {
906 .err_handler = &vfio_err_handlers, 927 .err_handler = &vfio_err_handlers,
907}; 928};
908 929
930/*
931 * Test whether a reset is necessary and possible. We mark devices as
932 * needs_reset when they are released, but don't have a function-local reset
933 * available. If any of these exist in the affected devices, we want to do
934 * a bus/slot reset. We also need all of the affected devices to be unused,
935 * so we abort if any device has a non-zero refcnt. driver_lock prevents a
936 * device from being opened during the scan or unbound from vfio-pci.
937 */
938static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
939{
940 bool *needs_reset = data;
941 struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
942 int ret = -EBUSY;
943
944 if (pci_drv == &vfio_pci_driver) {
945 struct vfio_device *device;
946 struct vfio_pci_device *vdev;
947
948 device = vfio_device_get_from_dev(&pdev->dev);
949 if (!device)
950 return ret;
951
952 vdev = vfio_device_data(device);
953 if (vdev) {
954 if (vdev->needs_reset)
955 *needs_reset = true;
956
957 if (!vdev->refcnt)
958 ret = 0;
959 }
960
961 vfio_device_put(device);
962 }
963
964 /*
965 * TODO: vfio-core considers groups to be viable even if some devices
966 * are attached to known drivers, like pci-stub or pcieport. We can't
967 * freeze devices from being unbound to those drivers like we can
968 * here though, so it would be racy to test for them. We also can't
969 * use device_lock() to prevent changes as that would interfere with
970 * PCI-core taking device_lock during bus reset. For now, we require
971 * devices to be bound to vfio-pci to get a bus/slot reset on release.
972 */
973
974 return ret;
975}
976
977/* Clear needs_reset on all affected devices after successful bus/slot reset */
978static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
979{
980 struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
981
982 if (pci_drv == &vfio_pci_driver) {
983 struct vfio_device *device;
984 struct vfio_pci_device *vdev;
985
986 device = vfio_device_get_from_dev(&pdev->dev);
987 if (!device)
988 return 0;
989
990 vdev = vfio_device_data(device);
991 if (vdev)
992 vdev->needs_reset = false;
993
994 vfio_device_put(device);
995 }
996
997 return 0;
998}
999
1000/*
1001 * Attempt to do a bus/slot reset if there are devices affected by a reset for
1002 * this device that are needs_reset and all of the affected devices are unused
1003 * (!refcnt). Callers of this function are required to hold driver_lock such
1004 * that devices can not be unbound from vfio-pci or opened by a user while we
1005 * test for and perform a bus/slot reset.
1006 */
1007static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
1008{
1009 bool needs_reset = false, slot = false;
1010 int ret;
1011
1012 if (!pci_probe_reset_slot(vdev->pdev->slot))
1013 slot = true;
1014 else if (pci_probe_reset_bus(vdev->pdev->bus))
1015 return;
1016
1017 if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
1018 vfio_pci_test_bus_reset,
1019 &needs_reset, slot) || !needs_reset)
1020 return;
1021
1022 if (slot)
1023 ret = pci_try_reset_slot(vdev->pdev->slot);
1024 else
1025 ret = pci_try_reset_bus(vdev->pdev->bus);
1026
1027 if (ret)
1028 return;
1029
1030 vfio_pci_for_each_slot_or_bus(vdev->pdev,
1031 vfio_pci_clear_needs_reset, NULL, slot);
1032}
1033
909static void __exit vfio_pci_cleanup(void) 1034static void __exit vfio_pci_cleanup(void)
910{ 1035{
911 pci_unregister_driver(&vfio_pci_driver); 1036 pci_unregister_driver(&vfio_pci_driver);
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 9c6d5d0f3b02..671c17a6e6d0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -54,8 +54,9 @@ struct vfio_pci_device {
54 bool extended_caps; 54 bool extended_caps;
55 bool bardirty; 55 bool bardirty;
56 bool has_vga; 56 bool has_vga;
57 bool needs_reset;
57 struct pci_saved_state *pci_saved_state; 58 struct pci_saved_state *pci_saved_state;
58 atomic_t refcnt; 59 int refcnt;
59 struct eventfd_ctx *err_trigger; 60 struct eventfd_ctx *err_trigger;
60}; 61};
61 62
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index f834b4ce1431..86dfceb9201f 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -9,20 +9,27 @@
9 * published by the Free Software Foundation. 9 * published by the Free Software Foundation.
10 */ 10 */
11 11
12#include <linux/module.h>
12#include <linux/uaccess.h> 13#include <linux/uaccess.h>
13#include <linux/vfio.h> 14#include <linux/vfio.h>
14#include <asm/eeh.h> 15#include <asm/eeh.h>
15 16
17#define DRIVER_VERSION "0.1"
18#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation"
19#define DRIVER_DESC "VFIO IOMMU SPAPR EEH"
20
16/* We might build address mapping here for "fast" path later */ 21/* We might build address mapping here for "fast" path later */
17int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) 22void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
18{ 23{
19 return eeh_dev_open(pdev); 24 eeh_dev_open(pdev);
20} 25}
26EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
21 27
22void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) 28void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
23{ 29{
24 eeh_dev_release(pdev); 30 eeh_dev_release(pdev);
25} 31}
32EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
26 33
27long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, 34long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
28 unsigned int cmd, unsigned long arg) 35 unsigned int cmd, unsigned long arg)
@@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
85 92
86 return ret; 93 return ret;
87} 94}
95EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
96
97MODULE_VERSION(DRIVER_VERSION);
98MODULE_LICENSE("GPL v2");
99MODULE_AUTHOR(DRIVER_AUTHOR);
100MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index d2633ee099d9..b39e5000ff58 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -308,6 +308,7 @@ struct bio_integrity_payload {
308 308
309 unsigned short bip_slab; /* slab the bip came from */ 309 unsigned short bip_slab; /* slab the bip came from */
310 unsigned short bip_vcnt; /* # of integrity bio_vecs */ 310 unsigned short bip_vcnt; /* # of integrity bio_vecs */
311 unsigned short bip_max_vcnt; /* integrity bio_vec slots */
311 unsigned bip_owns_buf:1; /* should free bip_buf */ 312 unsigned bip_owns_buf:1; /* should free bip_buf */
312 313
313 struct work_struct bip_work; /* I/O completion */ 314 struct work_struct bip_work; /* I/O completion */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 8699bcf5f099..518b46555b80 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -21,6 +21,7 @@
21#include <linux/bsg.h> 21#include <linux/bsg.h>
22#include <linux/smp.h> 22#include <linux/smp.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/percpu-refcount.h>
24 25
25#include <asm/scatterlist.h> 26#include <asm/scatterlist.h>
26 27
@@ -470,6 +471,7 @@ struct request_queue {
470 struct mutex sysfs_lock; 471 struct mutex sysfs_lock;
471 472
472 int bypass_depth; 473 int bypass_depth;
474 int mq_freeze_depth;
473 475
474#if defined(CONFIG_BLK_DEV_BSG) 476#if defined(CONFIG_BLK_DEV_BSG)
475 bsg_job_fn *bsg_job_fn; 477 bsg_job_fn *bsg_job_fn;
@@ -483,7 +485,7 @@ struct request_queue {
483#endif 485#endif
484 struct rcu_head rcu_head; 486 struct rcu_head rcu_head;
485 wait_queue_head_t mq_freeze_wq; 487 wait_queue_head_t mq_freeze_wq;
486 struct percpu_counter mq_usage_counter; 488 struct percpu_ref mq_usage_counter;
487 struct list_head all_q_node; 489 struct list_head all_q_node;
488 490
489 struct blk_mq_tag_set *tag_set; 491 struct blk_mq_tag_set *tag_set;
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index 3dbe9bd57a09..debb70d40547 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -52,7 +52,7 @@
52#endif 52#endif
53 53
54extern const char *drbd_buildtag(void); 54extern const char *drbd_buildtag(void);
55#define REL_VERSION "8.4.3" 55#define REL_VERSION "8.4.5"
56#define API_VERSION 1 56#define API_VERSION 1
57#define PRO_VERSION_MIN 86 57#define PRO_VERSION_MIN 86
58#define PRO_VERSION_MAX 101 58#define PRO_VERSION_MAX 101
@@ -245,7 +245,7 @@ enum drbd_disk_state {
245 D_DISKLESS, 245 D_DISKLESS,
246 D_ATTACHING, /* In the process of reading the meta-data */ 246 D_ATTACHING, /* In the process of reading the meta-data */
247 D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */ 247 D_FAILED, /* Becomes D_DISKLESS as soon as we told it the peer */
248 /* when >= D_FAILED it is legal to access mdev->bc */ 248 /* when >= D_FAILED it is legal to access mdev->ldev */
249 D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */ 249 D_NEGOTIATING, /* Late attaching state, we need to talk to the peer */
250 D_INCONSISTENT, 250 D_INCONSISTENT,
251 D_OUTDATED, 251 D_OUTDATED,
diff --git a/include/linux/drbd_genl.h b/include/linux/drbd_genl.h
index 4193f5f2636c..7b131ed8f9c6 100644
--- a/include/linux/drbd_genl.h
+++ b/include/linux/drbd_genl.h
@@ -171,6 +171,10 @@ GENL_struct(DRBD_NLA_NET_CONF, 5, net_conf,
171 __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative) 171 __flg_field(28, DRBD_GENLA_F_MANDATORY | DRBD_F_INVARIANT, tentative)
172 __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF) 172 __flg_field_def(29, DRBD_GENLA_F_MANDATORY, use_rle, DRBD_USE_RLE_DEF)
173 /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */ 173 /* 9: __u32_field_def(30, DRBD_GENLA_F_MANDATORY, fencing_policy, DRBD_FENCING_DEF) */
174 /* 9: __str_field_def(31, DRBD_GENLA_F_MANDATORY, name, SHARED_SECRET_MAX) */
175 /* 9: __u32_field(32, DRBD_F_REQUIRED | DRBD_F_INVARIANT, peer_node_id) */
176 __flg_field_def(33, 0 /* OPTIONAL */, csums_after_crash_only, DRBD_CSUMS_AFTER_CRASH_ONLY_DEF)
177 __u32_field_def(34, 0 /* OPTIONAL */, sock_check_timeo, DRBD_SOCKET_CHECK_TIMEO_DEF)
174) 178)
175 179
176GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms, 180GENL_struct(DRBD_NLA_SET_ROLE_PARMS, 6, set_role_parms,
diff --git a/include/linux/drbd_limits.h b/include/linux/drbd_limits.h
index 17e50bb00521..8ac8c5d9a3ad 100644
--- a/include/linux/drbd_limits.h
+++ b/include/linux/drbd_limits.h
@@ -214,6 +214,7 @@
214#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0 214#define DRBD_ALLOW_TWO_PRIMARIES_DEF 0
215#define DRBD_ALWAYS_ASBP_DEF 0 215#define DRBD_ALWAYS_ASBP_DEF 0
216#define DRBD_USE_RLE_DEF 1 216#define DRBD_USE_RLE_DEF 1
217#define DRBD_CSUMS_AFTER_CRASH_ONLY_DEF 0
217 218
218#define DRBD_AL_STRIPES_MIN 1 219#define DRBD_AL_STRIPES_MIN 1
219#define DRBD_AL_STRIPES_MAX 1024 220#define DRBD_AL_STRIPES_MAX 1024
@@ -224,4 +225,9 @@
224#define DRBD_AL_STRIPE_SIZE_MAX 16777216 225#define DRBD_AL_STRIPE_SIZE_MAX 16777216
225#define DRBD_AL_STRIPE_SIZE_DEF 32 226#define DRBD_AL_STRIPE_SIZE_DEF 32
226#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */ 227#define DRBD_AL_STRIPE_SIZE_SCALE 'k' /* kilobytes */
228
229#define DRBD_SOCKET_CHECK_TIMEO_MIN 0
230#define DRBD_SOCKET_CHECK_TIMEO_MAX DRBD_PING_TIMEO_MAX
231#define DRBD_SOCKET_CHECK_TIMEO_DEF 0
232#define DRBD_SOCKET_CHECK_TIMEO_SCALE '1'
227#endif 233#endif
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index c8450366c130..379c02648ab3 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -116,6 +116,7 @@ enum {
116 /* special QP and management commands */ 116 /* special QP and management commands */
117 MLX4_CMD_CONF_SPECIAL_QP = 0x23, 117 MLX4_CMD_CONF_SPECIAL_QP = 0x23,
118 MLX4_CMD_MAD_IFC = 0x24, 118 MLX4_CMD_MAD_IFC = 0x24,
119 MLX4_CMD_MAD_DEMUX = 0x203,
119 120
120 /* multicast commands */ 121 /* multicast commands */
121 MLX4_CMD_READ_MCG = 0x25, 122 MLX4_CMD_READ_MCG = 0x25,
@@ -186,6 +187,12 @@ enum {
186}; 187};
187 188
188enum { 189enum {
190 MLX4_CMD_MAD_DEMUX_CONFIG = 0,
191 MLX4_CMD_MAD_DEMUX_QUERY_STATE = 1,
192 MLX4_CMD_MAD_DEMUX_QUERY_RESTR = 2, /* Query mad demux restrictions */
193};
194
195enum {
189 MLX4_CMD_WRAPPED, 196 MLX4_CMD_WRAPPED,
190 MLX4_CMD_NATIVE 197 MLX4_CMD_NATIVE
191}; 198};
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e15b1544ea83..071f6b234604 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -183,6 +183,7 @@ enum {
183 MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8, 183 MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8,
184 MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9, 184 MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9,
185 MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10, 185 MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10,
186 MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11,
186}; 187};
187 188
188enum { 189enum {
@@ -273,6 +274,7 @@ enum {
273 MLX4_PERM_REMOTE_WRITE = 1 << 13, 274 MLX4_PERM_REMOTE_WRITE = 1 << 13,
274 MLX4_PERM_ATOMIC = 1 << 14, 275 MLX4_PERM_ATOMIC = 1 << 14,
275 MLX4_PERM_BIND_MW = 1 << 15, 276 MLX4_PERM_BIND_MW = 1 << 15,
277 MLX4_PERM_MASK = 0xFC00
276}; 278};
277 279
278enum { 280enum {
@@ -1254,6 +1256,21 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
1254int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); 1256int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
1255int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, 1257int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
1256 int enable); 1258 int enable);
1259int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
1260 struct mlx4_mpt_entry ***mpt_entry);
1261int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
1262 struct mlx4_mpt_entry **mpt_entry);
1263int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
1264 u32 pdn);
1265int mlx4_mr_hw_change_access(struct mlx4_dev *dev,
1266 struct mlx4_mpt_entry *mpt_entry,
1267 u32 access);
1268void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev,
1269 struct mlx4_mpt_entry **mpt_entry);
1270void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr);
1271int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
1272 u64 iova, u64 size, int npages,
1273 int page_shift, struct mlx4_mpt_entry *mpt_entry);
1257 1274
1258/* Returns true if running in low memory profile (kdump kernel) */ 1275/* Returns true if running in low memory profile (kdump kernel) */
1259static inline bool mlx4_low_memory_profile(void) 1276static inline bool mlx4_low_memory_profile(void)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index babaea93bca6..29ce014ab421 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -213,6 +213,8 @@ struct dw_mci_dma_ops {
213#define DW_MCI_QUIRK_HIGHSPEED BIT(2) 213#define DW_MCI_QUIRK_HIGHSPEED BIT(2)
214/* Unreliable card detection */ 214/* Unreliable card detection */
215#define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3) 215#define DW_MCI_QUIRK_BROKEN_CARD_DETECTION BIT(3)
216/* No write protect */
217#define DW_MCI_QUIRK_NO_WRITE_PROTECT BIT(4)
216 218
217/* Slot level quirks */ 219/* Slot level quirks */
218/* This slot has no write protect */ 220/* This slot has no write protect */
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 08abe9941884..09ebe57d5ce9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -104,9 +104,6 @@ struct sdhci_host {
104 104
105 const struct sdhci_ops *ops; /* Low level hw interface */ 105 const struct sdhci_ops *ops; /* Low level hw interface */
106 106
107 struct regulator *vmmc; /* Power regulator (vmmc) */
108 struct regulator *vqmmc; /* Signaling regulator (vccq) */
109
110 /* Internal data */ 107 /* Internal data */
111 struct mmc_host *mmc; /* MMC structure */ 108 struct mmc_host *mmc; /* MMC structure */
112 u64 dma_mask; /* custom DMA mask */ 109 u64 dma_mask; /* custom DMA mask */
diff --git a/include/linux/of.h b/include/linux/of.h
index 196b34c1ef4e..6c4363b8ddc3 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -74,8 +74,6 @@ struct of_phandle_args {
74 uint32_t args[MAX_PHANDLE_ARGS]; 74 uint32_t args[MAX_PHANDLE_ARGS];
75}; 75};
76 76
77extern int of_node_add(struct device_node *node);
78
79/* initialize a node */ 77/* initialize a node */
80extern struct kobj_type of_node_ktype; 78extern struct kobj_type of_node_ktype;
81static inline void of_node_init(struct device_node *node) 79static inline void of_node_init(struct device_node *node)
@@ -113,6 +111,7 @@ static inline void of_node_put(struct device_node *node) { }
113extern struct device_node *of_allnodes; 111extern struct device_node *of_allnodes;
114extern struct device_node *of_chosen; 112extern struct device_node *of_chosen;
115extern struct device_node *of_aliases; 113extern struct device_node *of_aliases;
114extern struct device_node *of_stdout;
116extern raw_spinlock_t devtree_lock; 115extern raw_spinlock_t devtree_lock;
117 116
118static inline bool of_have_populated_dt(void) 117static inline bool of_have_populated_dt(void)
@@ -204,6 +203,7 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size)
204#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */ 203#define OF_DYNAMIC 1 /* node and properties were allocated via kmalloc */
205#define OF_DETACHED 2 /* node has been detached from the device tree */ 204#define OF_DETACHED 2 /* node has been detached from the device tree */
206#define OF_POPULATED 3 /* device already created for the node */ 205#define OF_POPULATED 3 /* device already created for the node */
206#define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */
207 207
208#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) 208#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
209#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) 209#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
@@ -322,6 +322,7 @@ extern int of_update_property(struct device_node *np, struct property *newprop);
322struct of_prop_reconfig { 322struct of_prop_reconfig {
323 struct device_node *dn; 323 struct device_node *dn;
324 struct property *prop; 324 struct property *prop;
325 struct property *old_prop;
325}; 326};
326 327
327extern int of_reconfig_notifier_register(struct notifier_block *); 328extern int of_reconfig_notifier_register(struct notifier_block *);
@@ -352,7 +353,7 @@ const __be32 *of_prop_next_u32(struct property *prop, const __be32 *cur,
352 */ 353 */
353const char *of_prop_next_string(struct property *prop, const char *cur); 354const char *of_prop_next_string(struct property *prop, const char *cur);
354 355
355int of_device_is_stdout_path(struct device_node *dn); 356bool of_console_check(struct device_node *dn, char *name, int index);
356 357
357#else /* CONFIG_OF */ 358#else /* CONFIG_OF */
358 359
@@ -564,9 +565,9 @@ static inline int of_machine_is_compatible(const char *compat)
564 return 0; 565 return 0;
565} 566}
566 567
567static inline int of_device_is_stdout_path(struct device_node *dn) 568static inline bool of_console_check(const struct device_node *dn, const char *name, int index)
568{ 569{
569 return 0; 570 return false;
570} 571}
571 572
572static inline const __be32 *of_prop_next_u32(struct property *prop, 573static inline const __be32 *of_prop_next_u32(struct property *prop,
@@ -786,4 +787,80 @@ typedef void (*of_init_fn_1)(struct device_node *);
786#define OF_DECLARE_2(table, name, compat, fn) \ 787#define OF_DECLARE_2(table, name, compat, fn) \
787 _OF_DECLARE(table, name, compat, fn, of_init_fn_2) 788 _OF_DECLARE(table, name, compat, fn, of_init_fn_2)
788 789
790/**
791 * struct of_changeset_entry - Holds a changeset entry
792 *
793 * @node: list_head for the log list
794 * @action: notifier action
795 * @np: pointer to the device node affected
796 * @prop: pointer to the property affected
797 * @old_prop: hold a pointer to the original property
798 *
799 * Every modification of the device tree during a changeset
800 * is held in a list of of_changeset_entry structures.
801 * That way we can recover from a partial application, or we can
802 * revert the changeset
803 */
804struct of_changeset_entry {
805 struct list_head node;
806 unsigned long action;
807 struct device_node *np;
808 struct property *prop;
809 struct property *old_prop;
810};
811
812/**
813 * struct of_changeset - changeset tracker structure
814 *
815 * @entries: list_head for the changeset entries
816 *
817 * changesets are a convenient way to apply bulk changes to the
818 * live tree. In case of an error, changes are rolled-back.
819 * changesets live on after initial application, and if not
820 * destroyed after use, they can be reverted in one single call.
821 */
822struct of_changeset {
823 struct list_head entries;
824};
825
826#ifdef CONFIG_OF_DYNAMIC
827extern void of_changeset_init(struct of_changeset *ocs);
828extern void of_changeset_destroy(struct of_changeset *ocs);
829extern int of_changeset_apply(struct of_changeset *ocs);
830extern int of_changeset_revert(struct of_changeset *ocs);
831extern int of_changeset_action(struct of_changeset *ocs,
832 unsigned long action, struct device_node *np,
833 struct property *prop);
834
835static inline int of_changeset_attach_node(struct of_changeset *ocs,
836 struct device_node *np)
837{
838 return of_changeset_action(ocs, OF_RECONFIG_ATTACH_NODE, np, NULL);
839}
840
841static inline int of_changeset_detach_node(struct of_changeset *ocs,
842 struct device_node *np)
843{
844 return of_changeset_action(ocs, OF_RECONFIG_DETACH_NODE, np, NULL);
845}
846
847static inline int of_changeset_add_property(struct of_changeset *ocs,
848 struct device_node *np, struct property *prop)
849{
850 return of_changeset_action(ocs, OF_RECONFIG_ADD_PROPERTY, np, prop);
851}
852
853static inline int of_changeset_remove_property(struct of_changeset *ocs,
854 struct device_node *np, struct property *prop)
855{
856 return of_changeset_action(ocs, OF_RECONFIG_REMOVE_PROPERTY, np, prop);
857}
858
859static inline int of_changeset_update_property(struct of_changeset *ocs,
860 struct device_node *np, struct property *prop)
861{
862 return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
863}
864#endif
865
789#endif /* _LINUX_OF_H */ 866#endif /* _LINUX_OF_H */
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index d96e1badbee0..c2b0627a2317 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -72,7 +72,7 @@ extern int of_platform_populate(struct device_node *root,
72 const struct of_device_id *matches, 72 const struct of_device_id *matches,
73 const struct of_dev_auxdata *lookup, 73 const struct of_dev_auxdata *lookup,
74 struct device *parent); 74 struct device *parent);
75extern int of_platform_depopulate(struct device *parent); 75extern void of_platform_depopulate(struct device *parent);
76#else 76#else
77static inline int of_platform_populate(struct device_node *root, 77static inline int of_platform_populate(struct device_node *root,
78 const struct of_device_id *matches, 78 const struct of_device_id *matches,
@@ -81,10 +81,7 @@ static inline int of_platform_populate(struct device_node *root,
81{ 81{
82 return -ENODEV; 82 return -ENODEV;
83} 83}
84static inline int of_platform_depopulate(struct device *parent) 84static inline void of_platform_depopulate(struct device *parent) { }
85{
86 return -ENODEV;
87}
88#endif 85#endif
89 86
90#endif /* _LINUX_OF_PLATFORM_H */ 87#endif /* _LINUX_OF_PLATFORM_H */
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 4669ddfdd5af..5b5efae09135 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -8,6 +8,7 @@ struct reserved_mem_ops;
8struct reserved_mem { 8struct reserved_mem {
9 const char *name; 9 const char *name;
10 unsigned long fdt_node; 10 unsigned long fdt_node;
11 unsigned long phandle;
11 const struct reserved_mem_ops *ops; 12 const struct reserved_mem_ops *ops;
12 phys_addr_t base; 13 phys_addr_t base;
13 phys_addr_t size; 14 phys_addr_t size;
@@ -27,10 +28,16 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
27 _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn) 28 _OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
28 29
29#ifdef CONFIG_OF_RESERVED_MEM 30#ifdef CONFIG_OF_RESERVED_MEM
31void of_reserved_mem_device_init(struct device *dev);
32void of_reserved_mem_device_release(struct device *dev);
33
30void fdt_init_reserved_mem(void); 34void fdt_init_reserved_mem(void);
31void fdt_reserved_mem_save_node(unsigned long node, const char *uname, 35void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
32 phys_addr_t base, phys_addr_t size); 36 phys_addr_t base, phys_addr_t size);
33#else 37#else
38static inline void of_reserved_mem_device_init(struct device *dev) { }
39static inline void of_reserved_mem_device_release(struct device *pdev) { }
40
34static inline void fdt_init_reserved_mem(void) { } 41static inline void fdt_init_reserved_mem(void) { }
35static inline void fdt_reserved_mem_save_node(unsigned long node, 42static inline void fdt_reserved_mem_save_node(unsigned long node,
36 const char *uname, phys_addr_t base, phys_addr_t size) { } 43 const char *uname, phys_addr_t base, phys_addr_t size) { }
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 2bf1b30cb5dc..51e70cf25cbc 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -28,6 +28,7 @@
28 */ 28 */
29#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT BIT(0) 29#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT BIT(0)
30#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ BIT(1) 30#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ BIT(1)
31#define OMAP_HSMMC_SWAKEUP_MISSING BIT(2)
31 32
32struct mmc_card; 33struct mmc_card;
33 34
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 0990997a5304..d78125f73ac4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,6 +10,9 @@
10extern const char linux_banner[]; 10extern const char linux_banner[];
11extern const char linux_proc_banner[]; 11extern const char linux_proc_banner[];
12 12
13extern char *log_buf_addr_get(void);
14extern u32 log_buf_len_get(void);
15
13static inline int printk_get_level(const char *buffer) 16static inline int printk_get_level(const char *buffer)
14{ 17{
15 if (buffer[0] == KERN_SOH_ASCII && buffer[1]) { 18 if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9cda293c867d..36826c0166c5 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -21,7 +21,7 @@
21#include <linux/rculist.h> 21#include <linux/rculist.h>
22 22
23struct rhash_head { 23struct rhash_head {
24 struct rhash_head *next; 24 struct rhash_head __rcu *next;
25}; 25};
26 26
27#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL) 27#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
@@ -97,7 +97,7 @@ u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
97void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t); 97void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
98bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t); 98bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
99void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, 99void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
100 struct rhash_head **pprev, gfp_t flags); 100 struct rhash_head __rcu **pprev, gfp_t flags);
101 101
102bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size); 102bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
103bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size); 103bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
@@ -117,18 +117,12 @@ void rhashtable_destroy(const struct rhashtable *ht);
117#define rht_dereference_rcu(p, ht) \ 117#define rht_dereference_rcu(p, ht) \
118 rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) 118 rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
119 119
120/* Internal, use rht_obj() instead */
121#define rht_entry(ptr, type, member) container_of(ptr, type, member) 120#define rht_entry(ptr, type, member) container_of(ptr, type, member)
122#define rht_entry_safe(ptr, type, member) \ 121#define rht_entry_safe(ptr, type, member) \
123({ \ 122({ \
124 typeof(ptr) __ptr = (ptr); \ 123 typeof(ptr) __ptr = (ptr); \
125 __ptr ? rht_entry(__ptr, type, member) : NULL; \ 124 __ptr ? rht_entry(__ptr, type, member) : NULL; \
126}) 125})
127#define rht_entry_safe_rcu(ptr, type, member) \
128({ \
129 typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \
130 __ptr ? container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member) : NULL; \
131})
132 126
133#define rht_next_entry_safe(pos, ht, member) \ 127#define rht_next_entry_safe(pos, ht, member) \
134({ \ 128({ \
@@ -205,9 +199,10 @@ void rhashtable_destroy(const struct rhashtable *ht);
205 * traversal is guarded by rcu_read_lock(). 199 * traversal is guarded by rcu_read_lock().
206 */ 200 */
207#define rht_for_each_entry_rcu(pos, head, member) \ 201#define rht_for_each_entry_rcu(pos, head, member) \
208 for (pos = rht_entry_safe_rcu(head, typeof(*(pos)), member); \ 202 for (pos = rht_entry_safe(rcu_dereference_raw(head), \
203 typeof(*(pos)), member); \
209 pos; \ 204 pos; \
210 pos = rht_entry_safe_rcu((pos)->member.next, \ 205 pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \
211 typeof(*(pos)), member)) 206 typeof(*(pos)), member))
212 207
213#endif /* _LINUX_RHASHTABLE_H */ 208#endif /* _LINUX_RHASHTABLE_H */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 25a0fbd4b998..d3204115f15d 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -98,16 +98,16 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
98extern long vfio_external_check_extension(struct vfio_group *group, 98extern long vfio_external_check_extension(struct vfio_group *group,
99 unsigned long arg); 99 unsigned long arg);
100 100
101struct pci_dev;
101#ifdef CONFIG_EEH 102#ifdef CONFIG_EEH
102extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); 103extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
103extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); 104extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
104extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, 105extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
105 unsigned int cmd, 106 unsigned int cmd,
106 unsigned long arg); 107 unsigned long arg);
107#else 108#else
108static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) 109static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
109{ 110{
110 return 0;
111} 111}
112 112
113static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) 113static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 7a4313887568..5fbe6568c3cf 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -62,6 +62,7 @@ struct inet_connection_sock_af_ops {
62 void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); 62 void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
63 int (*bind_conflict)(const struct sock *sk, 63 int (*bind_conflict)(const struct sock *sk,
64 const struct inet_bind_bucket *tb, bool relax); 64 const struct inet_bind_bucket *tb, bool relax);
65 void (*mtu_reduced)(struct sock *sk);
65}; 66};
66 67
67/** inet_connection_sock - INET connection oriented sock 68/** inet_connection_sock - INET connection oriented sock
diff --git a/include/net/sock.h b/include/net/sock.h
index 38805fa02e48..7f2ab72f321a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -987,7 +987,6 @@ struct proto {
987 struct sk_buff *skb); 987 struct sk_buff *skb);
988 988
989 void (*release_cb)(struct sock *sk); 989 void (*release_cb)(struct sock *sk);
990 void (*mtu_reduced)(struct sock *sk);
991 990
992 /* Keeping track of sk's, looking them up, and port selection methods. */ 991 /* Keeping track of sk's, looking them up, and port selection methods. */
993 void (*hash)(struct sock *sk); 992 void (*hash)(struct sock *sk);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index dafa1cbc149b..590e01a476ac 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -417,7 +417,7 @@ void tcp_update_metrics(struct sock *sk);
417void tcp_init_metrics(struct sock *sk); 417void tcp_init_metrics(struct sock *sk);
418void tcp_metrics_init(void); 418void tcp_metrics_init(void);
419bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, 419bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
420 bool paws_check); 420 bool paws_check, bool timestamps);
421bool tcp_remember_stamp(struct sock *sk); 421bool tcp_remember_stamp(struct sock *sk);
422bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw); 422bool tcp_tw_remember_stamp(struct inet_timewait_sock *tw);
423void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst); 423void tcp_fetch_timewait_stamp(struct sock *sk, struct dst_entry *dst);
@@ -448,6 +448,7 @@ const u8 *tcp_parse_md5sig_option(const struct tcphdr *th);
448 */ 448 */
449 449
450void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); 450void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
451void tcp_v4_mtu_reduced(struct sock *sk);
451int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); 452int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
452struct sock *tcp_create_openreq_child(struct sock *sk, 453struct sock *tcp_create_openreq_child(struct sock *sk,
453 struct request_sock *req, 454 struct request_sock *req,
@@ -705,8 +706,10 @@ struct tcp_skb_cb {
705#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ 706#define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */
706#define TCPCB_LOST 0x04 /* SKB is lost */ 707#define TCPCB_LOST 0x04 /* SKB is lost */
707#define TCPCB_TAGBITS 0x07 /* All tag bits */ 708#define TCPCB_TAGBITS 0x07 /* All tag bits */
709#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */
708#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ 710#define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */
709#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS) 711#define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \
712 TCPCB_REPAIRED)
710 713
711 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */ 714 __u8 ip_dsfield; /* IPv4 tos or IPv6 dsfield */
712 /* 1 byte hole */ 715 /* 1 byte hole */
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 3d81b90cc315..9bb99e983f58 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -40,6 +40,7 @@
40#include <linux/list.h> 40#include <linux/list.h>
41 41
42#include <rdma/ib_verbs.h> 42#include <rdma/ib_verbs.h>
43#include <uapi/rdma/ib_user_mad.h>
43 44
44/* Management base version */ 45/* Management base version */
45#define IB_MGMT_BASE_VERSION 1 46#define IB_MGMT_BASE_VERSION 1
@@ -355,9 +356,13 @@ typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent,
355 * @hi_tid: Access layer assigned transaction ID for this client. 356 * @hi_tid: Access layer assigned transaction ID for this client.
356 * Unsolicited MADs sent by this client will have the upper 32-bits 357 * Unsolicited MADs sent by this client will have the upper 32-bits
357 * of their TID set to this value. 358 * of their TID set to this value.
359 * @flags: registration flags
358 * @port_num: Port number on which QP is registered 360 * @port_num: Port number on which QP is registered
359 * @rmpp_version: If set, indicates the RMPP version used by this agent. 361 * @rmpp_version: If set, indicates the RMPP version used by this agent.
360 */ 362 */
363enum {
364 IB_MAD_USER_RMPP = IB_USER_MAD_USER_RMPP,
365};
361struct ib_mad_agent { 366struct ib_mad_agent {
362 struct ib_device *device; 367 struct ib_device *device;
363 struct ib_qp *qp; 368 struct ib_qp *qp;
@@ -367,6 +372,7 @@ struct ib_mad_agent {
367 ib_mad_snoop_handler snoop_handler; 372 ib_mad_snoop_handler snoop_handler;
368 void *context; 373 void *context;
369 u32 hi_tid; 374 u32 hi_tid;
375 u32 flags;
370 u8 port_num; 376 u8 port_num;
371 u8 rmpp_version; 377 u8 rmpp_version;
372}; 378};
@@ -426,6 +432,7 @@ struct ib_mad_recv_wc {
426 * in the range from 0x30 to 0x4f. Otherwise not used. 432 * in the range from 0x30 to 0x4f. Otherwise not used.
427 * @method_mask: The caller will receive unsolicited MADs for any method 433 * @method_mask: The caller will receive unsolicited MADs for any method
428 * where @method_mask = 1. 434 * where @method_mask = 1.
435 *
429 */ 436 */
430struct ib_mad_reg_req { 437struct ib_mad_reg_req {
431 u8 mgmt_class; 438 u8 mgmt_class;
@@ -451,6 +458,7 @@ struct ib_mad_reg_req {
451 * @recv_handler: The completion callback routine invoked for a received 458 * @recv_handler: The completion callback routine invoked for a received
452 * MAD. 459 * MAD.
453 * @context: User specified context associated with the registration. 460 * @context: User specified context associated with the registration.
461 * @registration_flags: Registration flags to set for this agent
454 */ 462 */
455struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, 463struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
456 u8 port_num, 464 u8 port_num,
@@ -459,7 +467,8 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
459 u8 rmpp_version, 467 u8 rmpp_version,
460 ib_mad_send_handler send_handler, 468 ib_mad_send_handler send_handler,
461 ib_mad_recv_handler recv_handler, 469 ib_mad_recv_handler recv_handler,
462 void *context); 470 void *context,
471 u32 registration_flags);
463 472
464enum ib_mad_snoop_flags { 473enum ib_mad_snoop_flags {
465 /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ 474 /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/
@@ -661,4 +670,11 @@ void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num);
661 */ 670 */
662void ib_free_send_mad(struct ib_mad_send_buf *send_buf); 671void ib_free_send_mad(struct ib_mad_send_buf *send_buf);
663 672
673/**
674 * ib_mad_kernel_rmpp_agent - Returns if the agent is performing RMPP.
675 * @agent: the agent in question
676 * @return: true if agent is performing rmpp, false otherwise.
677 */
678int ib_mad_kernel_rmpp_agent(struct ib_mad_agent *agent);
679
664#endif /* IB_MAD_H */ 680#endif /* IB_MAD_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 7ccef342f724..ed44cc07a7b3 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1097,7 +1097,8 @@ struct ib_mr_attr {
1097enum ib_mr_rereg_flags { 1097enum ib_mr_rereg_flags {
1098 IB_MR_REREG_TRANS = 1, 1098 IB_MR_REREG_TRANS = 1,
1099 IB_MR_REREG_PD = (1<<1), 1099 IB_MR_REREG_PD = (1<<1),
1100 IB_MR_REREG_ACCESS = (1<<2) 1100 IB_MR_REREG_ACCESS = (1<<2),
1101 IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1)
1101}; 1102};
1102 1103
1103/** 1104/**
@@ -1547,6 +1548,13 @@ struct ib_device {
1547 u64 virt_addr, 1548 u64 virt_addr,
1548 int mr_access_flags, 1549 int mr_access_flags,
1549 struct ib_udata *udata); 1550 struct ib_udata *udata);
1551 int (*rereg_user_mr)(struct ib_mr *mr,
1552 int flags,
1553 u64 start, u64 length,
1554 u64 virt_addr,
1555 int mr_access_flags,
1556 struct ib_pd *pd,
1557 struct ib_udata *udata);
1550 int (*query_mr)(struct ib_mr *mr, 1558 int (*query_mr)(struct ib_mr *mr,
1551 struct ib_mr_attr *mr_attr); 1559 struct ib_mr_attr *mr_attr);
1552 int (*dereg_mr)(struct ib_mr *mr); 1560 int (*dereg_mr)(struct ib_mr *mr);
diff --git a/include/scsi/sg.h b/include/scsi/sg.h
index 9859355a7cf9..750e5db7c6bf 100644
--- a/include/scsi/sg.h
+++ b/include/scsi/sg.h
@@ -86,7 +86,9 @@ typedef struct sg_io_hdr
86#define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */ 86#define SG_FLAG_MMAP_IO 4 /* request memory mapped IO */
87#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */ 87#define SG_FLAG_NO_DXFER 0x10000 /* no transfer of kernel buffers to/from */
88 /* user space (debug indirect IO) */ 88 /* user space (debug indirect IO) */
89#define SG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */ 89/* defaults:: for sg driver: Q_AT_HEAD; for block layer: Q_AT_TAIL */
90#define SG_FLAG_Q_AT_TAIL 0x10
91#define SG_FLAG_Q_AT_HEAD 0x20
90 92
91/* following 'info' values are "or"-ed together */ 93/* following 'info' values are "or"-ed together */
92#define SG_INFO_OK_MASK 0x1 94#define SG_INFO_OK_MASK 0x1
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index c9c3c044b32f..981acf74b14f 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -148,11 +148,13 @@ TRACE_EVENT(bcache_read,
148); 148);
149 149
150TRACE_EVENT(bcache_write, 150TRACE_EVENT(bcache_write,
151 TP_PROTO(struct bio *bio, bool writeback, bool bypass), 151 TP_PROTO(struct cache_set *c, u64 inode, struct bio *bio,
152 TP_ARGS(bio, writeback, bypass), 152 bool writeback, bool bypass),
153 TP_ARGS(c, inode, bio, writeback, bypass),
153 154
154 TP_STRUCT__entry( 155 TP_STRUCT__entry(
155 __field(dev_t, dev ) 156 __array(char, uuid, 16 )
157 __field(u64, inode )
156 __field(sector_t, sector ) 158 __field(sector_t, sector )
157 __field(unsigned int, nr_sector ) 159 __field(unsigned int, nr_sector )
158 __array(char, rwbs, 6 ) 160 __array(char, rwbs, 6 )
@@ -161,7 +163,8 @@ TRACE_EVENT(bcache_write,
161 ), 163 ),
162 164
163 TP_fast_assign( 165 TP_fast_assign(
164 __entry->dev = bio->bi_bdev->bd_dev; 166 memcpy(__entry->uuid, c->sb.set_uuid, 16);
167 __entry->inode = inode;
165 __entry->sector = bio->bi_iter.bi_sector; 168 __entry->sector = bio->bi_iter.bi_sector;
166 __entry->nr_sector = bio->bi_iter.bi_size >> 9; 169 __entry->nr_sector = bio->bi_iter.bi_size >> 9;
167 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size); 170 blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_iter.bi_size);
@@ -169,8 +172,8 @@ TRACE_EVENT(bcache_write,
169 __entry->bypass = bypass; 172 __entry->bypass = bypass;
170 ), 173 ),
171 174
172 TP_printk("%d,%d %s %llu + %u hit %u bypass %u", 175 TP_printk("%pU inode %llu %s %llu + %u hit %u bypass %u",
173 MAJOR(__entry->dev), MINOR(__entry->dev), 176 __entry->uuid, __entry->inode,
174 __entry->rwbs, (unsigned long long)__entry->sector, 177 __entry->rwbs, (unsigned long long)__entry->sector,
175 __entry->nr_sector, __entry->writeback, __entry->bypass) 178 __entry->nr_sector, __entry->writeback, __entry->bypass)
176); 179);
@@ -258,9 +261,9 @@ DEFINE_EVENT(btree_node, bcache_btree_node_alloc,
258 TP_ARGS(b) 261 TP_ARGS(b)
259); 262);
260 263
261DEFINE_EVENT(btree_node, bcache_btree_node_alloc_fail, 264DEFINE_EVENT(cache_set, bcache_btree_node_alloc_fail,
262 TP_PROTO(struct btree *b), 265 TP_PROTO(struct cache_set *c),
263 TP_ARGS(b) 266 TP_ARGS(c)
264); 267);
265 268
266DEFINE_EVENT(btree_node, bcache_btree_node_free, 269DEFINE_EVENT(btree_node, bcache_btree_node_free,
diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
new file mode 100644
index 000000000000..b59b065e9e5d
--- /dev/null
+++ b/include/trace/events/thp.h
@@ -0,0 +1,88 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM thp
3
4#if !defined(_TRACE_THP_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_THP_H
6
7#include <linux/types.h>
8#include <linux/tracepoint.h>
9
10TRACE_EVENT(hugepage_invalidate,
11
12 TP_PROTO(unsigned long addr, unsigned long pte),
13 TP_ARGS(addr, pte),
14 TP_STRUCT__entry(
15 __field(unsigned long, addr)
16 __field(unsigned long, pte)
17 ),
18
19 TP_fast_assign(
20 __entry->addr = addr;
21 __entry->pte = pte;
22 ),
23
24 TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx",
25 __entry->addr, __entry->pte)
26);
27
28TRACE_EVENT(hugepage_set_pmd,
29
30 TP_PROTO(unsigned long addr, unsigned long pmd),
31 TP_ARGS(addr, pmd),
32 TP_STRUCT__entry(
33 __field(unsigned long, addr)
34 __field(unsigned long, pmd)
35 ),
36
37 TP_fast_assign(
38 __entry->addr = addr;
39 __entry->pmd = pmd;
40 ),
41
42 TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, __entry->pmd)
43);
44
45
46TRACE_EVENT(hugepage_update,
47
48 TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, unsigned long set),
49 TP_ARGS(addr, pte, clr, set),
50 TP_STRUCT__entry(
51 __field(unsigned long, addr)
52 __field(unsigned long, pte)
53 __field(unsigned long, clr)
54 __field(unsigned long, set)
55 ),
56
57 TP_fast_assign(
58 __entry->addr = addr;
59 __entry->pte = pte;
60 __entry->clr = clr;
61 __entry->set = set;
62
63 ),
64
65 TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
66);
67TRACE_EVENT(hugepage_splitting,
68
69 TP_PROTO(unsigned long addr, unsigned long pte),
70 TP_ARGS(addr, pte),
71 TP_STRUCT__entry(
72 __field(unsigned long, addr)
73 __field(unsigned long, pte)
74 ),
75
76 TP_fast_assign(
77 __entry->addr = addr;
78 __entry->pte = pte;
79 ),
80
81 TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx",
82 __entry->addr, __entry->pte)
83);
84
85#endif /* _TRACE_THP_H */
86
87/* This part must be outside protection */
88#include <trace/define_trace.h>
diff --git a/include/uapi/linux/bsg.h b/include/uapi/linux/bsg.h
index 7a12e1c0f371..02986cf8b6f1 100644
--- a/include/uapi/linux/bsg.h
+++ b/include/uapi/linux/bsg.h
@@ -10,12 +10,13 @@
10#define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2 10#define BSG_SUB_PROTOCOL_SCSI_TRANSPORT 2
11 11
12/* 12/*
13 * For flags member below 13 * For flag constants below:
14 * sg.h sg_io_hdr also has bits defined for it's flags member. However 14 * sg.h sg_io_hdr also has bits defined for it's flags member. These
15 * none of these bits are implemented/used by bsg. The bits below are 15 * two flag values (0x10 and 0x20) have the same meaning in sg.h . For
16 * allocated to not conflict with sg.h ones anyway. 16 * bsg the BSG_FLAG_Q_AT_HEAD flag is ignored since it is the deafult.
17 */ 17 */
18#define BSG_FLAG_Q_AT_TAIL 0x10 /* default, == 0 at this bit, is Q_AT_HEAD */ 18#define BSG_FLAG_Q_AT_TAIL 0x10 /* default is Q_AT_HEAD */
19#define BSG_FLAG_Q_AT_HEAD 0x20
19 20
20struct sg_io_v4 { 21struct sg_io_v4 {
21 __s32 guard; /* [i] 'Q' to differentiate from v3 */ 22 __s32 guard; /* [i] 'Q' to differentiate from v3 */
diff --git a/include/uapi/linux/virtio_blk.h b/include/uapi/linux/virtio_blk.h
index 6d8e61c48563..9ad67b267584 100644
--- a/include/uapi/linux/virtio_blk.h
+++ b/include/uapi/linux/virtio_blk.h
@@ -40,6 +40,7 @@
40#define VIRTIO_BLK_F_WCE 9 /* Writeback mode enabled after reset */ 40#define VIRTIO_BLK_F_WCE 9 /* Writeback mode enabled after reset */
41#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */ 41#define VIRTIO_BLK_F_TOPOLOGY 10 /* Topology information is available */
42#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ 42#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */
43#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */
43 44
44#ifndef __KERNEL__ 45#ifndef __KERNEL__
45/* Old (deprecated) name for VIRTIO_BLK_F_WCE. */ 46/* Old (deprecated) name for VIRTIO_BLK_F_WCE. */
@@ -77,6 +78,10 @@ struct virtio_blk_config {
77 78
78 /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ 79 /* writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */
79 __u8 wce; 80 __u8 wce;
81 __u8 unused;
82
83 /* number of vqs, only available when VIRTIO_BLK_F_MQ is set */
84 __u16 num_queues;
80} __attribute__((packed)); 85} __attribute__((packed));
81 86
82/* 87/*
diff --git a/include/uapi/rdma/ib_user_mad.h b/include/uapi/rdma/ib_user_mad.h
index d6fce1cbdb90..09f809f323ea 100644
--- a/include/uapi/rdma/ib_user_mad.h
+++ b/include/uapi/rdma/ib_user_mad.h
@@ -191,6 +191,45 @@ struct ib_user_mad_reg_req {
191 __u8 rmpp_version; 191 __u8 rmpp_version;
192}; 192};
193 193
194/**
195 * ib_user_mad_reg_req2 - MAD registration request
196 *
197 * @id - Set by the _kernel_; used by userspace to identify the
198 * registered agent in future requests.
199 * @qpn - Queue pair number; must be 0 or 1.
200 * @mgmt_class - Indicates which management class of MADs should be
201 * receive by the caller. This field is only required if
202 * the user wishes to receive unsolicited MADs, otherwise
203 * it should be 0.
204 * @mgmt_class_version - Indicates which version of MADs for the given
205 * management class to receive.
206 * @res - Ignored.
207 * @flags - additional registration flags; Must be in the set of
208 * flags defined in IB_USER_MAD_REG_FLAGS_CAP
209 * @method_mask - The caller wishes to receive unsolicited MADs for the
210 * methods whose bit(s) is(are) set.
211 * @oui - Indicates IEEE OUI to use when mgmt_class is a vendor
212 * class in the range from 0x30 to 0x4f. Otherwise not
213 * used.
214 * @rmpp_version - If set, indicates the RMPP version to use.
215 */
216enum {
217 IB_USER_MAD_USER_RMPP = (1 << 0),
218};
219#define IB_USER_MAD_REG_FLAGS_CAP (IB_USER_MAD_USER_RMPP)
220struct ib_user_mad_reg_req2 {
221 __u32 id;
222 __u32 qpn;
223 __u8 mgmt_class;
224 __u8 mgmt_class_version;
225 __u16 res;
226 __u32 flags;
227 __u64 method_mask[2];
228 __u32 oui;
229 __u8 rmpp_version;
230 __u8 reserved[3];
231};
232
194#define IB_IOCTL_MAGIC 0x1b 233#define IB_IOCTL_MAGIC 0x1b
195 234
196#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ 235#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \
@@ -200,4 +239,7 @@ struct ib_user_mad_reg_req {
200 239
201#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3) 240#define IB_USER_MAD_ENABLE_PKEY _IO(IB_IOCTL_MAGIC, 3)
202 241
242#define IB_USER_MAD_REGISTER_AGENT2 _IOWR(IB_IOCTL_MAGIC, 4, \
243 struct ib_user_mad_reg_req2)
244
203#endif /* IB_USER_MAD_H */ 245#endif /* IB_USER_MAD_H */
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index cbfdd4ca9510..26daf55ff76e 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -276,6 +276,22 @@ struct ib_uverbs_reg_mr_resp {
276 __u32 rkey; 276 __u32 rkey;
277}; 277};
278 278
279struct ib_uverbs_rereg_mr {
280 __u64 response;
281 __u32 mr_handle;
282 __u32 flags;
283 __u64 start;
284 __u64 length;
285 __u64 hca_va;
286 __u32 pd_handle;
287 __u32 access_flags;
288};
289
290struct ib_uverbs_rereg_mr_resp {
291 __u32 lkey;
292 __u32 rkey;
293};
294
279struct ib_uverbs_dereg_mr { 295struct ib_uverbs_dereg_mr {
280 __u32 mr_handle; 296 __u32 mr_handle;
281}; 297};
diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h
index 99b80abf360a..3066718eb120 100644
--- a/include/uapi/rdma/rdma_user_cm.h
+++ b/include/uapi/rdma/rdma_user_cm.h
@@ -34,6 +34,7 @@
34#define RDMA_USER_CM_H 34#define RDMA_USER_CM_H
35 35
36#include <linux/types.h> 36#include <linux/types.h>
37#include <linux/socket.h>
37#include <linux/in6.h> 38#include <linux/in6.h>
38#include <rdma/ib_user_verbs.h> 39#include <rdma/ib_user_verbs.h>
39#include <rdma/ib_user_sa.h> 40#include <rdma/ib_user_sa.h>
diff --git a/init/Kconfig b/init/Kconfig
index 44f9ed3dae22..e84c6423a2e5 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -268,7 +268,7 @@ config CROSS_MEMORY_ATTACH
268 help 268 help
269 Enabling this option adds the system calls process_vm_readv and 269 Enabling this option adds the system calls process_vm_readv and
270 process_vm_writev which allow a process with the correct privileges 270 process_vm_writev which allow a process with the correct privileges
271 to directly read from or write to to another process's address space. 271 to directly read from or write to another process' address space.
272 See the man page for more details. 272 See the man page for more details.
273 273
274config FHANDLE 274config FHANDLE
diff --git a/kernel/fork.c b/kernel/fork.c
index 1380d8ace334..0cf9cdb6e491 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1105,7 +1105,7 @@ static void copy_seccomp(struct task_struct *p)
1105 * needed because this new task is not yet running and cannot 1105 * needed because this new task is not yet running and cannot
1106 * be racing exec. 1106 * be racing exec.
1107 */ 1107 */
1108 BUG_ON(!spin_is_locked(&current->sighand->siglock)); 1108 assert_spin_locked(&current->sighand->siglock);
1109 1109
1110 /* Ref-count the new filter user, and assign it. */ 1110 /* Ref-count the new filter user, and assign it. */
1111 get_seccomp_filter(current); 1111 get_seccomp_filter(current);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index de1a6bb6861d..e04c455a0e38 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -272,6 +272,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
272static char *log_buf = __log_buf; 272static char *log_buf = __log_buf;
273static u32 log_buf_len = __LOG_BUF_LEN; 273static u32 log_buf_len = __LOG_BUF_LEN;
274 274
275/* Return log buffer address */
276char *log_buf_addr_get(void)
277{
278 return log_buf;
279}
280
281/* Return log buffer size */
282u32 log_buf_len_get(void)
283{
284 return log_buf_len;
285}
286
275/* human readable text of the record */ 287/* human readable text of the record */
276static char *log_text(const struct printk_log *msg) 288static char *log_text(const struct printk_log *msg)
277{ 289{
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 25b0043f4755..44eb005c6695 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -203,7 +203,7 @@ static u32 seccomp_run_filters(int syscall)
203 203
204static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode) 204static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
205{ 205{
206 BUG_ON(!spin_is_locked(&current->sighand->siglock)); 206 assert_spin_locked(&current->sighand->siglock);
207 207
208 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode) 208 if (current->seccomp.mode && current->seccomp.mode != seccomp_mode)
209 return false; 209 return false;
@@ -214,7 +214,7 @@ static inline bool seccomp_may_assign_mode(unsigned long seccomp_mode)
214static inline void seccomp_assign_mode(struct task_struct *task, 214static inline void seccomp_assign_mode(struct task_struct *task,
215 unsigned long seccomp_mode) 215 unsigned long seccomp_mode)
216{ 216{
217 BUG_ON(!spin_is_locked(&task->sighand->siglock)); 217 assert_spin_locked(&task->sighand->siglock);
218 218
219 task->seccomp.mode = seccomp_mode; 219 task->seccomp.mode = seccomp_mode;
220 /* 220 /*
@@ -253,7 +253,7 @@ static inline pid_t seccomp_can_sync_threads(void)
253 struct task_struct *thread, *caller; 253 struct task_struct *thread, *caller;
254 254
255 BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex)); 255 BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
256 BUG_ON(!spin_is_locked(&current->sighand->siglock)); 256 assert_spin_locked(&current->sighand->siglock);
257 257
258 /* Validate all threads being eligible for synchronization. */ 258 /* Validate all threads being eligible for synchronization. */
259 caller = current; 259 caller = current;
@@ -294,7 +294,7 @@ static inline void seccomp_sync_threads(void)
294 struct task_struct *thread, *caller; 294 struct task_struct *thread, *caller;
295 295
296 BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex)); 296 BUG_ON(!mutex_is_locked(&current->signal->cred_guard_mutex));
297 BUG_ON(!spin_is_locked(&current->sighand->siglock)); 297 assert_spin_locked(&current->sighand->siglock);
298 298
299 /* Synchronize all threads. */ 299 /* Synchronize all threads. */
300 caller = current; 300 caller = current;
@@ -464,7 +464,7 @@ static long seccomp_attach_filter(unsigned int flags,
464 unsigned long total_insns; 464 unsigned long total_insns;
465 struct seccomp_filter *walker; 465 struct seccomp_filter *walker;
466 466
467 BUG_ON(!spin_is_locked(&current->sighand->siglock)); 467 assert_spin_locked(&current->sighand->siglock);
468 468
469 /* Validate resulting filter length. */ 469 /* Validate resulting filter length. */
470 total_insns = filter->prog->len; 470 total_insns = filter->prog->len;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f36b02838a47..fb4a9c2cf8d9 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -338,10 +338,11 @@ EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
338 338
339static inline void update_vsyscall(struct timekeeper *tk) 339static inline void update_vsyscall(struct timekeeper *tk)
340{ 340{
341 struct timespec xt; 341 struct timespec xt, wm;
342 342
343 xt = timespec64_to_timespec(tk_xtime(tk)); 343 xt = timespec64_to_timespec(tk_xtime(tk));
344 update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->tkr.clock, tk->tkr.mult, 344 wm = timespec64_to_timespec(tk->wall_to_monotonic);
345 update_vsyscall_old(&xt, &wm, tk->tkr.clock, tk->tkr.mult,
345 tk->tkr.cycle_last); 346 tk->tkr.cycle_last);
346} 347}
347 348
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index cb45f59685e6..07c28323f88f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -143,6 +143,30 @@ config DEBUG_INFO_REDUCED
143 DEBUG_INFO build and compile times are reduced too. 143 DEBUG_INFO build and compile times are reduced too.
144 Only works with newer gcc versions. 144 Only works with newer gcc versions.
145 145
146config DEBUG_INFO_SPLIT
147 bool "Produce split debuginfo in .dwo files"
148 depends on DEBUG_INFO
149 help
150 Generate debug info into separate .dwo files. This significantly
151 reduces the build directory size for builds with DEBUG_INFO,
152 because it stores the information only once on disk in .dwo
153 files instead of multiple times in object files and executables.
154 In addition the debug information is also compressed.
155
156 Requires recent gcc (4.7+) and recent gdb/binutils.
157 Any tool that packages or reads debug information would need
158 to know about the .dwo files and include them.
159 Incompatible with older versions of ccache.
160
161config DEBUG_INFO_DWARF4
162 bool "Generate dwarf4 debuginfo"
163 depends on DEBUG_INFO
164 help
165 Generate dwarf4 debug info. This requires recent versions
166 of gcc and gdb. It makes the debug information larger.
167 But it significantly improves the success of resolving
168 variables in gdb on optimized code.
169
146config ENABLE_WARN_DEPRECATED 170config ENABLE_WARN_DEPRECATED
147 bool "Enable __deprecated logic" 171 bool "Enable __deprecated logic"
148 default y 172 default y
diff --git a/lib/lru_cache.c b/lib/lru_cache.c
index 4a83ecd03650..852c81e3ba9a 100644
--- a/lib/lru_cache.c
+++ b/lib/lru_cache.c
@@ -169,7 +169,7 @@ out_fail:
169 return NULL; 169 return NULL;
170} 170}
171 171
172void lc_free_by_index(struct lru_cache *lc, unsigned i) 172static void lc_free_by_index(struct lru_cache *lc, unsigned i)
173{ 173{
174 void *p = lc->lc_element[i]; 174 void *p = lc->lc_element[i];
175 WARN_ON(!p); 175 WARN_ON(!p);
@@ -643,9 +643,10 @@ void lc_set(struct lru_cache *lc, unsigned int enr, int index)
643 * lc_dump - Dump a complete LRU cache to seq in textual form. 643 * lc_dump - Dump a complete LRU cache to seq in textual form.
644 * @lc: the lru cache to operate on 644 * @lc: the lru cache to operate on
645 * @seq: the &struct seq_file pointer to seq_printf into 645 * @seq: the &struct seq_file pointer to seq_printf into
646 * @utext: user supplied "heading" or other info 646 * @utext: user supplied additional "heading" or other info
647 * @detail: function pointer the user may provide to dump further details 647 * @detail: function pointer the user may provide to dump further details
648 * of the object the lc_element is embedded in. 648 * of the object the lc_element is embedded in. May be NULL.
649 * Note: a leading space ' ' and trailing newline '\n' is implied.
649 */ 650 */
650void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext, 651void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext,
651 void (*detail) (struct seq_file *, struct lc_element *)) 652 void (*detail) (struct seq_file *, struct lc_element *))
@@ -654,16 +655,18 @@ void lc_seq_dump_details(struct seq_file *seq, struct lru_cache *lc, char *utext
654 struct lc_element *e; 655 struct lc_element *e;
655 int i; 656 int i;
656 657
657 seq_printf(seq, "\tnn: lc_number refcnt %s\n ", utext); 658 seq_printf(seq, "\tnn: lc_number (new nr) refcnt %s\n ", utext);
658 for (i = 0; i < nr_elements; i++) { 659 for (i = 0; i < nr_elements; i++) {
659 e = lc_element_by_index(lc, i); 660 e = lc_element_by_index(lc, i);
660 if (e->lc_number == LC_FREE) { 661 if (e->lc_number != e->lc_new_number)
661 seq_printf(seq, "\t%2d: FREE\n", i); 662 seq_printf(seq, "\t%5d: %6d %8d %6d ",
662 } else { 663 i, e->lc_number, e->lc_new_number, e->refcnt);
663 seq_printf(seq, "\t%2d: %4u %4u ", i, 664 else
664 e->lc_number, e->refcnt); 665 seq_printf(seq, "\t%5d: %6d %-8s %6d ",
666 i, e->lc_number, "-\"-", e->refcnt);
667 if (detail)
665 detail(seq, e); 668 detail(seq, e);
666 } 669 seq_putc(seq, '\n');
667 } 670 }
668} 671}
669 672
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6940cf16628..a2c78810ebc1 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -38,16 +38,10 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
38EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); 38EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
39#endif 39#endif
40 40
41/** 41static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
42 * rht_obj - cast hash head to outer object
43 * @ht: hash table
44 * @he: hashed node
45 */
46void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
47{ 42{
48 return (void *) he - ht->p.head_offset; 43 return (void *) he - ht->p.head_offset;
49} 44}
50EXPORT_SYMBOL_GPL(rht_obj);
51 45
52static u32 __hashfn(const struct rhashtable *ht, const void *key, 46static u32 __hashfn(const struct rhashtable *ht, const void *key,
53 u32 len, u32 hsize) 47 u32 len, u32 hsize)
@@ -386,7 +380,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
386 * deletion when combined with walking or lookup. 380 * deletion when combined with walking or lookup.
387 */ 381 */
388void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, 382void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
389 struct rhash_head **pprev, gfp_t flags) 383 struct rhash_head __rcu **pprev, gfp_t flags)
390{ 384{
391 struct bucket_table *tbl = rht_dereference(ht->tbl, ht); 385 struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
392 386
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 9aae6f47433f..9eebfadeeee1 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -275,6 +275,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
275 ret = res_counter_memparse_write_strategy(buf, &val); 275 ret = res_counter_memparse_write_strategy(buf, &val);
276 if (ret) 276 if (ret)
277 break; 277 break;
278 val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx]));
278 ret = res_counter_set_limit(&h_cg->hugepage[idx], val); 279 ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
279 break; 280 break;
280 default: 281 default:
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 4c5b8ba0f84f..e4853b50cf40 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -833,7 +833,6 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
833 loff_t *l) 833 loff_t *l)
834{ 834{
835 struct hlist_node *e = state->node; 835 struct hlist_node *e = state->node;
836 struct lec_arp_table *tmp;
837 836
838 if (!e) 837 if (!e)
839 e = tbl->first; 838 e = tbl->first;
@@ -842,9 +841,7 @@ static void *lec_tbl_walk(struct lec_state *state, struct hlist_head *tbl,
842 --*l; 841 --*l;
843 } 842 }
844 843
845 tmp = container_of(e, struct lec_arp_table, next); 844 for (; e; e = e->next) {
846
847 hlist_for_each_entry_from(tmp, next) {
848 if (--*l < 0) 845 if (--*l < 0)
849 break; 846 break;
850 } 847 }
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d8e5d0c2ebbc..1ba23f5018e7 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -50,12 +50,12 @@ static void svc_disconnect(struct atm_vcc *vcc)
50 50
51 pr_debug("%p\n", vcc); 51 pr_debug("%p\n", vcc);
52 if (test_bit(ATM_VF_REGIS, &vcc->flags)) { 52 if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
53 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
54 sigd_enq(vcc, as_close, NULL, NULL, NULL); 53 sigd_enq(vcc, as_close, NULL, NULL, NULL);
55 while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) { 54 for (;;) {
55 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
56 if (test_bit(ATM_VF_RELEASED, &vcc->flags) || !sigd)
57 break;
56 schedule(); 58 schedule();
57 prepare_to_wait(sk_sleep(sk), &wait,
58 TASK_UNINTERRUPTIBLE);
59 } 59 }
60 finish_wait(sk_sleep(sk), &wait); 60 finish_wait(sk_sleep(sk), &wait);
61 } 61 }
@@ -126,11 +126,12 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
126 } 126 }
127 vcc->local = *addr; 127 vcc->local = *addr;
128 set_bit(ATM_VF_WAITING, &vcc->flags); 128 set_bit(ATM_VF_WAITING, &vcc->flags);
129 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
130 sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local); 129 sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
131 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 130 for (;;) {
132 schedule();
133 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); 131 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
132 if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
133 break;
134 schedule();
134 } 135 }
135 finish_wait(sk_sleep(sk), &wait); 136 finish_wait(sk_sleep(sk), &wait);
136 clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */ 137 clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
@@ -202,15 +203,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
202 } 203 }
203 vcc->remote = *addr; 204 vcc->remote = *addr;
204 set_bit(ATM_VF_WAITING, &vcc->flags); 205 set_bit(ATM_VF_WAITING, &vcc->flags);
205 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
206 sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote); 206 sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
207 if (flags & O_NONBLOCK) { 207 if (flags & O_NONBLOCK) {
208 finish_wait(sk_sleep(sk), &wait);
209 sock->state = SS_CONNECTING; 208 sock->state = SS_CONNECTING;
210 error = -EINPROGRESS; 209 error = -EINPROGRESS;
211 goto out; 210 goto out;
212 } 211 }
213 error = 0; 212 error = 0;
213 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
214 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 214 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
215 schedule(); 215 schedule();
216 if (!signal_pending(current)) { 216 if (!signal_pending(current)) {
@@ -297,11 +297,12 @@ static int svc_listen(struct socket *sock, int backlog)
297 goto out; 297 goto out;
298 } 298 }
299 set_bit(ATM_VF_WAITING, &vcc->flags); 299 set_bit(ATM_VF_WAITING, &vcc->flags);
300 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
301 sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local); 300 sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
302 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 301 for (;;) {
303 schedule();
304 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); 302 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
303 if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
304 break;
305 schedule();
305 } 306 }
306 finish_wait(sk_sleep(sk), &wait); 307 finish_wait(sk_sleep(sk), &wait);
307 if (!sigd) { 308 if (!sigd) {
@@ -387,15 +388,15 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
387 } 388 }
388 /* wait should be short, so we ignore the non-blocking flag */ 389 /* wait should be short, so we ignore the non-blocking flag */
389 set_bit(ATM_VF_WAITING, &new_vcc->flags); 390 set_bit(ATM_VF_WAITING, &new_vcc->flags);
390 prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
391 TASK_UNINTERRUPTIBLE);
392 sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL); 391 sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
393 while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) { 392 for (;;) {
393 prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
394 TASK_UNINTERRUPTIBLE);
395 if (!test_bit(ATM_VF_WAITING, &new_vcc->flags) || !sigd)
396 break;
394 release_sock(sk); 397 release_sock(sk);
395 schedule(); 398 schedule();
396 lock_sock(sk); 399 lock_sock(sk);
397 prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
398 TASK_UNINTERRUPTIBLE);
399 } 400 }
400 finish_wait(sk_sleep(sk_atm(new_vcc)), &wait); 401 finish_wait(sk_sleep(sk_atm(new_vcc)), &wait);
401 if (!sigd) { 402 if (!sigd) {
@@ -433,12 +434,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
433 DEFINE_WAIT(wait); 434 DEFINE_WAIT(wait);
434 435
435 set_bit(ATM_VF_WAITING, &vcc->flags); 436 set_bit(ATM_VF_WAITING, &vcc->flags);
436 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
437 sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0); 437 sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
438 while (test_bit(ATM_VF_WAITING, &vcc->flags) && 438 for (;;) {
439 !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
440 schedule();
441 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE); 439 prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
440 if (!test_bit(ATM_VF_WAITING, &vcc->flags) ||
441 test_bit(ATM_VF_RELEASED, &vcc->flags) || !sigd) {
442 break;
443 }
444 schedule();
442 } 445 }
443 finish_wait(sk_sleep(sk), &wait); 446 finish_wait(sk_sleep(sk), &wait);
444 if (!sigd) 447 if (!sigd)
@@ -529,18 +532,18 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
529 532
530 lock_sock(sk); 533 lock_sock(sk);
531 set_bit(ATM_VF_WAITING, &vcc->flags); 534 set_bit(ATM_VF_WAITING, &vcc->flags);
532 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
533 sigd_enq(vcc, as_addparty, NULL, NULL, 535 sigd_enq(vcc, as_addparty, NULL, NULL,
534 (struct sockaddr_atmsvc *) sockaddr); 536 (struct sockaddr_atmsvc *) sockaddr);
535 if (flags & O_NONBLOCK) { 537 if (flags & O_NONBLOCK) {
536 finish_wait(sk_sleep(sk), &wait);
537 error = -EINPROGRESS; 538 error = -EINPROGRESS;
538 goto out; 539 goto out;
539 } 540 }
540 pr_debug("added wait queue\n"); 541 pr_debug("added wait queue\n");
541 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 542 for (;;) {
542 schedule();
543 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 543 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
544 if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
545 break;
546 schedule();
544 } 547 }
545 finish_wait(sk_sleep(sk), &wait); 548 finish_wait(sk_sleep(sk), &wait);
546 error = xchg(&sk->sk_err_soft, 0); 549 error = xchg(&sk->sk_err_soft, 0);
@@ -558,11 +561,12 @@ static int svc_dropparty(struct socket *sock, int ep_ref)
558 561
559 lock_sock(sk); 562 lock_sock(sk);
560 set_bit(ATM_VF_WAITING, &vcc->flags); 563 set_bit(ATM_VF_WAITING, &vcc->flags);
561 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
562 sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref); 564 sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
563 while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) { 565 for (;;) {
564 schedule();
565 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); 566 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
567 if (!test_bit(ATM_VF_WAITING, &vcc->flags) || !sigd)
568 break;
569 schedule();
566 } 570 }
567 finish_wait(sk_sleep(sk), &wait); 571 finish_wait(sk_sleep(sk), &wait);
568 if (!sigd) { 572 if (!sigd) {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 181b70ebd964..541f26a67ba2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1188,13 +1188,6 @@ new_segment:
1188 goto wait_for_memory; 1188 goto wait_for_memory;
1189 1189
1190 /* 1190 /*
1191 * All packets are restored as if they have
1192 * already been sent.
1193 */
1194 if (tp->repair)
1195 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1196
1197 /*
1198 * Check whether we can use HW checksum. 1191 * Check whether we can use HW checksum.
1199 */ 1192 */
1200 if (sk->sk_route_caps & NETIF_F_ALL_CSUM) 1193 if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -1203,6 +1196,13 @@ new_segment:
1203 skb_entail(sk, skb); 1196 skb_entail(sk, skb);
1204 copy = size_goal; 1197 copy = size_goal;
1205 max = size_goal; 1198 max = size_goal;
1199
1200 /* All packets are restored as if they have
1201 * already been sent. skb_mstamp isn't set to
1202 * avoid wrong rtt estimation.
1203 */
1204 if (tp->repair)
1205 TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
1206 } 1206 }
1207 1207
1208 /* Try to append data to the end of skb. */ 1208 /* Try to append data to the end of skb. */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a3d47af01906..a906e0200ff2 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2687,7 +2687,6 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
2687 */ 2687 */
2688static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack) 2688static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2689{ 2689{
2690 struct inet_connection_sock *icsk = inet_csk(sk);
2691 struct tcp_sock *tp = tcp_sk(sk); 2690 struct tcp_sock *tp = tcp_sk(sk);
2692 bool recovered = !before(tp->snd_una, tp->high_seq); 2691 bool recovered = !before(tp->snd_una, tp->high_seq);
2693 2692
@@ -2713,12 +2712,9 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
2713 2712
2714 if (recovered) { 2713 if (recovered) {
2715 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */ 2714 /* F-RTO RFC5682 sec 3.1 step 2.a and 1st part of step 3.a */
2716 icsk->icsk_retransmits = 0;
2717 tcp_try_undo_recovery(sk); 2715 tcp_try_undo_recovery(sk);
2718 return; 2716 return;
2719 } 2717 }
2720 if (flag & FLAG_DATA_ACKED)
2721 icsk->icsk_retransmits = 0;
2722 if (tcp_is_reno(tp)) { 2718 if (tcp_is_reno(tp)) {
2723 /* A Reno DUPACK means new data in F-RTO step 2.b above are 2719 /* A Reno DUPACK means new data in F-RTO step 2.b above are
2724 * delivered. Lower inflight to clock out (re)tranmissions. 2720 * delivered. Lower inflight to clock out (re)tranmissions.
@@ -3050,10 +3046,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3050 first_ackt.v64 = 0; 3046 first_ackt.v64 = 0;
3051 3047
3052 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3048 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3049 struct skb_shared_info *shinfo = skb_shinfo(skb);
3053 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3050 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3054 u8 sacked = scb->sacked; 3051 u8 sacked = scb->sacked;
3055 u32 acked_pcount; 3052 u32 acked_pcount;
3056 3053
3054 if (unlikely(shinfo->tx_flags & SKBTX_ACK_TSTAMP) &&
3055 between(shinfo->tskey, prior_snd_una, tp->snd_una - 1))
3056 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3057
3057 /* Determine how many packets and what bytes were acked, tso and else */ 3058 /* Determine how many packets and what bytes were acked, tso and else */
3058 if (after(scb->end_seq, tp->snd_una)) { 3059 if (after(scb->end_seq, tp->snd_una)) {
3059 if (tcp_skb_pcount(skb) == 1 || 3060 if (tcp_skb_pcount(skb) == 1 ||
@@ -3107,11 +3108,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3107 tp->retrans_stamp = 0; 3108 tp->retrans_stamp = 0;
3108 } 3109 }
3109 3110
3110 if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_ACK_TSTAMP) &&
3111 between(skb_shinfo(skb)->tskey, prior_snd_una,
3112 tp->snd_una + 1))
3113 __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK);
3114
3115 if (!fully_acked) 3111 if (!fully_acked)
3116 break; 3112 break;
3117 3113
@@ -3405,8 +3401,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3405 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) 3401 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
3406 tcp_rearm_rto(sk); 3402 tcp_rearm_rto(sk);
3407 3403
3408 if (after(ack, prior_snd_una)) 3404 if (after(ack, prior_snd_una)) {
3409 flag |= FLAG_SND_UNA_ADVANCED; 3405 flag |= FLAG_SND_UNA_ADVANCED;
3406 icsk->icsk_retransmits = 0;
3407 }
3410 3408
3411 prior_fackets = tp->fackets_out; 3409 prior_fackets = tp->fackets_out;
3412 3410
@@ -5979,12 +5977,14 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
5979 * timewait bucket, so that all the necessary checks 5977 * timewait bucket, so that all the necessary checks
5980 * are made in the function processing timewait state. 5978 * are made in the function processing timewait state.
5981 */ 5979 */
5982 if (tmp_opt.saw_tstamp && tcp_death_row.sysctl_tw_recycle) { 5980 if (tcp_death_row.sysctl_tw_recycle) {
5983 bool strict; 5981 bool strict;
5984 5982
5985 dst = af_ops->route_req(sk, &fl, req, &strict); 5983 dst = af_ops->route_req(sk, &fl, req, &strict);
5984
5986 if (dst && strict && 5985 if (dst && strict &&
5987 !tcp_peer_is_proven(req, dst, true)) { 5986 !tcp_peer_is_proven(req, dst, true,
5987 tmp_opt.saw_tstamp)) {
5988 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED); 5988 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
5989 goto drop_and_release; 5989 goto drop_and_release;
5990 } 5990 }
@@ -5993,7 +5993,8 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
5993 else if (!sysctl_tcp_syncookies && 5993 else if (!sysctl_tcp_syncookies &&
5994 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < 5994 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
5995 (sysctl_max_syn_backlog >> 2)) && 5995 (sysctl_max_syn_backlog >> 2)) &&
5996 !tcp_peer_is_proven(req, dst, false)) { 5996 !tcp_peer_is_proven(req, dst, false,
5997 tmp_opt.saw_tstamp)) {
5997 /* Without syncookies last quarter of 5998 /* Without syncookies last quarter of
5998 * backlog is filled with destinations, 5999 * backlog is filled with destinations,
5999 * proven to be alive. 6000 * proven to be alive.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index dceff5fe8e66..cd17f009aede 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -271,7 +271,7 @@ EXPORT_SYMBOL(tcp_v4_connect);
271 * It can be called through tcp_release_cb() if socket was owned by user 271 * It can be called through tcp_release_cb() if socket was owned by user
272 * at the time tcp_v4_err() was called to handle ICMP message. 272 * at the time tcp_v4_err() was called to handle ICMP message.
273 */ 273 */
274static void tcp_v4_mtu_reduced(struct sock *sk) 274void tcp_v4_mtu_reduced(struct sock *sk)
275{ 275{
276 struct dst_entry *dst; 276 struct dst_entry *dst;
277 struct inet_sock *inet = inet_sk(sk); 277 struct inet_sock *inet = inet_sk(sk);
@@ -302,6 +302,7 @@ static void tcp_v4_mtu_reduced(struct sock *sk)
302 tcp_simple_retransmit(sk); 302 tcp_simple_retransmit(sk);
303 } /* else let the usual retransmit timer handle it */ 303 } /* else let the usual retransmit timer handle it */
304} 304}
305EXPORT_SYMBOL(tcp_v4_mtu_reduced);
305 306
306static void do_redirect(struct sk_buff *skb, struct sock *sk) 307static void do_redirect(struct sk_buff *skb, struct sock *sk)
307{ 308{
@@ -1787,6 +1788,7 @@ const struct inet_connection_sock_af_ops ipv4_specific = {
1787 .compat_setsockopt = compat_ip_setsockopt, 1788 .compat_setsockopt = compat_ip_setsockopt,
1788 .compat_getsockopt = compat_ip_getsockopt, 1789 .compat_getsockopt = compat_ip_getsockopt,
1789#endif 1790#endif
1791 .mtu_reduced = tcp_v4_mtu_reduced,
1790}; 1792};
1791EXPORT_SYMBOL(ipv4_specific); 1793EXPORT_SYMBOL(ipv4_specific);
1792 1794
@@ -2406,7 +2408,6 @@ struct proto tcp_prot = {
2406 .sendpage = tcp_sendpage, 2408 .sendpage = tcp_sendpage,
2407 .backlog_rcv = tcp_v4_do_rcv, 2409 .backlog_rcv = tcp_v4_do_rcv,
2408 .release_cb = tcp_release_cb, 2410 .release_cb = tcp_release_cb,
2409 .mtu_reduced = tcp_v4_mtu_reduced,
2410 .hash = inet_hash, 2411 .hash = inet_hash,
2411 .unhash = inet_unhash, 2412 .unhash = inet_unhash,
2412 .get_port = inet_csk_get_port, 2413 .get_port = inet_csk_get_port,
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 0d54e59b9ea8..ed9c9a91851c 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -576,7 +576,8 @@ reset:
576 tp->snd_cwnd_stamp = tcp_time_stamp; 576 tp->snd_cwnd_stamp = tcp_time_stamp;
577} 577}
578 578
579bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool paws_check) 579bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst,
580 bool paws_check, bool timestamps)
580{ 581{
581 struct tcp_metrics_block *tm; 582 struct tcp_metrics_block *tm;
582 bool ret; 583 bool ret;
@@ -589,7 +590,8 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst, bool pa
589 if (paws_check) { 590 if (paws_check) {
590 if (tm && 591 if (tm &&
591 (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL && 592 (u32)get_seconds() - tm->tcpm_ts_stamp < TCP_PAWS_MSL &&
592 (s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW) 593 ((s32)(tm->tcpm_ts - req->ts_recent) > TCP_PAWS_WINDOW ||
594 !timestamps))
593 ret = false; 595 ret = false;
594 else 596 else
595 ret = true; 597 ret = true;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8fcfc91964ec..5a7c41fbc6d3 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -800,7 +800,7 @@ void tcp_release_cb(struct sock *sk)
800 __sock_put(sk); 800 __sock_put(sk);
801 } 801 }
802 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) { 802 if (flags & (1UL << TCP_MTU_REDUCED_DEFERRED)) {
803 sk->sk_prot->mtu_reduced(sk); 803 inet_csk(sk)->icsk_af_ops->mtu_reduced(sk);
804 __sock_put(sk); 804 __sock_put(sk);
805 } 805 }
806} 806}
@@ -1069,6 +1069,21 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de
1069 tcp_verify_left_out(tp); 1069 tcp_verify_left_out(tp);
1070} 1070}
1071 1071
1072static void tcp_fragment_tstamp(struct sk_buff *skb, struct sk_buff *skb2)
1073{
1074 struct skb_shared_info *shinfo = skb_shinfo(skb);
1075
1076 if (unlikely(shinfo->tx_flags & SKBTX_ANY_TSTAMP) &&
1077 !before(shinfo->tskey, TCP_SKB_CB(skb2)->seq)) {
1078 struct skb_shared_info *shinfo2 = skb_shinfo(skb2);
1079 u8 tsflags = shinfo->tx_flags & SKBTX_ANY_TSTAMP;
1080
1081 shinfo->tx_flags &= ~tsflags;
1082 shinfo2->tx_flags |= tsflags;
1083 swap(shinfo->tskey, shinfo2->tskey);
1084 }
1085}
1086
1072/* Function to create two new TCP segments. Shrinks the given segment 1087/* Function to create two new TCP segments. Shrinks the given segment
1073 * to the specified size and appends a new segment with the rest of the 1088 * to the specified size and appends a new segment with the rest of the
1074 * packet to the list. This won't be called frequently, I hope. 1089 * packet to the list. This won't be called frequently, I hope.
@@ -1136,6 +1151,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
1136 */ 1151 */
1137 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; 1152 TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
1138 buff->tstamp = skb->tstamp; 1153 buff->tstamp = skb->tstamp;
1154 tcp_fragment_tstamp(skb, buff);
1139 1155
1140 old_factor = tcp_skb_pcount(skb); 1156 old_factor = tcp_skb_pcount(skb);
1141 1157
@@ -1652,6 +1668,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1652 1668
1653 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; 1669 buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL;
1654 skb_split(skb, buff, len); 1670 skb_split(skb, buff, len);
1671 tcp_fragment_tstamp(skb, buff);
1655 1672
1656 /* Fix up tso_factor for both original and new SKB. */ 1673 /* Fix up tso_factor for both original and new SKB. */
1657 tcp_set_skb_tso_segs(sk, skb, mss_now); 1674 tcp_set_skb_tso_segs(sk, skb, mss_now);
@@ -1917,8 +1934,11 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
1917 tso_segs = tcp_init_tso_segs(sk, skb, mss_now); 1934 tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
1918 BUG_ON(!tso_segs); 1935 BUG_ON(!tso_segs);
1919 1936
1920 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) 1937 if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) {
1938 /* "when" is used as a start point for the retransmit timer */
1939 TCP_SKB_CB(skb)->when = tcp_time_stamp;
1921 goto repair; /* Skip network transmission */ 1940 goto repair; /* Skip network transmission */
1941 }
1922 1942
1923 cwnd_quota = tcp_cwnd_test(tp, skb); 1943 cwnd_quota = tcp_cwnd_test(tp, skb);
1924 if (!cwnd_quota) { 1944 if (!cwnd_quota) {
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2e9ba035fb5f..6163f851dc01 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -101,19 +101,19 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net,
101 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { 101 for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) {
102 if (local == t->parms.iph.saddr && 102 if (local == t->parms.iph.saddr &&
103 remote == t->parms.iph.daddr && 103 remote == t->parms.iph.daddr &&
104 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 104 (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
105 (t->dev->flags & IFF_UP)) 105 (t->dev->flags & IFF_UP))
106 return t; 106 return t;
107 } 107 }
108 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { 108 for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) {
109 if (remote == t->parms.iph.daddr && 109 if (remote == t->parms.iph.daddr &&
110 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 110 (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
111 (t->dev->flags & IFF_UP)) 111 (t->dev->flags & IFF_UP))
112 return t; 112 return t;
113 } 113 }
114 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { 114 for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) {
115 if (local == t->parms.iph.saddr && 115 if (local == t->parms.iph.saddr &&
116 (!dev || !t->parms.link || dev->iflink == t->parms.link) && 116 (!dev || !t->parms.link || dev->ifindex == t->parms.link) &&
117 (t->dev->flags & IFF_UP)) 117 (t->dev->flags & IFF_UP))
118 return t; 118 return t;
119 } 119 }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f2ce95502392..29964c3d363c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1595,6 +1595,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific = {
1595 .compat_setsockopt = compat_ipv6_setsockopt, 1595 .compat_setsockopt = compat_ipv6_setsockopt,
1596 .compat_getsockopt = compat_ipv6_getsockopt, 1596 .compat_getsockopt = compat_ipv6_getsockopt,
1597#endif 1597#endif
1598 .mtu_reduced = tcp_v6_mtu_reduced,
1598}; 1599};
1599 1600
1600#ifdef CONFIG_TCP_MD5SIG 1601#ifdef CONFIG_TCP_MD5SIG
@@ -1625,6 +1626,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
1625 .compat_setsockopt = compat_ipv6_setsockopt, 1626 .compat_setsockopt = compat_ipv6_setsockopt,
1626 .compat_getsockopt = compat_ipv6_getsockopt, 1627 .compat_getsockopt = compat_ipv6_getsockopt,
1627#endif 1628#endif
1629 .mtu_reduced = tcp_v4_mtu_reduced,
1628}; 1630};
1629 1631
1630#ifdef CONFIG_TCP_MD5SIG 1632#ifdef CONFIG_TCP_MD5SIG
@@ -1864,7 +1866,6 @@ struct proto tcpv6_prot = {
1864 .sendpage = tcp_sendpage, 1866 .sendpage = tcp_sendpage,
1865 .backlog_rcv = tcp_v6_do_rcv, 1867 .backlog_rcv = tcp_v6_do_rcv,
1866 .release_cb = tcp_release_cb, 1868 .release_cb = tcp_release_cb,
1867 .mtu_reduced = tcp_v6_mtu_reduced,
1868 .hash = tcp_v6_hash, 1869 .hash = tcp_v6_hash,
1869 .unhash = inet_unhash, 1870 .unhash = inet_unhash,
1870 .get_port = inet_csk_get_port, 1871 .get_port = inet_csk_get_port,
diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c
index 9ea0c933b9ff..a37998c6273d 100644
--- a/net/irda/irlap_frame.c
+++ b/net/irda/irlap_frame.c
@@ -622,7 +622,7 @@ void irlap_send_rd_frame(struct irlap_cb *self)
622 frame = (struct rd_frame *)skb_put(tx_skb, 2); 622 frame = (struct rd_frame *)skb_put(tx_skb, 2);
623 623
624 frame->caddr = self->caddr; 624 frame->caddr = self->caddr;
625 frame->caddr = RD_RSP | PF_BIT; 625 frame->control = RD_RSP | PF_BIT;
626 626
627 irlap_queue_xmit(self, tx_skb); 627 irlap_queue_xmit(self, tx_skb);
628} 628}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2e152e5f2186..c416725d28c4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2921,6 +2921,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
2921} 2921}
2922 2922
2923static void *netlink_seq_start(struct seq_file *seq, loff_t *pos) 2923static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
2924 __acquires(RCU)
2924{ 2925{
2925 rcu_read_lock(); 2926 rcu_read_lock();
2926 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN; 2927 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
@@ -2970,6 +2971,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2970} 2971}
2971 2972
2972static void netlink_seq_stop(struct seq_file *seq, void *v) 2973static void netlink_seq_stop(struct seq_file *seq, void *v)
2974 __releases(RCU)
2973{ 2975{
2974 rcu_read_unlock(); 2976 rcu_read_unlock();
2975} 2977}
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 702fb21bfe15..6d8f2ec481d9 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -137,8 +137,10 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
137 vport->ops = ops; 137 vport->ops = ops;
138 INIT_HLIST_NODE(&vport->dp_hash_node); 138 INIT_HLIST_NODE(&vport->dp_hash_node);
139 139
140 if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) 140 if (ovs_vport_set_upcall_portids(vport, parms->upcall_portids)) {
141 kfree(vport);
141 return ERR_PTR(-EINVAL); 142 return ERR_PTR(-EINVAL);
143 }
142 144
143 vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); 145 vport->percpu_stats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
144 if (!vport->percpu_stats) { 146 if (!vport->percpu_stats) {
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 122f95c95869..8a9a4e1c7eab 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -215,11 +215,13 @@ else
215arg-check = $(if $(strip $(cmd_$@)),,1) 215arg-check = $(if $(strip $(cmd_$@)),,1)
216endif 216endif
217 217
218# >'< substitution is for echo to work, 218# Replace >$< with >$$< to preserve $ when reloading the .cmd file
219# >$< substitution to preserve $ when reloading .cmd file 219# (needed for make)
220# note: when using inline perl scripts [perl -e '...$$t=1;...'] 220# Replace >#< with >\#< to avoid starting a comment in the .cmd file
221# in $(cmd_xxx) double $$ your perl vars 221# (needed for make)
222make-cmd = $(subst \\,\\\\,$(subst \#,\\\#,$(subst $$,$$$$,$(call escsq,$(cmd_$(1)))))) 222# Replace >'< with >'\''< to be able to enclose the whole string in '...'
223# (needed for the shell)
224make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1)))))
223 225
224# Find any prerequisites that is newer than target or that does not exist. 226# Find any prerequisites that is newer than target or that does not exist.
225# PHONY targets skipped in both cases. 227# PHONY targets skipped in both cases.
@@ -230,7 +232,7 @@ any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^)
230if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ 232if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
231 @set -e; \ 233 @set -e; \
232 $(echo-cmd) $(cmd_$(1)); \ 234 $(echo-cmd) $(cmd_$(1)); \
233 echo 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) 235 printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
234 236
235# Execute the command and also postprocess generated .d dependencies file. 237# Execute the command and also postprocess generated .d dependencies file.
236if_changed_dep = $(if $(strip $(any-prereq) $(arg-check) ), \ 238if_changed_dep = $(if $(strip $(any-prereq) $(arg-check) ), \
diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index 686cb0d31c7c..a651cee84f2a 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean
@@ -40,8 +40,8 @@ subdir-ymn := $(addprefix $(obj)/,$(subdir-ymn))
40# build a list of files to remove, usually relative to the current 40# build a list of files to remove, usually relative to the current
41# directory 41# directory
42 42
43__clean-files := $(extra-y) $(always) \ 43__clean-files := $(extra-y) $(extra-m) $(extra-) \
44 $(targets) $(clean-files) \ 44 $(always) $(targets) $(clean-files) \
45 $(host-progs) \ 45 $(host-progs) \
46 $(hostprogs-y) $(hostprogs-m) $(hostprogs-) 46 $(hostprogs-y) $(hostprogs-m) $(hostprogs-)
47 47
diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 65643506c71c..f734033af219 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -26,16 +26,6 @@ warning-1 += $(call cc-option, -Wmissing-include-dirs)
26warning-1 += $(call cc-option, -Wunused-but-set-variable) 26warning-1 += $(call cc-option, -Wunused-but-set-variable)
27warning-1 += $(call cc-disable-warning, missing-field-initializers) 27warning-1 += $(call cc-disable-warning, missing-field-initializers)
28 28
29# Clang
30warning-1 += $(call cc-disable-warning, initializer-overrides)
31warning-1 += $(call cc-disable-warning, unused-value)
32warning-1 += $(call cc-disable-warning, format)
33warning-1 += $(call cc-disable-warning, unknown-warning-option)
34warning-1 += $(call cc-disable-warning, sign-compare)
35warning-1 += $(call cc-disable-warning, format-zero-length)
36warning-1 += $(call cc-disable-warning, uninitialized)
37warning-1 += $(call cc-option, -fcatch-undefined-behavior)
38
39warning-2 := -Waggregate-return 29warning-2 := -Waggregate-return
40warning-2 += -Wcast-align 30warning-2 += -Wcast-align
41warning-2 += -Wdisabled-optimization 31warning-2 += -Wdisabled-optimization
@@ -64,4 +54,15 @@ ifeq ("$(strip $(warning))","")
64endif 54endif
65 55
66KBUILD_CFLAGS += $(warning) 56KBUILD_CFLAGS += $(warning)
57else
58
59ifeq ($(COMPILER),clang)
60KBUILD_CFLAGS += $(call cc-disable-warning, initializer-overrides)
61KBUILD_CFLAGS += $(call cc-disable-warning, unused-value)
62KBUILD_CFLAGS += $(call cc-disable-warning, format)
63KBUILD_CFLAGS += $(call cc-disable-warning, unknown-warning-option)
64KBUILD_CFLAGS += $(call cc-disable-warning, sign-compare)
65KBUILD_CFLAGS += $(call cc-disable-warning, format-zero-length)
66KBUILD_CFLAGS += $(call cc-disable-warning, uninitialized)
67endif
67endif 68endif
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 66893643fd7d..ab5980f91714 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -20,21 +20,12 @@
20# Will compile qconf as a C++ program, and menu as a C program. 20# Will compile qconf as a C++ program, and menu as a C program.
21# They are linked as C++ code to the executable qconf 21# They are linked as C++ code to the executable qconf
22 22
23# hostprogs-y := conf
24# conf-objs := conf.o libkconfig.so
25# libkconfig-objs := expr.o type.o
26# Will create a shared library named libkconfig.so that consists of
27# expr.o and type.o (they are both compiled as C code and the object files
28# are made as position independent code).
29# conf.c is compiled as a C program, and conf.o is linked together with
30# libkconfig.so as the executable conf.
31# Note: Shared libraries consisting of C++ files are not supported
32
33__hostprogs := $(sort $(hostprogs-y) $(hostprogs-m)) 23__hostprogs := $(sort $(hostprogs-y) $(hostprogs-m))
34 24
35# C code 25# C code
36# Executables compiled from a single .c file 26# Executables compiled from a single .c file
37host-csingle := $(foreach m,$(__hostprogs),$(if $($(m)-objs),,$(m))) 27host-csingle := $(foreach m,$(__hostprogs), \
28 $(if $($(m)-objs)$($(m)-cxxobjs),,$(m)))
38 29
39# C executables linked based on several .o files 30# C executables linked based on several .o files
40host-cmulti := $(foreach m,$(__hostprogs),\ 31host-cmulti := $(foreach m,$(__hostprogs),\
@@ -44,33 +35,17 @@ host-cmulti := $(foreach m,$(__hostprogs),\
44host-cobjs := $(sort $(foreach m,$(__hostprogs),$($(m)-objs))) 35host-cobjs := $(sort $(foreach m,$(__hostprogs),$($(m)-objs)))
45 36
46# C++ code 37# C++ code
47# C++ executables compiled from at least on .cc file 38# C++ executables compiled from at least one .cc file
48# and zero or more .c files 39# and zero or more .c files
49host-cxxmulti := $(foreach m,$(__hostprogs),$(if $($(m)-cxxobjs),$(m))) 40host-cxxmulti := $(foreach m,$(__hostprogs),$(if $($(m)-cxxobjs),$(m)))
50 41
51# C++ Object (.o) files compiled from .cc files 42# C++ Object (.o) files compiled from .cc files
52host-cxxobjs := $(sort $(foreach m,$(host-cxxmulti),$($(m)-cxxobjs))) 43host-cxxobjs := $(sort $(foreach m,$(host-cxxmulti),$($(m)-cxxobjs)))
53 44
54# Shared libaries (only .c supported)
55# Shared libraries (.so) - all .so files referenced in "xxx-objs"
56host-cshlib := $(sort $(filter %.so, $(host-cobjs)))
57# Remove .so files from "xxx-objs"
58host-cobjs := $(filter-out %.so,$(host-cobjs))
59
60#Object (.o) files used by the shared libaries
61host-cshobjs := $(sort $(foreach m,$(host-cshlib),$($(m:.so=-objs))))
62
63# output directory for programs/.o files 45# output directory for programs/.o files
64# hostprogs-y := tools/build may have been specified. Retrieve directory 46# hostprogs-y := tools/build may have been specified.
65host-objdirs := $(foreach f,$(__hostprogs), $(if $(dir $(f)),$(dir $(f)))) 47# Retrieve also directory of .o files from prog-objs or prog-cxxobjs notation
66# directory of .o files from prog-objs notation 48host-objdirs := $(dir $(__hostprogs) $(host-cobjs) $(host-cxxobjs))
67host-objdirs += $(foreach f,$(host-cmulti), \
68 $(foreach m,$($(f)-objs), \
69 $(if $(dir $(m)),$(dir $(m)))))
70# directory of .o files from prog-cxxobjs notation
71host-objdirs += $(foreach f,$(host-cxxmulti), \
72 $(foreach m,$($(f)-cxxobjs), \
73 $(if $(dir $(m)),$(dir $(m)))))
74 49
75host-objdirs := $(strip $(sort $(filter-out ./,$(host-objdirs)))) 50host-objdirs := $(strip $(sort $(filter-out ./,$(host-objdirs))))
76 51
@@ -81,8 +56,6 @@ host-cmulti := $(addprefix $(obj)/,$(host-cmulti))
81host-cobjs := $(addprefix $(obj)/,$(host-cobjs)) 56host-cobjs := $(addprefix $(obj)/,$(host-cobjs))
82host-cxxmulti := $(addprefix $(obj)/,$(host-cxxmulti)) 57host-cxxmulti := $(addprefix $(obj)/,$(host-cxxmulti))
83host-cxxobjs := $(addprefix $(obj)/,$(host-cxxobjs)) 58host-cxxobjs := $(addprefix $(obj)/,$(host-cxxobjs))
84host-cshlib := $(addprefix $(obj)/,$(host-cshlib))
85host-cshobjs := $(addprefix $(obj)/,$(host-cshobjs))
86host-objdirs := $(addprefix $(obj)/,$(host-objdirs)) 59host-objdirs := $(addprefix $(obj)/,$(host-objdirs))
87 60
88obj-dirs += $(host-objdirs) 61obj-dirs += $(host-objdirs)
@@ -123,7 +96,7 @@ quiet_cmd_host-cmulti = HOSTLD $@
123 cmd_host-cmulti = $(HOSTCC) $(HOSTLDFLAGS) -o $@ \ 96 cmd_host-cmulti = $(HOSTCC) $(HOSTLDFLAGS) -o $@ \
124 $(addprefix $(obj)/,$($(@F)-objs)) \ 97 $(addprefix $(obj)/,$($(@F)-objs)) \
125 $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F)) 98 $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
126$(host-cmulti): $(obj)/%: $(host-cobjs) $(host-cshlib) FORCE 99$(host-cmulti): $(obj)/%: $(host-cobjs) FORCE
127 $(call if_changed,host-cmulti) 100 $(call if_changed,host-cmulti)
128 101
129# Create .o file from a single .c file 102# Create .o file from a single .c file
@@ -140,7 +113,7 @@ quiet_cmd_host-cxxmulti = HOSTLD $@
140 $(foreach o,objs cxxobjs,\ 113 $(foreach o,objs cxxobjs,\
141 $(addprefix $(obj)/,$($(@F)-$(o)))) \ 114 $(addprefix $(obj)/,$($(@F)-$(o)))) \
142 $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F)) 115 $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
143$(host-cxxmulti): $(obj)/%: $(host-cobjs) $(host-cxxobjs) $(host-cshlib) FORCE 116$(host-cxxmulti): $(obj)/%: $(host-cobjs) $(host-cxxobjs) FORCE
144 $(call if_changed,host-cxxmulti) 117 $(call if_changed,host-cxxmulti)
145 118
146# Create .o file from a single .cc (C++) file 119# Create .o file from a single .cc (C++) file
@@ -149,21 +122,5 @@ quiet_cmd_host-cxxobjs = HOSTCXX $@
149$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE 122$(host-cxxobjs): $(obj)/%.o: $(src)/%.cc FORCE
150 $(call if_changed_dep,host-cxxobjs) 123 $(call if_changed_dep,host-cxxobjs)
151 124
152# Compile .c file, create position independent .o file
153# host-cshobjs -> .o
154quiet_cmd_host-cshobjs = HOSTCC -fPIC $@
155 cmd_host-cshobjs = $(HOSTCC) $(hostc_flags) -fPIC -c -o $@ $<
156$(host-cshobjs): $(obj)/%.o: $(src)/%.c FORCE
157 $(call if_changed_dep,host-cshobjs)
158
159# Link a shared library, based on position independent .o files
160# *.o -> .so shared library (host-cshlib)
161quiet_cmd_host-cshlib = HOSTLLD -shared $@
162 cmd_host-cshlib = $(HOSTCC) $(HOSTLDFLAGS) -shared -o $@ \
163 $(addprefix $(obj)/,$($(@F:.so=-objs))) \
164 $(HOST_LOADLIBES) $(HOSTLOADLIBES_$(@F))
165$(host-cshlib): $(obj)/%: $(host-cshobjs) FORCE
166 $(call if_changed,host-cshlib)
167
168targets += $(host-csingle) $(host-cmulti) $(host-cobjs)\ 125targets += $(host-csingle) $(host-cmulti) $(host-cobjs)\
169 $(host-cxxmulti) $(host-cxxobjs) $(host-cshlib) $(host-cshobjs) 126 $(host-cxxmulti) $(host-cxxobjs)
diff --git a/scripts/coccinelle/api/alloc/alloc_cast.cocci b/scripts/coccinelle/api/alloc/alloc_cast.cocci
new file mode 100644
index 000000000000..6c308ee19b32
--- /dev/null
+++ b/scripts/coccinelle/api/alloc/alloc_cast.cocci
@@ -0,0 +1,72 @@
1/// Remove casting the values returned by memory allocation functions
2/// like kmalloc, kzalloc, kmem_cache_alloc, kmem_cache_zalloc etc.
3///
4//# This makes an effort to find cases of casting of values returned by
5//# kmalloc, kzalloc, kcalloc, kmem_cache_alloc, kmem_cache_zalloc,
6//# kmem_cache_alloc_node, kmalloc_node and kzalloc_node and removes
7//# the casting as it is not required. The result in the patch case may
8//#need some reformatting.
9//
10// Confidence: High
11// Copyright: 2014, Himangi Saraogi GPLv2.
12// Comments:
13// Options: --no-includes --include-headers
14//
15
16virtual context
17virtual patch
18virtual org
19virtual report
20
21//----------------------------------------------------------
22// For context mode
23//----------------------------------------------------------
24
25@depends on context@
26type T;
27@@
28
29* (T *)
30 \(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\|
31 kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...)
32
33//----------------------------------------------------------
34// For patch mode
35//----------------------------------------------------------
36
37@depends on patch@
38type T;
39@@
40
41- (T *)
42 (\(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\|
43 kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...))
44
45//----------------------------------------------------------
46// For org and report mode
47//----------------------------------------------------------
48
49@r depends on org || report@
50type T;
51position p;
52@@
53
54 (T@p *)\(kmalloc\|kzalloc\|kcalloc\|kmem_cache_alloc\|kmem_cache_zalloc\|
55 kmem_cache_alloc_node\|kmalloc_node\|kzalloc_node\)(...)
56
57@script:python depends on org@
58p << r.p;
59t << r.T;
60@@
61
62coccilib.org.print_safe_todo(p[0], t)
63
64@script:python depends on report@
65p << r.p;
66t << r.T;
67@@
68
69msg="WARNING: casting value returned by memory allocation function to (%s *) is useless." % (t)
70coccilib.report.print_report(p[0], msg)
71
72
diff --git a/scripts/coccinelle/misc/array_size.cocci b/scripts/coccinelle/misc/array_size.cocci
new file mode 100644
index 000000000000..81e279cd347b
--- /dev/null
+++ b/scripts/coccinelle/misc/array_size.cocci
@@ -0,0 +1,87 @@
1/// Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element
2///
3//# This makes an effort to find cases where ARRAY_SIZE can be used such as
4//# where there is a division of sizeof the array by the sizeof its first
5//# element or by any indexed element or the element type. It replaces the
6//# division of the two sizeofs by ARRAY_SIZE.
7//
8// Confidence: High
9// Copyright: (C) 2014 Himangi Saraogi. GPLv2.
10// Comments:
11// Options: --no-includes --include-headers
12
13virtual patch
14virtual context
15virtual org
16virtual report
17
18@i@
19@@
20
21#include <linux/kernel.h>
22
23//----------------------------------------------------------
24// For context mode
25//----------------------------------------------------------
26
27@depends on i&&context@
28type T;
29T[] E;
30@@
31(
32* (sizeof(E)/sizeof(*E))
33|
34* (sizeof(E)/sizeof(E[...]))
35|
36* (sizeof(E)/sizeof(T))
37)
38
39//----------------------------------------------------------
40// For patch mode
41//----------------------------------------------------------
42
43@depends on i&&patch@
44type T;
45T[] E;
46@@
47(
48- (sizeof(E)/sizeof(*E))
49+ ARRAY_SIZE(E)
50|
51- (sizeof(E)/sizeof(E[...]))
52+ ARRAY_SIZE(E)
53|
54- (sizeof(E)/sizeof(T))
55+ ARRAY_SIZE(E)
56)
57
58//----------------------------------------------------------
59// For org and report mode
60//----------------------------------------------------------
61
62@r@
63type T;
64T[] E;
65position p;
66@@
67(
68 (sizeof(E)@p /sizeof(*E))
69|
70 (sizeof(E)@p /sizeof(E[...]))
71|
72 (sizeof(E)@p /sizeof(T))
73)
74
75@script:python depends on i&&org@
76p << r.p;
77@@
78
79coccilib.org.print_todo(p[0], "WARNING should use ARRAY_SIZE")
80
81@script:python depends on i&&report@
82p << r.p;
83@@
84
85msg="WARNING: Use ARRAY_SIZE"
86coccilib.report.print_report(p[0], msg)
87
diff --git a/scripts/coccinelle/misc/badty.cocci b/scripts/coccinelle/misc/badty.cocci
new file mode 100644
index 000000000000..2fc06fc71927
--- /dev/null
+++ b/scripts/coccinelle/misc/badty.cocci
@@ -0,0 +1,76 @@
1/// Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element
2///
3//# This makes an effort to find cases where the argument to sizeof is wrong
4//# in memory allocation functions by checking the type of the allocated memory
5//# when it is a double pointer and ensuring the sizeof argument takes a pointer
6//# to the the memory being allocated. There are false positives in cases the
7//# sizeof argument is not used in constructing the return value. The result
8//# may need some reformatting.
9//
10// Confidence: Moderate
11// Copyright: (C) 2014 Himangi Saraogi. GPLv2.
12// Comments:
13// Options:
14
15virtual patch
16virtual context
17virtual org
18virtual report
19
20//----------------------------------------------------------
21// For context mode
22//----------------------------------------------------------
23
24@depends on context disable sizeof_type_expr@
25type T;
26T **x;
27@@
28
29 x =
30 <+...sizeof(
31* T
32 )...+>
33
34//----------------------------------------------------------
35// For patch mode
36//----------------------------------------------------------
37
38@depends on patch disable sizeof_type_expr@
39type T;
40T **x;
41@@
42
43 x =
44 <+...sizeof(
45- T
46+ *x
47 )...+>
48
49//----------------------------------------------------------
50// For org and report mode
51//----------------------------------------------------------
52
53@r disable sizeof_type_expr@
54type T;
55T **x;
56position p;
57@@
58
59 x =
60 <+...sizeof(
61 T@p
62 )...+>
63
64@script:python depends on org@
65p << r.p;
66@@
67
68coccilib.org.print_todo(p[0], "WARNING sizeof argument should be pointer type, not structure type")
69
70@script:python depends on report@
71p << r.p;
72@@
73
74msg="WARNING: Use correct pointer type argument for sizeof"
75coccilib.report.print_report(p[0], msg)
76
diff --git a/scripts/coccinelle/api/alloc/drop_kmalloc_cast.cocci b/scripts/coccinelle/misc/bugon.cocci
index bd5d08b882ee..556456ca761c 100644
--- a/scripts/coccinelle/api/alloc/drop_kmalloc_cast.cocci
+++ b/scripts/coccinelle/misc/bugon.cocci
@@ -1,20 +1,17 @@
1/// Use BUG_ON instead of a if condition followed by BUG.
1/// 2///
2/// Casting (void *) value returned by kmalloc is useless 3//# This makes an effort to find cases where BUG() follows an if
3/// as mentioned in Documentation/CodingStyle, Chap 14. 4//# condition on an expression and replaces the if condition and BUG()
4/// 5//# with a BUG_ON having the conditional expression of the if statement
5// Confidence: High 6//# as argument.
6// Copyright: 2009,2010 Nicolas Palix, DIKU. GPLv2.
7// URL: http://coccinelle.lip6.fr/
8// Options: --no-includes --include-headers
9//
10// Keywords: kmalloc, kzalloc, kcalloc
11// Version min: < 2.6.12 kmalloc
12// Version min: < 2.6.12 kcalloc
13// Version min: 2.6.14 kzalloc
14// 7//
8// Confidence: High
9// Copyright: (C) 2014 Himangi Saraogi. GPLv2.
10// Comments:
11// Options: --no-includes, --include-headers
15 12
16virtual context
17virtual patch 13virtual patch
14virtual context
18virtual org 15virtual org
19virtual report 16virtual report
20 17
@@ -23,45 +20,43 @@ virtual report
23//---------------------------------------------------------- 20//----------------------------------------------------------
24 21
25@depends on context@ 22@depends on context@
26type T; 23expression e;
27@@ 24@@
28 25
29* (T *) 26*if (e) BUG();
30 \(kmalloc\|kzalloc\|kcalloc\)(...)
31 27
32//---------------------------------------------------------- 28//----------------------------------------------------------
33// For patch mode 29// For patch mode
34//---------------------------------------------------------- 30//----------------------------------------------------------
35 31
36@depends on patch@ 32@depends on patch@
37type T; 33expression e;
38@@ 34@@
39 35
40- (T *) 36-if (e) BUG();
41 \(kmalloc\|kzalloc\|kcalloc\)(...) 37+BUG_ON(e);
42 38
43//---------------------------------------------------------- 39//----------------------------------------------------------
44// For org and report mode 40// For org and report mode
45//---------------------------------------------------------- 41//----------------------------------------------------------
46 42
47@r depends on org || report@ 43@r@
48type T; 44expression e;
49position p; 45position p;
50@@ 46@@
51 47
52 (T@p *)\(kmalloc\|kzalloc\|kcalloc\)(...) 48 if (e) BUG@p ();
53 49
54@script:python depends on org@ 50@script:python depends on org@
55p << r.p; 51p << r.p;
56t << r.T;
57@@ 52@@
58 53
59coccilib.org.print_safe_todo(p[0], t) 54coccilib.org.print_todo(p[0], "WARNING use BUG_ON")
60 55
61@script:python depends on report@ 56@script:python depends on report@
62p << r.p; 57p << r.p;
63t << r.T;
64@@ 58@@
65 59
66msg="WARNING: casting value returned by k[cmz]alloc to (%s *) is useless." % (t) 60msg="WARNING: Use BUG_ON"
67coccilib.report.print_report(p[0], msg) 61coccilib.report.print_report(p[0], msg)
62
diff --git a/scripts/coccinelle/null/badzero.cocci b/scripts/coccinelle/null/badzero.cocci
index d79baf7220e7..5551da2b4fe3 100644
--- a/scripts/coccinelle/null/badzero.cocci
+++ b/scripts/coccinelle/null/badzero.cocci
@@ -10,7 +10,7 @@
10// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. GPLv2. 10// Copyright: (C) 2012 Julia Lawall, INRIA/LIP6. GPLv2.
11// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. GPLv2. 11// Copyright: (C) 2012 Gilles Muller, INRIA/LiP6. GPLv2.
12// URL: http://coccinelle.lip6.fr/ 12// URL: http://coccinelle.lip6.fr/
13// Comments: 13// Comments: Requires Coccinelle version 1.0.0-rc20 or later
14// Options: 14// Options:
15 15
16virtual patch 16virtual patch
@@ -19,6 +19,7 @@ virtual org
19virtual report 19virtual report
20 20
21@initialize:ocaml@ 21@initialize:ocaml@
22@@
22let negtable = Hashtbl.create 101 23let negtable = Hashtbl.create 101
23 24
24@depends on patch@ 25@depends on patch@
diff --git a/sound/ppc/pmac.c b/sound/ppc/pmac.c
index 7a43c0c38316..8a431bcb056c 100644
--- a/sound/ppc/pmac.c
+++ b/sound/ppc/pmac.c
@@ -992,9 +992,9 @@ static int snd_pmac_detect(struct snd_pmac *chip)
992 return -ENODEV; 992 return -ENODEV;
993 993
994 if (!sound) { 994 if (!sound) {
995 sound = of_find_node_by_name(NULL, "sound"); 995 for_each_node_by_name(sound, "sound")
996 while (sound && sound->parent != chip->node) 996 if (sound->parent == chip->node)
997 sound = of_find_node_by_name(sound, "sound"); 997 break;
998 } 998 }
999 if (! sound) { 999 if (! sound) {
1000 of_node_put(chip->node); 1000 of_node_put(chip->node);